diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp | 338 |
1 files changed, 65 insertions, 273 deletions
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp index 449a3d9fc8d4..c55a658dc375 100644 --- a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp @@ -11,13 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "PTX.h" #include "PTXTargetMachine.h" +#include "PTX.h" #include "llvm/PassManager.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Assembly/PrintModulePass.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -26,6 +25,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" @@ -37,8 +37,6 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -46,7 +44,7 @@ using namespace llvm; namespace llvm { MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, - bool useCFI, + bool useCFI, bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *MAB, @@ -67,29 +65,16 @@ namespace { "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; const char* DataLayout64 = "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; - - // Copied from LLVMTargetMachine.cpp - void printNoVerify(PassManagerBase &PM, const char *Banner) { - if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); - } - - void printAndVerify(PassManagerBase &PM, - const char *Banner) { - if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); - - //if (VerifyMachineCode) - // PM.add(createMachineVerifierPass(Banner)); - } } // DataLayout and FrameLowering are filled with dummy data PTXTargetMachine::PTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), DataLayout(is64Bit ? DataLayout64 : DataLayout32), Subtarget(TT, CPU, FS, is64Bit), FrameLowering(Subtarget), @@ -98,276 +83,83 @@ PTXTargetMachine::PTXTargetMachine(const Target &T, TLInfo(*this) { } +void PTX32TargetMachine::anchor() { } + PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) { + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { } +void PTX64TargetMachine::anchor() { } + PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) { + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { } -bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - PM.add(createPTXISelDag(*this, OptLevel)); - return false; +namespace llvm { +/// PTX Code Generator Pass Configuration Options. +class PTXPassConfig : public TargetPassConfig { +public: + PTXPassConfig(PTXTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + PTXTargetMachine &getPTXTargetMachine() const { + return getTM<PTXTargetMachine>(); + } + + bool addInstSelector(); + FunctionPass *createTargetRegisterAllocator(bool); + void addOptimizedRegAlloc(FunctionPass *RegAllocPass); + bool addPostRegAlloc(); + void addMachineLateOptimization(); + bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *PTXTargetMachine::createPassConfig(PassManagerBase &PM) { + PTXPassConfig *PassConfig = new PTXPassConfig(this, PM); + PassConfig->disablePass(PrologEpilogCodeInserterID); + return PassConfig; } -bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - // PTXMFInfoExtract must after register allocation! - //PM.add(createPTXMFInfoExtract(*this, OptLevel)); +bool PTXPassConfig::addInstSelector() { + PM.add(createPTXISelDag(getPTXTargetMachine(), getOptLevel())); return false; } -bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify) { - // This is mostly based on LLVMTargetMachine::addPassesToEmitFile - - // Add common CodeGen passes. - MCContext *Context = 0; - if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context)) - return true; - assert(Context != 0 && "Failed to get MCContext"); - - if (hasMCSaveTempLabels()) - Context->setAllowTemporaryLabels(false); - - const MCAsmInfo &MAI = *getMCAsmInfo(); - const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - OwningPtr<MCStreamer> AsmStreamer; - - switch (FileType) { - default: return true; - case CGFT_AssemblyFile: { - MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI); - - // Create a code emitter if asked to show the encoding. - MCCodeEmitter *MCE = 0; - MCAsmBackend *MAB = 0; - - MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, - true, /* verbose asm */ - hasMCUseLoc(), - hasMCUseCFI(), - InstPrinter, - MCE, MAB, - false /* show MC encoding */); - AsmStreamer.reset(S); - break; - } - case CGFT_ObjectFile: { - llvm_unreachable("Object file emission is not supported with PTX"); - } - case CGFT_Null: - // The Null output is intended for use for performance analysis and testing, - // not real users. - AsmStreamer.reset(createNullStreamer(*Context)); - break; - } - - // MC Logging - //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); - - // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. - FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); - if (Printer == 0) - return true; - - // If successful, createAsmPrinter took ownership of AsmStreamer. - AsmStreamer.take(); +FunctionPass *PTXPassConfig::createTargetRegisterAllocator(bool /*Optimized*/) { + return createPTXRegisterAllocator(); +} - PM.add(Printer); +// Modify the optimized compilation path to bypass optimized register alloction. +void PTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + addFastRegAlloc(RegAllocPass); +} - PM.add(createGCInfoDeleter()); +bool PTXPassConfig::addPostRegAlloc() { + // PTXMFInfoExtract must after register allocation! + //PM.add(createPTXMFInfoExtract(getPTXTargetMachine())); return false; } -bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool DisableVerify, - MCContext *&OutContext) { - // Add standard LLVM codegen passes. - // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some - // modifications for the PTX target. - - // Standard LLVM-Level Passes. - - // Basic AliasAnalysis support. - // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that - // BasicAliasAnalysis wins if they disagree. This is intended to help - // support "obvious" type-punning idioms. - PM.add(createTypeBasedAliasAnalysisPass()); - PM.add(createBasicAliasAnalysisPass()); - - // Before running any passes, run the verifier to determine if the input - // coming from the front-end and/or optimizer is valid. - if (!DisableVerify) - PM.add(createVerifierPass()); - - // Run loop strength reduction before anything else. - if (OptLevel != CodeGenOpt::None) { - PM.add(createLoopStrengthReducePass(getTargetLowering())); - //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); - } - - PM.add(createGCLoweringPass()); - - // Make sure that no unreachable blocks are instruction selected. - PM.add(createUnreachableBlockEliminationPass()); - - PM.add(createLowerInvokePass(getTargetLowering())); - // The lower invoke pass may create unreachable code. Remove it. - PM.add(createUnreachableBlockEliminationPass()); - - if (OptLevel != CodeGenOpt::None) - PM.add(createCodeGenPreparePass(getTargetLowering())); +/// Add passes that optimize machine instructions after register allocation. +void PTXPassConfig::addMachineLateOptimization() { + if (addPass(BranchFolderPassID) != &NoPassID) + printAndVerify("After BranchFolding"); - PM.add(createStackProtectorPass(getTargetLowering())); - - addPreISel(PM, OptLevel); - - //PM.add(createPrintFunctionPass("\n\n" - // "*** Final LLVM Code input to ISel ***\n", - // &dbgs())); - - // All passes which modify the LLVM IR are now complete; run the verifier - // to ensure that the IR is valid. - if (!DisableVerify) - PM.add(createVerifierPass()); - - // Standard Lower-Level Passes. - - // Install a MachineModuleInfo class, which is an immutable pass that holds - // all the per-module stuff we're generating, including MCContext. - MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), - *getRegisterInfo(), - &getTargetLowering()->getObjFileLowering()); - PM.add(MMI); - OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. - - // Set up a MachineFunction for the rest of CodeGen to work on. - PM.add(new MachineFunctionAnalysis(*this, OptLevel)); - - // Ask the target for an isel. - if (addInstSelector(PM, OptLevel)) - return true; - - // Print the instruction selected machine code... - printAndVerify(PM, "After Instruction Selection"); - - // Expand pseudo-instructions emitted by ISel. - PM.add(createExpandISelPseudosPass()); - - // Pre-ra tail duplication. - if (OptLevel != CodeGenOpt::None) { - PM.add(createTailDuplicatePass(true)); - printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); - } - - // Optimize PHIs before DCE: removing dead PHI cycles may make more - // instructions dead. - if (OptLevel != CodeGenOpt::None) - PM.add(createOptimizePHIsPass()); - - // If the target requests it, assign local variables to stack slots relative - // to one another and simplify frame index references where possible. - PM.add(createLocalStackSlotAllocationPass()); - - if (OptLevel != CodeGenOpt::None) { - // With optimization, dead code should already be eliminated. However - // there is one known exception: lowered code for arguments that are only - // used by tail calls, where the tail calls reuse the incoming stack - // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). - PM.add(createDeadMachineInstructionElimPass()); - printAndVerify(PM, "After codegen DCE pass"); - - PM.add(createMachineLICMPass()); - PM.add(createMachineCSEPass()); - PM.add(createMachineSinkingPass()); - printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); - - PM.add(createPeepholeOptimizerPass()); - printAndVerify(PM, "After codegen peephole optimization pass"); - } - - // Run pre-ra passes. - if (addPreRegAlloc(PM, OptLevel)) - printAndVerify(PM, "After PreRegAlloc passes"); - - // Perform register allocation. - PM.add(createPTXRegisterAllocator()); - printAndVerify(PM, "After Register Allocation"); - - // Perform stack slot coloring and post-ra machine LICM. - if (OptLevel != CodeGenOpt::None) { - // FIXME: Re-enable coloring with register when it's capable of adding - // kill markers. - PM.add(createStackSlotColoringPass(false)); - - // FIXME: Post-RA LICM has asserts that fire on virtual registers. - // Run post-ra machine LICM to hoist reloads / remats. - //if (!DisablePostRAMachineLICM) - // PM.add(createMachineLICMPass(false)); - - printAndVerify(PM, "After StackSlotColoring and postra Machine LICM"); - } - - // Run post-ra passes. - if (addPostRegAlloc(PM, OptLevel)) - printAndVerify(PM, "After PostRegAlloc passes"); - - PM.add(createExpandPostRAPseudosPass()); - printAndVerify(PM, "After ExpandPostRAPseudos"); - - // Insert prolog/epilog code. Eliminate abstract frame index references... - PM.add(createPrologEpilogCodeInserter()); - printAndVerify(PM, "After PrologEpilogCodeInserter"); - - // Run pre-sched2 passes. - if (addPreSched2(PM, OptLevel)) - printAndVerify(PM, "After PreSched2 passes"); - - // Second pass scheduler. - if (OptLevel != CodeGenOpt::None) { - PM.add(createPostRAScheduler(OptLevel)); - printAndVerify(PM, "After PostRAScheduler"); - } - - // Branch folding must be run after regalloc and prolog/epilog insertion. - if (OptLevel != CodeGenOpt::None) { - PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); - printNoVerify(PM, "After BranchFolding"); - } - - // Tail duplication. - if (OptLevel != CodeGenOpt::None) { - PM.add(createTailDuplicatePass(false)); - printNoVerify(PM, "After TailDuplicate"); - } - - PM.add(createGCMachineCodeAnalysisPass()); - - //if (PrintGCInfo) - // PM.add(createGCInfoPrinter(dbgs())); - - if (OptLevel != CodeGenOpt::None) { - PM.add(createCodePlacementOptPass()); - printNoVerify(PM, "After CodePlacementOpt"); - } - - if (addPreEmitPass(PM, OptLevel)) - printNoVerify(PM, "After PreEmit passes"); - - PM.add(createPTXMFInfoExtract(*this, OptLevel)); - PM.add(createPTXFPRoundingModePass(*this, OptLevel)); + if (addPass(TailDuplicateID) != &NoPassID) + printAndVerify("After TailDuplicate"); +} - return false; +bool PTXPassConfig::addPreEmitPass() { + PM.add(createPTXMFInfoExtract(getPTXTargetMachine(), getOptLevel())); + PM.add(createPTXFPRoundingModePass(getPTXTargetMachine(), getOptLevel())); + return true; } |