src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2019-01-20 11:41:25 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-01-20 11:41:25 +0000
commit	d9484dd61cc151c4f34c31e07f693fefa66316b5 (patch)
tree	ab0560b3da293f1fafd3269c59692e929418f5c2 /contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parent	79e0962d4c3cf1f0acf359a9d69cb3ac68c414c4 (diff)
parent	d8e91e46262bc44006913e6796843909f1ac7bcd (diff)
download	src-d9484dd61cc151c4f34c31e07f693fefa66316b5.tar.gz src-d9484dd61cc151c4f34c31e07f693fefa66316b5.zip

Merge llvm trunk r351319, resolve conflicts, and update FREEBSD-Xlist.

Notes

Notes: svn path=/projects/clang800-import/; revision=343210

Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')

-rw-r--r--

contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

1 files changed, 55 insertions, 40 deletions

diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2205819c444f..e8cefdbf74b9 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

@@ -45,6 +45,7 @@

#include "llvm/Transforms/IPO/PassManagerBuilder.h"

#include "llvm/Transforms/Scalar.h"

#include "llvm/Transforms/Scalar/GVN.h"

+#include "llvm/Transforms/Utils.h"

#include "llvm/Transforms/Vectorize.h"

#include <memory>

@@ -105,6 +106,11 @@ static cl::opt<bool> EnableSDWAPeephole(

cl::desc("Enable SDWA peepholer"),

cl::init(true));

+static cl::opt<bool> EnableDPPCombine(

+ "amdgpu-dpp-combine",

+ cl::desc("Enable DPP combiner"),

+ cl::init(false));

// Enable address space based alias analysis

static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,

cl::desc("Enable AMDGPU Alias Analysis"),

@@ -137,6 +143,20 @@ static cl::opt<bool> EnableLowerKernelArguments(

cl::init(true),

cl::Hidden);

+// Enable atomic optimization

+static cl::opt<bool> EnableAtomicOptimizations(

+ "amdgpu-atomic-optimizations",

+ cl::desc("Enable atomic optimizations"),

+ cl::init(false),

+ cl::Hidden);

+// Enable Mode register optimization

+static cl::opt<bool> EnableSIModeRegisterPass(

+ "amdgpu-mode-register",

+ cl::desc("Enable mode register pass"),

+ cl::init(true),

+ cl::Hidden);

extern "C" void LLVMInitializeAMDGPUTarget() {

// Register the target

RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());

@@ -150,18 +170,22 @@ extern "C" void LLVMInitializeAMDGPUTarget() {

initializeR600VectorRegMergerPass(*PR);

initializeGlobalISel(*PR);

initializeAMDGPUDAGToDAGISelPass(*PR);

+ initializeGCNDPPCombinePass(*PR);

initializeSILowerI1CopiesPass(*PR);

initializeSIFixSGPRCopiesPass(*PR);

initializeSIFixVGPRCopiesPass(*PR);

+ initializeSIFixupVectorISelPass(*PR);

initializeSIFoldOperandsPass(*PR);

initializeSIPeepholeSDWAPass(*PR);

initializeSIShrinkInstructionsPass(*PR);

initializeSIOptimizeExecMaskingPreRAPass(*PR);

initializeSILoadStoreOptimizerPass(*PR);

+ initializeAMDGPUFixFunctionBitcastsPass(*PR);

initializeAMDGPUAlwaysInlinePass(*PR);

initializeAMDGPUAnnotateKernelFeaturesPass(*PR);

initializeAMDGPUAnnotateUniformValuesPass(*PR);

initializeAMDGPUArgumentUsageInfoPass(*PR);

+ initializeAMDGPUAtomicOptimizerPass(*PR);

initializeAMDGPULowerKernelArgumentsPass(*PR);

initializeAMDGPULowerKernelAttributesPass(*PR);

initializeAMDGPULowerIntrinsicsPass(*PR);

@@ -172,6 +196,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {

initializeAMDGPUUnifyMetadataPass(*PR);

initializeSIAnnotateControlFlowPass(*PR);

initializeSIInsertWaitcntsPass(*PR);

+ initializeSIModeRegisterPass(*PR);

initializeSIWholeQuadModePass(*PR);

initializeSILowerControlFlowPass(*PR);

initializeSIInsertSkipsPass(*PR);

@@ -182,6 +207,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {

initializeSIFormMemoryClausesPass(*PR);

initializeAMDGPUUnifyDivergentExitNodesPass(*PR);

initializeAMDGPUAAWrapperPassPass(*PR);

+ initializeAMDGPUExternalAAWrapperPass(*PR);

initializeAMDGPUUseNativeCallsPass(*PR);

initializeAMDGPUSimplifyLibCallsPass(*PR);

initializeAMDGPUInlinerPass(*PR);

@@ -292,12 +318,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {

return Reloc::PIC_;

}

-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {

- if (CM)

- return *CM;

- return CodeModel::Small;

AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,

StringRef CPU, StringRef FS,

TargetOptions Options,

@@ -306,9 +326,8 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,

CodeGenOpt::Level OptLevel)

: LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),

FS, Options, getEffectiveRelocModel(RM),

- getEffectiveCodeModel(CM), OptLevel),

+ getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),

TLOF(createTLOF(getTargetTriple())) {

- AS = AMDGPU::getAMDGPUAS(TT);

initAsmInfo();

}

@@ -331,13 +350,6 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {

FSAttr.getValueAsString();

}

-static ImmutablePass *createAMDGPUExternalAAWrapperPass() {

- return createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {

- if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())

- AAR.addAAResult(WrapperPass->getResult());

- });

/// Predicate for Internalize pass.

static bool mustPreserveGV(const GlobalValue &GV) {

if (const Function *F = dyn_cast<Function>(&GV))

@@ -360,17 +372,6 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {

Builder.Inliner = createAMDGPUFunctionInliningPass();

}

- if (Internalize) {

- // If we're generating code, we always have the whole program available. The

- // relocations expected for externally visible functions aren't supported,

- // so make sure every non-entry function is hidden.

- Builder.addExtension(

- PassManagerBuilder::EP_EnabledOnOptLevel0,

- [](const PassManagerBuilder &, legacy::PassManagerBase &PM) {

- PM.add(createInternalizePass(mustPreserveGV));

- });

- }

Builder.addExtension(

PassManagerBuilder::EP_ModuleOptimizerEarly,

[Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,

@@ -613,20 +614,23 @@ void AMDGPUPassConfig::addIRPasses() {

disablePass(&FuncletLayoutID);

disablePass(&PatchableFunctionID);

+ addPass(createAtomicExpandPass());

+ // This must occur before inlining, as the inliner will not look through

+ // bitcast calls.

+ addPass(createAMDGPUFixFunctionBitcastsPass());

addPass(createAMDGPULowerIntrinsicsPass());

- if (TM.getTargetTriple().getArch() == Triple::r600 ||

- !EnableAMDGPUFunctionCalls) {

- // Function calls are not supported, so make sure we inline everything.

- addPass(createAMDGPUAlwaysInlinePass());

- addPass(createAlwaysInlinerLegacyPass());

- // We need to add the barrier noop pass, otherwise adding the function

- // inlining pass will cause all of the PassConfigs passes to be run

- // one function at a time, which means if we have a nodule with two

- // functions, then we will generate code for the first function

- // without ever running any passes on the second.

- addPass(createBarrierNoopPass());

- }

+ // Function calls are not supported, so make sure we inline everything.

+ addPass(createAMDGPUAlwaysInlinePass());

+ addPass(createAlwaysInlinerLegacyPass());

+ // We need to add the barrier noop pass, otherwise adding the function

+ // inlining pass will cause all of the PassConfigs passes to be run

+ // one function at a time, which means if we have a nodule with two

+ // functions, then we will generate code for the first function

+ // without ever running any passes on the second.

+ addPass(createBarrierNoopPass());

if (TM.getTargetTriple().getArch() == Triple::amdgcn) {

// TODO: May want to move later or split into an early and late one.

@@ -690,6 +694,7 @@ void AMDGPUPassConfig::addCodeGenPrepare() {

}

bool AMDGPUPassConfig::addPreISel() {

+ addPass(createLowerSwitchPass());

addPass(createFlattenCFGPass());

return false;

}

@@ -759,6 +764,10 @@ ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(

bool GCNPassConfig::addPreISel() {

AMDGPUPassConfig::addPreISel();

+ if (EnableAtomicOptimizations) {

+ addPass(createAMDGPUAtomicOptimizerPass());

+ }

// FIXME: We need to run a pass to propagate the attributes when calls are

// supported.

addPass(createAMDGPUAnnotateKernelFeaturesPass());

@@ -789,6 +798,8 @@ void GCNPassConfig::addMachineSSAOptimization() {

// XXX - Can we get away without running DeadMachineInstructionElim again?

addPass(&SIFoldOperandsID);

+ if (EnableDPPCombine)

+ addPass(&GCNDPPCombineID);

addPass(&DeadMachineInstructionElimID);

addPass(&SILoadStoreOptimizerID);

if (EnableSDWAPeephole) {

@@ -811,8 +822,10 @@ bool GCNPassConfig::addILPOpts() {

bool GCNPassConfig::addInstSelector() {

AMDGPUPassConfig::addInstSelector();

- addPass(createSILowerI1CopiesPass());

addPass(&SIFixSGPRCopiesID);

+ addPass(createSILowerI1CopiesPass());

+ addPass(createSIFixupVectorISelPass());

+ addPass(createSIAddIMGInitPass());

return false;

}

@@ -878,7 +891,8 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {

void GCNPassConfig::addPostRegAlloc() {

addPass(&SIFixVGPRCopiesID);

- addPass(&SIOptimizeExecMaskingID);

+ if (getOptLevel() > CodeGenOpt::None)

+ addPass(&SIOptimizeExecMaskingID);

TargetPassConfig::addPostRegAlloc();

}

@@ -889,6 +903,7 @@ void GCNPassConfig::addPreEmitPass() {

addPass(createSIMemoryLegalizerPass());

addPass(createSIInsertWaitcntsPass());

addPass(createSIShrinkInstructionsPass());

+ addPass(createSIModeRegisterPass());

// The hazard recognizer that runs as part of the post-ra scheduler does not

// guarantee to be able handle all hazards correctly. This is because if there