aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-01-20 11:41:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-01-20 11:41:25 +0000
commitd9484dd61cc151c4f34c31e07f693fefa66316b5 (patch)
treeab0560b3da293f1fafd3269c59692e929418f5c2 /contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parent79e0962d4c3cf1f0acf359a9d69cb3ac68c414c4 (diff)
parentd8e91e46262bc44006913e6796843909f1ac7bcd (diff)
downloadsrc-d9484dd61cc151c4f34c31e07f693fefa66316b5.tar.gz
src-d9484dd61cc151c4f34c31e07f693fefa66316b5.zip
Merge llvm trunk r351319, resolve conflicts, and update FREEBSD-Xlist.
Notes
Notes: svn path=/projects/clang800-import/; revision=343210
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp95
1 files changed, 55 insertions, 40 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2205819c444f..e8cefdbf74b9 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -45,6 +45,7 @@
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Vectorize.h"
#include <memory>
@@ -105,6 +106,11 @@ static cl::opt<bool> EnableSDWAPeephole(
cl::desc("Enable SDWA peepholer"),
cl::init(true));
+static cl::opt<bool> EnableDPPCombine(
+ "amdgpu-dpp-combine",
+ cl::desc("Enable DPP combiner"),
+ cl::init(false));
+
// Enable address space based alias analysis
static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
cl::desc("Enable AMDGPU Alias Analysis"),
@@ -137,6 +143,20 @@ static cl::opt<bool> EnableLowerKernelArguments(
cl::init(true),
cl::Hidden);
+// Enable atomic optimization
+static cl::opt<bool> EnableAtomicOptimizations(
+ "amdgpu-atomic-optimizations",
+ cl::desc("Enable atomic optimizations"),
+ cl::init(false),
+ cl::Hidden);
+
+// Enable Mode register optimization
+static cl::opt<bool> EnableSIModeRegisterPass(
+ "amdgpu-mode-register",
+ cl::desc("Enable mode register pass"),
+ cl::init(true),
+ cl::Hidden);
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -150,18 +170,22 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeR600VectorRegMergerPass(*PR);
initializeGlobalISel(*PR);
initializeAMDGPUDAGToDAGISelPass(*PR);
+ initializeGCNDPPCombinePass(*PR);
initializeSILowerI1CopiesPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
+ initializeSIFixupVectorISelPass(*PR);
initializeSIFoldOperandsPass(*PR);
initializeSIPeepholeSDWAPass(*PR);
initializeSIShrinkInstructionsPass(*PR);
initializeSIOptimizeExecMaskingPreRAPass(*PR);
initializeSILoadStoreOptimizerPass(*PR);
+ initializeAMDGPUFixFunctionBitcastsPass(*PR);
initializeAMDGPUAlwaysInlinePass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);
initializeAMDGPUArgumentUsageInfoPass(*PR);
+ initializeAMDGPUAtomicOptimizerPass(*PR);
initializeAMDGPULowerKernelArgumentsPass(*PR);
initializeAMDGPULowerKernelAttributesPass(*PR);
initializeAMDGPULowerIntrinsicsPass(*PR);
@@ -172,6 +196,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
+ initializeSIModeRegisterPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
initializeSIInsertSkipsPass(*PR);
@@ -182,6 +207,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeSIFormMemoryClausesPass(*PR);
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
initializeAMDGPUAAWrapperPassPass(*PR);
+ initializeAMDGPUExternalAAWrapperPass(*PR);
initializeAMDGPUUseNativeCallsPass(*PR);
initializeAMDGPUSimplifyLibCallsPass(*PR);
initializeAMDGPUInlinerPass(*PR);
@@ -292,12 +318,6 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return Reloc::PIC_;
}
-static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
- if (CM)
- return *CM;
- return CodeModel::Small;
-}
-
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
TargetOptions Options,
@@ -306,9 +326,8 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OptLevel)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
FS, Options, getEffectiveRelocModel(RM),
- getEffectiveCodeModel(CM), OptLevel),
+ getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
TLOF(createTLOF(getTargetTriple())) {
- AS = AMDGPU::getAMDGPUAS(TT);
initAsmInfo();
}
@@ -331,13 +350,6 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
FSAttr.getValueAsString();
}
-static ImmutablePass *createAMDGPUExternalAAWrapperPass() {
- return createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
- if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
- AAR.addAAResult(WrapperPass->getResult());
- });
-}
-
/// Predicate for Internalize pass.
static bool mustPreserveGV(const GlobalValue &GV) {
if (const Function *F = dyn_cast<Function>(&GV))
@@ -360,17 +372,6 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
Builder.Inliner = createAMDGPUFunctionInliningPass();
}
- if (Internalize) {
- // If we're generating code, we always have the whole program available. The
- // relocations expected for externally visible functions aren't supported,
- // so make sure every non-entry function is hidden.
- Builder.addExtension(
- PassManagerBuilder::EP_EnabledOnOptLevel0,
- [](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
- PM.add(createInternalizePass(mustPreserveGV));
- });
- }
-
Builder.addExtension(
PassManagerBuilder::EP_ModuleOptimizerEarly,
[Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
@@ -613,20 +614,23 @@ void AMDGPUPassConfig::addIRPasses() {
disablePass(&FuncletLayoutID);
disablePass(&PatchableFunctionID);
+ addPass(createAtomicExpandPass());
+
+ // This must occur before inlining, as the inliner will not look through
+ // bitcast calls.
+ addPass(createAMDGPUFixFunctionBitcastsPass());
+
addPass(createAMDGPULowerIntrinsicsPass());
- if (TM.getTargetTriple().getArch() == Triple::r600 ||
- !EnableAMDGPUFunctionCalls) {
- // Function calls are not supported, so make sure we inline everything.
- addPass(createAMDGPUAlwaysInlinePass());
- addPass(createAlwaysInlinerLegacyPass());
- // We need to add the barrier noop pass, otherwise adding the function
- // inlining pass will cause all of the PassConfigs passes to be run
- // one function at a time, which means if we have a nodule with two
- // functions, then we will generate code for the first function
- // without ever running any passes on the second.
- addPass(createBarrierNoopPass());
- }
+ // Function calls are not supported, so make sure we inline everything.
+ addPass(createAMDGPUAlwaysInlinePass());
+ addPass(createAlwaysInlinerLegacyPass());
+ // We need to add the barrier noop pass, otherwise adding the function
+ // inlining pass will cause all of the PassConfigs passes to be run
+ // one function at a time, which means if we have a nodule with two
+ // functions, then we will generate code for the first function
+ // without ever running any passes on the second.
+ addPass(createBarrierNoopPass());
if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
// TODO: May want to move later or split into an early and late one.
@@ -690,6 +694,7 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
}
bool AMDGPUPassConfig::addPreISel() {
+ addPass(createLowerSwitchPass());
addPass(createFlattenCFGPass());
return false;
}
@@ -759,6 +764,10 @@ ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
bool GCNPassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();
+ if (EnableAtomicOptimizations) {
+ addPass(createAMDGPUAtomicOptimizerPass());
+ }
+
// FIXME: We need to run a pass to propagate the attributes when calls are
// supported.
addPass(createAMDGPUAnnotateKernelFeaturesPass());
@@ -789,6 +798,8 @@ void GCNPassConfig::addMachineSSAOptimization() {
//
// XXX - Can we get away without running DeadMachineInstructionElim again?
addPass(&SIFoldOperandsID);
+ if (EnableDPPCombine)
+ addPass(&GCNDPPCombineID);
addPass(&DeadMachineInstructionElimID);
addPass(&SILoadStoreOptimizerID);
if (EnableSDWAPeephole) {
@@ -811,8 +822,10 @@ bool GCNPassConfig::addILPOpts() {
bool GCNPassConfig::addInstSelector() {
AMDGPUPassConfig::addInstSelector();
- addPass(createSILowerI1CopiesPass());
addPass(&SIFixSGPRCopiesID);
+ addPass(createSILowerI1CopiesPass());
+ addPass(createSIFixupVectorISelPass());
+ addPass(createSIAddIMGInitPass());
return false;
}
@@ -878,7 +891,8 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
void GCNPassConfig::addPostRegAlloc() {
addPass(&SIFixVGPRCopiesID);
- addPass(&SIOptimizeExecMaskingID);
+ if (getOptLevel() > CodeGenOpt::None)
+ addPass(&SIOptimizeExecMaskingID);
TargetPassConfig::addPostRegAlloc();
}
@@ -889,6 +903,7 @@ void GCNPassConfig::addPreEmitPass() {
addPass(createSIMemoryLegalizerPass());
addPass(createSIInsertWaitcntsPass());
addPass(createSIShrinkInstructionsPass());
+ addPass(createSIModeRegisterPass());
// The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there