src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2019-12-20 19:53:05 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-12-20 19:53:05 +0000
commit	0b57cec536236d46e3dba9bd041533462f33dbb7 (patch)
tree	56229dbdbbf76d18580f72f789003db17246c8d9 /contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
parent	718ef55ec7785aae63f98f8ca05dc07ed399c16d (diff)
download	src-0b57cec536236d46e3dba9bd041533462f33dbb7.tar.gz src-0b57cec536236d46e3dba9bd041533462f33dbb7.zip

Move all sources from the llvm project into contrib/llvm-project.

This uses the new layout of the upstream repository, which was recently migrated to GitHub, and converted into a "monorepo". That is, most of the earlier separate sub-projects with their own branches and tags were consolidated into one top-level directory, and are now branched and tagged together. Updating the vendor area to match this layout is next.

Notes

Notes: svn path=/head/; revision=355940

Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp')

-rw-r--r--

contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp

538

1 files changed, 0 insertions, 538 deletions

diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
deleted file mode 100644
index 87e63fcc4a04..000000000000
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ /dev/null

@@ -1,538 +0,0 @@

-//===-- SIInsertSkips.cpp - Use predicates for control flow ---------------===//

-//

-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

-// See https://llvm.org/LICENSE.txt for license information.

-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

-//

-//===----------------------------------------------------------------------===//

-//

-/// \file

-/// This pass inserts branches on the 0 exec mask over divergent branches

-/// branches when it's expected that jumping over the untaken control flow will

-/// be cheaper than having every workitem no-op through it.

-//

-//===----------------------------------------------------------------------===//

-#include "AMDGPU.h"

-#include "AMDGPUSubtarget.h"

-#include "SIInstrInfo.h"

-#include "SIMachineFunctionInfo.h"

-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

-#include "llvm/ADT/SmallVector.h"

-#include "llvm/ADT/StringRef.h"

-#include "llvm/CodeGen/MachineBasicBlock.h"

-#include "llvm/CodeGen/MachineFunction.h"

-#include "llvm/CodeGen/MachineFunctionPass.h"

-#include "llvm/CodeGen/MachineInstr.h"

-#include "llvm/CodeGen/MachineInstrBuilder.h"

-#include "llvm/CodeGen/MachineOperand.h"

-#include "llvm/IR/CallingConv.h"

-#include "llvm/IR/DebugLoc.h"

-#include "llvm/MC/MCAsmInfo.h"

-#include "llvm/Pass.h"

-#include "llvm/Support/CommandLine.h"

-#include "llvm/Target/TargetMachine.h"

-#include <cassert>

-#include <cstdint>

-#include <iterator>

-using namespace llvm;

-#define DEBUG_TYPE "si-insert-skips"

-static cl::opt<unsigned> SkipThresholdFlag(

- "amdgpu-skip-threshold",

- cl::desc("Number of instructions before jumping over divergent control flow"),

- cl::init(12), cl::Hidden);

-namespace {

-class SIInsertSkips : public MachineFunctionPass {

-private:

- const SIRegisterInfo *TRI = nullptr;

- const SIInstrInfo *TII = nullptr;

- unsigned SkipThreshold = 0;

- bool shouldSkip(const MachineBasicBlock &From,

- const MachineBasicBlock &To) const;

- bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB);

- void kill(MachineInstr &MI);

- MachineBasicBlock *insertSkipBlock(MachineBasicBlock &MBB,

- MachineBasicBlock::iterator I) const;

- bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);

- bool optimizeVccBranch(MachineInstr &MI) const;

-public:

- static char ID;

- SIInsertSkips() : MachineFunctionPass(ID) {}

- bool runOnMachineFunction(MachineFunction &MF) override;

- StringRef getPassName() const override {

- return "SI insert s_cbranch_execz instructions";

- }

- void getAnalysisUsage(AnalysisUsage &AU) const override {

- MachineFunctionPass::getAnalysisUsage(AU);

- }

-};

-} // end anonymous namespace

-char SIInsertSkips::ID = 0;

-INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE,

- "SI insert s_cbranch_execz instructions", false, false)

-char &llvm::SIInsertSkipsPassID = SIInsertSkips::ID;

-static bool opcodeEmitsNoInsts(const MachineInstr &MI) {

- if (MI.isMetaInstruction())

- return true;

- // Handle target specific opcodes.

- switch (MI.getOpcode()) {

- case AMDGPU::SI_MASK_BRANCH:

- return true;

- default:

- return false;

- }

-bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,

- const MachineBasicBlock &To) const {

- unsigned NumInstr = 0;

- const MachineFunction *MF = From.getParent();

- for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();

- MBBI != End && MBBI != ToI; ++MBBI) {

- const MachineBasicBlock &MBB = *MBBI;

- for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();

- NumInstr < SkipThreshold && I != E; ++I) {

- if (opcodeEmitsNoInsts(*I))

- continue;

- // FIXME: Since this is required for correctness, this should be inserted

- // during SILowerControlFlow.

- // When a uniform loop is inside non-uniform control flow, the branch

- // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken

- // when EXEC = 0. We should skip the loop lest it becomes infinite.

- if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||

- I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)

- return true;

- if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))

- return true;

- // These instructions are potentially expensive even if EXEC = 0.

- if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||

- I->getOpcode() == AMDGPU::S_WAITCNT)

- return true;

- ++NumInstr;

- if (NumInstr >= SkipThreshold)

- return true;

- }

- return false;

-bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {

- MachineBasicBlock &MBB = *MI.getParent();

- MachineFunction *MF = MBB.getParent();

- if (MF->getFunction().getCallingConv() != CallingConv::AMDGPU_PS ||

- !shouldSkip(MBB, MBB.getParent()->back()))

- return false;

- MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());

- const DebugLoc &DL = MI.getDebugLoc();

- // If the exec mask is non-zero, skip the next two instructions

- BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))

- .addMBB(&NextBB);

- MachineBasicBlock::iterator Insert = SkipBB->begin();

- // Exec mask is zero: Export to NULL target...

- BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP_DONE))

- .addImm(0x09) // V_008DFC_SQ_EXP_NULL

- .addReg(AMDGPU::VGPR0, RegState::Undef)

- .addImm(1) // vm

- .addImm(0) // compr

- .addImm(0); // en

- // ... and terminate wavefront.

- BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);

- return true;

-void SIInsertSkips::kill(MachineInstr &MI) {

- MachineBasicBlock &MBB = *MI.getParent();

- DebugLoc DL = MI.getDebugLoc();

- switch (MI.getOpcode()) {

- case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: {

- unsigned Opcode = 0;

- // The opcodes are inverted because the inline immediate has to be

- // the first operand, e.g. from "x < imm" to "imm > x"

- switch (MI.getOperand(2).getImm()) {

- case ISD::SETOEQ:

- case ISD::SETEQ:

- Opcode = AMDGPU::V_CMPX_EQ_F32_e64;

- break;

- case ISD::SETOGT:

- case ISD::SETGT:

- Opcode = AMDGPU::V_CMPX_LT_F32_e64;

- break;

- case ISD::SETOGE:

- case ISD::SETGE:

- Opcode = AMDGPU::V_CMPX_LE_F32_e64;

- break;

- case ISD::SETOLT:

- case ISD::SETLT:

- Opcode = AMDGPU::V_CMPX_GT_F32_e64;

- break;

- case ISD::SETOLE:

- case ISD::SETLE:

- Opcode = AMDGPU::V_CMPX_GE_F32_e64;

- break;

- case ISD::SETONE:

- case ISD::SETNE:

- Opcode = AMDGPU::V_CMPX_LG_F32_e64;

- break;

- case ISD::SETO:

- Opcode = AMDGPU::V_CMPX_O_F32_e64;

- break;

- case ISD::SETUO:

- Opcode = AMDGPU::V_CMPX_U_F32_e64;

- break;

- case ISD::SETUEQ:

- Opcode = AMDGPU::V_CMPX_NLG_F32_e64;

- break;

- case ISD::SETUGT:

- Opcode = AMDGPU::V_CMPX_NGE_F32_e64;

- break;

- case ISD::SETUGE:

- Opcode = AMDGPU::V_CMPX_NGT_F32_e64;

- break;

- case ISD::SETULT:

- Opcode = AMDGPU::V_CMPX_NLE_F32_e64;

- break;

- case ISD::SETULE:

- Opcode = AMDGPU::V_CMPX_NLT_F32_e64;

- break;

- case ISD::SETUNE:

- Opcode = AMDGPU::V_CMPX_NEQ_F32_e64;

- break;

- default:

- llvm_unreachable("invalid ISD:SET cond code");

- }

- const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();

- if (ST.hasNoSdstCMPX())

- Opcode = AMDGPU::getVCMPXNoSDstOp(Opcode);

- assert(MI.getOperand(0).isReg());

- if (TRI->isVGPR(MBB.getParent()->getRegInfo(),

- MI.getOperand(0).getReg())) {

- Opcode = AMDGPU::getVOPe32(Opcode);

- BuildMI(MBB, &MI, DL, TII->get(Opcode))

- .add(MI.getOperand(1))

- .add(MI.getOperand(0));

- } else {

- auto I = BuildMI(MBB, &MI, DL, TII->get(Opcode));

- if (!ST.hasNoSdstCMPX())

- I.addReg(AMDGPU::VCC, RegState::Define);

- I.addImm(0) // src0 modifiers

- .add(MI.getOperand(1))

- .addImm(0) // src1 modifiers

- .add(MI.getOperand(0));

- I.addImm(0); // omod

- }

- break;

- }

- case AMDGPU::SI_KILL_I1_TERMINATOR: {

- const MachineFunction *MF = MI.getParent()->getParent();

- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();

- unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;

- const MachineOperand &Op = MI.getOperand(0);

- int64_t KillVal = MI.getOperand(1).getImm();

- assert(KillVal == 0 || KillVal == -1);

- // Kill all threads if Op0 is an immediate and equal to the Kill value.

- if (Op.isImm()) {

- int64_t Imm = Op.getImm();

- assert(Imm == 0 || Imm == -1);

- if (Imm == KillVal)

- BuildMI(MBB, &MI, DL, TII->get(ST.isWave32() ? AMDGPU::S_MOV_B32

- : AMDGPU::S_MOV_B64), Exec)

- .addImm(0);

- break;

- }

- unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;

- if (ST.isWave32())

- Opcode = KillVal ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_AND_B32;

- BuildMI(MBB, &MI, DL, TII->get(Opcode), Exec)

- .addReg(Exec)

- .add(Op);

- break;

- }

- default:

- llvm_unreachable("invalid opcode, expected SI_KILL_*_TERMINATOR");

- }

-MachineBasicBlock *SIInsertSkips::insertSkipBlock(

- MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {

- MachineFunction *MF = MBB.getParent();

- MachineBasicBlock *SkipBB = MF->CreateMachineBasicBlock();

- MachineFunction::iterator MBBI(MBB);

- ++MBBI;

- MF->insert(MBBI, SkipBB);

- MBB.addSuccessor(SkipBB);

- return SkipBB;

-// Returns true if a branch over the block was inserted.

-bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,

- MachineBasicBlock &SrcMBB) {

- MachineBasicBlock *DestBB = MI.getOperand(0).getMBB();

- if (!shouldSkip(**SrcMBB.succ_begin(), *DestBB))

- return false;

- const DebugLoc &DL = MI.getDebugLoc();

- MachineBasicBlock::iterator InsPt = std::next(MI.getIterator());

- BuildMI(SrcMBB, InsPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))

- .addMBB(DestBB);

- return true;

-bool SIInsertSkips::optimizeVccBranch(MachineInstr &MI) const {

- // Match:

- // sreg = -1

- // vcc = S_AND_B64 exec, sreg

- // S_CBRANCH_VCC[N]Z

- // =>

- // S_CBRANCH_EXEC[N]Z

- bool Changed = false;

- MachineBasicBlock &MBB = *MI.getParent();

- const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();

- const bool IsWave32 = ST.isWave32();

- const unsigned CondReg = TRI->getVCC();

- const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;

- const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;

- MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),

- E = MBB.rend();

- bool ReadsCond = false;

- unsigned Threshold = 5;

- for (++A ; A != E ; ++A) {

- if (!--Threshold)

- return false;

- if (A->modifiesRegister(ExecReg, TRI))

- return false;

- if (A->modifiesRegister(CondReg, TRI)) {

- if (!A->definesRegister(CondReg, TRI) || A->getOpcode() != And)

- return false;

- break;

- }

- ReadsCond |= A->readsRegister(CondReg, TRI);

- }

- if (A == E)

- return false;

- MachineOperand &Op1 = A->getOperand(1);

- MachineOperand &Op2 = A->getOperand(2);

- if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {

- TII->commuteInstruction(*A);

- Changed = true;

- }

- if (Op1.getReg() != ExecReg)

- return Changed;

- if (Op2.isImm() && Op2.getImm() != -1)

- return Changed;

- unsigned SReg = AMDGPU::NoRegister;

- if (Op2.isReg()) {

- SReg = Op2.getReg();

- auto M = std::next(A);

- bool ReadsSreg = false;

- for ( ; M != E ; ++M) {

- if (M->definesRegister(SReg, TRI))

- break;

- if (M->modifiesRegister(SReg, TRI))

- return Changed;

- ReadsSreg |= M->readsRegister(SReg, TRI);

- }

- if (M == E ||

- !M->isMoveImmediate() ||

- !M->getOperand(1).isImm() ||

- M->getOperand(1).getImm() != -1)

- return Changed;

- // First if sreg is only used in and instruction fold the immediate

- // into that and.

- if (!ReadsSreg && Op2.isKill()) {

- A->getOperand(2).ChangeToImmediate(-1);

- M->eraseFromParent();

- }

- if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&

- MI.killsRegister(CondReg, TRI))

- A->eraseFromParent();

- bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;

- if (SReg == ExecReg) {

- if (IsVCCZ) {

- MI.eraseFromParent();

- return true;

- }

- MI.setDesc(TII->get(AMDGPU::S_BRANCH));

- } else {

- MI.setDesc(TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ

- : AMDGPU::S_CBRANCH_EXECNZ));

- }

- MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI));

- MI.addImplicitDefUseOperands(*MBB.getParent());

- return true;

-bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {

- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

- TII = ST.getInstrInfo();

- TRI = &TII->getRegisterInfo();

- SkipThreshold = SkipThresholdFlag;

- bool HaveKill = false;

- bool MadeChange = false;

- // Track depth of exec mask, divergent branches.

- SmallVector<MachineBasicBlock *, 16> ExecBranchStack;

- MachineFunction::iterator NextBB;

- MachineBasicBlock *EmptyMBBAtEnd = nullptr;

- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();

- BI != BE; BI = NextBB) {

- NextBB = std::next(BI);

- MachineBasicBlock &MBB = *BI;

- bool HaveSkipBlock = false;

- if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) {

- // Reached convergence point for last divergent branch.

- ExecBranchStack.pop_back();

- }

- if (HaveKill && ExecBranchStack.empty()) {

- HaveKill = false;

- // TODO: Insert skip if exec is 0?

- }

- MachineBasicBlock::iterator I, Next;

- for (I = MBB.begin(); I != MBB.end(); I = Next) {

- Next = std::next(I);

- MachineInstr &MI = *I;

- switch (MI.getOpcode()) {

- case AMDGPU::SI_MASK_BRANCH:

- ExecBranchStack.push_back(MI.getOperand(0).getMBB());

- MadeChange |= skipMaskBranch(MI, MBB);

- break;

- case AMDGPU::S_BRANCH:

- // Optimize out branches to the next block.

- // FIXME: Shouldn't this be handled by BranchFolding?

- if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {

- MI.eraseFromParent();

- } else if (HaveSkipBlock) {

- // Remove the given unconditional branch when a skip block has been

- // inserted after the current one and let skip the two instructions

- // performing the kill if the exec mask is non-zero.

- MI.eraseFromParent();

- }

- break;

- case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:

- case AMDGPU::SI_KILL_I1_TERMINATOR:

- MadeChange = true;

- kill(MI);

- if (ExecBranchStack.empty()) {

- if (NextBB != BE && skipIfDead(MI, *NextBB)) {

- HaveSkipBlock = true;

- NextBB = std::next(BI);

- BE = MF.end();

- }

- } else {

- HaveKill = true;

- }

- MI.eraseFromParent();

- break;

- case AMDGPU::SI_RETURN_TO_EPILOG:

- // FIXME: Should move somewhere else

- assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());

- // Graphics shaders returning non-void shouldn't contain S_ENDPGM,

- // because external bytecode will be appended at the end.

- if (BI != --MF.end() || I != MBB.getFirstTerminator()) {

- // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block at

- // the end and jump there.

- if (!EmptyMBBAtEnd) {

- EmptyMBBAtEnd = MF.CreateMachineBasicBlock();

- MF.insert(MF.end(), EmptyMBBAtEnd);

- }

- MBB.addSuccessor(EmptyMBBAtEnd);

- BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))

- .addMBB(EmptyMBBAtEnd);

- I->eraseFromParent();

- }

- break;

- case AMDGPU::S_CBRANCH_VCCZ:

- case AMDGPU::S_CBRANCH_VCCNZ:

- MadeChange |= optimizeVccBranch(MI);

- break;

- default:

- break;

- }

- return MadeChange;