diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86AtomicExpandPass.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86AtomicExpandPass.cpp | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86AtomicExpandPass.cpp b/contrib/llvm/lib/Target/X86/X86AtomicExpandPass.cpp new file mode 100644 index 000000000000..3dcadb16760b --- /dev/null +++ b/contrib/llvm/lib/Target/X86/X86AtomicExpandPass.cpp @@ -0,0 +1,283 @@ +//===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass (at IR level) to replace atomic instructions which +// cannot be implemented as a single instruction with cmpxchg-based loops. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86TargetMachine.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +#define DEBUG_TYPE "x86-atomic-expand" + +namespace { + class X86AtomicExpandPass : public FunctionPass { + const X86TargetMachine *TM; + public: + static char ID; // Pass identification, replacement for typeid + explicit X86AtomicExpandPass(const X86TargetMachine *TM) + : FunctionPass(ID), TM(TM) {} + + bool runOnFunction(Function &F) override; + bool expandAtomicInsts(Function &F); + + bool needsCmpXchgNb(Type *MemType); + + /// There are four kinds of atomic operations. Two never need expanding: + /// cmpxchg is what we expand the others *to*, and loads are easily handled + /// by ISelLowering. Atomicrmw and store can need expanding in some + /// circumstances. + bool shouldExpand(Instruction *Inst); + + /// 128-bit atomic stores (64-bit on i686) need to be implemented in terms + /// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic. + bool shouldExpandStore(StoreInst *SI); + + /// Only some atomicrmw instructions need expanding -- some operations + /// (e.g. max) have absolutely no architectural support; some (e.g. or) have + /// limited support but can't return the previous value; some (e.g. add) + /// have complete support in the instruction set. + /// + /// Also, naturally, 128-bit operations always need to be expanded. + bool shouldExpandAtomicRMW(AtomicRMWInst *AI); + + bool expandAtomicRMW(AtomicRMWInst *AI); + bool expandAtomicStore(StoreInst *SI); + }; +} + +char X86AtomicExpandPass::ID = 0; + +FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) { + return new X86AtomicExpandPass(TM); +} + +bool X86AtomicExpandPass::runOnFunction(Function &F) { + SmallVector<Instruction *, 1> AtomicInsts; + + // Changing control-flow while iterating through it is a bad idea, so gather a + // list of all atomic instructions before we start. + for (BasicBlock &BB : F) + for (Instruction &Inst : BB) { + if (isa<AtomicRMWInst>(&Inst) || + (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic())) + AtomicInsts.push_back(&Inst); + } + + bool MadeChange = false; + for (Instruction *Inst : AtomicInsts) { + if (!shouldExpand(Inst)) + continue; + + if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) + MadeChange |= expandAtomicRMW(AI); + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + MadeChange |= expandAtomicStore(SI); + + assert(MadeChange && "Atomic inst not expanded when it should be?"); + Inst->eraseFromParent(); + } + + return MadeChange; +} + +/// Returns true if the operand type is 1 step up from the native width, and +/// the corresponding cmpxchg8b or cmpxchg16b instruction is available +/// (otherwise we leave them alone to become __sync_fetch_and_... calls). +bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) { + const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>(); + unsigned OpWidth = MemType->getPrimitiveSizeInBits(); + + if (OpWidth == 64) + return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b + if (OpWidth == 128) + return Subtarget.hasCmpxchg16b(); + + return false; +} + +bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) { + const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>(); + unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; + + if (needsCmpXchgNb(AI->getType())) + return true; + + if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth) + return false; + + AtomicRMWInst::BinOp Op = AI->getOperation(); + switch (Op) { + default: + llvm_unreachable("Unknown atomic operation"); + case AtomicRMWInst::Xchg: + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + // It's better to use xadd, xsub or xchg for these in all cases. + return false; + case AtomicRMWInst::Or: + case AtomicRMWInst::And: + case AtomicRMWInst::Xor: + // If the atomicrmw's result isn't actually used, we can just add a "lock" + // prefix to a normal instruction for these operations. + return !AI->use_empty(); + case AtomicRMWInst::Nand: + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + // These always require a non-trivial set of data operations on x86. We must + // use a cmpxchg loop. + return true; + } +} + +bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) { + if (needsCmpXchgNb(SI->getValueOperand()->getType())) + return true; + + return false; +} + +bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) { + if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) + return shouldExpandAtomicRMW(AI); + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + return shouldExpandStore(SI); + return false; +} + +/// Emit IR to implement the given atomicrmw operation on values in registers, +/// returning the new value. +static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, + Value *Loaded, Value *Inc) { + Value *NewVal; + switch (Op) { + case AtomicRMWInst::Xchg: + return Inc; + case AtomicRMWInst::Add: + return Builder.CreateAdd(Loaded, Inc, "new"); + case AtomicRMWInst::Sub: + return Builder.CreateSub(Loaded, Inc, "new"); + case AtomicRMWInst::And: + return Builder.CreateAnd(Loaded, Inc, "new"); + case AtomicRMWInst::Nand: + return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); + case AtomicRMWInst::Or: + return Builder.CreateOr(Loaded, Inc, "new"); + case AtomicRMWInst::Xor: + return Builder.CreateXor(Loaded, Inc, "new"); + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + default: + break; + } + llvm_unreachable("Unknown atomic op"); +} + +bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) { + AtomicOrdering Order = + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // %init_loaded = load atomic iN* %addr + // br label %loop + // loop: + // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] + // %new = some_op iN %loaded, %incr + // %pair = cmpxchg iN* %addr, iN %loaded, iN %new + // %new_loaded = extractvalue { iN, i1 } %pair, 0 + // %success = extractvalue { iN, i1 } %pair, 1 + // br i1 %success, label %atomicrmw.end, label %loop + // atomicrmw.end: + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we want a load. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + LoadInst *InitLoaded = Builder.CreateLoad(Addr); + InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); + Loaded->addIncoming(InitLoaded, BB); + + Value *NewVal = + performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + + Value *Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, Order, + AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); + Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); + Loaded->addIncoming(NewLoaded, LoopBB); + + Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); + Builder.CreateCondBr(Success, ExitBB, LoopBB); + + AI->replaceAllUsesWith(NewLoaded); + + return true; +} + +bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) { + // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express + // this in terms of the usual expansion to "atomicrmw xchg". + IRBuilder<> Builder(SI); + AtomicOrdering Order = + SI->getOrdering() == Unordered ? Monotonic : SI->getOrdering(); + AtomicRMWInst *AI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), + SI->getValueOperand(), Order); + + // Now we have an appropriate swap instruction, lower it as usual. + if (shouldExpandAtomicRMW(AI)) { + expandAtomicRMW(AI); + AI->eraseFromParent(); + return true; + } + + return AI; +} |