diff options
Diffstat (limited to 'llvm/lib/CodeGen/AtomicExpandPass.cpp')
-rw-r--r-- | llvm/lib/CodeGen/AtomicExpandPass.cpp | 345 |
1 files changed, 187 insertions, 158 deletions
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 37a50cde6391..a5030305435c 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -89,7 +89,7 @@ namespace { AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind); AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); - void expandPartwordCmpXchg(AtomicCmpXchgInst *I); + bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); @@ -105,7 +105,7 @@ namespace { bool isIdempotentRMW(AtomicRMWInst *RMWI); bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); - bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align, + bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, AtomicOrdering Ordering2, @@ -152,47 +152,15 @@ static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); } -// Helper functions to retrieve the alignment of atomic instructions. -static unsigned getAtomicOpAlign(LoadInst *LI) { - unsigned Align = LI->getAlignment(); - // In the future, if this IR restriction is relaxed, we should - // return DataLayout::getABITypeAlignment when there's no align - // value. - assert(Align != 0 && "An atomic LoadInst always has an explicit alignment"); - return Align; -} - -static unsigned getAtomicOpAlign(StoreInst *SI) { - unsigned Align = SI->getAlignment(); - // In the future, if this IR restriction is relaxed, we should - // return DataLayout::getABITypeAlignment when there's no align - // value. - assert(Align != 0 && "An atomic StoreInst always has an explicit alignment"); - return Align; -} - -static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) { - // TODO(PR27168): This instruction has no alignment attribute, but unlike the - // default alignment for load/store, the default here is to assume - // it has NATURAL alignment, not DataLayout-specified alignment. - const DataLayout &DL = RMWI->getModule()->getDataLayout(); - return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); -} - -static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) { - // TODO(PR27168): same comment as above. - const DataLayout &DL = CASI->getModule()->getDataLayout(); - return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); -} - // Determine if a particular atomic operation has a supported size, // and is of appropriate alignment, to be passed through for target // lowering. (Versus turning into a __atomic libcall) template <typename Inst> static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { unsigned Size = getAtomicOpSize(I); - unsigned Align = getAtomicOpAlign(I); - return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; + Align Alignment = I->getAlign(); + return Alignment >= Size && + Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; } bool AtomicExpand::runOnFunction(Function &F) { @@ -383,7 +351,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { Value *NewAddr = Builder.CreateBitCast(Addr, PT); auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); - NewLI->setAlignment(MaybeAlign(LI->getAlignment())); + NewLI->setAlignment(LI->getAlign()); NewLI->setVolatile(LI->isVolatile()); NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); @@ -470,7 +438,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { Value *NewAddr = Builder.CreateBitCast(Addr, PT); StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); - NewSI->setAlignment(MaybeAlign(SI->getAlignment())); + NewSI->setAlignment(SI->getAlign()); NewSI->setVolatile(SI->isVolatile()); NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); @@ -570,8 +538,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(AI); if (ValueSize < MinCASSize) { - llvm_unreachable( - "MinCmpXchgSizeInBits not yet supported for LL/SC architectures."); + expandPartwordAtomicRMW(AI, + TargetLoweringBase::AtomicExpansionKind::LLSC); } else { auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { return performAtomicOp(AI->getOperation(), Builder, Loaded, @@ -608,16 +576,43 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { namespace { -/// Result values from createMaskInstrs helper. struct PartwordMaskValues { - Type *WordType; - Type *ValueType; - Value *AlignedAddr; - Value *ShiftAmt; - Value *Mask; - Value *Inv_Mask; + // These three fields are guaranteed to be set by createMaskInstrs. + Type *WordType = nullptr; + Type *ValueType = nullptr; + Value *AlignedAddr = nullptr; + // The remaining fields can be null. + Value *ShiftAmt = nullptr; + Value *Mask = nullptr; + Value *Inv_Mask = nullptr; }; +LLVM_ATTRIBUTE_UNUSED +raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { + auto PrintObj = [&O](auto *V) { + if (V) + O << *V; + else + O << "nullptr"; + O << '\n'; + }; + O << "PartwordMaskValues {\n"; + O << " WordType: "; + PrintObj(PMV.WordType); + O << " ValueType: "; + PrintObj(PMV.ValueType); + O << " AlignedAddr: "; + PrintObj(PMV.AlignedAddr); + O << " ShiftAmt: "; + PrintObj(PMV.ShiftAmt); + O << " Mask: "; + PrintObj(PMV.Mask); + O << " Inv_Mask: "; + PrintObj(PMV.Inv_Mask); + O << "}\n"; + return O; +} + } // end anonymous namespace /// This is a helper function which builds instructions to provide @@ -638,48 +633,74 @@ struct PartwordMaskValues { /// Inv_Mask: The inverse of Mask. static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, - unsigned WordSize) { - PartwordMaskValues Ret; + unsigned MinWordSize) { + PartwordMaskValues PMV; - BasicBlock *BB = I->getParent(); - Function *F = BB->getParent(); Module *M = I->getModule(); - - LLVMContext &Ctx = F->getContext(); + LLVMContext &Ctx = M->getContext(); const DataLayout &DL = M->getDataLayout(); - unsigned ValueSize = DL.getTypeStoreSize(ValueType); - assert(ValueSize < WordSize); + PMV.ValueType = ValueType; + PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) + : ValueType; + if (PMV.ValueType == PMV.WordType) { + PMV.AlignedAddr = Addr; + return PMV; + } - Ret.ValueType = ValueType; - Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8); + assert(ValueSize < MinWordSize); Type *WordPtrType = - Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); + PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); - Ret.AlignedAddr = Builder.CreateIntToPtr( - Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType, + PMV.AlignedAddr = Builder.CreateIntToPtr( + Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, "AlignedAddr"); - Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB"); + Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); if (DL.isLittleEndian()) { // turn bytes into bits - Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3); + PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); } else { // turn bytes into bits, and count from the other side. - Ret.ShiftAmt = - Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3); + PMV.ShiftAmt = Builder.CreateShl( + Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); } - Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt"); - Ret.Mask = Builder.CreateShl( - ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt, + PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt"); + PMV.Mask = Builder.CreateShl( + ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, "Mask"); - Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask"); + PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); + return PMV; +} + +static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, + const PartwordMaskValues &PMV) { + assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); + if (PMV.WordType == PMV.ValueType) + return WideWord; + + Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); + Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted"); + return Trunc; +} - return Ret; +static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, + Value *Updated, const PartwordMaskValues &PMV) { + assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); + assert(Updated->getType() == PMV.ValueType && "Value type mismatch"); + if (PMV.WordType == PMV.ValueType) + return Updated; + + Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); + Value *Shift = + Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); + Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked"); + Value *Or = Builder.CreateOr(And, Shift, "inserted"); + return Or; } /// Emit IR to implement a masked version of a given atomicrmw @@ -719,13 +740,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, // Finally, comparison ops will operate on the full value, so // truncate down to the original size, and expand out again after // doing the operation. - Value *Loaded_Shiftdown = Builder.CreateTrunc( - Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType); - Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc); - Value *NewVal_Shiftup = Builder.CreateShl( - Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); - Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); - Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup); + Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); + Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc); + Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); return FinalVal; } default: @@ -738,12 +755,10 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, /// /// It will create an LL/SC or cmpxchg loop, as appropriate, the same /// way as a typical atomicrmw expansion. The only difference here is -/// that the operation inside of the loop must operate only upon a +/// that the operation inside of the loop may operate upon only a /// part of the value. void AtomicExpand::expandPartwordAtomicRMW( AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { - assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg); - AtomicOrdering MemOpOrder = AI->getOrdering(); IRBuilder<> Builder(AI); @@ -761,13 +776,18 @@ void AtomicExpand::expandPartwordAtomicRMW( ValOperand_Shifted, AI->getValOperand(), PMV); }; - // TODO: When we're ready to support LLSC conversions too, use - // insertRMWLLSCLoop here for ExpansionKind==LLSC. - Value *OldResult = - insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, - PerformPartwordOp, createCmpXchgInstFun); - Value *FinalOldResult = Builder.CreateTrunc( - Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType); + Value *OldResult; + if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { + OldResult = + insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, + PerformPartwordOp, createCmpXchgInstFun); + } else { + assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); + OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, + MemOpOrder, PerformPartwordOp); + } + + Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); } @@ -800,14 +820,13 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand, AI->getOrdering()); - Value *FinalOldResult = Builder.CreateTrunc( - Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType); + Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); return NewAI; } -void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { +bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { // The basic idea here is that we're expanding a cmpxchg of a // smaller memory size up to a word-sized cmpxchg. To do this, we // need to add a retry-loop for strong cmpxchg, so that @@ -923,14 +942,14 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { // partword.cmpxchg.end: Builder.SetInsertPoint(CI); - Value *FinalOldVal = Builder.CreateTrunc( - Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); + Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); Res = Builder.CreateInsertValue(Res, Success, 1); CI->replaceAllUsesWith(Res); CI->eraseFromParent(); + return true; } void AtomicExpand::expandAtomicOpToLLSC( @@ -965,8 +984,7 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, AI->getOrdering()); - Value *FinalOldResult = Builder.CreateTrunc( - Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType); + Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); AI->replaceAllUsesWith(FinalOldResult); AI->eraseFromParent(); } @@ -987,9 +1005,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, CI->getSuccessOrdering()); - Value *FinalOldVal = Builder.CreateTrunc( - Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); - + Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); Value *Res = UndefValue::get(CI->getType()); Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); Value *Success = Builder.CreateICmpEQ( @@ -1126,24 +1142,28 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // // The full expansion we produce is: // [...] + // %aligned.addr = ... // cmpxchg.start: - // %unreleasedload = @load.linked(%addr) - // %should_store = icmp eq %unreleasedload, %desired - // br i1 %should_store, label %cmpxchg.fencedstore, + // %unreleasedload = @load.linked(%aligned.addr) + // %unreleasedload.extract = extract value from %unreleasedload + // %should_store = icmp eq %unreleasedload.extract, %desired + // br i1 %should_store, label %cmpxchg.releasingstore, // label %cmpxchg.nostore // cmpxchg.releasingstore: // fence? // br label cmpxchg.trystore // cmpxchg.trystore: - // %loaded.trystore = phi [%unreleasedload, %releasingstore], + // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], // [%releasedload, %cmpxchg.releasedload] - // %stored = @store_conditional(%new, %addr) + // %updated.new = insert %new into %loaded.trystore + // %stored = @store_conditional(%updated.new, %aligned.addr) // %success = icmp eq i32 %stored, 0 // br i1 %success, label %cmpxchg.success, // label %cmpxchg.releasedload/%cmpxchg.failure // cmpxchg.releasedload: - // %releasedload = @load.linked(%addr) - // %should_store = icmp eq %releasedload, %desired + // %releasedload = @load.linked(%aligned.addr) + // %releasedload.extract = extract value from %releasedload + // %should_store = icmp eq %releasedload.extract, %desired // br i1 %should_store, label %cmpxchg.trystore, // label %cmpxchg.failure // cmpxchg.success: @@ -1159,9 +1179,10 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // fence? // br label %cmpxchg.end // cmpxchg.end: - // %loaded = phi [%loaded.nostore, %cmpxchg.failure], - // [%loaded.trystore, %cmpxchg.trystore] + // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], + // [%loaded.trystore, %cmpxchg.trystore] // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %loaded = extract value from %loaded.exit // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] @@ -1187,13 +1208,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(BB); if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) TLI->emitLeadingFence(Builder, CI, SuccessOrder); + + PartwordMaskValues PMV = + createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, + TLI->getMinCmpXchgSizeInBits() / 8); Builder.CreateBr(StartBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(StartBB); - Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *UnreleasedLoad = + TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + Value *UnreleasedLoadExtract = + extractMaskedValue(Builder, UnreleasedLoad, PMV); Value *ShouldStore = Builder.CreateICmpEQ( - UnreleasedLoad, CI->getCompareOperand(), "should_store"); + UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). @@ -1205,8 +1233,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(TryStoreBB); Builder.SetInsertPoint(TryStoreBB); - Value *StoreSuccess = TLI->emitStoreConditional( - Builder, CI->getNewValOperand(), Addr, MemOpOrder); + PHINode *LoadedTryStore = + Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); + LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); + Value *NewValueInsert = + insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); + Value *StoreSuccess = + TLI->emitStoreConditional(Builder, NewValueInsert, Addr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; @@ -1216,13 +1249,16 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(ReleasedLoadBB); Value *SecondLoad; if (HasReleasedLoadBB) { - SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); - ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(), - "should_store"); + SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); + ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, + CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); + // Update PHI node in TryStoreBB. + LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); } else Builder.CreateUnreachable(); @@ -1234,6 +1270,12 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(ExitBB); Builder.SetInsertPoint(NoStoreBB); + PHINode *LoadedNoStore = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); + LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); + if (HasReleasedLoadBB) + LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); + // In the failing case, where we don't execute the store-conditional, the // target might want to balance out the load-linked with a dedicated // instruction (e.g., on ARM, clearing the exclusive monitor). @@ -1241,6 +1283,11 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(FailureBB); Builder.SetInsertPoint(FailureBB); + PHINode *LoadedFailure = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); + LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); + if (CI->isWeak()) + LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); if (ShouldInsertFencesForAtomic) TLI->emitTrailingFence(Builder, CI, FailureOrder); Builder.CreateBr(ExitBB); @@ -1250,32 +1297,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate // PHI. Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + PHINode *LoadedExit = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); + LoadedExit->addIncoming(LoadedTryStore, SuccessBB); + LoadedExit->addIncoming(LoadedFailure, FailureBB); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); - // Setup the builder so we can create any PHIs we need. - Value *Loaded; - if (!HasReleasedLoadBB) - Loaded = UnreleasedLoad; - else { - Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin()); - PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB); - TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); - - Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin()); - PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB); - NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); - - Builder.SetInsertPoint(ExitBB, ++ExitBB->begin()); - PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB); - ExitLoaded->addIncoming(NoStoreLoaded, FailureBB); - - Loaded = ExitLoaded; - } + // This is the "exit value" from the cmpxchg expansion. It may be of + // a type wider than the one in the cmpxchg instruction. + Value *LoadedFull = LoadedExit; + + Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); + Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV); // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. @@ -1377,7 +1412,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop( Builder.SetInsertPoint(BB); LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr); // Atomics require at least natural alignment. - InitLoaded->setAlignment(MaybeAlign(ResultTy->getPrimitiveSizeInBits() / 8)); + InitLoaded->setAlignment(Align(ResultTy->getPrimitiveSizeInBits() / 8)); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -1414,11 +1449,9 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg"); case TargetLoweringBase::AtomicExpansionKind::None: if (ValueSize < MinCASSize) - expandPartwordCmpXchg(CI); + return expandPartwordCmpXchg(CI); return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: { - assert(ValueSize >= MinCASSize && - "MinCmpXchgSizeInBits not yet supported for LL/SC expansions."); return expandAtomicCmpXchg(CI); } case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: @@ -1449,7 +1482,7 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, // must be one of the potentially-specialized sizes, and the value // type must actually exist in C on the target (otherwise, the // function wouldn't actually be defined.) -static bool canUseSizedAtomicCall(unsigned Size, unsigned Align, +static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL) { // TODO: "LargestSize" is an approximation for "largest type that // you can express in C". It seems to be the case that int128 is @@ -1459,7 +1492,7 @@ static bool canUseSizedAtomicCall(unsigned Size, unsigned Align, // really be some more reliable way in LLVM of determining integer // sizes which are valid in the target's C ABI... unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8; - return Align >= Size && + return Alignment >= Size && (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) && Size <= LargestSize; } @@ -1469,10 +1502,9 @@ void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) { RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; unsigned Size = getAtomicOpSize(I); - unsigned Align = getAtomicOpAlign(I); bool expanded = expandAtomicOpToLibcall( - I, Size, Align, I->getPointerOperand(), nullptr, nullptr, + I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); (void)expanded; assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load"); @@ -1483,11 +1515,10 @@ void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; unsigned Size = getAtomicOpSize(I); - unsigned Align = getAtomicOpAlign(I); bool expanded = expandAtomicOpToLibcall( - I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr, - I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); + I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(), + nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); (void)expanded; assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store"); } @@ -1498,10 +1529,9 @@ void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16}; unsigned Size = getAtomicOpSize(I); - unsigned Align = getAtomicOpAlign(I); bool expanded = expandAtomicOpToLibcall( - I, Size, Align, I->getPointerOperand(), I->getNewValOperand(), + I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(), I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), Libcalls); (void)expanded; @@ -1571,13 +1601,12 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation()); unsigned Size = getAtomicOpSize(I); - unsigned Align = getAtomicOpAlign(I); bool Success = false; if (!Libcalls.empty()) Success = expandAtomicOpToLibcall( - I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr, - I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); + I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(), + nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); // The expansion failed: either there were no libcalls at all for // the operation (min/max), or there were only size-specialized @@ -1608,7 +1637,7 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { // 'I' are extracted from the Instruction subclass by the // caller. Depending on the particular call, some will be null. bool AtomicExpand::expandAtomicOpToLibcall( - Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand, + Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) { assert(Libcalls.size() == 6); @@ -1619,10 +1648,10 @@ bool AtomicExpand::expandAtomicOpToLibcall( IRBuilder<> Builder(I); IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); - bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL); + bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); - unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy); + const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); // TODO: the "order" argument type is "int", not int32. So // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. @@ -1712,7 +1741,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( // 'expected' argument, if present. if (CASExpected) { AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); - AllocaCASExpected->setAlignment(MaybeAlign(AllocaAlignment)); + AllocaCASExpected->setAlignment(AllocaAlignment); unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); AllocaCASExpected_i8 = @@ -1731,7 +1760,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( Args.push_back(IntValue); } else { AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); - AllocaValue->setAlignment(MaybeAlign(AllocaAlignment)); + AllocaValue->setAlignment(AllocaAlignment); AllocaValue_i8 = Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx)); Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64); @@ -1743,7 +1772,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( // 'ret' argument. if (!CASExpected && HasResult && !UseSizedLibcall) { AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); - AllocaResult->setAlignment(MaybeAlign(AllocaAlignment)); + AllocaResult->setAlignment(AllocaAlignment); unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); AllocaResult_i8 = Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); |