aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/AtomicExpandPass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/AtomicExpandPass.cpp')
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp345
1 files changed, 187 insertions, 158 deletions
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 37a50cde6391..a5030305435c 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -89,7 +89,7 @@ namespace {
AtomicRMWInst *I,
TargetLoweringBase::AtomicExpansionKind ExpansionKind);
AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
- void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
+ bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
@@ -105,7 +105,7 @@ namespace {
bool isIdempotentRMW(AtomicRMWInst *RMWI);
bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
- bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
+ bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
Value *PointerOperand, Value *ValueOperand,
Value *CASExpected, AtomicOrdering Ordering,
AtomicOrdering Ordering2,
@@ -152,47 +152,15 @@ static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
}
-// Helper functions to retrieve the alignment of atomic instructions.
-static unsigned getAtomicOpAlign(LoadInst *LI) {
- unsigned Align = LI->getAlignment();
- // In the future, if this IR restriction is relaxed, we should
- // return DataLayout::getABITypeAlignment when there's no align
- // value.
- assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
- return Align;
-}
-
-static unsigned getAtomicOpAlign(StoreInst *SI) {
- unsigned Align = SI->getAlignment();
- // In the future, if this IR restriction is relaxed, we should
- // return DataLayout::getABITypeAlignment when there's no align
- // value.
- assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
- return Align;
-}
-
-static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
- // TODO(PR27168): This instruction has no alignment attribute, but unlike the
- // default alignment for load/store, the default here is to assume
- // it has NATURAL alignment, not DataLayout-specified alignment.
- const DataLayout &DL = RMWI->getModule()->getDataLayout();
- return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
-}
-
-static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
- // TODO(PR27168): same comment as above.
- const DataLayout &DL = CASI->getModule()->getDataLayout();
- return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
-}
-
// Determine if a particular atomic operation has a supported size,
// and is of appropriate alignment, to be passed through for target
// lowering. (Versus turning into a __atomic libcall)
template <typename Inst>
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
unsigned Size = getAtomicOpSize(I);
- unsigned Align = getAtomicOpAlign(I);
- return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
+ Align Alignment = I->getAlign();
+ return Alignment >= Size &&
+ Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
}
bool AtomicExpand::runOnFunction(Function &F) {
@@ -383,7 +351,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
- NewLI->setAlignment(MaybeAlign(LI->getAlignment()));
+ NewLI->setAlignment(LI->getAlign());
NewLI->setVolatile(LI->isVolatile());
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
@@ -470,7 +438,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
- NewSI->setAlignment(MaybeAlign(SI->getAlignment()));
+ NewSI->setAlignment(SI->getAlign());
NewSI->setVolatile(SI->isVolatile());
NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
@@ -570,8 +538,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
unsigned ValueSize = getAtomicOpSize(AI);
if (ValueSize < MinCASSize) {
- llvm_unreachable(
- "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
+ expandPartwordAtomicRMW(AI,
+ TargetLoweringBase::AtomicExpansionKind::LLSC);
} else {
auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
return performAtomicOp(AI->getOperation(), Builder, Loaded,
@@ -608,16 +576,43 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
namespace {
-/// Result values from createMaskInstrs helper.
struct PartwordMaskValues {
- Type *WordType;
- Type *ValueType;
- Value *AlignedAddr;
- Value *ShiftAmt;
- Value *Mask;
- Value *Inv_Mask;
+ // These three fields are guaranteed to be set by createMaskInstrs.
+ Type *WordType = nullptr;
+ Type *ValueType = nullptr;
+ Value *AlignedAddr = nullptr;
+ // The remaining fields can be null.
+ Value *ShiftAmt = nullptr;
+ Value *Mask = nullptr;
+ Value *Inv_Mask = nullptr;
};
+LLVM_ATTRIBUTE_UNUSED
+raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
+ auto PrintObj = [&O](auto *V) {
+ if (V)
+ O << *V;
+ else
+ O << "nullptr";
+ O << '\n';
+ };
+ O << "PartwordMaskValues {\n";
+ O << " WordType: ";
+ PrintObj(PMV.WordType);
+ O << " ValueType: ";
+ PrintObj(PMV.ValueType);
+ O << " AlignedAddr: ";
+ PrintObj(PMV.AlignedAddr);
+ O << " ShiftAmt: ";
+ PrintObj(PMV.ShiftAmt);
+ O << " Mask: ";
+ PrintObj(PMV.Mask);
+ O << " Inv_Mask: ";
+ PrintObj(PMV.Inv_Mask);
+ O << "}\n";
+ return O;
+}
+
} // end anonymous namespace
/// This is a helper function which builds instructions to provide
@@ -638,48 +633,74 @@ struct PartwordMaskValues {
/// Inv_Mask: The inverse of Mask.
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
Type *ValueType, Value *Addr,
- unsigned WordSize) {
- PartwordMaskValues Ret;
+ unsigned MinWordSize) {
+ PartwordMaskValues PMV;
- BasicBlock *BB = I->getParent();
- Function *F = BB->getParent();
Module *M = I->getModule();
-
- LLVMContext &Ctx = F->getContext();
+ LLVMContext &Ctx = M->getContext();
const DataLayout &DL = M->getDataLayout();
-
unsigned ValueSize = DL.getTypeStoreSize(ValueType);
- assert(ValueSize < WordSize);
+ PMV.ValueType = ValueType;
+ PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
+ : ValueType;
+ if (PMV.ValueType == PMV.WordType) {
+ PMV.AlignedAddr = Addr;
+ return PMV;
+ }
- Ret.ValueType = ValueType;
- Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
+ assert(ValueSize < MinWordSize);
Type *WordPtrType =
- Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
+ PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
- Ret.AlignedAddr = Builder.CreateIntToPtr(
- Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
+ PMV.AlignedAddr = Builder.CreateIntToPtr(
+ Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType,
"AlignedAddr");
- Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
+ Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
if (DL.isLittleEndian()) {
// turn bytes into bits
- Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
+ PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
} else {
// turn bytes into bits, and count from the other side.
- Ret.ShiftAmt =
- Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
+ PMV.ShiftAmt = Builder.CreateShl(
+ Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
}
- Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
- Ret.Mask = Builder.CreateShl(
- ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt,
+ PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
+ PMV.Mask = Builder.CreateShl(
+ ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
"Mask");
- Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
+ PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
+ return PMV;
+}
+
+static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord,
+ const PartwordMaskValues &PMV) {
+ assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
+ if (PMV.WordType == PMV.ValueType)
+ return WideWord;
+
+ Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
+ Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted");
+ return Trunc;
+}
- return Ret;
+static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord,
+ Value *Updated, const PartwordMaskValues &PMV) {
+ assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
+ assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
+ if (PMV.WordType == PMV.ValueType)
+ return Updated;
+
+ Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
+ Value *Shift =
+ Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
+ Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
+ Value *Or = Builder.CreateOr(And, Shift, "inserted");
+ return Or;
}
/// Emit IR to implement a masked version of a given atomicrmw
@@ -719,13 +740,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
// Finally, comparison ops will operate on the full value, so
// truncate down to the original size, and expand out again after
// doing the operation.
- Value *Loaded_Shiftdown = Builder.CreateTrunc(
- Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
- Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
- Value *NewVal_Shiftup = Builder.CreateShl(
- Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
- Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
- Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
+ Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
+ Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc);
+ Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
return FinalVal;
}
default:
@@ -738,12 +755,10 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
///
/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
/// way as a typical atomicrmw expansion. The only difference here is
-/// that the operation inside of the loop must operate only upon a
+/// that the operation inside of the loop may operate upon only a
/// part of the value.
void AtomicExpand::expandPartwordAtomicRMW(
AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
- assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg);
-
AtomicOrdering MemOpOrder = AI->getOrdering();
IRBuilder<> Builder(AI);
@@ -761,13 +776,18 @@ void AtomicExpand::expandPartwordAtomicRMW(
ValOperand_Shifted, AI->getValOperand(), PMV);
};
- // TODO: When we're ready to support LLSC conversions too, use
- // insertRMWLLSCLoop here for ExpansionKind==LLSC.
- Value *OldResult =
- insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
- PerformPartwordOp, createCmpXchgInstFun);
- Value *FinalOldResult = Builder.CreateTrunc(
- Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+ Value *OldResult;
+ if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
+ OldResult =
+ insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
+ PerformPartwordOp, createCmpXchgInstFun);
+ } else {
+ assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
+ OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
+ MemOpOrder, PerformPartwordOp);
+ }
+
+ Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
AI->replaceAllUsesWith(FinalOldResult);
AI->eraseFromParent();
}
@@ -800,14 +820,13 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
NewOperand, AI->getOrdering());
- Value *FinalOldResult = Builder.CreateTrunc(
- Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
+ Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
AI->replaceAllUsesWith(FinalOldResult);
AI->eraseFromParent();
return NewAI;
}
-void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
+bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
// The basic idea here is that we're expanding a cmpxchg of a
// smaller memory size up to a word-sized cmpxchg. To do this, we
// need to add a retry-loop for strong cmpxchg, so that
@@ -923,14 +942,14 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
// partword.cmpxchg.end:
Builder.SetInsertPoint(CI);
- Value *FinalOldVal = Builder.CreateTrunc(
- Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
+ Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
Value *Res = UndefValue::get(CI->getType());
Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
Res = Builder.CreateInsertValue(Res, Success, 1);
CI->replaceAllUsesWith(Res);
CI->eraseFromParent();
+ return true;
}
void AtomicExpand::expandAtomicOpToLLSC(
@@ -965,8 +984,7 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
AI->getOrdering());
- Value *FinalOldResult = Builder.CreateTrunc(
- Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+ Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
AI->replaceAllUsesWith(FinalOldResult);
AI->eraseFromParent();
}
@@ -987,9 +1005,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
CI->getSuccessOrdering());
- Value *FinalOldVal = Builder.CreateTrunc(
- Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
-
+ Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
Value *Res = UndefValue::get(CI->getType());
Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
Value *Success = Builder.CreateICmpEQ(
@@ -1126,24 +1142,28 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
//
// The full expansion we produce is:
// [...]
+ // %aligned.addr = ...
// cmpxchg.start:
- // %unreleasedload = @load.linked(%addr)
- // %should_store = icmp eq %unreleasedload, %desired
- // br i1 %should_store, label %cmpxchg.fencedstore,
+ // %unreleasedload = @load.linked(%aligned.addr)
+ // %unreleasedload.extract = extract value from %unreleasedload
+ // %should_store = icmp eq %unreleasedload.extract, %desired
+ // br i1 %should_store, label %cmpxchg.releasingstore,
// label %cmpxchg.nostore
// cmpxchg.releasingstore:
// fence?
// br label cmpxchg.trystore
// cmpxchg.trystore:
- // %loaded.trystore = phi [%unreleasedload, %releasingstore],
+ // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
// [%releasedload, %cmpxchg.releasedload]
- // %stored = @store_conditional(%new, %addr)
+ // %updated.new = insert %new into %loaded.trystore
+ // %stored = @store_conditional(%updated.new, %aligned.addr)
// %success = icmp eq i32 %stored, 0
// br i1 %success, label %cmpxchg.success,
// label %cmpxchg.releasedload/%cmpxchg.failure
// cmpxchg.releasedload:
- // %releasedload = @load.linked(%addr)
- // %should_store = icmp eq %releasedload, %desired
+ // %releasedload = @load.linked(%aligned.addr)
+ // %releasedload.extract = extract value from %releasedload
+ // %should_store = icmp eq %releasedload.extract, %desired
// br i1 %should_store, label %cmpxchg.trystore,
// label %cmpxchg.failure
// cmpxchg.success:
@@ -1159,9 +1179,10 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// fence?
// br label %cmpxchg.end
// cmpxchg.end:
- // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
- // [%loaded.trystore, %cmpxchg.trystore]
+ // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
+ // [%loaded.trystore, %cmpxchg.trystore]
// %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
+ // %loaded = extract value from %loaded.exit
// %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
// %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
// [...]
@@ -1187,13 +1208,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.SetInsertPoint(BB);
if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
TLI->emitLeadingFence(Builder, CI, SuccessOrder);
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
+ TLI->getMinCmpXchgSizeInBits() / 8);
Builder.CreateBr(StartBB);
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(StartBB);
- Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ Value *UnreleasedLoad =
+ TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder);
+ Value *UnreleasedLoadExtract =
+ extractMaskedValue(Builder, UnreleasedLoad, PMV);
Value *ShouldStore = Builder.CreateICmpEQ(
- UnreleasedLoad, CI->getCompareOperand(), "should_store");
+ UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
@@ -1205,8 +1233,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.CreateBr(TryStoreBB);
Builder.SetInsertPoint(TryStoreBB);
- Value *StoreSuccess = TLI->emitStoreConditional(
- Builder, CI->getNewValOperand(), Addr, MemOpOrder);
+ PHINode *LoadedTryStore =
+ Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
+ LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
+ Value *NewValueInsert =
+ insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
+ Value *StoreSuccess =
+ TLI->emitStoreConditional(Builder, NewValueInsert, Addr, MemOpOrder);
StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
@@ -1216,13 +1249,16 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.SetInsertPoint(ReleasedLoadBB);
Value *SecondLoad;
if (HasReleasedLoadBB) {
- SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
- ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
- "should_store");
+ SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder);
+ Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
+ ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
+ CI->getCompareOperand(), "should_store");
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+ // Update PHI node in TryStoreBB.
+ LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
} else
Builder.CreateUnreachable();
@@ -1234,6 +1270,12 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.CreateBr(ExitBB);
Builder.SetInsertPoint(NoStoreBB);
+ PHINode *LoadedNoStore =
+ Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
+ LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
+ if (HasReleasedLoadBB)
+ LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
+
// In the failing case, where we don't execute the store-conditional, the
// target might want to balance out the load-linked with a dedicated
// instruction (e.g., on ARM, clearing the exclusive monitor).
@@ -1241,6 +1283,11 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.CreateBr(FailureBB);
Builder.SetInsertPoint(FailureBB);
+ PHINode *LoadedFailure =
+ Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
+ LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
+ if (CI->isWeak())
+ LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
if (ShouldInsertFencesForAtomic)
TLI->emitTrailingFence(Builder, CI, FailureOrder);
Builder.CreateBr(ExitBB);
@@ -1250,32 +1297,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
// PHI.
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
+ PHINode *LoadedExit =
+ Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
+ LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
+ LoadedExit->addIncoming(LoadedFailure, FailureBB);
+ PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
- // Setup the builder so we can create any PHIs we need.
- Value *Loaded;
- if (!HasReleasedLoadBB)
- Loaded = UnreleasedLoad;
- else {
- Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
- PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
- TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
- TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
-
- Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
- PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
- NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
- NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
-
- Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
- PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
- ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
- ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
-
- Loaded = ExitLoaded;
- }
+ // This is the "exit value" from the cmpxchg expansion. It may be of
+ // a type wider than the one in the cmpxchg instruction.
+ Value *LoadedFull = LoadedExit;
+
+ Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
+ Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
// Look for any users of the cmpxchg that are just comparing the loaded value
// against the desired one, and replace them with the CFG-derived version.
@@ -1377,7 +1412,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
Builder.SetInsertPoint(BB);
LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
// Atomics require at least natural alignment.
- InitLoaded->setAlignment(MaybeAlign(ResultTy->getPrimitiveSizeInBits() / 8));
+ InitLoaded->setAlignment(Align(ResultTy->getPrimitiveSizeInBits() / 8));
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.
@@ -1414,11 +1449,9 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
case TargetLoweringBase::AtomicExpansionKind::None:
if (ValueSize < MinCASSize)
- expandPartwordCmpXchg(CI);
+ return expandPartwordCmpXchg(CI);
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC: {
- assert(ValueSize >= MinCASSize &&
- "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
return expandAtomicCmpXchg(CI);
}
case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
@@ -1449,7 +1482,7 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
// must be one of the potentially-specialized sizes, and the value
// type must actually exist in C on the target (otherwise, the
// function wouldn't actually be defined.)
-static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
+static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
const DataLayout &DL) {
// TODO: "LargestSize" is an approximation for "largest type that
// you can express in C". It seems to be the case that int128 is
@@ -1459,7 +1492,7 @@ static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
// really be some more reliable way in LLVM of determining integer
// sizes which are valid in the target's C ABI...
unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
- return Align >= Size &&
+ return Alignment >= Size &&
(Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
Size <= LargestSize;
}
@@ -1469,10 +1502,9 @@ void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
unsigned Size = getAtomicOpSize(I);
- unsigned Align = getAtomicOpAlign(I);
bool expanded = expandAtomicOpToLibcall(
- I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
+ I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
(void)expanded;
assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
@@ -1483,11 +1515,10 @@ void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
unsigned Size = getAtomicOpSize(I);
- unsigned Align = getAtomicOpAlign(I);
bool expanded = expandAtomicOpToLibcall(
- I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
- I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+ I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
+ nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
(void)expanded;
assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
}
@@ -1498,10 +1529,9 @@ void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
unsigned Size = getAtomicOpSize(I);
- unsigned Align = getAtomicOpAlign(I);
bool expanded = expandAtomicOpToLibcall(
- I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
+ I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
Libcalls);
(void)expanded;
@@ -1571,13 +1601,12 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
unsigned Size = getAtomicOpSize(I);
- unsigned Align = getAtomicOpAlign(I);
bool Success = false;
if (!Libcalls.empty())
Success = expandAtomicOpToLibcall(
- I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
- I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+ I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
+ nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
// The expansion failed: either there were no libcalls at all for
// the operation (min/max), or there were only size-specialized
@@ -1608,7 +1637,7 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
// 'I' are extracted from the Instruction subclass by the
// caller. Depending on the particular call, some will be null.
bool AtomicExpand::expandAtomicOpToLibcall(
- Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
+ Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
assert(Libcalls.size() == 6);
@@ -1619,10 +1648,10 @@ bool AtomicExpand::expandAtomicOpToLibcall(
IRBuilder<> Builder(I);
IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
- bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
+ bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
- unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
+ const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
// TODO: the "order" argument type is "int", not int32. So
// getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
@@ -1712,7 +1741,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// 'expected' argument, if present.
if (CASExpected) {
AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
- AllocaCASExpected->setAlignment(MaybeAlign(AllocaAlignment));
+ AllocaCASExpected->setAlignment(AllocaAlignment);
unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
AllocaCASExpected_i8 =
@@ -1731,7 +1760,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
Args.push_back(IntValue);
} else {
AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
- AllocaValue->setAlignment(MaybeAlign(AllocaAlignment));
+ AllocaValue->setAlignment(AllocaAlignment);
AllocaValue_i8 =
Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
@@ -1743,7 +1772,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// 'ret' argument.
if (!CASExpected && HasResult && !UseSizedLibcall) {
AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
- AllocaResult->setAlignment(MaybeAlign(AllocaAlignment));
+ AllocaResult->setAlignment(AllocaAlignment);
unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
AllocaResult_i8 =
Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));