aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp125
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def2
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp2
-rw-r--r--llvm/lib/CodeGen/ExpandVectorPredication.cpp3
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp50
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp15
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp31
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp9
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp46
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp17
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineBasicBlock.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineFunctionSplitter.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp7
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp1
-rw-r--r--llvm/lib/CodeGen/ModuloSchedule.cpp8
-rw-r--r--llvm/lib/CodeGen/PrologEpilogInserter.cpp2
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.h3
-rw-r--r--llvm/lib/CodeGen/SelectOptimize.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp128
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp17
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp93
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp38
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp168
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp99
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp160
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp6
33 files changed, 677 insertions, 390 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 4a31bf85446b..94612a51d2e1 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1816,6 +1816,11 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
assert(MAI->hasVisibilityOnlyWithLinkage() &&
"Visibility should be handled with emitLinkage() on AIX.");
+
+ // Linkage for alias of global variable has been emitted.
+ if (isa<GlobalVariable>(GA.getAliaseeObject()))
+ return;
+
emitLinkage(&GA, Name);
// If it's a function, also emit linkage for aliases of function entry
// point.
@@ -2860,7 +2865,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C,
AsmPrinter &AP,
const Constant *BaseCV = nullptr,
- uint64_t Offset = 0);
+ uint64_t Offset = 0,
+ AsmPrinter::AliasMapTy *AliasList = nullptr);
static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP);
static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP);
@@ -2914,9 +2920,21 @@ static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) {
return -1;
}
-static void emitGlobalConstantDataSequential(const DataLayout &DL,
- const ConstantDataSequential *CDS,
- AsmPrinter &AP) {
+static void emitGlobalAliasInline(AsmPrinter &AP, uint64_t Offset,
+ AsmPrinter::AliasMapTy *AliasList) {
+ if (AliasList) {
+ auto AliasIt = AliasList->find(Offset);
+ if (AliasIt != AliasList->end()) {
+ for (const GlobalAlias *GA : AliasIt->second)
+ AP.OutStreamer->emitLabel(AP.getSymbol(GA));
+ AliasList->erase(Offset);
+ }
+ }
+}
+
+static void emitGlobalConstantDataSequential(
+ const DataLayout &DL, const ConstantDataSequential *CDS, AsmPrinter &AP,
+ AsmPrinter::AliasMapTy *AliasList) {
// See if we can aggregate this into a .fill, if so, emit it as such.
int Value = isRepeatedByteSequence(CDS, DL);
if (Value != -1) {
@@ -2933,17 +2951,20 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
// Otherwise, emit the values in successive locations.
unsigned ElementByteSize = CDS->getElementByteSize();
if (isa<IntegerType>(CDS->getElementType())) {
- for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ emitGlobalAliasInline(AP, ElementByteSize * I, AliasList);
if (AP.isVerbose())
AP.OutStreamer->getCommentOS()
- << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i));
- AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(i),
+ << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(I));
+ AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(I),
ElementByteSize);
}
} else {
Type *ET = CDS->getElementType();
- for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I)
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ emitGlobalAliasInline(AP, ElementByteSize * I, AliasList);
emitGlobalConstantFP(CDS->getElementAsAPFloat(I), ET, AP);
+ }
}
unsigned Size = DL.getTypeAllocSize(CDS->getType());
@@ -2956,7 +2977,8 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
static void emitGlobalConstantArray(const DataLayout &DL,
const ConstantArray *CA, AsmPrinter &AP,
- const Constant *BaseCV, uint64_t Offset) {
+ const Constant *BaseCV, uint64_t Offset,
+ AsmPrinter::AliasMapTy *AliasList) {
// See if we can aggregate some values. Make sure it can be
// represented as a series of bytes of the constant value.
int Value = isRepeatedByteSequence(CA, DL);
@@ -2964,44 +2986,75 @@ static void emitGlobalConstantArray(const DataLayout &DL,
if (Value != -1) {
uint64_t Bytes = DL.getTypeAllocSize(CA->getType());
AP.OutStreamer->emitFill(Bytes, Value);
- }
- else {
- for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
- emitGlobalConstantImpl(DL, CA->getOperand(i), AP, BaseCV, Offset);
- Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType());
+ } else {
+ for (unsigned I = 0, E = CA->getNumOperands(); I != E; ++I) {
+ emitGlobalConstantImpl(DL, CA->getOperand(I), AP, BaseCV, Offset,
+ AliasList);
+ Offset += DL.getTypeAllocSize(CA->getOperand(I)->getType());
}
}
}
+static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP);
+
static void emitGlobalConstantVector(const DataLayout &DL,
- const ConstantVector *CV, AsmPrinter &AP) {
- for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
- emitGlobalConstantImpl(DL, CV->getOperand(i), AP);
+ const ConstantVector *CV, AsmPrinter &AP,
+ AsmPrinter::AliasMapTy *AliasList) {
+ Type *ElementType = CV->getType()->getElementType();
+ uint64_t ElementSizeInBits = DL.getTypeSizeInBits(ElementType);
+ uint64_t ElementAllocSizeInBits = DL.getTypeAllocSizeInBits(ElementType);
+ uint64_t EmittedSize;
+ if (ElementSizeInBits != ElementAllocSizeInBits) {
+ // If the allocation size of an element is different from the size in bits,
+ // printing each element separately will insert incorrect padding.
+ //
+ // The general algorithm here is complicated; instead of writing it out
+ // here, just use the existing code in ConstantFolding.
+ Type *IntT =
+ IntegerType::get(CV->getContext(), DL.getTypeSizeInBits(CV->getType()));
+ ConstantInt *CI = dyn_cast_or_null<ConstantInt>(ConstantFoldConstant(
+ ConstantExpr::getBitCast(const_cast<ConstantVector *>(CV), IntT), DL));
+ if (!CI) {
+ report_fatal_error(
+ "Cannot lower vector global with unusual element type");
+ }
+ emitGlobalAliasInline(AP, 0, AliasList);
+ emitGlobalConstantLargeInt(CI, AP);
+ EmittedSize = DL.getTypeStoreSize(CV->getType());
+ } else {
+ for (unsigned I = 0, E = CV->getType()->getNumElements(); I != E; ++I) {
+ emitGlobalAliasInline(AP, DL.getTypeAllocSize(CV->getType()) * I, AliasList);
+ emitGlobalConstantImpl(DL, CV->getOperand(I), AP);
+ }
+ EmittedSize =
+ DL.getTypeAllocSize(ElementType) * CV->getType()->getNumElements();
+ }
unsigned Size = DL.getTypeAllocSize(CV->getType());
- unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
- CV->getType()->getNumElements();
if (unsigned Padding = Size - EmittedSize)
AP.OutStreamer->emitZeros(Padding);
}
static void emitGlobalConstantStruct(const DataLayout &DL,
const ConstantStruct *CS, AsmPrinter &AP,
- const Constant *BaseCV, uint64_t Offset) {
+ const Constant *BaseCV, uint64_t Offset,
+ AsmPrinter::AliasMapTy *AliasList) {
// Print the fields in successive locations. Pad to align if needed!
unsigned Size = DL.getTypeAllocSize(CS->getType());
const StructLayout *Layout = DL.getStructLayout(CS->getType());
uint64_t SizeSoFar = 0;
- for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
- const Constant *Field = CS->getOperand(i);
+ for (unsigned I = 0, E = CS->getNumOperands(); I != E; ++I) {
+ const Constant *Field = CS->getOperand(I);
// Print the actual field value.
- emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar);
+ emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar,
+ AliasList);
// Check if padding is needed and insert one or more 0s.
uint64_t FieldSize = DL.getTypeAllocSize(Field->getType());
- uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
- - Layout->getElementOffset(i)) - FieldSize;
+ uint64_t PadSize = ((I == E - 1 ? Size : Layout->getElementOffset(I + 1)) -
+ Layout->getElementOffset(I)) -
+ FieldSize;
SizeSoFar += FieldSize + PadSize;
// Insert padding - this may include padding to increase the size of the
@@ -3211,7 +3264,9 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
AsmPrinter &AP, const Constant *BaseCV,
- uint64_t Offset) {
+ uint64_t Offset,
+ AsmPrinter::AliasMapTy *AliasList) {
+ emitGlobalAliasInline(AP, Offset, AliasList);
uint64_t Size = DL.getTypeAllocSize(CV->getType());
// Globals with sub-elements such as combinations of arrays and structs
@@ -3251,13 +3306,13 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
}
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
- return emitGlobalConstantDataSequential(DL, CDS, AP);
+ return emitGlobalConstantDataSequential(DL, CDS, AP, AliasList);
if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
- return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset);
+ return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset, AliasList);
if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
- return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset);
+ return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset, AliasList);
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
// Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
@@ -3276,7 +3331,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
}
if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
- return emitGlobalConstantVector(DL, V, AP);
+ return emitGlobalConstantVector(DL, V, AP, AliasList);
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
@@ -3292,15 +3347,21 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
}
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
-void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV) {
+void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV,
+ AliasMapTy *AliasList) {
uint64_t Size = DL.getTypeAllocSize(CV->getType());
if (Size)
- emitGlobalConstantImpl(DL, CV, *this);
+ emitGlobalConstantImpl(DL, CV, *this, nullptr, 0, AliasList);
else if (MAI->hasSubsectionsViaSymbols()) {
// If the global has zero size, emit a single byte so that two labels don't
// look like they are at the same location.
OutStreamer->emitIntValue(0, 1);
}
+ if (!AliasList)
+ return;
+ for (const auto &AliasPair : *AliasList)
+ report_fatal_error("Aliases with offset " + Twine(AliasPair.first) +
+ " were not emitted.");
}
void AsmPrinter::emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def b/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def
index 28a02390fccb..c872d0dd2dfa 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def
@@ -51,5 +51,5 @@ HANDLE_DIE_HASH_ATTR(DW_AT_virtuality)
HANDLE_DIE_HASH_ATTR(DW_AT_visibility)
HANDLE_DIE_HASH_ATTR(DW_AT_vtable_elem_location)
HANDLE_DIE_HASH_ATTR(DW_AT_type)
-
+HANDLE_DIE_HASH_ATTR(DW_AT_linkage_name)
#undef HANDLE_DIE_HASH_ATTR
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 5ce6fbb5f647..ad9dc517539a 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1646,6 +1646,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
+ case AtomicRMWInst::FMax:
+ case AtomicRMWInst::FMin:
case AtomicRMWInst::FAdd:
case AtomicRMWInst::FSub:
// No atomic libcalls are available for max/min/umax/umin.
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 7883a48d121c..59932a542bbc 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -120,8 +120,7 @@ static bool maySpeculateLanes(VPIntrinsic &VPI) {
// Fallback to whether the intrinsic is speculatable.
Optional<unsigned> OpcOpt = VPI.getFunctionalOpcode();
unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call);
- return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc,
- cast<Operator>(&VPI));
+ return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, &VPI);
}
//// } Helpers
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 081c8b125f17..b06043fb4c31 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -500,6 +500,12 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
LLT DstTy = MRI.getType(DstRegs[0]);
LLT LCMTy = getCoverTy(SrcTy, PartTy);
+ if (PartTy.isVector() && LCMTy == PartTy) {
+ assert(DstRegs.size() == 1);
+ B.buildPadVectorWithUndefElements(DstRegs[0], SrcReg);
+ return;
+ }
+
const unsigned DstSize = DstTy.getSizeInBits();
const unsigned SrcSize = SrcTy.getSizeInBits();
unsigned CoveringSize = LCMTy.getSizeInBits();
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 2c94f87804ac..ad0c0c8315dc 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -697,14 +697,16 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
return false;
Register SrcReg = MI.getOperand(1).getReg();
- GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI);
- if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) ||
- !LoadMI->isSimple())
+ // Don't use getOpcodeDef() here since intermediate instructions may have
+ // multiple users.
+ GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
+ if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
return false;
Register LoadReg = LoadMI->getDstReg();
- LLT LoadTy = MRI.getType(LoadReg);
+ LLT RegTy = MRI.getType(LoadReg);
Register PtrReg = LoadMI->getPointerReg();
+ unsigned RegSize = RegTy.getSizeInBits();
uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
unsigned MaskSizeBits = MaskVal.countTrailingOnes();
@@ -715,7 +717,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
// If the mask covers the whole destination register, there's nothing to
// extend
- if (MaskSizeBits >= LoadTy.getSizeInBits())
+ if (MaskSizeBits >= RegSize)
return false;
// Most targets cannot deal with loads of size < 8 and need to re-legalize to
@@ -725,17 +727,26 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
const MachineMemOperand &MMO = LoadMI->getMMO();
LegalityQuery::MemDesc MemDesc(MMO);
- MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
+
+ // Don't modify the memory access size if this is atomic/volatile, but we can
+ // still adjust the opcode to indicate the high bit behavior.
+ if (LoadMI->isSimple())
+ MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
+ else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize)
+ return false;
+
+ // TODO: Could check if it's legal with the reduced or original memory size.
if (!isLegalOrBeforeLegalizer(
- {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}}))
+ {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
return false;
MatchInfo = [=](MachineIRBuilder &B) {
B.setInstrAndDebugLoc(*LoadMI);
auto &MF = B.getMF();
auto PtrInfo = MMO.getPointerInfo();
- auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8);
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
+ LoadMI->eraseFromParent();
};
return true;
}
@@ -805,21 +816,24 @@ bool CombinerHelper::matchSextInRegOfLoad(
MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT RegTy = MRI.getType(DstReg);
+
// Only supports scalars for now.
- if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ if (RegTy.isVector())
return false;
Register SrcReg = MI.getOperand(1).getReg();
auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
- if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) ||
- !LoadDef->isSimple())
+ if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
return false;
+ uint64_t MemBits = LoadDef->getMemSizeInBits();
+
// If the sign extend extends from a narrower width than the load's width,
// then we can narrow the load width when we combine to a G_SEXTLOAD.
// Avoid widening the load at all.
- unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(),
- LoadDef->getMemSizeInBits());
+ unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
// Don't generate G_SEXTLOADs with a < 1 byte width.
if (NewSizeBits < 8)
@@ -831,7 +845,15 @@ bool CombinerHelper::matchSextInRegOfLoad(
const MachineMemOperand &MMO = LoadDef->getMMO();
LegalityQuery::MemDesc MMDesc(MMO);
- MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
+
+ // Don't modify the memory access size if this is atomic/volatile, but we can
+ // still adjust the opcode to indicate the high bit behavior.
+ if (LoadDef->isSimple())
+ MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
+ else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
+ return false;
+
+ // TODO: Could check if it's legal with the reduced or original memory size.
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
{MRI.getType(LoadDef->getDstReg()),
MRI.getType(LoadDef->getPointerReg())},
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index a2af66d28f4a..947facc87b71 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2076,9 +2076,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getStackGuard(getOrCreateVReg(CI), MIRBuilder);
return true;
case Intrinsic::stackprotector: {
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
- Register GuardVal = MRI->createGenericVirtualRegister(PtrTy);
- getStackGuard(GuardVal, MIRBuilder);
+ Register GuardVal;
+ if (TLI.useLoadStackGuardNode()) {
+ GuardVal = MRI->createGenericVirtualRegister(PtrTy);
+ getStackGuard(GuardVal, MIRBuilder);
+ } else
+ GuardVal = getOrCreateVReg(*CI.getArgOperand(0)); // The guard's value.
AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
int FI = getOrCreateFrameIndex(*Slot);
@@ -2883,6 +2888,12 @@ bool IRTranslator::translateAtomicRMW(const User &U,
case AtomicRMWInst::FSub:
Opcode = TargetOpcode::G_ATOMICRMW_FSUB;
break;
+ case AtomicRMWInst::FMax:
+ Opcode = TargetOpcode::G_ATOMICRMW_FMAX;
+ break;
+ case AtomicRMWInst::FMin:
+ Opcode = TargetOpcode::G_ATOMICRMW_FMIN;
+ break;
}
MIRBuilder.buildAtomicRMW(
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 19ebf46191a9..0d9580e25606 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -473,6 +473,23 @@ MachineInstrBuilder MachineIRBuilder::buildBoolExt(const DstOp &Res,
return buildInstr(ExtOp, Res, Op);
}
+MachineInstrBuilder MachineIRBuilder::buildBoolExtInReg(const DstOp &Res,
+ const SrcOp &Op,
+ bool IsVector,
+ bool IsFP) {
+ const auto *TLI = getMF().getSubtarget().getTargetLowering();
+ switch (TLI->getBooleanContents(IsVector, IsFP)) {
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ return buildSExtInReg(Res, Op, 1);
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ return buildZExtInReg(Res, Op, 1);
+ case TargetLoweringBase::UndefinedBooleanContent:
+ return buildCopy(Res, Op);
+ }
+
+ llvm_unreachable("unexpected BooleanContent");
+}
+
MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc,
const DstOp &Res,
const SrcOp &Op) {
@@ -938,6 +955,20 @@ MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr,
}
MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFMax(const DstOp &OldValRes, const SrcOp &Addr,
+ const SrcOp &Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMAX, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFMin(const DstOp &OldValRes, const SrcOp &Addr,
+ const SrcOp &Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMIN, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
return buildInstr(TargetOpcode::G_FENCE)
.addImm(Ordering)
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index b3f38a3b53f3..55f3ad796291 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -310,10 +310,11 @@ bool InterleavedAccess::lowerInterleavedLoad(
Extracts.push_back(Extract);
continue;
}
- auto *BI = dyn_cast<BinaryOperator>(User);
- if (BI && BI->hasOneUse()) {
- if (auto *SVI = dyn_cast<ShuffleVectorInst>(*BI->user_begin())) {
- BinOpShuffles.insert(SVI);
+ if (auto *BI = dyn_cast<BinaryOperator>(User)) {
+ if (all_of(BI->users(),
+ [](auto *U) { return isa<ShuffleVectorInst>(U); })) {
+ for (auto *SVI : BI->users())
+ BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI));
continue;
}
}
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 30ca8bd871e8..43c12c67939e 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -536,6 +536,17 @@ public:
// What was the old variable value?
ValueIDNum OldValue = VarLocs[MLoc.asU64()];
+ clobberMloc(MLoc, OldValue, Pos, MakeUndef);
+ }
+ /// Overload that takes an explicit value \p OldValue for when the value in
+ /// \p MLoc has changed and the TransferTracker's locations have not been
+ /// updated yet.
+ void clobberMloc(LocIdx MLoc, ValueIDNum OldValue,
+ MachineBasicBlock::iterator Pos, bool MakeUndef = true) {
+ auto ActiveMLocIt = ActiveMLocs.find(MLoc);
+ if (ActiveMLocIt == ActiveMLocs.end())
+ return;
+
VarLocs[MLoc.asU64()] = ValueIDNum::EmptyValue;
// Examine the remaining variable locations: if we can find the same value
@@ -1730,9 +1741,35 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
if (EmulateOldLDV && !SrcRegOp->isKill())
return false;
+ // Before we update MTracker, remember which values were present in each of
+ // the locations about to be overwritten, so that we can recover any
+ // potentially clobbered variables.
+ DenseMap<LocIdx, ValueIDNum> ClobberedLocs;
+ if (TTracker) {
+ for (MCRegAliasIterator RAI(DestReg, TRI, true); RAI.isValid(); ++RAI) {
+ LocIdx ClobberedLoc = MTracker->getRegMLoc(*RAI);
+ auto MLocIt = TTracker->ActiveMLocs.find(ClobberedLoc);
+ // If ActiveMLocs isn't tracking this location or there are no variables
+ // using it, don't bother remembering.
+ if (MLocIt == TTracker->ActiveMLocs.end() || MLocIt->second.empty())
+ continue;
+ ValueIDNum Value = MTracker->readReg(*RAI);
+ ClobberedLocs[ClobberedLoc] = Value;
+ }
+ }
+
// Copy MTracker info, including subregs if available.
InstrRefBasedLDV::performCopy(SrcReg, DestReg);
+ // The copy might have clobbered variables based on the destination register.
+ // Tell TTracker about it, passing the old ValueIDNum to search for
+ // alternative locations (or else terminating those variables).
+ if (TTracker) {
+ for (auto LocVal : ClobberedLocs) {
+ TTracker->clobberMloc(LocVal.first, LocVal.second, MI.getIterator(), false);
+ }
+ }
+
// Only produce a transfer of DBG_VALUE within a block where old LDV
// would have. We might make use of the additional value tracking in some
// other way, later.
@@ -1744,15 +1781,6 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
if (EmulateOldLDV && SrcReg != DestReg)
MTracker->defReg(SrcReg, CurBB, CurInst);
- // Finally, the copy might have clobbered variables based on the destination
- // register. Tell TTracker about it, in case a backup location exists.
- if (TTracker) {
- for (MCRegAliasIterator RAI(DestReg, TRI, true); RAI.isValid(); ++RAI) {
- LocIdx ClobberedLoc = MTracker->getRegMLoc(*RAI);
- TTracker->clobberMloc(ClobberedLoc, MI.getIterator(), false);
- }
- }
-
return true;
}
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 7d825a8bf853..1242ce20b732 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -1049,12 +1049,17 @@ public:
// we may end up with a main range not covering all subranges.
// This is extremely rare case, so let's check and reconstruct the
// main range.
- for (LiveInterval::SubRange &S : LI.subranges()) {
- if (LI.covers(S))
- continue;
- LI.clear();
- LIS.constructMainRangeFromSubranges(LI);
- break;
+ if (LI.hasSubRanges()) {
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask LaneMask = SubReg ? TRI.getSubRegIndexLaneMask(SubReg)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask).none() || LI.covers(S))
+ continue;
+ LI.clear();
+ LIS.constructMainRangeFromSubranges(LI);
+ break;
+ }
}
continue;
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 40ae7053ea09..0c94e1f7e474 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -742,7 +742,7 @@ bool MIParser::parseBasicBlockDefinition(
MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget);
MBB->setIsEHFuncletEntry(IsEHFuncletEntry);
if (SectionID) {
- MBB->setSectionID(SectionID.getValue());
+ MBB->setSectionID(SectionID.value());
MF.setBBSectionsType(BasicBlockSection::List);
}
return false;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index c186d0ba9969..02c44fa85cd9 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -451,7 +451,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (IrrLoopHeaderWeight && IsStandalone) {
if (Indexes) OS << '\t';
OS.indent(2) << "; Irreducible loop header weight: "
- << IrrLoopHeaderWeight.getValue() << '\n';
+ << IrrLoopHeaderWeight.value() << '\n';
}
}
diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index 81c97ba6a086..867a7ed584b2 100644
--- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -106,8 +106,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
// We don't want to proceed further for cold functions
// or functions of unknown hotness. Lukewarm functions have no prefix.
Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
- if (SectionPrefix && (SectionPrefix.getValue().equals("unlikely") ||
- SectionPrefix.getValue().equals("unknown"))) {
+ if (SectionPrefix && (SectionPrefix.value().equals("unlikely") ||
+ SectionPrefix.value().equals("unknown"))) {
return false;
}
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 4e00a211713e..5f80445a5a34 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -93,8 +93,11 @@ cl::opt<bool> VerifyScheduling(
cl::opt<bool> ViewMISchedDAGs(
"view-misched-dags", cl::Hidden,
cl::desc("Pop up a window to show MISched dags after they are processed"));
+cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
+ cl::desc("Print schedule DAGs"));
#else
const bool ViewMISchedDAGs = false;
+const bool PrintDAGs = false;
#endif // NDEBUG
} // end namespace llvm
@@ -112,10 +115,6 @@ static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
cl::desc("Only schedule this function"));
static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
cl::desc("Only schedule this MBB#"));
-static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
- cl::desc("Print schedule DAGs"));
-#else
-static const bool PrintDAGs = false;
#endif // NDEBUG
/// Avoid quadratic complexity in unusually large basic blocks by limiting the
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index db04f2bcc095..7a008bae726e 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -293,6 +293,7 @@ namespace {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addUsedIfAvailable<LiveStacks>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 3245d9649be1..581168b31384 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -1448,7 +1448,7 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
const TargetRegisterClass *RC) {
// If the init register is not undef, try and find an existing phi.
if (InitReg) {
- auto I = Phis.find({LoopReg, InitReg.getValue()});
+ auto I = Phis.find({LoopReg, InitReg.value()});
if (I != Phis.end())
return I->second;
} else {
@@ -1469,10 +1469,10 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
return R;
// Found a phi taking undef as input, so rewrite it to take InitReg.
MachineInstr *MI = MRI.getVRegDef(R);
- MI->getOperand(1).setReg(InitReg.getValue());
- Phis.insert({{LoopReg, InitReg.getValue()}, R});
+ MI->getOperand(1).setReg(InitReg.value());
+ Phis.insert({{LoopReg, InitReg.value()}, R});
const TargetRegisterClass *ConstrainRegClass =
- MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue()));
+ MRI.constrainRegClass(R, MRI.getRegClass(InitReg.value()));
assert(ConstrainRegClass && "Expected a valid constrained register class!");
(void)ConstrainRegClass;
UndefPhis.erase(I);
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 1a0f296d5fdc..89a43c4f57f6 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -554,7 +554,7 @@ static void updateLiveness(MachineFunction &MF) {
}
}
-/// Insert restore code for the callee-saved registers used in the function.
+/// Insert spill code for the callee-saved registers used in the function.
static void insertCSRSaves(MachineBasicBlock &SaveBlock,
ArrayRef<CalleeSavedInfo> CSI) {
MachineFunction &MF = *SaveBlock.getParent();
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index d57b0ca6d53d..d6a3997e4b70 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -148,9 +148,6 @@ protected:
/// Run or not the local reassignment heuristic. This information is
/// obtained from the TargetSubtargetInfo.
const bool EnableLocalReassign;
-
-private:
- unsigned NextCascade = 1;
};
/// ImmutableAnalysis abstraction for fetching the Eviction Advisor. We model it
diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
index c199b6a6cca8..d627519a34aa 100644
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -870,8 +870,8 @@ bool SelectOptimize::computeLoopCosts(
ORE->emit(ORmissL);
return false;
}
- IPredCost += Scaled64::get(ILatency.getValue());
- INonPredCost += Scaled64::get(ILatency.getValue());
+ IPredCost += Scaled64::get(ILatency.value());
+ INonPredCost += Scaled64::get(ILatency.value());
// For a select that can be converted to branch,
// compute its cost as a branch (non-predicated cost).
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa688d9dda3c..2654c00929d8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2392,12 +2392,14 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
// add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
// sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
SDLoc DL(N);
- auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
- SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
- if (SDValue NewC =
- DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
- {ConstantOp, DAG.getConstant(1, DL, VT)}))
+ if (SDValue NewC = DAG.FoldConstantArithmetic(
+ IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
+ {ConstantOp, DAG.getConstant(1, DL, VT)})) {
+ SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
+ Not.getOperand(0), ShAmt);
return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
+ }
+
return SDValue();
}
@@ -3760,6 +3762,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // If there's no chance of borrowing from adjacent bits, then sub is xor:
+ // sub C0, X --> xor X, C0
+ if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
+ if (!C0->isOpaque()) {
+ const APInt &C0Val = C0->getAPIntValue();
+ const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
+ if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
+ return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+ }
+ }
+
return SDValue();
}
@@ -4550,13 +4563,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
SDLoc DL(N);
// fold (rem c1, c2) -> c1%c2
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
// fold (urem X, -1) -> select(FX == -1, 0, FX)
// Freeze the numerator to avoid a miscompile with an undefined value.
- if (!isSigned && N1C && N1C->isAllOnes()) {
+ if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) {
SDValue F0 = DAG.getFreeze(N0);
SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
@@ -4581,9 +4593,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
- if (N1.getOpcode() == ISD::SHL &&
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
+ // TODO: We should sink the following into isKnownToBePowerOfTwo
+ // using a OrZero parameter analogous to our handling in ValueTracking.
+ if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
- // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
@@ -9288,31 +9303,44 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
// sra (add (shl X, N1C), AddC), N1C -->
// sext (add (trunc X to (width - N1C)), AddC')
- if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
- N0.getOperand(0).getOpcode() == ISD::SHL &&
- N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
- if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
- SDValue Shl = N0.getOperand(0);
- // Determine what the truncate's type would be and ask the target if that
- // is a free operation.
- LLVMContext &Ctx = *DAG.getContext();
- unsigned ShiftAmt = N1C->getZExtValue();
- EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
- if (VT.isVector())
- TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
-
- // TODO: The simple type check probably belongs in the default hook
- // implementation and/or target-specific overrides (because
- // non-simple types likely require masking when legalized), but that
- // restriction may conflict with other transforms.
- if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
- TLI.isTruncateFree(VT, TruncVT)) {
- SDLoc DL(N);
- SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
- SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
- trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
- SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
- return DAG.getSExtOrTrunc(Add, DL, VT);
+ // sra (sub AddC, (shl X, N1C)), N1C -->
+ // sext (sub AddC1',(trunc X to (width - N1C)))
+ if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
+ N0.hasOneUse()) {
+ bool IsAdd = N0.getOpcode() == ISD::ADD;
+ SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
+ if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
+ Shl.hasOneUse()) {
+ // TODO: AddC does not need to be a splat.
+ if (ConstantSDNode *AddC =
+ isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
+ // Determine what the truncate's type would be and ask the target if
+ // that is a free operation.
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned ShiftAmt = N1C->getZExtValue();
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
+
+ // TODO: The simple type check probably belongs in the default hook
+ // implementation and/or target-specific overrides (because
+ // non-simple types likely require masking when legalized), but
+ // that restriction may conflict with other transforms.
+ if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+ SDLoc DL(N);
+ SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
+ SDValue ShiftC =
+ DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
+ TruncVT.getScalarSizeInBits()),
+ DL, TruncVT);
+ SDValue Add;
+ if (IsAdd)
+ Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
+ else
+ Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
+ return DAG.getSExtOrTrunc(Add, DL, VT);
+ }
}
}
}
@@ -11025,6 +11053,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
return V;
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -13243,18 +13274,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
- // See if we can simplify the input to this truncate through knowledge that
- // only the low bits are being used.
- // For example "trunc (or (shl x, 8), y)" // -> trunc y
- // Currently we only perform this optimization on scalars because vectors
- // may have different active low bits.
- if (!VT.isVector()) {
- APInt Mask =
- APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
- if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
- }
-
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
@@ -13341,6 +13360,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ // See if we can simplify the input to this truncate through knowledge that
+ // only the low bits are being used.
+ // For example "trunc (or (shl x, 8), y)" // -> trunc y
+ // Currently we only perform this optimization on scalars because vectors
+ // may have different active low bits.
+ if (!VT.isVector()) {
+ APInt Mask =
+ APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
+ if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
+ }
+
// fold (truncate (extract_subvector(ext x))) ->
// (extract_subvector x)
// TODO: This can be generalized to cover cases where the truncate and extract
@@ -24514,8 +24545,9 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
auto &Size0 = MUC0.NumBytes;
auto &Size1 = MUC1.NumBytes;
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
- Size0 && Size1 && *Size0 == *Size1 && OrigAlignment0 > *Size0 &&
- SrcValOffset0 % *Size0 == 0 && SrcValOffset1 % *Size1 == 0) {
+ Size0.has_value() && Size1.has_value() && *Size0 == *Size1 &&
+ OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
+ SrcValOffset1 % *Size1 == 0) {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index f464208cd9dc..6c136bdfc652 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2915,6 +2915,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break;
case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break;
case ISD::STORE: Res = SoftPromoteHalfOp_STORE(N, OpNo); break;
+ case ISD::STACKMAP:
+ Res = SoftPromoteHalfOp_STACKMAP(N, OpNo);
+ break;
}
if (!Res.getNode())
@@ -3042,3 +3045,17 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo) {
return DAG.getStore(ST->getChain(), dl, Promoted, ST->getBasePtr(),
ST->getMemOperand());
}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1); // Because the first two arguments are guaranteed legal.
+ SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end());
+ SDValue Op = N->getOperand(OpNo);
+ NewOps[OpNo] = GetSoftPromotedHalf(Op);
+ SDValue NewNode =
+ DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), NewOps);
+
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
+ ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum));
+
+ return SDValue(); // Signal that we replaced the node ourselves.
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 69fd83bcd7b3..343722a97c3c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -19,6 +19,7 @@
#include "LegalizeTypes.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -1723,6 +1724,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break;
+ case ISD::STACKMAP:
+ Res = PromoteIntOp_STACKMAP(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -2255,16 +2259,40 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
SDLoc dl(N);
SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0));
- EVT EltVT = Op.getValueType().getVectorElementType();
- EVT VT = N->getValueType(0);
+ EVT OrigEltVT = N->getOperand(0).getValueType().getVectorElementType();
+ EVT InVT = Op.getValueType();
+ EVT EltVT = InVT.getVectorElementType();
+ EVT ResVT = N->getValueType(0);
+ unsigned Opcode = N->getOpcode();
- if (VT.bitsGE(EltVT))
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);
+ // An i1 vecreduce_xor is equivalent to vecreduce_add, use that instead if
+ // vecreduce_xor is not legal
+ if (Opcode == ISD::VECREDUCE_XOR && OrigEltVT == MVT::i1 &&
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_XOR, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_ADD, InVT))
+ Opcode = ISD::VECREDUCE_ADD;
+
+ // An i1 vecreduce_or is equivalent to vecreduce_umax, use that instead if
+ // vecreduce_or is not legal
+ else if (Opcode == ISD::VECREDUCE_OR && OrigEltVT == MVT::i1 &&
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT))
+ Opcode = ISD::VECREDUCE_UMAX;
+
+ // An i1 vecreduce_and is equivalent to vecreduce_umin, use that instead if
+ // vecreduce_and is not legal
+ else if (Opcode == ISD::VECREDUCE_AND && OrigEltVT == MVT::i1 &&
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT))
+ Opcode = ISD::VECREDUCE_UMIN;
+
+ if (ResVT.bitsGE(EltVT))
+ return DAG.getNode(Opcode, SDLoc(N), ResVT, Op);
// Result size must be >= element size. If this is not the case after
// promotion, also promote the result type and then truncate.
- SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op);
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
+ SDValue Reduce = DAG.getNode(Opcode, dl, EltVT, Op);
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Reduce);
}
SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) {
@@ -2304,6 +2332,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1); // Because the first two arguments are guaranteed legal.
+ SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end());
+ SDValue Operand = N->getOperand(OpNo);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType());
+ NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand);
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
//===----------------------------------------------------------------------===//
// Integer Result Expansion
//===----------------------------------------------------------------------===//
@@ -4653,6 +4690,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break;
+ case ISD::STACKMAP:
+ Res = ExpandIntOp_STACKMAP(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -5481,3 +5521,44 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
return DAG.getBuildVector(N->getValueType(0), dl, NewOps);
}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo) {
+ assert(OpNo > 1);
+
+ SDValue Op = N->getOperand(OpNo);
+ SDLoc DL = SDLoc(N);
+ SmallVector<SDValue> NewOps;
+
+ // Copy operands before the one being expanded.
+ for (unsigned I = 0; I < OpNo; I++)
+ NewOps.push_back(N->getOperand(I));
+
+ if (Op->getOpcode() == ISD::Constant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op);
+ EVT Ty = Op.getValueType();
+ if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) {
+ NewOps.push_back(
+ DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
+ NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty));
+ } else {
+ // FIXME: https://github.com/llvm/llvm-project/issues/55609
+ return SDValue();
+ }
+ } else {
+ // FIXME: Non-constant operands are not yet handled:
+ // - https://github.com/llvm/llvm-project/issues/26431
+ // - https://github.com/llvm/llvm-project/issues/55957
+ return SDValue();
+ }
+
+ // Copy remaining operands.
+ for (unsigned I = OpNo + 1; I < N->getNumOperands(); I++)
+ NewOps.push_back(N->getOperand(I));
+
+ SDValue NewNode = DAG.getNode(N->getOpcode(), DL, N->getVTList(), NewOps);
+
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
+ ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum));
+
+ return SDValue(); // Signal that we have replaced the node already.
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index de320290bda9..2807b7f5ae68 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -402,6 +402,7 @@ private:
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
+ SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -493,6 +494,7 @@ private:
SDValue ExpandIntOp_RETURNADDR(SDNode *N);
SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N);
+ SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo);
void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode, const SDLoc &dl);
@@ -741,6 +743,7 @@ private:
SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo);
//===--------------------------------------------------------------------===//
// Scalarization Support: LegalizeVectorTypes.cpp
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index fa555be00ded..143abc08eeea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5627,7 +5627,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();
SDLoc dl(N);
- unsigned NumElts = VT.getVectorNumElements();
SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
assert(getTypeAction(InOp.getValueType()) ==
TargetLowering::TypeWidenVector &&
@@ -5639,7 +5638,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// See if a widened result type would be legal, if so widen the node.
// FIXME: This isn't safe for StrictFP. Other optimization here is needed.
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
- InVT.getVectorNumElements());
+ InVT.getVectorElementCount());
if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
SDValue Res;
if (N->isStrictFPOpcode()) {
@@ -5665,6 +5664,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
EVT InEltVT = InVT.getVectorElementType();
// Unroll the convert into some scalar code and create a nasty build vector.
+ unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(NumElts);
if (N->isStrictFPOpcode()) {
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
@@ -6055,7 +6055,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
// The result type is legal, if its vXi1, keep vXi1 for the new SETCC.
if (VT.getScalarType() == MVT::i1)
SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- SVT.getVectorNumElements());
+ SVT.getVectorElementCount());
SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
SVT, InOp0, InOp1, N->getOperand(2));
@@ -6063,7 +6063,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
// Extract the needed results from the result vector.
EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
SVT.getVectorElementType(),
- VT.getVectorNumElements());
+ VT.getVectorElementCount());
SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
DAG.getVectorIdxConstant(0, dl));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b3b8756ae9ba..c8d0f5faf647 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -60,7 +60,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/raw_ostream.h"
@@ -3271,6 +3270,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
case ISD::SETCC:
+ case ISD::SETCCCARRY:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: {
unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
@@ -3506,6 +3506,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
case ISD::USUBO:
case ISD::SSUBO:
+ case ISD::SUBCARRY:
+ case ISD::SSUBO_CARRY:
if (Op.getResNo() == 1) {
// If we know the result of a setcc has the top bits zero, use this info.
if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
@@ -3520,6 +3522,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
assert(Op.getResNo() == 0 &&
"We only compute knownbits for the difference here.");
+ // TODO: Compute influence of the carry operand.
+ if (Opcode == ISD::SUBCARRY || Opcode == ISD::SSUBO_CARRY)
+ break;
+
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false,
@@ -3529,6 +3535,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::UADDO:
case ISD::SADDO:
case ISD::ADDCARRY:
+ case ISD::SADDO_CARRY:
if (Op.getResNo() == 1) {
// If we know the result of a setcc has the top bits zero, use this info.
if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
@@ -3548,7 +3555,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (Opcode == ISD::ADDE)
// Can't track carry from glue, set carry to unknown.
Carry.resetAll();
- else if (Opcode == ISD::ADDCARRY)
+ else if (Opcode == ISD::ADDCARRY || Opcode == ISD::SADDO_CARRY)
// TODO: Compute known bits for the carry operand. Not sure if it is worth
// the trouble (how often will we find a known carry bit). And I haven't
// tested this very much yet, but something like this might work:
@@ -3862,6 +3869,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2())
return true;
+ // vscale(power-of-two) is a power-of-two for some targets
+ if (Val.getOpcode() == ISD::VSCALE &&
+ getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() &&
+ isKnownToBeAPowerOfTwo(Val.getOperand(0)))
+ return true;
+
// More could be done here, though the above checks are enough
// to handle some common cases.
@@ -4108,8 +4121,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::min(Tmp, Tmp2);
case ISD::SADDO:
case ISD::UADDO:
+ case ISD::SADDO_CARRY:
+ case ISD::ADDCARRY:
case ISD::SSUBO:
case ISD::USUBO:
+ case ISD::SSUBO_CARRY:
+ case ISD::SUBCARRY:
case ISD::SMULO:
case ISD::UMULO:
if (Op.getResNo() != 1)
@@ -4123,6 +4140,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return VTBits;
break;
case ISD::SETCC:
+ case ISD::SETCCCARRY:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: {
unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
@@ -7505,6 +7523,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
Opcode == ISD::ATOMIC_LOAD_UMAX ||
Opcode == ISD::ATOMIC_LOAD_FADD ||
Opcode == ISD::ATOMIC_LOAD_FSUB ||
+ Opcode == ISD::ATOMIC_LOAD_FMAX ||
+ Opcode == ISD::ATOMIC_LOAD_FMIN ||
Opcode == ISD::ATOMIC_SWAP ||
Opcode == ISD::ATOMIC_STORE) &&
"Invalid Atomic Op");
@@ -10739,19 +10759,19 @@ namespace {
} // end anonymous namespace
-static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs;
-static ManagedStatic<EVTArray> SimpleVTArray;
-static ManagedStatic<sys::SmartMutex<true>> VTMutex;
-
/// getValueTypeList - Return a pointer to the specified value type.
///
const EVT *SDNode::getValueTypeList(EVT VT) {
+ static std::set<EVT, EVT::compareRawBits> EVTs;
+ static EVTArray SimpleVTArray;
+ static sys::SmartMutex<true> VTMutex;
+
if (VT.isExtended()) {
- sys::SmartScopedLock<true> Lock(*VTMutex);
- return &(*EVTs->insert(VT).first);
+ sys::SmartScopedLock<true> Lock(VTMutex);
+ return &(*EVTs.insert(VT).first);
}
assert(VT.getSimpleVT() < MVT::VALUETYPE_SIZE && "Value type out of range!");
- return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
+ return &SimpleVTArray.VTs[VT.getSimpleVT().SimpleTy];
}
/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 37d05cdba76d..fe3c38ec590d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -703,7 +703,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
unsigned NumRegs;
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
- *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
+ *DAG.getContext(), CallConv.value(), ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
} else {
NumRegs =
@@ -800,11 +800,11 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
for (EVT ValueVT : ValueVTs) {
unsigned NumRegs =
isABIMangled()
- ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT)
+ ? TLI.getNumRegistersForCallingConv(Context, CC.value(), ValueVT)
: TLI.getNumRegisters(Context, ValueVT);
MVT RegisterVT =
isABIMangled()
- ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT)
+ ? TLI.getRegisterTypeForCallingConv(Context, CC.value(), ValueVT)
: TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(Reg + i);
@@ -831,10 +831,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// Copy the legal parts from the registers.
EVT ValueVT = ValueVTs[Value];
unsigned NumRegs = RegCount[Value];
- MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
- *DAG.getContext(),
- CallConv.getValue(), RegVTs[Value])
- : RegVTs[Value];
+ MVT RegisterVT =
+ isABIMangled() ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(), CallConv.value(), RegVTs[Value])
+ : RegVTs[Value];
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
@@ -914,10 +914,10 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
unsigned NumParts = RegCount[Value];
- MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
- *DAG.getContext(),
- CallConv.getValue(), RegVTs[Value])
- : RegVTs[Value];
+ MVT RegisterVT =
+ isABIMangled() ? TLI.getRegisterTypeForCallingConv(
+ *DAG.getContext(), CallConv.value(), RegVTs[Value])
+ : RegVTs[Value];
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
@@ -1309,7 +1309,7 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder,
/*IsVariadic=*/false)) {
LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
- << DDI.getDI() << "\nBy stripping back to:\n " << V);
+ << *DDI.getDI() << "\nBy stripping back to:\n " << *V);
return;
}
}
@@ -1321,7 +1321,7 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
- LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI()
+ LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << *DDI.getDI()
<< "\n");
LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0)
<< "\n");
@@ -3747,13 +3747,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
setValue(&I, DAG.getBuildVector(VT, DL, Ops));
}
-void SelectionDAGBuilder::visitInsertValue(const User &I) {
- ArrayRef<unsigned> Indices;
- if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
- Indices = IV->getIndices();
- else
- Indices = cast<ConstantExpr>(&I)->getIndices();
-
+void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
+ ArrayRef<unsigned> Indices = I.getIndices();
const Value *Op0 = I.getOperand(0);
const Value *Op1 = I.getOperand(1);
Type *AggTy = I.getType();
@@ -4616,6 +4611,8 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
+ case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break;
+ case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break;
}
AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
@@ -8410,52 +8407,6 @@ public:
return false;
}
-
- /// getCallOperandValEVT - Return the EVT of the Value* that this operand
- /// corresponds to. If there is no Value* for this operand, it returns
- /// MVT::Other.
- EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
- const DataLayout &DL,
- llvm::Type *ParamElemType) const {
- if (!CallOperandVal) return MVT::Other;
-
- if (isa<BasicBlock>(CallOperandVal))
- return TLI.getProgramPointerTy(DL);
-
- llvm::Type *OpTy = CallOperandVal->getType();
-
- // FIXME: code duplicated from TargetLowering::ParseConstraints().
- // If this is an indirect operand, the operand is a pointer to the
- // accessed type.
- if (isIndirect) {
- OpTy = ParamElemType;
- assert(OpTy && "Indirect operand must have elementtype attribute");
- }
-
- // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
- if (StructType *STy = dyn_cast<StructType>(OpTy))
- if (STy->getNumElements() == 1)
- OpTy = STy->getElementType(0);
-
- // If OpTy is not a single value, it may be a struct/union that we
- // can tile with integers.
- if (!OpTy->isSingleValueType() && OpTy->isSized()) {
- unsigned BitSize = DL.getTypeSizeInBits(OpTy);
- switch (BitSize) {
- default: break;
- case 1:
- case 8:
- case 16:
- case 32:
- case 64:
- case 128:
- OpTy = IntegerType::get(Context, BitSize);
- break;
- }
- }
-
- return TLI.getAsmOperandValueType(DL, OpTy, true);
- }
};
@@ -8722,37 +8673,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
bool HasSideEffect = IA->hasSideEffects();
ExtraFlags ExtraInfo(Call);
- unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
- unsigned ResNo = 0; // ResNo - The result number of the next output.
for (auto &T : TargetConstraints) {
ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
- // Compute the value type for each operand.
- if (OpInfo.hasArg()) {
- OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
+ if (OpInfo.CallOperandVal)
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
- Type *ParamElemTy = Call.getParamElementType(ArgNo);
- EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
- DAG.getDataLayout(), ParamElemTy);
- OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
- ArgNo++;
- } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
- // The return value of the call is this value. As such, there is no
- // corresponding argument.
- assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
- if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
- OpInfo.ConstraintVT = TLI.getSimpleValueType(
- DAG.getDataLayout(), STy->getElementType(ResNo));
- } else {
- assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT = TLI.getAsmOperandValueType(
- DAG.getDataLayout(), Call.getType()).getSimpleVT();
- }
- ++ResNo;
- } else {
- OpInfo.ConstraintVT = MVT::Other;
- }
if (!HasSideEffect)
HasSideEffect = OpInfo.hasMemory(TLI);
@@ -8865,7 +8791,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
if (RegError) {
const MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- const char *RegName = TRI.getName(RegError.getValue());
+ const char *RegName = TRI.getName(RegError.value());
emitInlineAsmError(Call, "register '" + Twine(RegName) +
"' allocated for constraint '" +
Twine(OpInfo.ConstraintCode) +
@@ -9385,9 +9311,9 @@ static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
}
}
-/// Lower llvm.experimental.stackmap directly to its target opcode.
+/// Lower llvm.experimental.stackmap.
void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
- // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
+ // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
// [live variables...])
assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
@@ -9412,29 +9338,45 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
InFlag = Chain.getValue(1);
- // Add the <id> and <numBytes> constants.
- SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
- Ops.push_back(DAG.getTargetConstant(
- cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
- SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
- Ops.push_back(DAG.getTargetConstant(
- cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
- MVT::i32));
-
- // Push live variables for the stack map.
- addStackMapLiveVars(CI, 2, DL, Ops, *this);
-
- // We are not pushing any register mask info here on the operands list,
- // because the stackmap doesn't clobber anything.
-
- // Push the chain and the glue flag.
+ // Add the STACKMAP operands, starting with DAG house-keeping.
Ops.push_back(Chain);
Ops.push_back(InFlag);
+ // Add the <id>, <numShadowBytes> operands.
+ //
+ // These do not require legalisation, and can be emitted directly to target
+ // constant nodes.
+ SDValue ID = getValue(CI.getArgOperand(0));
+ assert(ID.getValueType() == MVT::i64);
+ SDValue IDConst = DAG.getTargetConstant(
+ cast<ConstantSDNode>(ID)->getZExtValue(), DL, ID.getValueType());
+ Ops.push_back(IDConst);
+
+ SDValue Shad = getValue(CI.getArgOperand(1));
+ assert(Shad.getValueType() == MVT::i32);
+ SDValue ShadConst = DAG.getTargetConstant(
+ cast<ConstantSDNode>(Shad)->getZExtValue(), DL, Shad.getValueType());
+ Ops.push_back(ShadConst);
+
+ // Add the live variables.
+ for (unsigned I = 2; I < CI.arg_size(); I++) {
+ SDValue Op = getValue(CI.getArgOperand(I));
+
+ // Things on the stack are pointer-typed, meaning that they are already
+ // legal and can be emitted directly to target nodes.
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Ops.push_back(DAG.getTargetFrameIndex(
+ FI->getIndex(), TLI.getFrameIndexTy(DAG.getDataLayout())));
+ } else {
+ // Otherwise emit a target independent node to be legalised.
+ Ops.push_back(getValue(CI.getArgOperand(I)));
+ }
+ }
+
// Create the STACKMAP node.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
- Chain = SDValue(SM, 0);
+ Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 72cca3d9b001..4a3ab00614b3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -529,7 +529,7 @@ private:
void visitShuffleVector(const User &I);
void visitExtractValue(const ExtractValueInst &I);
- void visitInsertValue(const User &I);
+ void visitInsertValue(const InsertValueInst &I);
void visitLandingPad(const LandingPadInst &LP);
void visitGetElementPtr(const User &I);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index bbfc6e5ef64f..9df0b64c26c3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -486,6 +486,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
+ case ISD::STACKMAP:
+ return "stackmap";
// Vector Predication
#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 2b63359c2b1b..7f453f081982 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -51,6 +50,7 @@
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -64,7 +64,6 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstIterator.h"
@@ -345,47 +344,6 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
-/// may trap on it. In this case we have to split the edge so that the path
-/// through the predecessor block that doesn't go to the phi block doesn't
-/// execute the possibly trapping instruction. If available, we pass domtree
-/// and loop info to be updated when we split critical edges. This is because
-/// SelectionDAGISel preserves these analyses.
-/// This is required for correctness, so it must be done at -O0.
-///
-static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT,
- LoopInfo *LI) {
- // Loop for blocks with phi nodes.
- for (BasicBlock &BB : Fn) {
- PHINode *PN = dyn_cast<PHINode>(BB.begin());
- if (!PN) continue;
-
- ReprocessBlock:
- // For each block with a PHI node, check to see if any of the input values
- // are potentially trapping constant expressions. Constant expressions are
- // the only potentially trapping value that can occur as the argument to a
- // PHI.
- for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I)
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Constant *C = dyn_cast<Constant>(PN->getIncomingValue(i));
- if (!C || !C->canTrap()) continue;
-
- // The only case we have to worry about is when the edge is critical.
- // Since this block has a PHI Node, we assume it has multiple input
- // edges: check to see if the pred has multiple successors.
- BasicBlock *Pred = PN->getIncomingBlock(i);
- if (Pred->getTerminator()->getNumSuccessors() == 1)
- continue;
-
- // Okay, we have to split this edge.
- SplitCriticalEdge(
- Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
- CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges());
- goto ReprocessBlock;
- }
- }
-}
-
static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F,
MachineModuleInfo &MMI) {
// Only needed for MSVC
@@ -445,10 +403,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
- LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
BlockFrequencyInfo *BFI = nullptr;
if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None)
@@ -456,8 +410,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
- SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
-
CurDAG->init(*MF, *ORE, this, LibInfo,
getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI);
FuncInfo->set(Fn, *MF, CurDAG);
@@ -2241,6 +2193,52 @@ void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) {
N->getOperand(0));
}
+void SelectionDAGISel::Select_STACKMAP(SDNode *N) {
+ std::vector<SDValue> Ops;
+ auto *It = N->op_begin();
+ SDLoc DL(N);
+
+ // Stash the chain and glue operands so we can move them to the end.
+ SDValue Chain = *It++;
+ SDValue InFlag = *It++;
+
+ // <id> operand.
+ SDValue ID = *It++;
+ assert(ID.getValueType() == MVT::i64);
+ Ops.push_back(ID);
+
+ // <numShadowBytes> operand.
+ SDValue Shad = *It++;
+ assert(Shad.getValueType() == MVT::i32);
+ Ops.push_back(Shad);
+
+ // Live variable operands.
+ for (; It != N->op_end(); It++) {
+ SDNode *OpNode = It->getNode();
+ SDValue O;
+
+ // FrameIndex nodes should have been directly emitted to TargetFrameIndex
+ // nodes at DAG-construction time.
+ assert(OpNode->getOpcode() != ISD::FrameIndex);
+
+ if (OpNode->getOpcode() == ISD::Constant) {
+ Ops.push_back(
+ CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
+ O = CurDAG->getTargetConstant(
+ cast<ConstantSDNode>(OpNode)->getZExtValue(), DL, It->getValueType());
+ } else {
+ O = *It;
+ }
+ Ops.push_back(O);
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(InFlag);
+
+ SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue);
+ CurDAG->SelectNodeTo(N, TargetOpcode::STACKMAP, NodeTys, Ops);
+}
+
/// GetVBR - decode a vbr encoding whose top bit is set.
LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
@@ -2795,6 +2793,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::ARITH_FENCE:
Select_ARITH_FENCE(NodeToMatch);
return;
+ case ISD::STACKMAP:
+ Select_STACKMAP(NodeToMatch);
+ return;
}
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 19a52fde44c1..3061158eea30 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -531,14 +531,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
for (const Value *V : SI.Bases) {
auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
if (Opt) {
- assert(Opt.getValue() &&
+ assert(Opt.value() &&
"non gc managed base pointer found in statepoint");
}
}
for (const Value *V : SI.Ptrs) {
auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
if (Opt) {
- assert(Opt.getValue() &&
+ assert(Opt.value() &&
"non gc managed derived pointer found in statepoint");
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a6b471ea22b7..66389a57f780 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1362,6 +1362,29 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
+ // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
+ // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
+ if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ (Op0.getOperand(0).isUndef() ||
+ ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
+ Op0->hasOneUse()) {
+ unsigned NumSubElts =
+ Op0.getOperand(1).getValueType().getVectorNumElements();
+ unsigned SubIdx = Op0.getConstantOperandVal(2);
+ APInt DemandedSub =
+ APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
+ KnownBits KnownSubMask =
+ TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
+ if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
+ SDValue NewAnd =
+ TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
+ SDValue NewInsert =
+ TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
+ Op0.getOperand(1), Op0.getOperand(2));
+ return TLO.CombineTo(Op, NewInsert);
+ }
+ }
+
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
Depth + 1))
return true;
@@ -1371,20 +1394,6 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
- // Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
- SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
- Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
- SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
- Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
- if (DemandedOp0 || DemandedOp1) {
- Op0 = DemandedOp0 ? DemandedOp0 : Op0;
- Op1 = DemandedOp1 ? DemandedOp1 : Op1;
- SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
- return TLO.CombineTo(Op, NewOp);
- }
- }
-
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
@@ -1402,6 +1411,20 @@ bool TargetLowering::SimplifyDemandedBits(
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
Known &= Known2;
break;
}
@@ -1418,6 +1441,19 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
+ return TLO.CombineTo(Op, Op0);
+ if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
+ return TLO.CombineTo(Op, Op1);
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
+ return true;
+
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
@@ -1432,19 +1468,6 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
- // If all of the demanded bits are known zero on one side, return the other.
- // These bits cannot contribute to the result of the 'or'.
- if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
- return TLO.CombineTo(Op, Op0);
- if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
- return TLO.CombineTo(Op, Op1);
- // If the RHS is a constant, see if we can simplify it.
- if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
- return true;
- // If the operation can be done in a smaller type, do so.
- if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
- return true;
-
Known |= Known2;
break;
}
@@ -1461,20 +1484,6 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
- // Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
- SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
- Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
- SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
- Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
- if (DemandedOp0 || DemandedOp1) {
- Op0 = DemandedOp0 ? DemandedOp0 : Op0;
- Op1 = DemandedOp1 ? DemandedOp1 : Op1;
- SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
- return TLO.CombineTo(Op, NewOp);
- }
- }
-
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if (DemandedBits.isSubsetOf(Known.Zero))
@@ -1519,6 +1528,20 @@ bool TargetLowering::SimplifyDemandedBits(
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
Known ^= Known2;
break;
}
@@ -1972,9 +1995,9 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umin(Known0, Known1);
if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
- return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
+ return TLO.CombineTo(Op, IsULE.value() ? Op0 : Op1);
if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
- return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
+ return TLO.CombineTo(Op, IsULT.value() ? Op0 : Op1);
break;
}
case ISD::UMAX: {
@@ -1985,9 +2008,9 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
Known = KnownBits::umax(Known0, Known1);
if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
- return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
+ return TLO.CombineTo(Op, IsUGE.value() ? Op0 : Op1);
if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
- return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
+ return TLO.CombineTo(Op, IsUGT.value() ? Op0 : Op1);
break;
}
case ISD::BITREVERSE: {
@@ -2486,9 +2509,7 @@ bool TargetLowering::SimplifyDemandedBits(
// won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
- SDValue NewOp =
- TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
- return TLO.CombineTo(Op, NewOp);
+ Op->setFlags(Flags);
}
return true;
}
@@ -3031,15 +3052,15 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::VSELECT: {
+ SDValue Sel = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+
// Try to transform the select condition based on the current demanded
// elements.
- // TODO: If a condition element is undef, we can choose from one arm of the
- // select (and if one arm is undef, then we can propagate that to the
- // result).
- // TODO - add support for constant vselect masks (see IR version of this).
- APInt UnusedUndef, UnusedZero;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
- UnusedZero, TLO, Depth + 1))
+ APInt UndefSel, UndefZero;
+ if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO,
+ Depth + 1))
return true;
// See if we can simplify either vselect operand.
@@ -3047,15 +3068,24 @@ bool TargetLowering::SimplifyDemandedVectorElts(
APInt DemandedRHS(DemandedElts);
APInt UndefLHS, ZeroLHS;
APInt UndefRHS, ZeroRHS;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
- ZeroLHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
+ Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
- ZeroRHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
+ Depth + 1))
return true;
KnownUndef = UndefLHS & UndefRHS;
KnownZero = ZeroLHS & ZeroRHS;
+
+ // If we know that the selected element is always zero, we don't need the
+ // select value element.
+ APInt DemandedSel = DemandedElts & ~KnownZero;
+ if (DemandedSel != DemandedElts)
+ if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO,
+ Depth + 1))
+ return true;
+
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -5239,17 +5269,13 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
case 32:
case 64:
case 128:
- OpInfo.ConstraintVT =
- MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ OpTy = IntegerType::get(OpTy->getContext(), BitSize);
break;
}
- } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
- unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
- OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
- } else {
- OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
}
+ EVT VT = getAsmOperandValueType(DL, OpTy, true);
+ OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
ArgNo++;
}
}
@@ -7833,7 +7859,7 @@ SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
// return popcount(~x);
//
// Ref: "Hacker's Delight" by Henry Warren
- for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
+ for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
Op = DAG.getNode(ISD::OR, dl, VT, Op,
DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index f3d68bd9c92d..2badbe34ae6a 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -449,9 +449,6 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
Name == ".llvmbc" || Name == ".llvmcmd")
return SectionKind::getMetadata();
- if (Name == ".llvm.offloading")
- return SectionKind::getExclude();
-
if (Name.empty() || Name[0] != '.') return K;
// Default implementation based on some magic section names.
@@ -501,6 +498,9 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) {
if (hasPrefix(Name, ".preinit_array"))
return ELF::SHT_PREINIT_ARRAY;
+ if (hasPrefix(Name, ".llvm.offloading"))
+ return ELF::SHT_LLVM_OFFLOADING;
+
if (K.isBSS() || K.isThreadBSS())
return ELF::SHT_NOBITS;