diff options
Diffstat (limited to 'llvm/lib/CodeGen')
33 files changed, 677 insertions, 390 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 4a31bf85446b..94612a51d2e1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1816,6 +1816,11 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) { if (TM.getTargetTriple().isOSBinFormatXCOFF()) { assert(MAI->hasVisibilityOnlyWithLinkage() && "Visibility should be handled with emitLinkage() on AIX."); + + // Linkage for alias of global variable has been emitted. + if (isa<GlobalVariable>(GA.getAliaseeObject())) + return; + emitLinkage(&GA, Name); // If it's a function, also emit linkage for aliases of function entry // point. @@ -2860,7 +2865,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C, AsmPrinter &AP, const Constant *BaseCV = nullptr, - uint64_t Offset = 0); + uint64_t Offset = 0, + AsmPrinter::AliasMapTy *AliasList = nullptr); static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP); static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP); @@ -2914,9 +2920,21 @@ static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) { return -1; } -static void emitGlobalConstantDataSequential(const DataLayout &DL, - const ConstantDataSequential *CDS, - AsmPrinter &AP) { +static void emitGlobalAliasInline(AsmPrinter &AP, uint64_t Offset, + AsmPrinter::AliasMapTy *AliasList) { + if (AliasList) { + auto AliasIt = AliasList->find(Offset); + if (AliasIt != AliasList->end()) { + for (const GlobalAlias *GA : AliasIt->second) + AP.OutStreamer->emitLabel(AP.getSymbol(GA)); + AliasList->erase(Offset); + } + } +} + +static void emitGlobalConstantDataSequential( + const DataLayout &DL, const ConstantDataSequential *CDS, AsmPrinter &AP, + AsmPrinter::AliasMapTy *AliasList) { // See if we can aggregate this into a .fill, if so, emit it as such. int Value = isRepeatedByteSequence(CDS, DL); if (Value != -1) { @@ -2933,17 +2951,20 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, // Otherwise, emit the values in successive locations. unsigned ElementByteSize = CDS->getElementByteSize(); if (isa<IntegerType>(CDS->getElementType())) { - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { + emitGlobalAliasInline(AP, ElementByteSize * I, AliasList); if (AP.isVerbose()) AP.OutStreamer->getCommentOS() - << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i)); - AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(i), + << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(I)); + AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(I), ElementByteSize); } } else { Type *ET = CDS->getElementType(); - for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { + emitGlobalAliasInline(AP, ElementByteSize * I, AliasList); emitGlobalConstantFP(CDS->getElementAsAPFloat(I), ET, AP); + } } unsigned Size = DL.getTypeAllocSize(CDS->getType()); @@ -2956,7 +2977,8 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, static void emitGlobalConstantArray(const DataLayout &DL, const ConstantArray *CA, AsmPrinter &AP, - const Constant *BaseCV, uint64_t Offset) { + const Constant *BaseCV, uint64_t Offset, + AsmPrinter::AliasMapTy *AliasList) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, DL); @@ -2964,44 +2986,75 @@ static void emitGlobalConstantArray(const DataLayout &DL, if (Value != -1) { uint64_t Bytes = DL.getTypeAllocSize(CA->getType()); AP.OutStreamer->emitFill(Bytes, Value); - } - else { - for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { - emitGlobalConstantImpl(DL, CA->getOperand(i), AP, BaseCV, Offset); - Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); + } else { + for (unsigned I = 0, E = CA->getNumOperands(); I != E; ++I) { + emitGlobalConstantImpl(DL, CA->getOperand(I), AP, BaseCV, Offset, + AliasList); + Offset += DL.getTypeAllocSize(CA->getOperand(I)->getType()); } } } +static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP); + static void emitGlobalConstantVector(const DataLayout &DL, - const ConstantVector *CV, AsmPrinter &AP) { - for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - emitGlobalConstantImpl(DL, CV->getOperand(i), AP); + const ConstantVector *CV, AsmPrinter &AP, + AsmPrinter::AliasMapTy *AliasList) { + Type *ElementType = CV->getType()->getElementType(); + uint64_t ElementSizeInBits = DL.getTypeSizeInBits(ElementType); + uint64_t ElementAllocSizeInBits = DL.getTypeAllocSizeInBits(ElementType); + uint64_t EmittedSize; + if (ElementSizeInBits != ElementAllocSizeInBits) { + // If the allocation size of an element is different from the size in bits, + // printing each element separately will insert incorrect padding. + // + // The general algorithm here is complicated; instead of writing it out + // here, just use the existing code in ConstantFolding. + Type *IntT = + IntegerType::get(CV->getContext(), DL.getTypeSizeInBits(CV->getType())); + ConstantInt *CI = dyn_cast_or_null<ConstantInt>(ConstantFoldConstant( + ConstantExpr::getBitCast(const_cast<ConstantVector *>(CV), IntT), DL)); + if (!CI) { + report_fatal_error( + "Cannot lower vector global with unusual element type"); + } + emitGlobalAliasInline(AP, 0, AliasList); + emitGlobalConstantLargeInt(CI, AP); + EmittedSize = DL.getTypeStoreSize(CV->getType()); + } else { + for (unsigned I = 0, E = CV->getType()->getNumElements(); I != E; ++I) { + emitGlobalAliasInline(AP, DL.getTypeAllocSize(CV->getType()) * I, AliasList); + emitGlobalConstantImpl(DL, CV->getOperand(I), AP); + } + EmittedSize = + DL.getTypeAllocSize(ElementType) * CV->getType()->getNumElements(); + } unsigned Size = DL.getTypeAllocSize(CV->getType()); - unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * - CV->getType()->getNumElements(); if (unsigned Padding = Size - EmittedSize) AP.OutStreamer->emitZeros(Padding); } static void emitGlobalConstantStruct(const DataLayout &DL, const ConstantStruct *CS, AsmPrinter &AP, - const Constant *BaseCV, uint64_t Offset) { + const Constant *BaseCV, uint64_t Offset, + AsmPrinter::AliasMapTy *AliasList) { // Print the fields in successive locations. Pad to align if needed! unsigned Size = DL.getTypeAllocSize(CS->getType()); const StructLayout *Layout = DL.getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; - for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { - const Constant *Field = CS->getOperand(i); + for (unsigned I = 0, E = CS->getNumOperands(); I != E; ++I) { + const Constant *Field = CS->getOperand(I); // Print the actual field value. - emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar); + emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar, + AliasList); // Check if padding is needed and insert one or more 0s. uint64_t FieldSize = DL.getTypeAllocSize(Field->getType()); - uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - - Layout->getElementOffset(i)) - FieldSize; + uint64_t PadSize = ((I == E - 1 ? Size : Layout->getElementOffset(I + 1)) - + Layout->getElementOffset(I)) - + FieldSize; SizeSoFar += FieldSize + PadSize; // Insert padding - this may include padding to increase the size of the @@ -3211,7 +3264,9 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, AsmPrinter &AP, const Constant *BaseCV, - uint64_t Offset) { + uint64_t Offset, + AsmPrinter::AliasMapTy *AliasList) { + emitGlobalAliasInline(AP, Offset, AliasList); uint64_t Size = DL.getTypeAllocSize(CV->getType()); // Globals with sub-elements such as combinations of arrays and structs @@ -3251,13 +3306,13 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, } if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) - return emitGlobalConstantDataSequential(DL, CDS, AP); + return emitGlobalConstantDataSequential(DL, CDS, AP, AliasList); if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset); + return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset, AliasList); if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset); + return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset, AliasList); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of @@ -3276,7 +3331,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, } if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return emitGlobalConstantVector(DL, V, AP); + return emitGlobalConstantVector(DL, V, AP, AliasList); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. @@ -3292,15 +3347,21 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. -void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV) { +void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV, + AliasMapTy *AliasList) { uint64_t Size = DL.getTypeAllocSize(CV->getType()); if (Size) - emitGlobalConstantImpl(DL, CV, *this); + emitGlobalConstantImpl(DL, CV, *this, nullptr, 0, AliasList); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. OutStreamer->emitIntValue(0, 1); } + if (!AliasList) + return; + for (const auto &AliasPair : *AliasList) + report_fatal_error("Aliases with offset " + Twine(AliasPair.first) + + " were not emitted."); } void AsmPrinter::emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def b/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def index 28a02390fccb..c872d0dd2dfa 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def @@ -51,5 +51,5 @@ HANDLE_DIE_HASH_ATTR(DW_AT_virtuality) HANDLE_DIE_HASH_ATTR(DW_AT_visibility) HANDLE_DIE_HASH_ATTR(DW_AT_vtable_elem_location) HANDLE_DIE_HASH_ATTR(DW_AT_type) - +HANDLE_DIE_HASH_ATTR(DW_AT_linkage_name) #undef HANDLE_DIE_HASH_ATTR diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 5ce6fbb5f647..ad9dc517539a 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1646,6 +1646,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: // No atomic libcalls are available for max/min/umax/umin. diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index 7883a48d121c..59932a542bbc 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -120,8 +120,7 @@ static bool maySpeculateLanes(VPIntrinsic &VPI) { // Fallback to whether the intrinsic is speculatable. Optional<unsigned> OpcOpt = VPI.getFunctionalOpcode(); unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call); - return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, - cast<Operator>(&VPI)); + return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, &VPI); } //// } Helpers diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 081c8b125f17..b06043fb4c31 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -500,6 +500,12 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, LLT DstTy = MRI.getType(DstRegs[0]); LLT LCMTy = getCoverTy(SrcTy, PartTy); + if (PartTy.isVector() && LCMTy == PartTy) { + assert(DstRegs.size() == 1); + B.buildPadVectorWithUndefElements(DstRegs[0], SrcReg); + return; + } + const unsigned DstSize = DstTy.getSizeInBits(); const unsigned SrcSize = SrcTy.getSizeInBits(); unsigned CoveringSize = LCMTy.getSizeInBits(); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 2c94f87804ac..ad0c0c8315dc 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -697,14 +697,16 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, return false; Register SrcReg = MI.getOperand(1).getReg(); - GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI); - if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) || - !LoadMI->isSimple()) + // Don't use getOpcodeDef() here since intermediate instructions may have + // multiple users. + GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg)); + if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg())) return false; Register LoadReg = LoadMI->getDstReg(); - LLT LoadTy = MRI.getType(LoadReg); + LLT RegTy = MRI.getType(LoadReg); Register PtrReg = LoadMI->getPointerReg(); + unsigned RegSize = RegTy.getSizeInBits(); uint64_t LoadSizeBits = LoadMI->getMemSizeInBits(); unsigned MaskSizeBits = MaskVal.countTrailingOnes(); @@ -715,7 +717,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, // If the mask covers the whole destination register, there's nothing to // extend - if (MaskSizeBits >= LoadTy.getSizeInBits()) + if (MaskSizeBits >= RegSize) return false; // Most targets cannot deal with loads of size < 8 and need to re-legalize to @@ -725,17 +727,26 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, const MachineMemOperand &MMO = LoadMI->getMMO(); LegalityQuery::MemDesc MemDesc(MMO); - MemDesc.MemoryTy = LLT::scalar(MaskSizeBits); + + // Don't modify the memory access size if this is atomic/volatile, but we can + // still adjust the opcode to indicate the high bit behavior. + if (LoadMI->isSimple()) + MemDesc.MemoryTy = LLT::scalar(MaskSizeBits); + else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize) + return false; + + // TODO: Could check if it's legal with the reduced or original memory size. if (!isLegalOrBeforeLegalizer( - {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}})) + {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}})) return false; MatchInfo = [=](MachineIRBuilder &B) { B.setInstrAndDebugLoc(*LoadMI); auto &MF = B.getMF(); auto PtrInfo = MMO.getPointerInfo(); - auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy); B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO); + LoadMI->eraseFromParent(); }; return true; } @@ -805,21 +816,24 @@ bool CombinerHelper::matchSextInRegOfLoad( MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register DstReg = MI.getOperand(0).getReg(); + LLT RegTy = MRI.getType(DstReg); + // Only supports scalars for now. - if (MRI.getType(MI.getOperand(0).getReg()).isVector()) + if (RegTy.isVector()) return false; Register SrcReg = MI.getOperand(1).getReg(); auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI); - if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) || - !LoadDef->isSimple()) + if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg)) return false; + uint64_t MemBits = LoadDef->getMemSizeInBits(); + // If the sign extend extends from a narrower width than the load's width, // then we can narrow the load width when we combine to a G_SEXTLOAD. // Avoid widening the load at all. - unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), - LoadDef->getMemSizeInBits()); + unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits); // Don't generate G_SEXTLOADs with a < 1 byte width. if (NewSizeBits < 8) @@ -831,7 +845,15 @@ bool CombinerHelper::matchSextInRegOfLoad( const MachineMemOperand &MMO = LoadDef->getMMO(); LegalityQuery::MemDesc MMDesc(MMO); - MMDesc.MemoryTy = LLT::scalar(NewSizeBits); + + // Don't modify the memory access size if this is atomic/volatile, but we can + // still adjust the opcode to indicate the high bit behavior. + if (LoadDef->isSimple()) + MMDesc.MemoryTy = LLT::scalar(NewSizeBits); + else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits()) + return false; + + // TODO: Could check if it's legal with the reduced or original memory size. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD, {MRI.getType(LoadDef->getDstReg()), MRI.getType(LoadDef->getPointerReg())}, diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index a2af66d28f4a..947facc87b71 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2076,9 +2076,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, getStackGuard(getOrCreateVReg(CI), MIRBuilder); return true; case Intrinsic::stackprotector: { + const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL); - Register GuardVal = MRI->createGenericVirtualRegister(PtrTy); - getStackGuard(GuardVal, MIRBuilder); + Register GuardVal; + if (TLI.useLoadStackGuardNode()) { + GuardVal = MRI->createGenericVirtualRegister(PtrTy); + getStackGuard(GuardVal, MIRBuilder); + } else + GuardVal = getOrCreateVReg(*CI.getArgOperand(0)); // The guard's value. AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1)); int FI = getOrCreateFrameIndex(*Slot); @@ -2883,6 +2888,12 @@ bool IRTranslator::translateAtomicRMW(const User &U, case AtomicRMWInst::FSub: Opcode = TargetOpcode::G_ATOMICRMW_FSUB; break; + case AtomicRMWInst::FMax: + Opcode = TargetOpcode::G_ATOMICRMW_FMAX; + break; + case AtomicRMWInst::FMin: + Opcode = TargetOpcode::G_ATOMICRMW_FMIN; + break; } MIRBuilder.buildAtomicRMW( diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 19ebf46191a9..0d9580e25606 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -473,6 +473,23 @@ MachineInstrBuilder MachineIRBuilder::buildBoolExt(const DstOp &Res, return buildInstr(ExtOp, Res, Op); } +MachineInstrBuilder MachineIRBuilder::buildBoolExtInReg(const DstOp &Res, + const SrcOp &Op, + bool IsVector, + bool IsFP) { + const auto *TLI = getMF().getSubtarget().getTargetLowering(); + switch (TLI->getBooleanContents(IsVector, IsFP)) { + case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + return buildSExtInReg(Res, Op, 1); + case TargetLoweringBase::ZeroOrOneBooleanContent: + return buildZExtInReg(Res, Op, 1); + case TargetLoweringBase::UndefinedBooleanContent: + return buildCopy(Res, Op); + } + + llvm_unreachable("unexpected BooleanContent"); +} + MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op) { @@ -938,6 +955,20 @@ MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr, } MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFMax(const DstOp &OldValRes, const SrcOp &Addr, + const SrcOp &Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMAX, OldValRes, Addr, Val, + MMO); +} + +MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFMin(const DstOp &OldValRes, const SrcOp &Addr, + const SrcOp &Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMIN, OldValRes, Addr, Val, + MMO); +} + +MachineInstrBuilder MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) { return buildInstr(TargetOpcode::G_FENCE) .addImm(Ordering) diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index b3f38a3b53f3..55f3ad796291 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -310,10 +310,11 @@ bool InterleavedAccess::lowerInterleavedLoad( Extracts.push_back(Extract); continue; } - auto *BI = dyn_cast<BinaryOperator>(User); - if (BI && BI->hasOneUse()) { - if (auto *SVI = dyn_cast<ShuffleVectorInst>(*BI->user_begin())) { - BinOpShuffles.insert(SVI); + if (auto *BI = dyn_cast<BinaryOperator>(User)) { + if (all_of(BI->users(), + [](auto *U) { return isa<ShuffleVectorInst>(U); })) { + for (auto *SVI : BI->users()) + BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI)); continue; } } diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 30ca8bd871e8..43c12c67939e 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -536,6 +536,17 @@ public: // What was the old variable value? ValueIDNum OldValue = VarLocs[MLoc.asU64()]; + clobberMloc(MLoc, OldValue, Pos, MakeUndef); + } + /// Overload that takes an explicit value \p OldValue for when the value in + /// \p MLoc has changed and the TransferTracker's locations have not been + /// updated yet. + void clobberMloc(LocIdx MLoc, ValueIDNum OldValue, + MachineBasicBlock::iterator Pos, bool MakeUndef = true) { + auto ActiveMLocIt = ActiveMLocs.find(MLoc); + if (ActiveMLocIt == ActiveMLocs.end()) + return; + VarLocs[MLoc.asU64()] = ValueIDNum::EmptyValue; // Examine the remaining variable locations: if we can find the same value @@ -1730,9 +1741,35 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) { if (EmulateOldLDV && !SrcRegOp->isKill()) return false; + // Before we update MTracker, remember which values were present in each of + // the locations about to be overwritten, so that we can recover any + // potentially clobbered variables. + DenseMap<LocIdx, ValueIDNum> ClobberedLocs; + if (TTracker) { + for (MCRegAliasIterator RAI(DestReg, TRI, true); RAI.isValid(); ++RAI) { + LocIdx ClobberedLoc = MTracker->getRegMLoc(*RAI); + auto MLocIt = TTracker->ActiveMLocs.find(ClobberedLoc); + // If ActiveMLocs isn't tracking this location or there are no variables + // using it, don't bother remembering. + if (MLocIt == TTracker->ActiveMLocs.end() || MLocIt->second.empty()) + continue; + ValueIDNum Value = MTracker->readReg(*RAI); + ClobberedLocs[ClobberedLoc] = Value; + } + } + // Copy MTracker info, including subregs if available. InstrRefBasedLDV::performCopy(SrcReg, DestReg); + // The copy might have clobbered variables based on the destination register. + // Tell TTracker about it, passing the old ValueIDNum to search for + // alternative locations (or else terminating those variables). + if (TTracker) { + for (auto LocVal : ClobberedLocs) { + TTracker->clobberMloc(LocVal.first, LocVal.second, MI.getIterator(), false); + } + } + // Only produce a transfer of DBG_VALUE within a block where old LDV // would have. We might make use of the additional value tracking in some // other way, later. @@ -1744,15 +1781,6 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) { if (EmulateOldLDV && SrcReg != DestReg) MTracker->defReg(SrcReg, CurBB, CurInst); - // Finally, the copy might have clobbered variables based on the destination - // register. Tell TTracker about it, in case a backup location exists. - if (TTracker) { - for (MCRegAliasIterator RAI(DestReg, TRI, true); RAI.isValid(); ++RAI) { - LocIdx ClobberedLoc = MTracker->getRegMLoc(*RAI); - TTracker->clobberMloc(ClobberedLoc, MI.getIterator(), false); - } - } - return true; } diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 7d825a8bf853..1242ce20b732 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -1049,12 +1049,17 @@ public: // we may end up with a main range not covering all subranges. // This is extremely rare case, so let's check and reconstruct the // main range. - for (LiveInterval::SubRange &S : LI.subranges()) { - if (LI.covers(S)) - continue; - LI.clear(); - LIS.constructMainRangeFromSubranges(LI); - break; + if (LI.hasSubRanges()) { + unsigned SubReg = MO.getSubReg(); + LaneBitmask LaneMask = SubReg ? TRI.getSubRegIndexLaneMask(SubReg) + : MRI.getMaxLaneMaskForVReg(Reg); + for (LiveInterval::SubRange &S : LI.subranges()) { + if ((S.LaneMask & LaneMask).none() || LI.covers(S)) + continue; + LI.clear(); + LIS.constructMainRangeFromSubranges(LI); + break; + } } continue; diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 40ae7053ea09..0c94e1f7e474 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -742,7 +742,7 @@ bool MIParser::parseBasicBlockDefinition( MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget); MBB->setIsEHFuncletEntry(IsEHFuncletEntry); if (SectionID) { - MBB->setSectionID(SectionID.getValue()); + MBB->setSectionID(SectionID.value()); MF.setBBSectionsType(BasicBlockSection::List); } return false; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index c186d0ba9969..02c44fa85cd9 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -451,7 +451,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (IrrLoopHeaderWeight && IsStandalone) { if (Indexes) OS << '\t'; OS.indent(2) << "; Irreducible loop header weight: " - << IrrLoopHeaderWeight.getValue() << '\n'; + << IrrLoopHeaderWeight.value() << '\n'; } } diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index 81c97ba6a086..867a7ed584b2 100644 --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -106,8 +106,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { // We don't want to proceed further for cold functions // or functions of unknown hotness. Lukewarm functions have no prefix. Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix(); - if (SectionPrefix && (SectionPrefix.getValue().equals("unlikely") || - SectionPrefix.getValue().equals("unknown"))) { + if (SectionPrefix && (SectionPrefix.value().equals("unlikely") || + SectionPrefix.value().equals("unknown"))) { return false; } diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 4e00a211713e..5f80445a5a34 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -93,8 +93,11 @@ cl::opt<bool> VerifyScheduling( cl::opt<bool> ViewMISchedDAGs( "view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed")); +cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, + cl::desc("Print schedule DAGs")); #else const bool ViewMISchedDAGs = false; +const bool PrintDAGs = false; #endif // NDEBUG } // end namespace llvm @@ -112,10 +115,6 @@ static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function")); static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#")); -static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, - cl::desc("Print schedule DAGs")); -#else -static const bool PrintDAGs = false; #endif // NDEBUG /// Avoid quadratic complexity in unusually large basic blocks by limiting the diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index db04f2bcc095..7a008bae726e 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -293,6 +293,7 @@ namespace { } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addUsedIfAvailable<LiveStacks>(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 3245d9649be1..581168b31384 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -1448,7 +1448,7 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, const TargetRegisterClass *RC) { // If the init register is not undef, try and find an existing phi. if (InitReg) { - auto I = Phis.find({LoopReg, InitReg.getValue()}); + auto I = Phis.find({LoopReg, InitReg.value()}); if (I != Phis.end()) return I->second; } else { @@ -1469,10 +1469,10 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, return R; // Found a phi taking undef as input, so rewrite it to take InitReg. MachineInstr *MI = MRI.getVRegDef(R); - MI->getOperand(1).setReg(InitReg.getValue()); - Phis.insert({{LoopReg, InitReg.getValue()}, R}); + MI->getOperand(1).setReg(InitReg.value()); + Phis.insert({{LoopReg, InitReg.value()}, R}); const TargetRegisterClass *ConstrainRegClass = - MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue())); + MRI.constrainRegClass(R, MRI.getRegClass(InitReg.value())); assert(ConstrainRegClass && "Expected a valid constrained register class!"); (void)ConstrainRegClass; UndefPhis.erase(I); diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 1a0f296d5fdc..89a43c4f57f6 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -554,7 +554,7 @@ static void updateLiveness(MachineFunction &MF) { } } -/// Insert restore code for the callee-saved registers used in the function. +/// Insert spill code for the callee-saved registers used in the function. static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef<CalleeSavedInfo> CSI) { MachineFunction &MF = *SaveBlock.getParent(); diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h index d57b0ca6d53d..d6a3997e4b70 100644 --- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h @@ -148,9 +148,6 @@ protected: /// Run or not the local reassignment heuristic. This information is /// obtained from the TargetSubtargetInfo. const bool EnableLocalReassign; - -private: - unsigned NextCascade = 1; }; /// ImmutableAnalysis abstraction for fetching the Eviction Advisor. We model it diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index c199b6a6cca8..d627519a34aa 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -870,8 +870,8 @@ bool SelectOptimize::computeLoopCosts( ORE->emit(ORmissL); return false; } - IPredCost += Scaled64::get(ILatency.getValue()); - INonPredCost += Scaled64::get(ILatency.getValue()); + IPredCost += Scaled64::get(ILatency.value()); + INonPredCost += Scaled64::get(ILatency.value()); // For a select that can be converted to branch, // compute its cost as a branch (non-predicated cost). diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index aa688d9dda3c..2654c00929d8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2392,12 +2392,14 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) SDLoc DL(N); - auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL; - SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt); - if (SDValue NewC = - DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT, - {ConstantOp, DAG.getConstant(1, DL, VT)})) + if (SDValue NewC = DAG.FoldConstantArithmetic( + IsAdd ? ISD::ADD : ISD::SUB, DL, VT, + {ConstantOp, DAG.getConstant(1, DL, VT)})) { + SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT, + Not.getOperand(0), ShAmt); return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC); + } + return SDValue(); } @@ -3760,6 +3762,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + // If there's no chance of borrowing from adjacent bits, then sub is xor: + // sub C0, X --> xor X, C0 + if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) { + if (!C0->isOpaque()) { + const APInt &C0Val = C0->getAPIntValue(); + const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero; + if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes)) + return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + } + } + return SDValue(); } @@ -4550,13 +4563,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) { SDLoc DL(N); // fold (rem c1, c2) -> c1%c2 - ConstantSDNode *N1C = isConstOrConstSplat(N1); if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; // fold (urem X, -1) -> select(FX == -1, 0, FX) // Freeze the numerator to avoid a miscompile with an undefined value. - if (!isSigned && N1C && N1C->isAllOnes()) { + if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) { SDValue F0 = DAG.getFreeze(N0); SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ); return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0); @@ -4581,9 +4593,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) { AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, DL, VT, N0, Add); } - if (N1.getOpcode() == ISD::SHL && + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1)) + // TODO: We should sink the following into isKnownToBePowerOfTwo + // using a OrZero parameter analogous to our handling in ValueTracking. + if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) && DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) { - // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) SDValue NegOne = DAG.getAllOnesConstant(DL, VT); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); AddToWorklist(Add.getNode()); @@ -9288,31 +9303,44 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. // sra (add (shl X, N1C), AddC), N1C --> // sext (add (trunc X to (width - N1C)), AddC') - if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && - N0.getOperand(0).getOpcode() == ISD::SHL && - N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { - if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { - SDValue Shl = N0.getOperand(0); - // Determine what the truncate's type would be and ask the target if that - // is a free operation. - LLVMContext &Ctx = *DAG.getContext(); - unsigned ShiftAmt = N1C->getZExtValue(); - EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); - if (VT.isVector()) - TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount()); - - // TODO: The simple type check probably belongs in the default hook - // implementation and/or target-specific overrides (because - // non-simple types likely require masking when legalized), but that - // restriction may conflict with other transforms. - if (TruncVT.isSimple() && isTypeLegal(TruncVT) && - TLI.isTruncateFree(VT, TruncVT)) { - SDLoc DL(N); - SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); - SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). - trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT); - SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); - return DAG.getSExtOrTrunc(Add, DL, VT); + // sra (sub AddC, (shl X, N1C)), N1C --> + // sext (sub AddC1',(trunc X to (width - N1C))) + if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C && + N0.hasOneUse()) { + bool IsAdd = N0.getOpcode() == ISD::ADD; + SDValue Shl = N0.getOperand(IsAdd ? 0 : 1); + if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 && + Shl.hasOneUse()) { + // TODO: AddC does not need to be a splat. + if (ConstantSDNode *AddC = + isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) { + // Determine what the truncate's type would be and ask the target if + // that is a free operation. + LLVMContext &Ctx = *DAG.getContext(); + unsigned ShiftAmt = N1C->getZExtValue(); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount()); + + // TODO: The simple type check probably belongs in the default hook + // implementation and/or target-specific overrides (because + // non-simple types likely require masking when legalized), but + // that restriction may conflict with other transforms. + if (TruncVT.isSimple() && isTypeLegal(TruncVT) && + TLI.isTruncateFree(VT, TruncVT)) { + SDLoc DL(N); + SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); + SDValue ShiftC = + DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc( + TruncVT.getScalarSizeInBits()), + DL, TruncVT); + SDValue Add; + if (IsAdd) + Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); + else + Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc); + return DAG.getSExtOrTrunc(Add, DL, VT); + } } } } @@ -11025,6 +11053,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG)) return V; + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -13243,18 +13274,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } - // See if we can simplify the input to this truncate through knowledge that - // only the low bits are being used. - // For example "trunc (or (shl x, 8), y)" // -> trunc y - // Currently we only perform this optimization on scalars because vectors - // may have different active low bits. - if (!VT.isVector()) { - APInt Mask = - APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); - if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); - } - // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { @@ -13341,6 +13360,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + // See if we can simplify the input to this truncate through knowledge that + // only the low bits are being used. + // For example "trunc (or (shl x, 8), y)" // -> trunc y + // Currently we only perform this optimization on scalars because vectors + // may have different active low bits. + if (!VT.isVector()) { + APInt Mask = + APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); + if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); + } + // fold (truncate (extract_subvector(ext x))) -> // (extract_subvector x) // TODO: This can be generalized to cover cases where the truncate and extract @@ -24514,8 +24545,9 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { auto &Size0 = MUC0.NumBytes; auto &Size1 = MUC1.NumBytes; if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && - Size0 && Size1 && *Size0 == *Size1 && OrigAlignment0 > *Size0 && - SrcValOffset0 % *Size0 == 0 && SrcValOffset1 % *Size1 == 0) { + Size0.has_value() && Size1.has_value() && *Size0 == *Size1 && + OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 && + SrcValOffset1 % *Size1 == 0) { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value(); int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index f464208cd9dc..6c136bdfc652 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2915,6 +2915,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break; case ISD::STORE: Res = SoftPromoteHalfOp_STORE(N, OpNo); break; + case ISD::STACKMAP: + Res = SoftPromoteHalfOp_STACKMAP(N, OpNo); + break; } if (!Res.getNode()) @@ -3042,3 +3045,17 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo) { return DAG.getStore(ST->getChain(), dl, Promoted, ST->getBasePtr(), ST->getMemOperand()); } + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo) { + assert(OpNo > 1); // Because the first two arguments are guaranteed legal. + SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end()); + SDValue Op = N->getOperand(OpNo); + NewOps[OpNo] = GetSoftPromotedHalf(Op); + SDValue NewNode = + DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), NewOps); + + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum)); + + return SDValue(); // Signal that we replaced the node ourselves. +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 69fd83bcd7b3..343722a97c3c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -19,6 +19,7 @@ #include "LegalizeTypes.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -1723,6 +1724,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { break; case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break; + case ISD::STACKMAP: + Res = PromoteIntOp_STACKMAP(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -2255,16 +2259,40 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { SDLoc dl(N); SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0)); - EVT EltVT = Op.getValueType().getVectorElementType(); - EVT VT = N->getValueType(0); + EVT OrigEltVT = N->getOperand(0).getValueType().getVectorElementType(); + EVT InVT = Op.getValueType(); + EVT EltVT = InVT.getVectorElementType(); + EVT ResVT = N->getValueType(0); + unsigned Opcode = N->getOpcode(); - if (VT.bitsGE(EltVT)) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op); + // An i1 vecreduce_xor is equivalent to vecreduce_add, use that instead if + // vecreduce_xor is not legal + if (Opcode == ISD::VECREDUCE_XOR && OrigEltVT == MVT::i1 && + !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_XOR, InVT) && + TLI.isOperationLegalOrCustom(ISD::VECREDUCE_ADD, InVT)) + Opcode = ISD::VECREDUCE_ADD; + + // An i1 vecreduce_or is equivalent to vecreduce_umax, use that instead if + // vecreduce_or is not legal + else if (Opcode == ISD::VECREDUCE_OR && OrigEltVT == MVT::i1 && + !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) && + TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT)) + Opcode = ISD::VECREDUCE_UMAX; + + // An i1 vecreduce_and is equivalent to vecreduce_umin, use that instead if + // vecreduce_and is not legal + else if (Opcode == ISD::VECREDUCE_AND && OrigEltVT == MVT::i1 && + !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) && + TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT)) + Opcode = ISD::VECREDUCE_UMIN; + + if (ResVT.bitsGE(EltVT)) + return DAG.getNode(Opcode, SDLoc(N), ResVT, Op); // Result size must be >= element size. If this is not the case after // promotion, also promote the result type and then truncate. - SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce); + SDValue Reduce = DAG.getNode(Opcode, dl, EltVT, Op); + return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Reduce); } SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) { @@ -2304,6 +2332,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) { return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo) { + assert(OpNo > 1); // Because the first two arguments are guaranteed legal. + SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end()); + SDValue Operand = N->getOperand(OpNo); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType()); + NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + //===----------------------------------------------------------------------===// // Integer Result Expansion //===----------------------------------------------------------------------===// @@ -4653,6 +4690,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break; case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break; + case ISD::STACKMAP: + Res = ExpandIntOp_STACKMAP(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -5481,3 +5521,44 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(N->getValueType(0), dl, NewOps); } + +SDValue DAGTypeLegalizer::ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo) { + assert(OpNo > 1); + + SDValue Op = N->getOperand(OpNo); + SDLoc DL = SDLoc(N); + SmallVector<SDValue> NewOps; + + // Copy operands before the one being expanded. + for (unsigned I = 0; I < OpNo; I++) + NewOps.push_back(N->getOperand(I)); + + if (Op->getOpcode() == ISD::Constant) { + ConstantSDNode *CN = cast<ConstantSDNode>(Op); + EVT Ty = Op.getValueType(); + if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) { + NewOps.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty)); + } else { + // FIXME: https://github.com/llvm/llvm-project/issues/55609 + return SDValue(); + } + } else { + // FIXME: Non-constant operands are not yet handled: + // - https://github.com/llvm/llvm-project/issues/26431 + // - https://github.com/llvm/llvm-project/issues/55957 + return SDValue(); + } + + // Copy remaining operands. + for (unsigned I = OpNo + 1; I < N->getNumOperands(); I++) + NewOps.push_back(N->getOperand(I)); + + SDValue NewNode = DAG.getNode(N->getOpcode(), DL, N->getVTList(), NewOps); + + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum)); + + return SDValue(); // Signal that we have replaced the node already. +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index de320290bda9..2807b7f5ae68 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -402,6 +402,7 @@ private: SDValue PromoteIntOp_VECREDUCE(SDNode *N); SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SET_ROUNDING(SDNode *N); + SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -493,6 +494,7 @@ private: SDValue ExpandIntOp_RETURNADDR(SDNode *N); SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N); + SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &dl); @@ -741,6 +743,7 @@ private: SDValue SoftPromoteHalfOp_SETCC(SDNode *N); SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index fa555be00ded..143abc08eeea 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -5627,7 +5627,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDLoc dl(N); - unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0); assert(getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector && @@ -5639,7 +5638,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // See if a widened result type would be legal, if so widen the node. // FIXME: This isn't safe for StrictFP. Other optimization here is needed. EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, - InVT.getVectorNumElements()); + InVT.getVectorElementCount()); if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) { SDValue Res; if (N->isStrictFPOpcode()) { @@ -5665,6 +5664,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { EVT InEltVT = InVT.getVectorElementType(); // Unroll the convert into some scalar code and create a nasty build vector. + unsigned NumElts = VT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(NumElts); if (N->isStrictFPOpcode()) { SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); @@ -6055,7 +6055,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. if (VT.getScalarType() == MVT::i1) SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - SVT.getVectorNumElements()); + SVT.getVectorElementCount()); SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), SVT, InOp0, InOp1, N->getOperand(2)); @@ -6063,7 +6063,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { // Extract the needed results from the result vector. EVT ResVT = EVT::getVectorVT(*DAG.getContext(), SVT.getVectorElementType(), - VT.getVectorNumElements()); + VT.getVectorElementCount()); SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getVectorIdxConstant(0, dl)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b3b8756ae9ba..c8d0f5faf647 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -60,7 +60,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MachineValueType.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/raw_ostream.h" @@ -3271,6 +3270,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; case ISD::SETCC: + case ISD::SETCCCARRY: case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: { unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; @@ -3506,6 +3506,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; case ISD::USUBO: case ISD::SSUBO: + case ISD::SUBCARRY: + case ISD::SSUBO_CARRY: if (Op.getResNo() == 1) { // If we know the result of a setcc has the top bits zero, use this info. if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == @@ -3520,6 +3522,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, assert(Op.getResNo() == 0 && "We only compute knownbits for the difference here."); + // TODO: Compute influence of the carry operand. + if (Opcode == ISD::SUBCARRY || Opcode == ISD::SSUBO_CARRY) + break; + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, @@ -3529,6 +3535,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::UADDO: case ISD::SADDO: case ISD::ADDCARRY: + case ISD::SADDO_CARRY: if (Op.getResNo() == 1) { // If we know the result of a setcc has the top bits zero, use this info. if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == @@ -3548,7 +3555,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (Opcode == ISD::ADDE) // Can't track carry from glue, set carry to unknown. Carry.resetAll(); - else if (Opcode == ISD::ADDCARRY) + else if (Opcode == ISD::ADDCARRY || Opcode == ISD::SADDO_CARRY) // TODO: Compute known bits for the carry operand. Not sure if it is worth // the trouble (how often will we find a known carry bit). And I haven't // tested this very much yet, but something like this might work: @@ -3862,6 +3869,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2()) return true; + // vscale(power-of-two) is a power-of-two for some targets + if (Val.getOpcode() == ISD::VSCALE && + getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() && + isKnownToBeAPowerOfTwo(Val.getOperand(0))) + return true; + // More could be done here, though the above checks are enough // to handle some common cases. @@ -4108,8 +4121,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::min(Tmp, Tmp2); case ISD::SADDO: case ISD::UADDO: + case ISD::SADDO_CARRY: + case ISD::ADDCARRY: case ISD::SSUBO: case ISD::USUBO: + case ISD::SSUBO_CARRY: + case ISD::SUBCARRY: case ISD::SMULO: case ISD::UMULO: if (Op.getResNo() != 1) @@ -4123,6 +4140,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return VTBits; break; case ISD::SETCC: + case ISD::SETCCCARRY: case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: { unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; @@ -7505,6 +7523,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, Opcode == ISD::ATOMIC_LOAD_UMAX || Opcode == ISD::ATOMIC_LOAD_FADD || Opcode == ISD::ATOMIC_LOAD_FSUB || + Opcode == ISD::ATOMIC_LOAD_FMAX || + Opcode == ISD::ATOMIC_LOAD_FMIN || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_STORE) && "Invalid Atomic Op"); @@ -10739,19 +10759,19 @@ namespace { } // end anonymous namespace -static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs; -static ManagedStatic<EVTArray> SimpleVTArray; -static ManagedStatic<sys::SmartMutex<true>> VTMutex; - /// getValueTypeList - Return a pointer to the specified value type. /// const EVT *SDNode::getValueTypeList(EVT VT) { + static std::set<EVT, EVT::compareRawBits> EVTs; + static EVTArray SimpleVTArray; + static sys::SmartMutex<true> VTMutex; + if (VT.isExtended()) { - sys::SmartScopedLock<true> Lock(*VTMutex); - return &(*EVTs->insert(VT).first); + sys::SmartScopedLock<true> Lock(VTMutex); + return &(*EVTs.insert(VT).first); } assert(VT.getSimpleVT() < MVT::VALUETYPE_SIZE && "Value type out of range!"); - return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; + return &SimpleVTArray.VTs[VT.getSimpleVT().SimpleTy]; } /// hasNUsesOfValue - Return true if there are exactly NUSES uses of the diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 37d05cdba76d..fe3c38ec590d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -703,7 +703,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, unsigned NumRegs; if (IsABIRegCopy) { NumRegs = TLI.getVectorTypeBreakdownForCallingConv( - *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT, + *DAG.getContext(), CallConv.value(), ValueVT, IntermediateVT, NumIntermediates, RegisterVT); } else { NumRegs = @@ -800,11 +800,11 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, for (EVT ValueVT : ValueVTs) { unsigned NumRegs = isABIMangled() - ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT) + ? TLI.getNumRegistersForCallingConv(Context, CC.value(), ValueVT) : TLI.getNumRegisters(Context, ValueVT); MVT RegisterVT = isABIMangled() - ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT) + ? TLI.getRegisterTypeForCallingConv(Context, CC.value(), ValueVT) : TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); @@ -831,10 +831,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = RegCount[Value]; - MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( - *DAG.getContext(), - CallConv.getValue(), RegVTs[Value]) - : RegVTs[Value]; + MVT RegisterVT = + isABIMangled() ? TLI.getRegisterTypeForCallingConv( + *DAG.getContext(), CallConv.value(), RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -914,10 +914,10 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumParts = RegCount[Value]; - MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( - *DAG.getContext(), - CallConv.getValue(), RegVTs[Value]) - : RegVTs[Value]; + MVT RegisterVT = + isABIMangled() ? TLI.getRegisterTypeForCallingConv( + *DAG.getContext(), CallConv.value(), RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; @@ -1309,7 +1309,7 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /*IsVariadic=*/false)) { LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n " - << DDI.getDI() << "\nBy stripping back to:\n " << V); + << *DDI.getDI() << "\nBy stripping back to:\n " << *V); return; } } @@ -1321,7 +1321,7 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); DAG.AddDbgValue(SDV, false); - LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI() + LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << *DDI.getDI() << "\n"); LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0) << "\n"); @@ -3747,13 +3747,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { setValue(&I, DAG.getBuildVector(VT, DL, Ops)); } -void SelectionDAGBuilder::visitInsertValue(const User &I) { - ArrayRef<unsigned> Indices; - if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I)) - Indices = IV->getIndices(); - else - Indices = cast<ConstantExpr>(&I)->getIndices(); - +void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { + ArrayRef<unsigned> Indices = I.getIndices(); const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); Type *AggTy = I.getType(); @@ -4616,6 +4611,8 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break; case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; + case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break; + case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break; } AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); @@ -8410,52 +8407,6 @@ public: return false; } - - /// getCallOperandValEVT - Return the EVT of the Value* that this operand - /// corresponds to. If there is no Value* for this operand, it returns - /// MVT::Other. - EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, - llvm::Type *ParamElemType) const { - if (!CallOperandVal) return MVT::Other; - - if (isa<BasicBlock>(CallOperandVal)) - return TLI.getProgramPointerTy(DL); - - llvm::Type *OpTy = CallOperandVal->getType(); - - // FIXME: code duplicated from TargetLowering::ParseConstraints(). - // If this is an indirect operand, the operand is a pointer to the - // accessed type. - if (isIndirect) { - OpTy = ParamElemType; - assert(OpTy && "Indirect operand must have elementtype attribute"); - } - - // Look for vector wrapped in a struct. e.g. { <16 x i8> }. - if (StructType *STy = dyn_cast<StructType>(OpTy)) - if (STy->getNumElements() == 1) - OpTy = STy->getElementType(0); - - // If OpTy is not a single value, it may be a struct/union that we - // can tile with integers. - if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = DL.getTypeSizeInBits(OpTy); - switch (BitSize) { - default: break; - case 1: - case 8: - case 16: - case 32: - case 64: - case 128: - OpTy = IntegerType::get(Context, BitSize); - break; - } - } - - return TLI.getAsmOperandValueType(DL, OpTy, true); - } }; @@ -8722,37 +8673,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, bool HasSideEffect = IA->hasSideEffects(); ExtraFlags ExtraInfo(Call); - unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. - unsigned ResNo = 0; // ResNo - The result number of the next output. for (auto &T : TargetConstraints) { ConstraintOperands.push_back(SDISelAsmOperandInfo(T)); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); - // Compute the value type for each operand. - if (OpInfo.hasArg()) { - OpInfo.CallOperandVal = Call.getArgOperand(ArgNo); + if (OpInfo.CallOperandVal) OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); - Type *ParamElemTy = Call.getParamElementType(ArgNo); - EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, - DAG.getDataLayout(), ParamElemTy); - OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other; - ArgNo++; - } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { - // The return value of the call is this value. As such, there is no - // corresponding argument. - assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); - if (StructType *STy = dyn_cast<StructType>(Call.getType())) { - OpInfo.ConstraintVT = TLI.getSimpleValueType( - DAG.getDataLayout(), STy->getElementType(ResNo)); - } else { - assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = TLI.getAsmOperandValueType( - DAG.getDataLayout(), Call.getType()).getSimpleVT(); - } - ++ResNo; - } else { - OpInfo.ConstraintVT = MVT::Other; - } if (!HasSideEffect) HasSideEffect = OpInfo.hasMemory(TLI); @@ -8865,7 +8791,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, if (RegError) { const MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const char *RegName = TRI.getName(RegError.getValue()); + const char *RegName = TRI.getName(RegError.value()); emitInlineAsmError(Call, "register '" + Twine(RegName) + "' allocated for constraint '" + Twine(OpInfo.ConstraintCode) + @@ -9385,9 +9311,9 @@ static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx, } } -/// Lower llvm.experimental.stackmap directly to its target opcode. +/// Lower llvm.experimental.stackmap. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { - // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, + // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, // [live variables...]) assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); @@ -9412,29 +9338,45 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL); InFlag = Chain.getValue(1); - // Add the <id> and <numBytes> constants. - SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64)); - SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL, - MVT::i32)); - - // Push live variables for the stack map. - addStackMapLiveVars(CI, 2, DL, Ops, *this); - - // We are not pushing any register mask info here on the operands list, - // because the stackmap doesn't clobber anything. - - // Push the chain and the glue flag. + // Add the STACKMAP operands, starting with DAG house-keeping. Ops.push_back(Chain); Ops.push_back(InFlag); + // Add the <id>, <numShadowBytes> operands. + // + // These do not require legalisation, and can be emitted directly to target + // constant nodes. + SDValue ID = getValue(CI.getArgOperand(0)); + assert(ID.getValueType() == MVT::i64); + SDValue IDConst = DAG.getTargetConstant( + cast<ConstantSDNode>(ID)->getZExtValue(), DL, ID.getValueType()); + Ops.push_back(IDConst); + + SDValue Shad = getValue(CI.getArgOperand(1)); + assert(Shad.getValueType() == MVT::i32); + SDValue ShadConst = DAG.getTargetConstant( + cast<ConstantSDNode>(Shad)->getZExtValue(), DL, Shad.getValueType()); + Ops.push_back(ShadConst); + + // Add the live variables. + for (unsigned I = 2; I < CI.arg_size(); I++) { + SDValue Op = getValue(CI.getArgOperand(I)); + + // Things on the stack are pointer-typed, meaning that they are already + // legal and can be emitted directly to target nodes. + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Ops.push_back(DAG.getTargetFrameIndex( + FI->getIndex(), TLI.getFrameIndexTy(DAG.getDataLayout()))); + } else { + // Otherwise emit a target independent node to be legalised. + Ops.push_back(getValue(CI.getArgOperand(I))); + } + } + // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); - Chain = SDValue(SM, 0); + Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 72cca3d9b001..4a3ab00614b3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -529,7 +529,7 @@ private: void visitShuffleVector(const User &I); void visitExtractValue(const ExtractValueInst &I); - void visitInsertValue(const User &I); + void visitInsertValue(const InsertValueInst &I); void visitLandingPad(const LandingPadInst &LP); void visitGetElementPtr(const User &I); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index bbfc6e5ef64f..9df0b64c26c3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -486,6 +486,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::VECREDUCE_UMIN: return "vecreduce_umin"; case ISD::VECREDUCE_FMAX: return "vecreduce_fmax"; case ISD::VECREDUCE_FMIN: return "vecreduce_fmin"; + case ISD::STACKMAP: + return "stackmap"; // Vector Predication #define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \ diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 2b63359c2b1b..7f453f081982 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,7 +27,6 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -51,6 +50,7 @@ #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -64,7 +64,6 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstIterator.h" @@ -345,47 +344,6 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that -/// may trap on it. In this case we have to split the edge so that the path -/// through the predecessor block that doesn't go to the phi block doesn't -/// execute the possibly trapping instruction. If available, we pass domtree -/// and loop info to be updated when we split critical edges. This is because -/// SelectionDAGISel preserves these analyses. -/// This is required for correctness, so it must be done at -O0. -/// -static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT, - LoopInfo *LI) { - // Loop for blocks with phi nodes. - for (BasicBlock &BB : Fn) { - PHINode *PN = dyn_cast<PHINode>(BB.begin()); - if (!PN) continue; - - ReprocessBlock: - // For each block with a PHI node, check to see if any of the input values - // are potentially trapping constant expressions. Constant expressions are - // the only potentially trapping value that can occur as the argument to a - // PHI. - for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I) - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Constant *C = dyn_cast<Constant>(PN->getIncomingValue(i)); - if (!C || !C->canTrap()) continue; - - // The only case we have to worry about is when the edge is critical. - // Since this block has a PHI Node, we assume it has multiple input - // edges: check to see if the pred has multiple successors. - BasicBlock *Pred = PN->getIncomingBlock(i); - if (Pred->getTerminator()->getNumSuccessors() == 1) - continue; - - // Okay, we have to split this edge. - SplitCriticalEdge( - Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), - CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges()); - goto ReprocessBlock; - } - } -} - static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F, MachineModuleInfo &MMI) { // Only needed for MSVC @@ -445,10 +403,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn); - auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); - LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); BlockFrequencyInfo *BFI = nullptr; if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None) @@ -456,8 +410,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); - CurDAG->init(*MF, *ORE, this, LibInfo, getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI); FuncInfo->set(Fn, *MF, CurDAG); @@ -2241,6 +2193,52 @@ void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) { N->getOperand(0)); } +void SelectionDAGISel::Select_STACKMAP(SDNode *N) { + std::vector<SDValue> Ops; + auto *It = N->op_begin(); + SDLoc DL(N); + + // Stash the chain and glue operands so we can move them to the end. + SDValue Chain = *It++; + SDValue InFlag = *It++; + + // <id> operand. + SDValue ID = *It++; + assert(ID.getValueType() == MVT::i64); + Ops.push_back(ID); + + // <numShadowBytes> operand. + SDValue Shad = *It++; + assert(Shad.getValueType() == MVT::i32); + Ops.push_back(Shad); + + // Live variable operands. + for (; It != N->op_end(); It++) { + SDNode *OpNode = It->getNode(); + SDValue O; + + // FrameIndex nodes should have been directly emitted to TargetFrameIndex + // nodes at DAG-construction time. + assert(OpNode->getOpcode() != ISD::FrameIndex); + + if (OpNode->getOpcode() == ISD::Constant) { + Ops.push_back( + CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + O = CurDAG->getTargetConstant( + cast<ConstantSDNode>(OpNode)->getZExtValue(), DL, It->getValueType()); + } else { + O = *It; + } + Ops.push_back(O); + } + + Ops.push_back(Chain); + Ops.push_back(InFlag); + + SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue); + CurDAG->SelectNodeTo(N, TargetOpcode::STACKMAP, NodeTys, Ops); +} + /// GetVBR - decode a vbr encoding whose top bit is set. LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { @@ -2795,6 +2793,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::ARITH_FENCE: Select_ARITH_FENCE(NodeToMatch); return; + case ISD::STACKMAP: + Select_STACKMAP(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 19a52fde44c1..3061158eea30 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -531,14 +531,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, for (const Value *V : SI.Bases) { auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); if (Opt) { - assert(Opt.getValue() && + assert(Opt.value() && "non gc managed base pointer found in statepoint"); } } for (const Value *V : SI.Ptrs) { auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); if (Opt) { - assert(Opt.getValue() && + assert(Opt.value() && "non gc managed derived pointer found in statepoint"); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a6b471ea22b7..66389a57f780 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1362,6 +1362,29 @@ bool TargetLowering::SimplifyDemandedBits( } } + // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I) + // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits). + if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && + (Op0.getOperand(0).isUndef() || + ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) && + Op0->hasOneUse()) { + unsigned NumSubElts = + Op0.getOperand(1).getValueType().getVectorNumElements(); + unsigned SubIdx = Op0.getConstantOperandVal(2); + APInt DemandedSub = + APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts); + KnownBits KnownSubMask = + TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1); + if (DemandedBits.isSubsetOf(KnownSubMask.One)) { + SDValue NewAnd = + TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1); + SDValue NewInsert = + TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd, + Op0.getOperand(1), Op0.getOperand(2)); + return TLO.CombineTo(Op, NewInsert); + } + } + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1)) return true; @@ -1371,20 +1394,6 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); - // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { - SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( - Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); - SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( - Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); - if (DemandedOp0 || DemandedOp1) { - Op0 = DemandedOp0 ? DemandedOp0 : Op0; - Op1 = DemandedOp1 ? DemandedOp1 : Op1; - SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); - return TLO.CombineTo(Op, NewOp); - } - } - // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. if (DemandedBits.isSubsetOf(Known2.Zero | Known.One)) @@ -1402,6 +1411,20 @@ bool TargetLowering::SimplifyDemandedBits( if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + Known &= Known2; break; } @@ -1418,6 +1441,19 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) + return TLO.CombineTo(Op, Op0); + if (DemandedBits.isSubsetOf(Known.One | Known2.Zero)) + return TLO.CombineTo(Op, Op1); + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) + return true; + // If the operation can be done in a smaller type, do so. + if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) + return true; + // Attempt to avoid multi-use ops if we don't need anything from them. if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( @@ -1432,19 +1468,6 @@ bool TargetLowering::SimplifyDemandedBits( } } - // If all of the demanded bits are known zero on one side, return the other. - // These bits cannot contribute to the result of the 'or'. - if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) - return TLO.CombineTo(Op, Op0); - if (DemandedBits.isSubsetOf(Known.One | Known2.Zero)) - return TLO.CombineTo(Op, Op1); - // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) - return true; - // If the operation can be done in a smaller type, do so. - if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) - return true; - Known |= Known2; break; } @@ -1461,20 +1484,6 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); - // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { - SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( - Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); - SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( - Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); - if (DemandedOp0 || DemandedOp1) { - Op0 = DemandedOp0 ? DemandedOp0 : Op0; - Op1 = DemandedOp1 ? DemandedOp1 : Op1; - SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); - return TLO.CombineTo(Op, NewOp); - } - } - // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. if (DemandedBits.isSubsetOf(Known.Zero)) @@ -1519,6 +1528,20 @@ bool TargetLowering::SimplifyDemandedBits( if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + Known ^= Known2; break; } @@ -1972,9 +1995,9 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); Known = KnownBits::umin(Known0, Known1); if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1)) - return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1); + return TLO.CombineTo(Op, IsULE.value() ? Op0 : Op1); if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1)) - return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1); + return TLO.CombineTo(Op, IsULT.value() ? Op0 : Op1); break; } case ISD::UMAX: { @@ -1985,9 +2008,9 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); Known = KnownBits::umax(Known0, Known1); if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1)) - return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1); + return TLO.CombineTo(Op, IsUGE.value() ? Op0 : Op1); if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1)) - return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1); + return TLO.CombineTo(Op, IsUGT.value() ? Op0 : Op1); break; } case ISD::BITREVERSE: { @@ -2486,9 +2509,7 @@ bool TargetLowering::SimplifyDemandedBits( // won't wrap after simplification. Flags.setNoSignedWrap(false); Flags.setNoUnsignedWrap(false); - SDValue NewOp = - TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); - return TLO.CombineTo(Op, NewOp); + Op->setFlags(Flags); } return true; } @@ -3031,15 +3052,15 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } case ISD::VSELECT: { + SDValue Sel = Op.getOperand(0); + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + // Try to transform the select condition based on the current demanded // elements. - // TODO: If a condition element is undef, we can choose from one arm of the - // select (and if one arm is undef, then we can propagate that to the - // result). - // TODO - add support for constant vselect masks (see IR version of this). - APInt UnusedUndef, UnusedZero; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef, - UnusedZero, TLO, Depth + 1)) + APInt UndefSel, UndefZero; + if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO, + Depth + 1)) return true; // See if we can simplify either vselect operand. @@ -3047,15 +3068,24 @@ bool TargetLowering::SimplifyDemandedVectorElts( APInt DemandedRHS(DemandedElts); APInt UndefLHS, ZeroLHS; APInt UndefRHS, ZeroRHS; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS, - ZeroLHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO, + Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS, - ZeroRHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO, + Depth + 1)) return true; KnownUndef = UndefLHS & UndefRHS; KnownZero = ZeroLHS & ZeroRHS; + + // If we know that the selected element is always zero, we don't need the + // select value element. + APInt DemandedSel = DemandedElts & ~KnownZero; + if (DemandedSel != DemandedElts) + if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO, + Depth + 1)) + return true; + break; } case ISD::VECTOR_SHUFFLE: { @@ -5239,17 +5269,13 @@ TargetLowering::ParseConstraints(const DataLayout &DL, case 32: case 64: case 128: - OpInfo.ConstraintVT = - MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true); + OpTy = IntegerType::get(OpTy->getContext(), BitSize); break; } - } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { - unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace()); - OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize); - } else { - OpInfo.ConstraintVT = MVT::getVT(OpTy, true); } + EVT VT = getAsmOperandValueType(DL, OpTy, true); + OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other; ArgNo++; } } @@ -7833,7 +7859,7 @@ SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { // return popcount(~x); // // Ref: "Hacker's Delight" by Henry Warren - for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) { + for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) { SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT); Op = DAG.getNode(ISD::OR, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, Tmp)); diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index f3d68bd9c92d..2badbe34ae6a 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -449,9 +449,6 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { Name == ".llvmbc" || Name == ".llvmcmd") return SectionKind::getMetadata(); - if (Name == ".llvm.offloading") - return SectionKind::getExclude(); - if (Name.empty() || Name[0] != '.') return K; // Default implementation based on some magic section names. @@ -501,6 +498,9 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) { if (hasPrefix(Name, ".preinit_array")) return ELF::SHT_PREINIT_ARRAY; + if (hasPrefix(Name, ".llvm.offloading")) + return ELF::SHT_LLVM_OFFLOADING; + if (K.isBSS() || K.isThreadBSS()) return ELF::SHT_NOBITS; |