diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-29 00:56:15 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-29 00:56:15 +0000 |
commit | fe4fed2e4d17945c38474cf0746792d04bf84b7d (patch) | |
tree | f82cc30abef889351b2dbe8d8aa2874056dbebbd /contrib/llvm | |
parent | bbd32193a0463b1c7383443a45b774a2fe4d3430 (diff) | |
parent | 55e6d896ad333f07bb3b1ba487df214fc268a4ab (diff) |
Merge llvm, clang, lld, lldb, compiler-rt and libc++ trunk r321545,
update build glue and version numbers, add new intrinsics headers, and
update OptionalObsoleteFiles.inc.
Notes
Notes:
svn path=/projects/clang600-import/; revision=327330
Diffstat (limited to 'contrib/llvm')
122 files changed, 3776 insertions, 924 deletions
diff --git a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h index 3932a2ec2498..4f896bddff87 100644 --- a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -197,6 +197,9 @@ Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q); +/// Given a callsite, fold the result or return null. +Value *SimplifyCall(ImmutableCallSite CS, const SimplifyQuery &Q); + /// Given a function and iterators over arguments, fold the result or return /// null. Value *SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin, diff --git a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index 391a333594e9..c2974525a6ff 100644 --- a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -407,12 +407,6 @@ public: void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result); - /// Perform a dependency query specifically for QueryInst's access to Loc. - /// The other comments for getNonLocalPointerDependency apply here as well. - void getNonLocalPointerDependencyFrom(Instruction *QueryInst, - const MemoryLocation &Loc, bool isLoad, - SmallVectorImpl<NonLocalDepResult> &Result); - /// Removes an instruction from the dependence analysis, updating the /// dependence of instructions that previously depended on it. void removeInstruction(Instruction *InstToRemove); diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h index cecd8958e9d9..c20f20cfbe4d 100644 --- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -646,9 +646,6 @@ public: /// \brief Additional properties of an operand's values. enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; - /// \return True if target can execute instructions out of order. - bool isOutOfOrder() const; - /// \return The number of scalar or vector registers that the target has. /// If 'Vectors' is true, it returns the number of vector registers. If it is /// set to false, it returns the number of scalar registers. @@ -1021,7 +1018,6 @@ public: Type *Ty) = 0; virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) = 0; - virtual bool isOutOfOrder() const = 0; virtual unsigned getNumberOfRegisters(bool Vector) = 0; virtual unsigned getRegisterBitWidth(bool Vector) const = 0; virtual unsigned getMinVectorRegisterBitWidth() = 0; @@ -1299,9 +1295,6 @@ public: Type *Ty) override { return Impl.getIntImmCost(IID, Idx, Imm, Ty); } - bool isOutOfOrder() const override { - return Impl.isOutOfOrder(); - } unsigned getNumberOfRegisters(bool Vector) override { return Impl.getNumberOfRegisters(Vector); } diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 3625675d53de..4c37402278ef 100644 --- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -337,8 +337,6 @@ public: return TTI::TCC_Free; } - bool isOutOfOrder() const { return false; } - unsigned getNumberOfRegisters(bool Vector) { return 8; } unsigned getRegisterBitWidth(bool Vector) const { return 32; } diff --git a/contrib/llvm/include/llvm/BinaryFormat/COFF.h b/contrib/llvm/include/llvm/BinaryFormat/COFF.h index e6bb50cadb12..a55c544dfe90 100644 --- a/contrib/llvm/include/llvm/BinaryFormat/COFF.h +++ b/contrib/llvm/include/llvm/BinaryFormat/COFF.h @@ -95,7 +95,7 @@ enum MachineTypes : unsigned { MT_Invalid = 0xffff, IMAGE_FILE_MACHINE_UNKNOWN = 0x0, - IMAGE_FILE_MACHINE_AM33 = 0x13, + IMAGE_FILE_MACHINE_AM33 = 0x1D3, IMAGE_FILE_MACHINE_AMD64 = 0x8664, IMAGE_FILE_MACHINE_ARM = 0x1C0, IMAGE_FILE_MACHINE_ARMNT = 0x1C4, diff --git a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h index f1f9275b0786..526ddb1b9706 100644 --- a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -402,10 +402,6 @@ public: return BaseT::getInstructionLatency(I); } - bool isOutOfOrder() const { - return getST()->getSchedModel().isOutOfOrder(); - } - /// @} /// \name Vector TTI Implementations diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index d7999cd33231..cc08fe683272 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -288,7 +288,8 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty], llvm_i32_ty, // ordering llvm_i32_ty, // scope llvm_i1_ty], // isVolatile - [IntrArgMemOnly, NoCapture<0>] + [IntrArgMemOnly, NoCapture<0>], "", + [SDNPMemOperand] >; def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin; diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProf.h b/contrib/llvm/include/llvm/ProfileData/SampleProf.h index 9eccafc65f3a..641631cc4ec9 100644 --- a/contrib/llvm/include/llvm/ProfileData/SampleProf.h +++ b/contrib/llvm/include/llvm/ProfileData/SampleProf.h @@ -226,8 +226,8 @@ public: sampleprof_error addCalledTargetSamples(uint32_t LineOffset, uint32_t Discriminator, - const std::string &FName, - uint64_t Num, uint64_t Weight = 1) { + StringRef FName, uint64_t Num, + uint64_t Weight = 1) { return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( FName, Num, Weight); } diff --git a/contrib/llvm/include/llvm/Support/KnownBits.h b/contrib/llvm/include/llvm/Support/KnownBits.h index 7a4de3e5ff12..97e73b13fca3 100644 --- a/contrib/llvm/include/llvm/Support/KnownBits.h +++ b/contrib/llvm/include/llvm/Support/KnownBits.h @@ -100,13 +100,11 @@ public: /// Make this value negative. void makeNegative() { - assert(!isNonNegative() && "Can't make a non-negative value negative"); One.setSignBit(); } /// Make this value negative. void makeNonNegative() { - assert(!isNegative() && "Can't make a negative value non-negative"); Zero.setSignBit(); } diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp index 7276f2524fed..de7d21f9f133 100644 --- a/contrib/llvm/lib/Analysis/DemandedBits.cpp +++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp @@ -385,8 +385,8 @@ bool DemandedBits::isInstructionDead(Instruction *I) { void DemandedBits::print(raw_ostream &OS) { performAnalysis(); for (auto &KV : AliveBits) { - OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for " - << *KV.first << "\n"; + OS << "DemandedBits: 0x" << Twine::utohexstr(KV.second.getLimitedValue()) + << " for " << *KV.first << '\n'; } } diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index 3ce1281743c3..93fb1143e505 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3897,8 +3897,9 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) - if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue())) - return Elt; + if (IdxC->getValue().ule(Vec->getType()->getVectorNumElements())) + if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue())) + return Elt; // An undef extract index can be arbitrarily chosen to be an out-of-range // index value, which would result in the instruction being undef. @@ -4494,6 +4495,22 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, return *ArgBegin; return nullptr; } + case Intrinsic::bswap: { + Value *IIOperand = *ArgBegin; + Value *X = nullptr; + // bswap(bswap(x)) -> x + if (match(IIOperand, m_BSwap(m_Value(X)))) + return X; + return nullptr; + } + case Intrinsic::bitreverse: { + Value *IIOperand = *ArgBegin; + Value *X = nullptr; + // bitreverse(bitreverse(x)) -> x + if (match(IIOperand, m_BitReverse(m_Value(X)))) + return X; + return nullptr; + } default: return nullptr; } @@ -4548,6 +4565,16 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, return SimplifyRelativeLoad(C0, C1, Q.DL); return nullptr; } + case Intrinsic::powi: + if (ConstantInt *Power = dyn_cast<ConstantInt>(RHS)) { + // powi(x, 0) -> 1.0 + if (Power->isZero()) + return ConstantFP::get(LHS->getType(), 1.0); + // powi(x, 1) -> x + if (Power->isOne()) + return LHS; + } + return nullptr; default: return nullptr; } @@ -4616,6 +4643,12 @@ Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit); } +Value *llvm::SimplifyCall(ImmutableCallSite ICS, const SimplifyQuery &Q) { + CallSite CS(const_cast<Instruction*>(ICS.getInstruction())); + return ::SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), + Q, RecursionLimit); +} + /// See if we can compute a simplified version of this instruction. /// If not, this returns null. @@ -4750,8 +4783,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, break; case Instruction::Call: { CallSite CS(cast<CallInst>(I)); - Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), - Q); + Result = SimplifyCall(CS, Q); break; } #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index 7b792ed0a2e2..0e3f498cb14c 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -265,13 +265,21 @@ void Lint::visitCallSite(CallSite CS) { // Check that noalias arguments don't alias other arguments. This is // not fully precise because we don't know the sizes of the dereferenced // memory regions. - if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) - for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) + if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { + AttributeList PAL = CS.getAttributes(); + unsigned ArgNo = 0; + for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) { + // Skip ByVal arguments since they will be memcpy'd to the callee's + // stack so we're not really passing the pointer anyway. + if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal)) + continue; if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasResult Result = AA->alias(*AI, *BI); Assert(Result != MustAlias && Result != PartialAlias, "Unusual: noalias argument aliases another argument", &I); } + } + } // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index bb7bf967994c..bf83f52ccf2e 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -920,14 +920,6 @@ void MemoryDependenceResults::getNonLocalPointerDependency( Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) { const MemoryLocation Loc = MemoryLocation::get(QueryInst); bool isLoad = isa<LoadInst>(QueryInst); - return getNonLocalPointerDependencyFrom(QueryInst, Loc, isLoad, Result); -} - -void MemoryDependenceResults::getNonLocalPointerDependencyFrom( - Instruction *QueryInst, - const MemoryLocation &Loc, - bool isLoad, - SmallVectorImpl<NonLocalDepResult> &Result) { BasicBlock *FromBB = QueryInst->getParent(); assert(FromBB); @@ -1127,15 +1119,21 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( // If we already have a cache entry for this CacheKey, we may need to do some // work to reconcile the cache entry and the current query. if (!Pair.second) { - if (CacheInfo->Size != Loc.Size) { - // The query's Size differs from the cached one. Throw out the - // cached data and proceed with the query at the new size. + if (CacheInfo->Size < Loc.Size) { + // The query's Size is greater than the cached one. Throw out the + // cached data and proceed with the query at the greater size. CacheInfo->Pair = BBSkipFirstBlockPair(); CacheInfo->Size = Loc.Size; for (auto &Entry : CacheInfo->NonLocalDeps) if (Instruction *Inst = Entry.getResult().getInst()) RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); CacheInfo->NonLocalDeps.clear(); + } else if (CacheInfo->Size > Loc.Size) { + // This query's Size is less than the cached one. Conservatively restart + // the query using the greater size. + return getNonLocalPointerDepFromBB( + QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, + StartBB, Result, Visited, SkipFirstBlock); } // If the query's AATags are inconsistent with the cached one, diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 10badd89a4a8..efa5bd564ad0 100644 --- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -306,7 +306,9 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, NonRenamableLocal || HasInlineAsmMaybeReferencingInternal || // Inliner doesn't handle variadic functions. // FIXME: refactor this to use the same code that inliner is using. - F.isVarArg(); + F.isVarArg() || + // Don't try to import functions with noinline attribute. + F.getAttributes().hasFnAttribute(Attribute::NoInline); GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, /* Live = */ false, F.isDSOLocal()); FunctionSummary::FFlags FunFlags{ diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index 2a8088dc4452..f34549ae52b4 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1268,7 +1268,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, } if (!hasTrunc) return getAddExpr(Operands); - UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. + // In spite we checked in the beginning that ID is not in the cache, + // it is possible that during recursion and different modification + // ID came to cache, so if we found it, just return it. + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) + return S; } // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can @@ -1284,7 +1288,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, } if (!hasTrunc) return getMulExpr(Operands); - UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. + // In spite we checked in the beginning that ID is not in the cache, + // it is possible that during recursion and different modification + // ID came to cache, so if we found it, just return it. + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) + return S; } // If the input value is a chrec scev, truncate the chrec's operands. diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 86f714b930d0..3ceda677ba61 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -187,8 +187,21 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, // generated code. if (isa<DbgInfoIntrinsic>(IP)) ScanLimit++; + + // Conservatively, do not use any instruction which has any of wrap/exact + // flags installed. + // TODO: Instead of simply disable poison instructions we can be clever + // here and match SCEV to this instruction. + auto canGeneratePoison = [](Instruction *I) { + if (isa<OverflowingBinaryOperator>(I) && + (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())) + return true; + if (isa<PossiblyExactOperator>(I) && I->isExact()) + return true; + return false; + }; if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && - IP->getOperand(1) == RHS) + IP->getOperand(1) == RHS && !canGeneratePoison(&*IP)) return &*IP; if (IP == BlockBegin) break; } diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index c9e9c6d1a419..b744cae51ed7 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -314,10 +314,6 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, return Cost; } -bool TargetTransformInfo::isOutOfOrder() const { - return TTIImpl->isOutOfOrder(); -} - unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { return TTIImpl->getNumberOfRegisters(Vector); } diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 2730daefa625..cd4cee631568 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -4238,14 +4238,14 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, LHS = CmpLHS; RHS = CmpRHS; - // If the predicate is an "or-equal" (FP) predicate, then signed zeroes may - // return inconsistent results between implementations. - // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 - // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) - // Therefore we behave conservatively and only proceed if at least one of the - // operands is known to not be zero, or if we don't care about signed zeroes. + // Signed zero may return inconsistent results between implementations. + // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 + // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) + // Therefore, we behave conservatively and only proceed if at least one of the + // operands is known to not be zero or if we don't care about signed zero. switch (Pred) { default: break; + // FIXME: Include OGT/OLT/UGT/ULT. case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && @@ -4493,14 +4493,24 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, // Deal with type mismatches. if (CastOp && CmpLHS->getType() != TrueVal->getType()) { - if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) + if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) { + // If this is a potential fmin/fmax with a cast to integer, then ignore + // -0.0 because there is no corresponding integer value. + if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) + FMF.setNoSignedZeros(); return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, cast<CastInst>(TrueVal)->getOperand(0), C, LHS, RHS); - if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) + } + if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) { + // If this is a potential fmin/fmax with a cast to integer, then ignore + // -0.0 because there is no corresponding integer value. + if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) + FMF.setNoSignedZeros(); return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, C, cast<CastInst>(FalseVal)->getOperand(0), LHS, RHS); + } } return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 17f907eb07e8..3218dce8f575 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1922,14 +1922,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { EVT VT = Sel.getValueType(); SDLoc DL(Sel); SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1); - assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) || - isConstantFPBuildVectorOrConstantFP(NewCT)) && - "Failed to constant fold a binop with constant operands"); + if (!NewCT.isUndef() && + !isConstantOrConstantVector(NewCT, true) && + !isConstantFPBuildVectorOrConstantFP(NewCT)) + return SDValue(); SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1); - assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) || - isConstantFPBuildVectorOrConstantFP(NewCF)) && - "Failed to constant fold a binop with constant operands"); + if (!NewCF.isUndef() && + !isConstantOrConstantVector(NewCF, true) && + !isConstantFPBuildVectorOrConstantFP(NewCF)) + return SDValue(); return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); } @@ -3577,7 +3579,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, // TODO: What is the 'or' equivalent of this fold? // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2) - if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE && + if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 && + IsInteger && CC0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || (isAllOnesConstant(LR) && isNullConstant(RR)))) { SDValue One = DAG.getConstant(1, DL, OpVT); @@ -3641,15 +3644,18 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && VT.getSizeInBits() <= 64) { if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - APInt ADDC = ADDI->getAPIntValue(); - if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal // immediate for an add, but it is legal if its top c2 bits are set, // transform the ADD so the immediate doesn't need to be materialized // in a register. - if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + APInt SRLC = SRLI->getAPIntValue(); + if (ADDC.getMinSignedBits() <= 64 && + SRLC.ult(VT.getSizeInBits()) && + !TLI.isLegalAddImmediate(ADDC.getSExtValue())) { APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), - SRLI->getZExtValue()); + SRLC.getZExtValue()); if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { ADDC |= Mask; if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { @@ -3987,6 +3993,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // reassociate and if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) return RAND; + + // Try to convert a constant mask AND into a shuffle clear mask. + if (VT.isVector()) + if (SDValue Shuffle = XformToShuffleWithZero(N)) + return Shuffle; + // fold (and (or x, C), D) -> D if (C & D) == D auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); @@ -16480,6 +16492,8 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "Unexpected opcode!"); + EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = peekThroughBitcast(N->getOperand(1)); @@ -16490,9 +16504,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (LegalOperations) return SDValue(); - if (N->getOpcode() != ISD::AND) - return SDValue(); - if (RHS.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); @@ -16581,10 +16592,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) return Fold; - // Try to convert a constant mask AND into a shuffle clear mask. - if (SDValue Shuffle = XformToShuffleWithZero(N)) - return Shuffle; - // Type legalization might introduce new shuffles in the DAG. // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) // -> (shuffle (VBinOp (A, B)), Undef, Mask). diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 74970ab5792c..7643790df350 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -49,6 +49,8 @@ using namespace llvm; +#define DEBUG_TYPE "legalizevectorops" + namespace { class VectorLegalizer { @@ -226,7 +228,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); ISD::LoadExtType ExtType = LD->getExtensionType(); - if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) + if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { + DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG)); switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), LD->getMemoryVT())) { default: llvm_unreachable("This action is not supported yet!"); @@ -252,11 +255,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; return LegalizeOp(ExpandLoad(Op)); } + } } else if (Op.getOpcode() == ISD::STORE) { StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); EVT StVT = ST->getMemoryVT(); MVT ValVT = ST->getValue().getSimpleValueType(); - if (StVT.isVector() && ST->isTruncatingStore()) + if (StVT.isVector() && ST->isTruncatingStore()) { + DEBUG(dbgs() << "\nLegalizing truncating vector store: "; + Node->dump(&DAG)); switch (TLI.getTruncStoreAction(ValVT, StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: @@ -270,6 +276,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; return LegalizeOp(ExpandStore(Op)); } + } } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE) HasVectorValue = true; @@ -376,6 +383,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { break; } + DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); + switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: @@ -383,12 +392,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; break; case TargetLowering::Legal: + DEBUG(dbgs() << "Legal node: nothing to do\n"); break; case TargetLowering::Custom: { + DEBUG(dbgs() << "Trying custom legalization\n"); if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { + DEBUG(dbgs() << "Successfully custom legalized node\n"); Result = Tmp1; break; } + DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; } case TargetLowering::Expand: diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index a04c770c51c4..4c8b63d2f239 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5943,7 +5943,9 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, @@ -6043,7 +6045,9 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, @@ -6108,7 +6112,9 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, @@ -6134,7 +6140,9 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, @@ -6160,7 +6168,9 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, @@ -6189,7 +6199,9 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, @@ -6224,7 +6236,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, @@ -6256,7 +6270,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, @@ -7112,6 +7128,8 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To, void SelectionDAG::salvageDebugInfo(SDNode &N) { if (!N.getHasDebugValue()) return; + + SmallVector<SDDbgValue *, 2> ClonedDVs; for (auto DV : GetDbgValues(&N)) { if (DV->isInvalidated()) continue; @@ -7135,13 +7153,16 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { SDDbgValue *Clone = getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(), DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); + ClonedDVs.push_back(Clone); DV->setIsInvalidated(); - AddDbgValue(Clone, N0.getNode(), false); DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); dbgs() << " into " << *DIExpr << '\n'); } } } + + for (SDDbgValue *Dbg : ClonedDVs) + AddDbgValue(Dbg, Dbg->getSDNode(), false); } namespace { diff --git a/contrib/llvm/lib/IR/SafepointIRVerifier.cpp b/contrib/llvm/lib/IR/SafepointIRVerifier.cpp index 68e0ce39a54e..04deb434cec2 100644 --- a/contrib/llvm/lib/IR/SafepointIRVerifier.cpp +++ b/contrib/llvm/lib/IR/SafepointIRVerifier.cpp @@ -237,6 +237,59 @@ class InstructionVerifier; /// Builds BasicBlockState for each BB of the function. /// It can traverse function for verification and provides all required /// information. +/// +/// GC pointer may be in one of three states: relocated, unrelocated and +/// poisoned. +/// Relocated pointer may be used without any restrictions. +/// Unrelocated pointer cannot be dereferenced, passed as argument to any call +/// or returned. Unrelocated pointer may be safely compared against another +/// unrelocated pointer or against a pointer exclusively derived from null. +/// Poisoned pointers are produced when we somehow derive pointer from relocated +/// and unrelocated pointers (e.g. phi, select). This pointers may be safely +/// used in a very limited number of situations. Currently the only way to use +/// it is comparison against constant exclusively derived from null. All +/// limitations arise due to their undefined state: this pointers should be +/// treated as relocated and unrelocated simultaneously. +/// Rules of deriving: +/// R + U = P - that's where the poisoned pointers come from +/// P + X = P +/// U + U = U +/// R + R = R +/// X + C = X +/// Where "+" - any operation that somehow derive pointer, U - unrelocated, +/// R - relocated and P - poisoned, C - constant, X - U or R or P or C or +/// nothing (in case when "+" is unary operation). +/// Deriving of pointers by itself is always safe. +/// NOTE: when we are making decision on the status of instruction's result: +/// a) for phi we need to check status of each input *at the end of +/// corresponding predecessor BB*. +/// b) for other instructions we need to check status of each input *at the +/// current point*. +/// +/// FIXME: This works fairly well except one case +/// bb1: +/// p = *some GC-ptr def* +/// p1 = gep p, offset +/// / | +/// / | +/// bb2: | +/// safepoint | +/// \ | +/// \ | +/// bb3: +/// p2 = phi [p, bb2] [p1, bb1] +/// p3 = phi [p, bb2] [p, bb1] +/// here p and p1 is unrelocated +/// p2 and p3 is poisoned (though they shouldn't be) +/// +/// This leads to some weird results: +/// cmp eq p, p2 - illegal instruction (false-positive) +/// cmp eq p1, p2 - illegal instruction (false-positive) +/// cmp eq p, p3 - illegal instruction (false-positive) +/// cmp eq p, p1 - ok +/// To fix this we need to introduce conception of generations and be able to +/// check if two values belong to one generation or not. This way p2 will be +/// considered to be unrelocated and no false alarm will happen. class GCPtrTracker { const Function &F; SpecificBumpPtrAllocator<BasicBlockState> BSAllocator; @@ -244,6 +297,9 @@ class GCPtrTracker { // This set contains defs of unrelocated pointers that are proved to be legal // and don't need verification. DenseSet<const Instruction *> ValidUnrelocatedDefs; + // This set contains poisoned defs. They can be safely ignored during + // verification too. + DenseSet<const Value *> PoisonedDefs; public: GCPtrTracker(const Function &F, const DominatorTree &DT); @@ -251,6 +307,8 @@ public: BasicBlockState *getBasicBlockState(const BasicBlock *BB); const BasicBlockState *getBasicBlockState(const BasicBlock *BB) const; + bool isValuePoisoned(const Value *V) const { return PoisonedDefs.count(V); } + /// Traverse each BB of the function and call /// InstructionVerifier::verifyInstruction for each possibly invalid /// instruction. @@ -349,7 +407,9 @@ const BasicBlockState *GCPtrTracker::getBasicBlockState( } bool GCPtrTracker::instructionMayBeSkipped(const Instruction *I) const { - return ValidUnrelocatedDefs.count(I); + // Poisoned defs are skipped since they are always safe by itself by + // definition (for details see comment to this class). + return ValidUnrelocatedDefs.count(I) || PoisonedDefs.count(I); } void GCPtrTracker::verifyFunction(GCPtrTracker &&Tracker, @@ -418,31 +478,78 @@ bool GCPtrTracker::removeValidUnrelocatedDefs(const BasicBlock *BB, "Passed Contribution should be from the passed BasicBlockState!"); AvailableValueSet AvailableSet = BBS->AvailableIn; bool ContributionChanged = false; + // For explanation why instructions are processed this way see + // "Rules of deriving" in the comment to this class. for (const Instruction &I : *BB) { - bool ProducesUnrelocatedPointer = false; - if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) && - containsGCPtrType(I.getType())) { - // GEP/bitcast of unrelocated pointer is legal by itself but this - // def shouldn't appear in any AvailableSet. + bool ValidUnrelocatedPointerDef = false; + bool PoisonedPointerDef = false; + // TODO: `select` instructions should be handled here too. + if (const PHINode *PN = dyn_cast<PHINode>(&I)) { + if (containsGCPtrType(PN->getType())) { + // If both is true, output is poisoned. + bool HasRelocatedInputs = false; + bool HasUnrelocatedInputs = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + const BasicBlock *InBB = PN->getIncomingBlock(i); + const Value *InValue = PN->getIncomingValue(i); + + if (isNotExclusivelyConstantDerived(InValue)) { + if (isValuePoisoned(InValue)) { + // If any of inputs is poisoned, output is always poisoned too. + HasRelocatedInputs = true; + HasUnrelocatedInputs = true; + break; + } + if (BlockMap[InBB]->AvailableOut.count(InValue)) + HasRelocatedInputs = true; + else + HasUnrelocatedInputs = true; + } + } + if (HasUnrelocatedInputs) { + if (HasRelocatedInputs) + PoisonedPointerDef = true; + else + ValidUnrelocatedPointerDef = true; + } + } + } else if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) && + containsGCPtrType(I.getType())) { + // GEP/bitcast of unrelocated pointer is legal by itself but this def + // shouldn't appear in any AvailableSet. for (const Value *V : I.operands()) if (containsGCPtrType(V->getType()) && isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) { - ProducesUnrelocatedPointer = true; + if (isValuePoisoned(V)) + PoisonedPointerDef = true; + else + ValidUnrelocatedPointerDef = true; break; } } - if (!ProducesUnrelocatedPointer) { - bool Cleared = false; - transferInstruction(I, Cleared, AvailableSet); - (void)Cleared; - } else { - // Remove def of unrelocated pointer from Contribution of this BB - // and trigger update of all its successors. + assert(!(ValidUnrelocatedPointerDef && PoisonedPointerDef) && + "Value cannot be both unrelocated and poisoned!"); + if (ValidUnrelocatedPointerDef) { + // Remove def of unrelocated pointer from Contribution of this BB and + // trigger update of all its successors. Contribution.erase(&I); + PoisonedDefs.erase(&I); ValidUnrelocatedDefs.insert(&I); - DEBUG(dbgs() << "Removing " << I << " from Contribution of " + DEBUG(dbgs() << "Removing urelocated " << I << " from Contribution of " << BB->getName() << "\n"); ContributionChanged = true; + } else if (PoisonedPointerDef) { + // Mark pointer as poisoned, remove its def from Contribution and trigger + // update of all successors. + Contribution.erase(&I); + PoisonedDefs.insert(&I); + DEBUG(dbgs() << "Removing poisoned " << I << " from Contribution of " + << BB->getName() << "\n"); + ContributionChanged = true; + } else { + bool Cleared = false; + transferInstruction(I, Cleared, AvailableSet); + (void)Cleared; } } return ContributionChanged; @@ -524,8 +631,8 @@ void InstructionVerifier::verifyInstruction( // Returns true if LHS and RHS are unrelocated pointers and they are // valid unrelocated uses. - auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS, - &RHS] () { + auto hasValidUnrelocatedUse = [&AvailableSet, Tracker, baseTyLHS, baseTyRHS, + &LHS, &RHS] () { // A cmp instruction has valid unrelocated pointer operands only if // both operands are unrelocated pointers. // In the comparison between two pointers, if one is an unrelocated @@ -545,12 +652,23 @@ void InstructionVerifier::verifyInstruction( (baseTyLHS == BaseType::NonConstant && baseTyRHS == BaseType::ExclusivelySomeConstant)) return false; + + // If one of pointers is poisoned and other is not exclusively derived + // from null it is an invalid expression: it produces poisoned result + // and unless we want to track all defs (not only gc pointers) the only + // option is to prohibit such instructions. + if ((Tracker->isValuePoisoned(LHS) && baseTyRHS != ExclusivelyNull) || + (Tracker->isValuePoisoned(RHS) && baseTyLHS != ExclusivelyNull)) + return false; + // All other cases are valid cases enumerated below: - // 1. Comparison between an exlusively derived null pointer and a + // 1. Comparison between an exclusively derived null pointer and a // constant base pointer. - // 2. Comparison between an exlusively derived null pointer and a + // 2. Comparison between an exclusively derived null pointer and a // non-constant unrelocated base pointer. // 3. Comparison between 2 unrelocated pointers. + // 4. Comparison between a pointer exclusively derived from null and a + // non-constant poisoned pointer. return true; }; if (!hasValidUnrelocatedUse()) { diff --git a/contrib/llvm/lib/LTO/LTOModule.cpp b/contrib/llvm/lib/LTO/LTOModule.cpp index 51b4f225939f..626d2f5dc813 100644 --- a/contrib/llvm/lib/LTO/LTOModule.cpp +++ b/contrib/llvm/lib/LTO/LTOModule.cpp @@ -388,24 +388,20 @@ void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) { // from the ObjC data structures generated by the front end. // special case if this data blob is an ObjC class definition - std::string Section = v->getSection(); - if (Section.compare(0, 15, "__OBJC,__class,") == 0) { - if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) { - addObjCClass(gv); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(v)) { + StringRef Section = GV->getSection(); + if (Section.startswith("__OBJC,__class,")) { + addObjCClass(GV); } - } - // special case if this data blob is an ObjC category definition - else if (Section.compare(0, 18, "__OBJC,__category,") == 0) { - if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) { - addObjCCategory(gv); + // special case if this data blob is an ObjC category definition + else if (Section.startswith("__OBJC,__category,")) { + addObjCCategory(GV); } - } - // special case if this data blob is the list of referenced classes - else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) { - if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) { - addObjCClassRef(gv); + // special case if this data blob is the list of referenced classes + else if (Section.startswith("__OBJC,__cls_refs,")) { + addObjCClassRef(GV); } } } diff --git a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index c8b3892375f6..abcd8905ad35 100644 --- a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -76,7 +76,7 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir, if (TempDir.empty()) return; // User asked to save temps, let dump the bitcode file after import. - std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str(); + std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str(); std::error_code EC; raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); if (EC) diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp index 195ddc78d454..5bbf49290f17 100644 --- a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp @@ -1086,7 +1086,7 @@ bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc, return false; } -Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) { +static Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) { switch (Type) { case MachO::PLATFORM_MACOS: return Triple::MacOSX; case MachO::PLATFORM_IOS: return Triple::IOS; diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 38720c23ff26..3e2150a451e0 100644 --- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -423,13 +423,17 @@ bool ELFAsmParser::parseGroup(StringRef &GroupName) { if (L.isNot(AsmToken::Comma)) return TokError("expected group name"); Lex(); - if (getParser().parseIdentifier(GroupName)) - return true; + if (L.is(AsmToken::Integer)) { + GroupName = getTok().getString(); + Lex(); + } else if (getParser().parseIdentifier(GroupName)) { + return TokError("invalid group name"); + } if (L.is(AsmToken::Comma)) { Lex(); StringRef Linkage; if (getParser().parseIdentifier(Linkage)) - return true; + return TokError("invalid linkage"); if (Linkage != "comdat") return TokError("Linkage must be 'comdat'"); } diff --git a/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index ccc8cc56eb0a..8dbd58632f0e 100644 --- a/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -628,7 +628,7 @@ CoverageMapping::getInstantiationGroups(StringRef Filename) const { } std::vector<InstantiationGroup> Result; - for (const auto &InstantiationSet : InstantiationSetCollector) { + for (auto &InstantiationSet : InstantiationSetCollector) { InstantiationGroup IG{InstantiationSet.first.first, InstantiationSet.first.second, std::move(InstantiationSet.second)}; diff --git a/contrib/llvm/lib/Support/ARMAttributeParser.cpp b/contrib/llvm/lib/Support/ARMAttributeParser.cpp index 3d800eb7a96c..e39bddc4e8f2 100644 --- a/contrib/llvm/lib/Support/ARMAttributeParser.cpp +++ b/contrib/llvm/lib/Support/ARMAttributeParser.cpp @@ -666,7 +666,7 @@ void ARMAttributeParser::ParseSubsection(const uint8_t *Data, uint32_t Length) { ParseIndexList(Data, Offset, Indicies); break; default: - errs() << "unrecognised tag: 0x" << utohexstr(Tag) << '\n'; + errs() << "unrecognised tag: 0x" << Twine::utohexstr(Tag) << '\n'; return; } diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp index b547a0932709..4caf4a4fdce0 100644 --- a/contrib/llvm/lib/Support/CommandLine.cpp +++ b/contrib/llvm/lib/Support/CommandLine.cpp @@ -688,7 +688,9 @@ static bool EatsUnboundedNumberOfValues(const Option *O) { O->getNumOccurrencesFlag() == cl::OneOrMore; } -static bool isWhitespace(char C) { return strchr(" \t\n\r\f\v", C); } +static bool isWhitespace(char C) { + return C == ' ' || C == '\t' || C == '\r' || C == '\n'; +} static bool isQuote(char C) { return C == '\"' || C == '\''; } @@ -709,17 +711,19 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, break; } + char C = Src[I]; + // Backslash escapes the next character. - if (I + 1 < E && Src[I] == '\\') { + if (I + 1 < E && C == '\\') { ++I; // Skip the escape. Token.push_back(Src[I]); continue; } // Consume a quoted string. - if (isQuote(Src[I])) { - char Quote = Src[I++]; - while (I != E && Src[I] != Quote) { + if (isQuote(C)) { + ++I; + while (I != E && Src[I] != C) { // Backslash escapes the next character. if (Src[I] == '\\' && I + 1 != E) ++I; @@ -732,7 +736,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, } // End the token if this is whitespace. - if (isWhitespace(Src[I])) { + if (isWhitespace(C)) { if (!Token.empty()) NewArgv.push_back(Saver.save(StringRef(Token)).data()); Token.clear(); @@ -740,7 +744,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, } // This is a normal character. Append it. - Token.push_back(Src[I]); + Token.push_back(C); } // Append the last token after hitting EOF with no whitespace. @@ -798,25 +802,27 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, // end of the source string. enum { INIT, UNQUOTED, QUOTED } State = INIT; for (size_t I = 0, E = Src.size(); I != E; ++I) { + char C = Src[I]; + // INIT state indicates that the current input index is at the start of // the string or between tokens. if (State == INIT) { - if (isWhitespace(Src[I])) { + if (isWhitespace(C)) { // Mark the end of lines in response files - if (MarkEOLs && Src[I] == '\n') + if (MarkEOLs && C == '\n') NewArgv.push_back(nullptr); continue; } - if (Src[I] == '"') { + if (C == '"') { State = QUOTED; continue; } - if (Src[I] == '\\') { + if (C == '\\') { I = parseBackslash(Src, I, Token); State = UNQUOTED; continue; } - Token.push_back(Src[I]); + Token.push_back(C); State = UNQUOTED; continue; } @@ -825,38 +831,38 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, // quotes. if (State == UNQUOTED) { // Whitespace means the end of the token. - if (isWhitespace(Src[I])) { + if (isWhitespace(C)) { NewArgv.push_back(Saver.save(StringRef(Token)).data()); Token.clear(); State = INIT; // Mark the end of lines in response files - if (MarkEOLs && Src[I] == '\n') + if (MarkEOLs && C == '\n') NewArgv.push_back(nullptr); continue; } - if (Src[I] == '"') { + if (C == '"') { State = QUOTED; continue; } - if (Src[I] == '\\') { + if (C == '\\') { I = parseBackslash(Src, I, Token); continue; } - Token.push_back(Src[I]); + Token.push_back(C); continue; } // QUOTED state means that it's reading a token quoted by double quotes. if (State == QUOTED) { - if (Src[I] == '"') { + if (C == '"') { State = UNQUOTED; continue; } - if (Src[I] == '\\') { + if (C == '\\') { I = parseBackslash(Src, I, Token); continue; } - Token.push_back(Src[I]); + Token.push_back(C); } } // Append the last token after hitting EOF with no whitespace. diff --git a/contrib/llvm/lib/TableGen/Main.cpp b/contrib/llvm/lib/TableGen/Main.cpp index fc9d0cc08885..be35f894cccd 100644 --- a/contrib/llvm/lib/TableGen/Main.cpp +++ b/contrib/llvm/lib/TableGen/Main.cpp @@ -110,7 +110,7 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) { return 1; if (ErrorsPrinted > 0) - return reportError(argv0, utostr(ErrorsPrinted) + " errors.\n"); + return reportError(argv0, Twine(ErrorsPrinted) + " errors.\n"); // Declare success. Out.keep(); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index abbba7d1d5a9..40836b00b9e6 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3673,15 +3673,6 @@ static bool getFMAPatterns(MachineInstr &Root, } break; case AArch64::FSUBv2f32: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1); - Found = true; - } if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULv2i32_indexed)) { Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2); @@ -3691,17 +3682,17 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2); Found = true; } - break; - case AArch64::FSUBv2f64: if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1); + AArch64::FMULv2i32_indexed)) { + Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1); Found = true; } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1); + AArch64::FMULv2f32)) { + Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1); Found = true; } + break; + case AArch64::FSUBv2f64: if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULv2i64_indexed)) { Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2); @@ -3711,17 +3702,17 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2); Found = true; } - break; - case AArch64::FSUBv4f32: if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1); + AArch64::FMULv2i64_indexed)) { + Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1); Found = true; } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1); + AArch64::FMULv2f64)) { + Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1); Found = true; } + break; + case AArch64::FSUBv4f32: if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULv4i32_indexed)) { Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2); @@ -3731,6 +3722,15 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2); Found = true; } + if (canCombineWithFMUL(MBB, Root.getOperand(1), + AArch64::FMULv4i32_indexed)) { + Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1); + Found = true; + } else if (canCombineWithFMUL(MBB, Root.getOperand(1), + AArch64::FMULv4f32)) { + Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1); + Found = true; + } break; } return Found; @@ -5062,4 +5062,4 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( It = MBB.insert(It, LDRXpost); return It; -}
\ No newline at end of file +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 8b6c571dee02..740861851185 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -27,6 +27,8 @@ using namespace llvm; +namespace { + // -------------------------------------------------------------------- // Implementation of permutation networks. @@ -147,6 +149,7 @@ private: void build(); bool color(); }; +} // namespace std::pair<bool,uint8_t> Coloring::getUniqueColor(const NodeSet &Nodes) { uint8_t Color = None; @@ -300,6 +303,7 @@ void Coloring::dump() const { dbgs() << " }\n}\n"; } +namespace { // Base class of for reordering networks. They don't strictly need to be // permutations, as outputs with repeated occurrences of an input element // are allowed. @@ -408,7 +412,7 @@ struct BenesNetwork : public PermNetwork { private: bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); }; - +} // namespace bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size, unsigned Step) { @@ -602,6 +606,7 @@ bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size, // Support for building selection results (output instructions that are // parts of the final selection). +namespace { struct OpRef { OpRef(SDValue V) : OpV(V) {} bool isValue() const { return OpV.getNode() != nullptr; } @@ -689,6 +694,7 @@ struct ResultStack { void print(raw_ostream &OS, const SelectionDAG &G) const; }; +} // namespace void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const { if (isValue()) { @@ -740,6 +746,7 @@ void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const { } } +namespace { struct ShuffleMask { ShuffleMask(ArrayRef<int> M) : Mask(M) { for (unsigned I = 0, E = Mask.size(); I != E; ++I) { @@ -763,6 +770,7 @@ struct ShuffleMask { return ShuffleMask(Mask.take_back(H)); } }; +} // namespace // -------------------------------------------------------------------- // The HvxSelector class. diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 2aa395642c40..753cfff4cdae 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -1797,11 +1797,7 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { llvm_unreachable("unsupported fp type"); APInt API = APF.bitcastToAPInt(); - std::string hexstr(utohexstr(API.getZExtValue())); - O << lead; - if (hexstr.length() < numHex) - O << std::string(numHex - hexstr.length(), '0'); - O << utohexstr(API.getZExtValue()); + O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true); } void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp index 86a28f7d0700..a754a6a36dab 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/Support/Format.h" using namespace llvm; #define DEBUG_TYPE "nvptx-mcexpr" @@ -47,10 +48,7 @@ void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { } APInt API = APF.bitcastToAPInt(); - std::string HexStr(utohexstr(API.getZExtValue())); - if (HexStr.length() < NumHex) - OS << std::string(NumHex - HexStr.length(), '0'); - OS << utohexstr(API.getZExtValue()); + OS << format_hex_no_prefix(API.getZExtValue(), NumHex, /*Upper=*/true); } const NVPTXGenericMCSymbolRefExpr* diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index d19463ccb51f..204d97cbdd44 100644 --- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -190,7 +190,7 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (isVerbose()) { OutStreamer->AddComment("fallthrough-return: $pop" + - utostr(MFI->getWARegStackId( + Twine(MFI->getWARegStackId( MFI->getWAReg(MI->getOperand(0).getReg())))); OutStreamer->AddBlankLine(); } diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 87c65347e334..f1ce430f3323 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2377,10 +2377,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Flags |= Prefix; Name = Parser.getTok().getString(); Parser.Lex(); // eat the prefix - // Hack: we could have something like + // Hack: we could have something like "rep # some comment" or // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" while (Name.startswith(";") || Name.startswith("\n") || - Name.startswith("\t") || Name.startswith("/")) { + Name.startswith("#") || Name.startswith("\t") || + Name.startswith("/")) { Name = Parser.getTok().getString(); Parser.Lex(); // go to next prefix or instr } diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td index 7e7c35569093..ba998467b799 100644 --- a/contrib/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm/lib/Target/X86/X86.td @@ -739,7 +739,8 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [ FeatureVNNI, FeatureVPCLMULQDQ, FeatureVPOPCNTDQ, - FeatureGFNI + FeatureGFNI, + FeatureCLWB ]>; class IcelakeProc<string Name> : ProcModel<Name, SkylakeServerModel, diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 5ac5d0348f8a..9edd799779c7 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1310,8 +1310,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); - - setOperationAction(ISD::MUL, MVT::v8i64, Legal); } if (Subtarget.hasCDI()) { @@ -1388,8 +1386,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, VT, Legal); setOperationAction(ISD::FP_TO_SINT, VT, Legal); setOperationAction(ISD::FP_TO_UINT, VT, Legal); - - setOperationAction(ISD::MUL, VT, Legal); } } @@ -7108,8 +7104,8 @@ static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant(Immediate, dl, VT); } // Lower BUILD_VECTOR operation for v8i1 and v16i1 types. -SDValue -X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); assert((VT.getVectorElementType() == MVT::i1) && @@ -7131,8 +7127,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32)); // We have to manually lower both halves so getNode doesn't try to // reassemble the build_vector. - Lower = LowerBUILD_VECTORvXi1(Lower, DAG); - Upper = LowerBUILD_VECTORvXi1(Upper, DAG); + Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget); + Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget); return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper); } SDValue Imm = ConvertI1VectorToInteger(Op, DAG); @@ -7881,7 +7877,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Generate vectors for predicate vectors. if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) - return LowerBUILD_VECTORvXi1(Op, DAG); + return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget); if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget)) return VectorConstant; @@ -15543,7 +15539,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (SrcVT.isVector()) { if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) { return DAG.getNode(X86ISD::CVTSI2P, dl, VT, @@ -15551,9 +15546,15 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, DAG.getUNDEF(SrcVT))); } if (SrcVT.getVectorElementType() == MVT::i1) { - if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT)) - return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src)); + if (SrcVT == MVT::v2i1) { + // For v2i1, we need to widen to v4i1 first. + assert(VT == MVT::v2f64 && "Unexpected type"); + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src, + DAG.getUNDEF(MVT::v2i1)); + return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(), + DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src)); + } + MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src)); @@ -15653,8 +15654,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, } /// 64-bit unsigned integer to double expansion. -SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { // This algorithm is not obvious. Here it is what we're trying to output: /* movq %rax, %xmm0 @@ -15674,7 +15675,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, // Build some magic constants. static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; Constant *C0 = ConstantDataVector::get(*Context, CV0); - auto PtrVT = getPointerTy(DAG.getDataLayout()); + auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16); SmallVector<Constant*,2> CV1; @@ -15721,8 +15722,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, } /// 32-bit unsigned integer to float expansion. -SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDLoc dl(Op); // FP constant to bias correct the final result. SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, @@ -15755,16 +15756,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); // Handle final rounding. - MVT DestVT = Op.getSimpleValueType(); - - if (DestVT.bitsLT(MVT::f64)) - return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, - DAG.getIntPtrConstant(0, dl)); - if (DestVT.bitsGT(MVT::f64)) - return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); - - // Handle final rounding. - return Sub; + return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType()); } static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG, @@ -15896,16 +15888,22 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); } -SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, - SelectionDAG &DAG) const { +static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDValue N0 = Op.getOperand(0); MVT SrcVT = N0.getSimpleValueType(); SDLoc dl(Op); if (SrcVT.getVectorElementType() == MVT::i1) { - if (SrcVT == MVT::v2i1) - return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0)); + if (SrcVT == MVT::v2i1) { + // For v2i1, we need to widen to v4i1 first. + assert(Op.getValueType() == MVT::v2f64 && "Unexpected type"); + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0, + DAG.getUNDEF(MVT::v2i1)); + return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64, + DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0)); + } + MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0)); @@ -15930,7 +15928,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, auto PtrVT = getPointerTy(DAG.getDataLayout()); if (Op.getSimpleValueType().isVector()) - return lowerUINT_TO_FP_vec(Op, DAG); + return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); MVT SrcVT = N0.getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); @@ -15943,9 +15941,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, } if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) - return LowerUINT_TO_FP_i64(Op, DAG); + return LowerUINT_TO_FP_i64(Op, DAG, Subtarget); if (SrcVT == MVT::i32 && X86ScalarSSEf64) - return LowerUINT_TO_FP_i32(Op, DAG); + return LowerUINT_TO_FP_i32(Op, DAG, Subtarget); if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32) return SDValue(); @@ -16283,7 +16281,7 @@ static SDValue LowerZERO_EXTEND_Mask(SDValue Op, // Truncate if we had to extend i16/i8 above. if (VT != ExtVT) { WideVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); - SelectedVal = DAG.getNode(X86ISD::VTRUNC, DL, WideVT, SelectedVal); + SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal); } // Extract back to 128/256-bit if we widened. @@ -18428,7 +18426,7 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, // Truncate if we had to extend i16/i8 above. if (VT != ExtVT) { WideVT = MVT::getVectorVT(VTElt, NumElts); - V = DAG.getNode(X86ISD::VTRUNC, dl, WideVT, V); + V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V); } // Extract back to 128/256-bit if we widened. @@ -18681,6 +18679,14 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op, // Replace chain users with the new chain. assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); + if (Subtarget.hasVLX()) { + // Extract to v4i1/v2i1. + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Load, + DAG.getIntPtrConstant(0, dl)); + // Finally, do a normal sign-extend to the desired register. + return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract); + } + MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8); SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load); @@ -18700,22 +18706,25 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op, if (NumElts <= 8) { // A subset, assume that we have only AVX-512F - unsigned NumBitsToLoad = 8; - MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad); - SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(), + SDValue Load = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getMemOperand()); // Replace chain users with the new chain. assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); - MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad); - SDValue BitVec = DAG.getBitcast(MaskVT, Load); + SDValue BitVec = DAG.getBitcast(MVT::v8i1, Load); if (NumElts == 8) return DAG.getNode(ExtOpcode, dl, VT, BitVec); - // we should take care to v4i1 and v2i1 + if (Subtarget.hasVLX()) { + // Extract to v4i1/v2i1. + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, BitVec, + DAG.getIntPtrConstant(0, dl)); + // Finally, do a normal sign-extend to the desired register. + return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract); + } MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8); SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec); @@ -18730,13 +18739,12 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op, Ld->getBasePtr(), Ld->getMemOperand()); - SDValue BasePtrHi = - DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(2, dl, BasePtr.getValueType())); + SDValue BasePtrHi = DAG.getMemBasePlusOffset(BasePtr, 2, dl); - SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), - BasePtrHi, - Ld->getMemOperand()); + SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), BasePtrHi, + Ld->getPointerInfo().getWithOffset(2), + MinAlign(Ld->getAlignment(), 2U), + Ld->getMemOperand()->getFlags()); SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadLo.getValue(1), LoadHi.getValue(1)); @@ -22086,7 +22094,14 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle. if (VT == MVT::v4i32) { assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() && - "Should not custom lower when pmuldq is available!"); + "Should not custom lower when pmulld is available!"); + + // If the upper 17 bits of each element are zero then we can use PMADD. + APInt Mask17 = APInt::getHighBitsSet(32, 17); + if (DAG.MaskedValueIsZero(A, Mask17) && DAG.MaskedValueIsZero(B, Mask17)) + return DAG.getNode(X86ISD::VPMADDWD, dl, VT, + DAG.getBitcast(MVT::v8i16, A), + DAG.getBitcast(MVT::v8i16, B)); // Extract the odd parts. static const int UnpackMask[] = { 1, -1, 3, -1 }; @@ -22138,6 +22153,11 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask); bool BHiIsZero = DAG.MaskedValueIsZero(B, UpperBitsMask); + // If DQI is supported we can use MULLQ, but MULUDQ is still better if the + // the high bits are known to be zero. + if (Subtarget.hasDQI() && (!AHiIsZero || !BHiIsZero)) + return Op; + // Bit cast to 32-bit vectors for MULUDQ. SDValue Alo = DAG.getBitcast(MulVT, A); SDValue Blo = DAG.getBitcast(MulVT, B); @@ -31001,8 +31021,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]); } - // The replacement was made in place; don't return anything. - return SDValue(); + // The replacement was made in place; return N so it won't be revisited. + return SDValue(N, 0); } /// If a vector select has an operand that is -1 or 0, try to simplify the @@ -32256,6 +32276,13 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, if ((NumElts % 2) != 0) return SDValue(); + // If the upper 17 bits of each element are zero then we can use PMADD. + APInt Mask17 = APInt::getHighBitsSet(32, 17); + if (VT == MVT::v4i32 && DAG.MaskedValueIsZero(N0, Mask17) && + DAG.MaskedValueIsZero(N1, Mask17)) + return DAG.getNode(X86ISD::VPMADDWD, DL, VT, DAG.getBitcast(MVT::v8i16, N0), + DAG.getBitcast(MVT::v8i16, N1)); + unsigned RegSize = 128; MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); @@ -33047,10 +33074,8 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, // The right side has to be a 'trunc' or a constant vector. bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getValueType() == VT; - ConstantSDNode *RHSConstSplat = nullptr; - if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1)) - RHSConstSplat = RHSBV->getConstantSplatNode(); - if (!RHSTrunc && !RHSConstSplat) + if (!RHSTrunc && + !ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -33060,13 +33085,10 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, // Set N0 and N1 to hold the inputs to the new wide operation. N0 = N0->getOperand(0); - if (RHSConstSplat) { - N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT.getVectorElementType(), - SDValue(RHSConstSplat, 0)); - N1 = DAG.getSplatBuildVector(VT, DL, N1); - } else if (RHSTrunc) { + if (RHSTrunc) N1 = N1->getOperand(0); - } + else + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1); // Generate the wide operation. SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, VT, N0, N1); @@ -34039,15 +34061,14 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl); SDValue Load2 = - DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), - std::min(16U, Alignment), Ld->getMemOperand()->getFlags()); + DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, + Ld->getPointerInfo().getWithOffset(16), + MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags()); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load1.getValue(1), Load2.getValue(1)); - SDValue NewVec = DAG.getUNDEF(RegVT); - NewVec = insert128BitVector(NewVec, Load1, 0, DAG, dl); - NewVec = insert128BitVector(NewVec, Load2, NumElems / 2, DAG, dl); + SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2); return DCI.CombineTo(N, NewVec, TF, true); } @@ -34453,8 +34474,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(), Alignment, St->getMemOperand()->getFlags()); SDValue Ch1 = - DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(), - std::min(16U, Alignment), St->getMemOperand()->getFlags()); + DAG.getStore(St->getChain(), dl, Value1, Ptr1, + St->getPointerInfo().getWithOffset(16), + MinAlign(Alignment, 16U), St->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } @@ -34876,7 +34898,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its // better to truncate if we have the chance. if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) && - !TLI.isOperationLegal(Opcode, SrcVT)) + !Subtarget.hasDQI()) return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); LLVM_FALLTHROUGH; case ISD::ADD: { diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h index 8464081b1b08..7708f577ba70 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h @@ -1167,7 +1167,6 @@ namespace llvm { bool isReplace) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; @@ -1183,9 +1182,6 @@ namespace llvm { SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td index 2a2286e42405..dcd84930741b 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4420,12 +4420,12 @@ defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, SSE_INTALU_ITINS_P, HasBWI, 0>; defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, - SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD; defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, - SSE_INTALU_ITINS_P, HasBWI, 1>; + SSE_INTMUL_ITINS_P, HasBWI, 1>; defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, - SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; -defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P, + SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD; +defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P, HasBWI, 1>; defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P, HasBWI, 1>; @@ -4454,7 +4454,7 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins, } } -defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P, +defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P, avx512vl_i32_info, avx512vl_i64_info, X86pmuldq, HasAVX512, 1>,T8PD; defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, @@ -8704,17 +8704,6 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>; } -// Use 512bit version to implement 128/256 bit in case NoVLX. -multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info, - X86VectorVTInfo _> { - - def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))), - (X86Info.VT (EXTRACT_SUBREG - (_.VT (!cast<Instruction>(NAME#"Zrr") - (_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))), - X86Info.SubRegIdx))>; -} - multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, string OpcodeStr, Predicate prd> { let Predicates = [prd] in @@ -8724,11 +8713,6 @@ let Predicates = [prd] in defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; } -let Predicates = [prd, NoVLX] in { - defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>; - defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>; - } - } defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td index 7e89a4111d86..619b399ef8d8 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td @@ -141,6 +141,7 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP, // These instructions cannot address 80-bit memory. multiclass FPBinary<SDNode OpNode, Format fp, string asmstring, bit Forward = 1> { +let mayLoad = 1, hasSideEffects = 1 in { // ST(0) = ST(0) + [mem] def _Fp32m : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW, @@ -177,10 +178,8 @@ def _Fp80m64: FpI_<(outs RFP80:$dst), (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))), (set RFP80:$dst, (OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>; -let mayLoad = 1 in def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src), !strconcat("f", asmstring, "{s}\t$src")>; -let mayLoad = 1 in def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src), !strconcat("f", asmstring, "{l}\t$src")>; // ST(0) = ST(0) + [memint] @@ -226,12 +225,11 @@ def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), (OpNode RFP80:$src1, (X86fild addr:$src2, i32))), (set RFP80:$dst, (OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>; -let mayLoad = 1 in def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src), !strconcat("fi", asmstring, "{s}\t$src")>; -let mayLoad = 1 in def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), !strconcat("fi", asmstring, "{l}\t$src")>; +} // mayLoad = 1, hasSideEffects = 1 } let Defs = [FPSW] in { diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index fdf3e73e4fcd..27c67500b26f 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -832,9 +832,11 @@ def NoVLX : Predicate<"!Subtarget->hasVLX()">; def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; def PKU : Predicate<"Subtarget->hasPKU()">; -def HasVNNI : Predicate<"Subtarget->hasVNNI()">; +def HasVNNI : Predicate<"Subtarget->hasVNNI()">, + AssemblerPredicate<"FeatureVNNI", "AVX-512 VNNI ISA">; -def HasBITALG : Predicate<"Subtarget->hasBITALG()">; +def HasBITALG : Predicate<"Subtarget->hasBITALG()">, + AssemblerPredicate<"FeatureBITALG", "AVX-512 BITALG ISA">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; def HasVAES : Predicate<"Subtarget->hasVAES()">; @@ -866,7 +868,8 @@ def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; def NoBMI2 : Predicate<"!Subtarget->hasBMI2()">; def HasVBMI : Predicate<"Subtarget->hasVBMI()">, AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">; -def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">; +def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">, + AssemblerPredicate<"FeatureVBMI2", "AVX-512 VBMI2 ISA">; def HasIFMA : Predicate<"Subtarget->hasIFMA()">, AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">; def HasRTM : Predicate<"Subtarget->hasRTM()">; diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td index b48fa1841979..cb84f9aecf79 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td @@ -3734,7 +3734,7 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), @@ -3742,8 +3742,8 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (bitconvert (memop_frag addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -6313,7 +6313,7 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), @@ -6321,8 +6321,8 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, - (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst @@ -6338,7 +6338,7 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), @@ -6346,8 +6346,8 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (bitconvert (memop_frag addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX, NoVLX] in { @@ -6924,14 +6924,15 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, Sched<[itins.Sched]>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, - (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp index 0472a85f50da..6d6dedc60736 100644 --- a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp +++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp @@ -149,6 +149,12 @@ void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const { } bool WinEHStatePass::runOnFunction(Function &F) { + // Don't insert state stores or exception handler thunks for + // available_externally functions. The handler needs to reference the LSDA, + // which will not be emitted in this case. + if (F.hasAvailableExternallyLinkage()) + return false; + // Check the personality. Do nothing if this personality doesn't use funclets. if (!F.hasPersonalityFn()) return false; diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp index 7086c2eb52c4..a69c009e1a54 100644 --- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -181,8 +181,9 @@ public: StringRef Name, bool IsThinLTOPreLink, std::function<AssumptionCache &(Function &)> GetAssumptionCache, std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo) - : GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo), - Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {} + : GetAC(std::move(GetAssumptionCache)), + GetTTI(std::move(GetTargetTransformInfo)), Filename(Name), + IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM); @@ -1547,14 +1548,14 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) { // Populate the symbol map. for (const auto &N_F : M.getValueSymbolTable()) { - std::string OrigName = N_F.getKey(); + StringRef OrigName = N_F.getKey(); Function *F = dyn_cast<Function>(N_F.getValue()); if (F == nullptr) continue; SymbolMap[OrigName] = F; auto pos = OrigName.find('.'); - if (pos != std::string::npos) { - std::string NewName = OrigName.substr(0, pos); + if (pos != StringRef::npos) { + StringRef NewName = OrigName.substr(0, pos); auto r = SymbolMap.insert(std::make_pair(NewName, F)); // Failiing to insert means there is already an entry in SymbolMap, // thus there are multiple functions that are mapped to the same diff --git a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 945133074059..caffc03339c4 100644 --- a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -90,8 +90,7 @@ void promoteTypeIds(Module &M, StringRef ModuleId) { if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { Metadata *&GlobalMD = LocalToGlobal[MD]; if (!GlobalMD) { - std::string NewName = - (to_string(LocalToGlobal.size()) + ModuleId).str(); + std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str(); GlobalMD = MDString::get(M.getContext(), NewName); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index a088d447337f..40e52ee755e5 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1802,9 +1802,7 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) { /// instructions. For normal calls, it allows visitCallSite to do the heavy /// lifting. Instruction *InstCombiner::visitCallInst(CallInst &CI) { - auto Args = CI.arg_operands(); - if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(), - Args.end(), SQ.getWithInstruction(&CI))) + if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); if (isFreeCall(&CI, &TLI)) @@ -1903,16 +1901,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false)) return replaceInstUsesWith(CI, N); return nullptr; - case Intrinsic::bswap: { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; - // TODO should this be in InstSimplify? - // bswap(bswap(x)) -> x - if (match(IIOperand, m_BSwap(m_Value(X)))) - return replaceInstUsesWith(CI, X); - // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { unsigned C = X->getType()->getPrimitiveSizeInBits() - @@ -1923,18 +1915,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; } - - case Intrinsic::bitreverse: { - Value *IIOperand = II->getArgOperand(0); - Value *X = nullptr; - - // TODO should this be in InstSimplify? - // bitreverse(bitreverse(x)) -> x - if (match(IIOperand, m_BitReverse(m_Value(X)))) - return replaceInstUsesWith(CI, X); - break; - } - case Intrinsic::masked_load: if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder)) return replaceInstUsesWith(CI, SimplifiedMaskedOp); @@ -1948,16 +1928,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::powi: if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { - // powi(x, 0) -> 1.0 - if (Power->isZero()) - return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); - // powi(x, 1) -> x - if (Power->isOne()) - return replaceInstUsesWith(CI, II->getArgOperand(0)); + // 0 and 1 are handled in instsimplify + // powi(x, -1) -> 1/x if (Power->isMinusOne()) return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), II->getArgOperand(0)); + // powi(x, 2) -> x*x + if (Power->equalsInt(2)) + return BinaryOperator::CreateFMul(II->getArgOperand(0), + II->getArgOperand(0)); } break; @@ -2396,7 +2376,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // The compare intrinsic uses the above assumptions and therefore // doesn't require additional flags. if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) && - match(Arg1, m_Zero()) && + match(Arg1, m_Zero()) && isa<Instruction>(Arg0) && cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) { if (Arg0IsZero) std::swap(A, B); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 87666360c1a0..541dde6c47d2 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1631,9 +1631,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); - // Handle cases involving: rem X, (select Cond, Y, Z) - if (simplifyDivRemOfSelectWithZeroOp(I)) - return &I; - return nullptr; } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 65a96b965227..aeac8910af6b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -181,11 +181,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { - unsigned IndexVal = IdxC->getZExtValue(); unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); - // InstSimplify handles cases where the index is invalid. - assert(IndexVal < VectorWidth); + // InstSimplify should handle cases where the index is invalid. + if (!IdxC->getValue().ule(VectorWidth)) + return nullptr; + + unsigned IndexVal = IdxC->getZExtValue(); // This instruction only demands the single element from the input vector. // If the input vector has a single use, simplify it based on this use diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8328d4031941..8e39f24d819c 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -2702,9 +2702,10 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() { unsigned Align = Arg.getParamAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(Ty); - const std::string &Name = Arg.hasName() ? Arg.getName().str() : - "Arg" + llvm::to_string(Arg.getArgNo()); - AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval"); + AllocaInst *AI = IRB.CreateAlloca( + Ty, nullptr, + (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) + + ".byval"); AI->setAlignment(Align); Arg.replaceAllUsesWith(AI); diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp index 814a62cd7d65..bf92e43c4715 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -641,7 +641,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking( DenseMap<uint32_t, unsigned> VNums; for (auto *I : Insts) { uint32_t N = VN.lookupOrAdd(I); - DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n"); + DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n"); if (N == ~0U) return None; VNums[N]++; diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 6af3fef963dc..9c870b42a747 100644 --- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -476,33 +476,22 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, Alignment = DL.getABITypeAlignment(EltType); } - // Remember the debug location. - DebugLoc Loc; - if (!Range.TheStores.empty()) - Loc = Range.TheStores[0]->getDebugLoc(); + AMemSet = + Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI : Range.TheStores) - dbgs() << *SI << '\n'); + dbgs() << *SI << '\n'; + dbgs() << "With: " << *AMemSet << '\n'); + + if (!Range.TheStores.empty()) + AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); // Zap all the stores. for (Instruction *SI : Range.TheStores) { MD->removeInstruction(SI); SI->eraseFromParent(); } - - // Create the memset after removing the stores, so that if there any cached - // non-local dependencies on the removed instructions in - // MemoryDependenceAnalysis, the cache entries are updated to "dirty" - // entries pointing below the memset, so subsequent queries include the - // memset. - AMemSet = - Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); - if (!Range.TheStores.empty()) - AMemSet->setDebugLoc(Loc); - - DEBUG(dbgs() << "With: " << *AMemSet << '\n'); - ++NumMemSetInfer; } @@ -1042,22 +1031,9 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. - MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep); - MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false, - M->getIterator(), M->getParent()); - - if (SourceDep.isNonLocal()) { - SmallVector<NonLocalDepResult, 2> NonLocalDepResults; - MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false, - NonLocalDepResults); - if (NonLocalDepResults.size() == 1) { - SourceDep = NonLocalDepResults[0].getResult(); - assert((!SourceDep.getInst() || - LookupDomTree().dominates(SourceDep.getInst(), M)) && - "when memdep returns exactly one result, it should dominate"); - } - } - + MemDepResult SourceDep = + MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, + M->getIterator(), M->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; @@ -1259,18 +1235,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) { MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( SrcLoc, true, M->getIterator(), M->getParent()); - if (SrcDepInfo.isNonLocal()) { - SmallVector<NonLocalDepResult, 2> NonLocalDepResults; - MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true, - NonLocalDepResults); - if (NonLocalDepResults.size() == 1) { - SrcDepInfo = NonLocalDepResults[0].getResult(); - assert((!SrcDepInfo.getInst() || - LookupDomTree().dominates(SrcDepInfo.getInst(), M)) && - "when memdep returns exactly one result, it should dominate"); - } - } - if (SrcDepInfo.isClobber()) { if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) return processMemCpyMemCpyDependence(M, MDep); diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 3b45cfa482e6..c44edbed8ed9 100644 --- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -2796,17 +2796,12 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, StatepointLiveSetTy Updated; findLiveSetAtInst(Inst, RevisedLivenessData, Updated); -#ifndef NDEBUG - DenseSet<Value *> Bases; - for (auto KVPair : Info.PointerToBase) - Bases.insert(KVPair.second); -#endif - // We may have base pointers which are now live that weren't before. We need // to update the PointerToBase structure to reflect this. for (auto V : Updated) if (Info.PointerToBase.insert({V, V}).second) { - assert(Bases.count(V) && "Can't find base for unexpected live value!"); + assert(isKnownBaseResult(V) && + "Can't find base for unexpected live value!"); continue; } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index efff06f79cb7..e00541d3c812 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -648,8 +648,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); + // NewExit gets its DebugLoc from LatchExit, which is not part of the + // original Loop. + // Fix this by setting Loop's DebugLoc to NewExit. + auto *NewExitTerminator = NewExit->getTerminator(); + NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc()); // Split NewExit to insert epilog remainder loop. - EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); + EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp index c3fa05a11a24..fe106e33bca1 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -880,9 +880,10 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, /// If we are able to find such sequence, we return the instructions /// we found, namely %casted_phi and the instructions on its use-def chain up /// to the phi (not including the phi). -bool getCastsForInductionPHI( - PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, - const SCEVAddRecExpr *AR, SmallVectorImpl<Instruction *> &CastInsts) { +static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, + const SCEVUnknown *PhiScev, + const SCEVAddRecExpr *AR, + SmallVectorImpl<Instruction *> &CastInsts) { assert(CastInsts.empty() && "CastInsts is expected to be empty."); auto *PN = cast<PHINode>(PhiScev->getValue()); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b3c80424c8b9..e7358dbcb624 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -127,16 +127,6 @@ static cl::opt<unsigned> MaxSpeculationDepth( cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions")); -static cl::opt<unsigned> DependenceChainLatency( - "dependence-chain-latency", cl::Hidden, cl::init(8), - cl::desc("Limit the maximum latency of dependence chain containing cmp " - "for if conversion")); - -static cl::opt<unsigned> SmallBBSize( - "small-bb-size", cl::Hidden, cl::init(40), - cl::desc("Check dependence chain latency only in basic block smaller than " - "this number")); - STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -405,166 +395,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, return true; } -/// Estimate the code size of the specified BB. -static unsigned CountBBCodeSize(BasicBlock *BB, - const TargetTransformInfo &TTI) { - unsigned Size = 0; - for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II) - Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize); - return Size; -} - -/// Find out the latency of the longest dependence chain in the BB if -/// LongestChain is true, or the dependence chain containing the compare -/// instruction feeding the block's conditional branch. -static unsigned FindDependenceChainLatency(BasicBlock *BB, - DenseMap<Instruction *, unsigned> &Instructions, - const TargetTransformInfo &TTI, - bool LongestChain) { - unsigned MaxLatency = 0; - - BasicBlock::iterator II; - for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) { - unsigned Latency = 0; - for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) { - Instruction *Op = dyn_cast<Instruction>(II->getOperand(O)); - if (Op && Instructions.count(Op)) { - auto OpLatency = Instructions[Op]; - if (OpLatency > Latency) - Latency = OpLatency; - } - } - Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency); - Instructions[&(*II)] = Latency; - - if (Latency > MaxLatency) - MaxLatency = Latency; - } - - if (LongestChain) - return MaxLatency; - - // The length of the dependence chain containing the compare instruction is - // wanted, so the terminator must be a BranchInst. - assert(isa<BranchInst>(II)); - BranchInst* Br = cast<BranchInst>(II); - Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition()); - if (Cmp && Instructions.count(Cmp)) - return Instructions[Cmp]; - else - return 0; -} - -/// Instructions in BB2 may depend on instructions in BB1, and instructions -/// in BB1 may have users in BB2. If the last (in terms of latency) such kind -/// of instruction in BB1 is I, then the instructions after I can be executed -/// in parallel with instructions in BB2. -/// This function returns the latency of I. -static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2, - BasicBlock *IfBlock1, BasicBlock *IfBlock2, - DenseMap<Instruction *, unsigned> &BB1Instructions) { - unsigned LastLatency = 0; - SmallVector<Instruction *, 16> Worklist; - BasicBlock::iterator II; - for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) { - if (PHINode *PN = dyn_cast<PHINode>(II)) { - // Look for users in BB2. - bool InBBUser = false; - for (User *U : PN->users()) { - if (cast<Instruction>(U)->getParent() == BB2) { - InBBUser = true; - break; - } - } - // No such user, we don't care about this instruction and its operands. - if (!InBBUser) - break; - } - Worklist.push_back(&(*II)); - } - - while (!Worklist.empty()) { - Instruction *I = Worklist.pop_back_val(); - for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) { - if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) { - if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2) - Worklist.push_back(Op); - else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) { - if (BB1Instructions[Op] > LastLatency) - LastLatency = BB1Instructions[Op]; - } - } - } - } - - return LastLatency; -} - -/// If after if conversion, most of the instructions in this new BB construct a -/// long and slow dependence chain, it may be slower than cmp/branch, even -/// if the branch has a high miss rate, because the control dependence is -/// transformed into data dependence, and control dependence can be speculated, -/// and thus, the second part can execute in parallel with the first part on -/// modern OOO processor. -/// -/// To check this condition, this function finds the length of the dependence -/// chain in BB1 (only the part that can be executed in parallel with code after -/// branch in BB2) containing cmp, and if the length is longer than a threshold, -/// don't perform if conversion. -/// -/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion. -/// SpeculationSize contains the code size of IfBlock1 and IfBlock2. -static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2, - BasicBlock *IfBlock1, BasicBlock *IfBlock2, - unsigned SpeculationSize, - const TargetTransformInfo &TTI) { - // Accumulated latency of each instruction in their BBs. - DenseMap<Instruction *, unsigned> BB1Instructions; - DenseMap<Instruction *, unsigned> BB2Instructions; - - if (!TTI.isOutOfOrder()) - return false; - - unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI) - + SpeculationSize; - - // We check small BB only since it is more difficult to find unrelated - // instructions to fill functional units in a small BB. - if (NewBBSize > SmallBBSize) - return false; - - auto BB1Chain = - FindDependenceChainLatency(BB1, BB1Instructions, TTI, false); - auto BB2Chain = - FindDependenceChainLatency(BB2, BB2Instructions, TTI, true); - - // If there are many unrelated instructions in the new BB, there will be - // other instructions for the processor to issue regardless of the length - // of this new dependence chain. - // Modern processors can issue 3 or more instructions in each cycle. But in - // real world applications, an IPC of 2 is already very good for non-loop - // code with small basic blocks. Higher IPC is usually found in programs with - // small kernel. So IPC of 2 is more reasonable for most applications. - if ((BB1Chain + BB2Chain) * 2 <= NewBBSize) - return false; - - // We only care about part of the dependence chain in BB1 that can be - // executed in parallel with BB2, so adjust the latency. - BB1Chain -= - LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions); - - // Correctly predicted branch instruction can skip the dependence chain in - // BB1, but misprediction has a penalty, so only when the dependence chain is - // longer than DependenceChainLatency, then branch is better than select. - // Besides misprediction penalty, the threshold value DependenceChainLatency - // also depends on branch misprediction rate, taken branch latency and cmov - // latency. - if (BB1Chain >= DependenceChainLatency) - return true; - - return false; -} - /// Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { @@ -2214,11 +2044,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) return false; - // Don't do if conversion for long dependence chain. - if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr, - CountBBCodeSize(ThenBB, TTI), TTI)) - return false; - // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); @@ -2526,10 +2351,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } } - if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2, - AggressiveInsts.size(), TTI)) - return false; - DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); diff --git a/contrib/llvm/tools/clang/include/clang/AST/Stmt.h b/contrib/llvm/tools/clang/include/clang/AST/Stmt.h index 6bd07af1affa..b27dbfacf6a6 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/Stmt.h +++ b/contrib/llvm/tools/clang/include/clang/AST/Stmt.h @@ -592,15 +592,21 @@ public: }; /// CompoundStmt - This represents a group of statements like { stmt stmt }. -class CompoundStmt : public Stmt { +class CompoundStmt final : public Stmt, + private llvm::TrailingObjects<CompoundStmt, Stmt *> { friend class ASTStmtReader; + friend TrailingObjects; - Stmt** Body = nullptr; SourceLocation LBraceLoc, RBraceLoc; + CompoundStmt(ArrayRef<Stmt *> Stmts, SourceLocation LB, SourceLocation RB); + explicit CompoundStmt(EmptyShell Empty) : Stmt(CompoundStmtClass, Empty) {} + + void setStmts(ArrayRef<Stmt *> Stmts); + public: - CompoundStmt(const ASTContext &C, ArrayRef<Stmt*> Stmts, - SourceLocation LB, SourceLocation RB); + static CompoundStmt *Create(const ASTContext &C, ArrayRef<Stmt *> Stmts, + SourceLocation LB, SourceLocation RB); // \brief Build an empty compound statement with a location. explicit CompoundStmt(SourceLocation Loc) @@ -609,11 +615,7 @@ public: } // \brief Build an empty compound statement. - explicit CompoundStmt(EmptyShell Empty) : Stmt(CompoundStmtClass, Empty) { - CompoundStmtBits.NumStmts = 0; - } - - void setStmts(const ASTContext &C, ArrayRef<Stmt *> Stmts); + static CompoundStmt *CreateEmpty(const ASTContext &C, unsigned NumStmts); bool body_empty() const { return CompoundStmtBits.NumStmts == 0; } unsigned size() const { return CompoundStmtBits.NumStmts; } @@ -622,14 +624,16 @@ public: using body_range = llvm::iterator_range<body_iterator>; body_range body() { return body_range(body_begin(), body_end()); } - body_iterator body_begin() { return Body; } - body_iterator body_end() { return Body + size(); } - Stmt *body_front() { return !body_empty() ? Body[0] : nullptr; } - Stmt *body_back() { return !body_empty() ? Body[size()-1] : nullptr; } + body_iterator body_begin() { return getTrailingObjects<Stmt *>(); } + body_iterator body_end() { return body_begin() + size(); } + Stmt *body_front() { return !body_empty() ? body_begin()[0] : nullptr; } + Stmt *body_back() { + return !body_empty() ? body_begin()[size() - 1] : nullptr; + } void setLastStmt(Stmt *S) { assert(!body_empty() && "setLastStmt"); - Body[size()-1] = S; + body_begin()[size() - 1] = S; } using const_body_iterator = Stmt* const *; @@ -639,15 +643,17 @@ public: return body_const_range(body_begin(), body_end()); } - const_body_iterator body_begin() const { return Body; } - const_body_iterator body_end() const { return Body + size(); } + const_body_iterator body_begin() const { + return getTrailingObjects<Stmt *>(); + } + const_body_iterator body_end() const { return body_begin() + size(); } const Stmt *body_front() const { - return !body_empty() ? Body[0] : nullptr; + return !body_empty() ? body_begin()[0] : nullptr; } const Stmt *body_back() const { - return !body_empty() ? Body[size() - 1] : nullptr; + return !body_empty() ? body_begin()[size() - 1] : nullptr; } using reverse_body_iterator = std::reverse_iterator<body_iterator>; @@ -682,13 +688,10 @@ public: } // Iterators - child_range children() { - return child_range(Body, Body + CompoundStmtBits.NumStmts); - } + child_range children() { return child_range(body_begin(), body_end()); } const_child_range children() const { - return const_child_range(child_iterator(Body), - child_iterator(Body + CompoundStmtBits.NumStmts)); + return const_child_range(body_begin(), body_end()); } }; @@ -875,8 +878,11 @@ public: /// /// Represents an attribute applied to a statement. For example: /// [[omp::for(...)]] for (...) { ... } -class AttributedStmt : public Stmt { +class AttributedStmt final + : public Stmt, + private llvm::TrailingObjects<AttributedStmt, const Attr *> { friend class ASTStmtReader; + friend TrailingObjects; Stmt *SubStmt; SourceLocation AttrLoc; @@ -894,11 +900,9 @@ class AttributedStmt : public Stmt { } const Attr *const *getAttrArrayPtr() const { - return reinterpret_cast<const Attr *const *>(this + 1); - } - const Attr **getAttrArrayPtr() { - return reinterpret_cast<const Attr **>(this + 1); + return getTrailingObjects<const Attr *>(); } + const Attr **getAttrArrayPtr() { return getTrailingObjects<const Attr *>(); } public: static AttributedStmt *Create(const ASTContext &C, SourceLocation Loc, diff --git a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def index d0be48467f1f..465551be7742 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def @@ -429,9 +429,34 @@ TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "", "aes") TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "", "aes") TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "", "aes") +// VAES +TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") + +// GFNI +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "", "avx512bw,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "", "avx512bw,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "", "avx512bw,gfni") + // CLMUL TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "", "pclmul") +// VPCLMULQDQ +TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "", "vpclmulqdq") +TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "", "avx512f,vpclmulqdq") + // AVX TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "", "avx") @@ -954,6 +979,31 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni") + TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi","","avx512vl") TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi","","avx512vl") TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4ddC*V4LLiUcIi","","avx512vl") @@ -1067,6 +1117,17 @@ TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "", "avx512vpopcntdq,a TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq") TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "", "avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "", "avx512bitalg") + +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "", "avx512bitalg") + TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw") @@ -1107,6 +1168,12 @@ TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2LLiV2LLiV2LLiUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4LLiV4LLiV4LLiUc", "", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_compresshi128_mask, "V8sV8sV8sUc","","avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compresshi256_mask, "V16sV16sV16sUs","","avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressqi128_mask, "V16cV16cV16cUs","","avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressqi256_mask, "V32cV32cV32cUi","","avx512vl,avx512vbmi2") + TARGET_BUILTIN(__builtin_ia32_compresssf128_mask, "V4fV4fV4fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compresssf256_mask, "V8fV8fV8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "", "avx512vl") @@ -1115,6 +1182,12 @@ TARGET_BUILTIN(__builtin_ia32_compressstoredf128_mask, "vV2d*V2dUc", "", "avx512 TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2LLi*V2LLiUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4LLi*V4LLiUc", "", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_compressstorehi128_mask, "vV8s*V8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstorehi256_mask, "vV16s*V16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoreqi128_mask, "vV16c*V16cUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoreqi256_mask, "vV32c*V32cUi", "", "avx512vl,avx512vbmi2") + TARGET_BUILTIN(__builtin_ia32_compressstoresf128_mask, "vV4f*V4fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "", "avx512vl") @@ -1147,10 +1220,22 @@ TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4LLiV4LLiV4LLiUc", "", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_expandhi128_mask, "V8sV8sV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandhi256_mask, "V16sV16sV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandqi128_mask, "V16cV16cV16cUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandqi256_mask, "V32cV32cV32cUi", "", "avx512vl,avx512vbmi2") + TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2d*V2dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4d*V4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2LLi*V2LLiUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4LLiV4LLi*V4LLiUc", "", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_expandloadhi128_mask, "V8sV8sC*V8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadhi256_mask, "V16sV16sC*V16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadqi128_mask, "V16cV16cC*V16cUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadqi256_mask, "V32cV32cC*V32cUi", "", "avx512vl,avx512vbmi2") + TARGET_BUILTIN(__builtin_ia32_expandloadsf128_mask, "V4fV4f*V4fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expandloadsf256_mask, "V8fV8f*V8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expandloadsi128_mask, "V4iV4i*V4iUc", "", "avx512vl") @@ -1223,6 +1308,65 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2varq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", TARGET_BUILTIN(__builtin_ia32_vpermt2varq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_vpermt2varq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_vpermt2varq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_vpshldd128_mask, "V4iV4iV4iIiV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldd256_mask, "V8iV8iV8iIiV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldd512_mask, "V16iV16iV16iIiV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw128_mask, "V8sV8sV8sIiV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw256_mask, "V16sV16sV16sIiV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw512_mask, "V32sV32sV32sIiV32sUi", "", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_vpshldvd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd512_mask, "V16iV16iV16iV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw512_mask, "V32sV32sV32sV32sUi", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw128_maskz, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw512_maskz, "V32sV32sV32sV32sUi", "", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_mask, "V16iV16iV16iV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_mask, "V32sV32sV32sV32sUi", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iiV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iiV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iiV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8siV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16siV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32siV32sUi", "", "avx512vbmi2") + TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "", "avx512bw") @@ -1677,14 +1821,20 @@ TARGET_BUILTIN(__builtin_ia32_dbpsadbw512_mask, "V32sV64cV64cIiV32sUi","","avx51 TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c","","avx512bw") TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi","","avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cULLi","","avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi","","avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cULLi","","avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8LLiV8LLiC*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi","","avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cULLi","","avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs","","avx512f") @@ -1692,6 +1842,8 @@ TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi","","avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cULLi","","avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc","","avx512vl") diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticParseKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticParseKinds.td index a8d6955da3c0..7936cdd96f80 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -587,6 +587,7 @@ def ext_using_attribute_ns : ExtWarn< def err_using_attribute_ns_conflict : Error< "attribute with scope specifier cannot follow default scope specifier">; def err_attributes_not_allowed : Error<"an attribute list cannot appear here">; +def err_attributes_misplaced : Error<"misplaced attributes; expected attributes here">; def err_l_square_l_square_not_attribute : Error< "C++11 only allows consecutive left square brackets when " "introducing an attribute">; diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Options.td b/contrib/llvm/tools/clang/include/clang/Driver/Options.td index d36e1a63220e..09efd7b0af63 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/Options.td +++ b/contrib/llvm/tools/clang/include/clang/Driver/Options.td @@ -2467,6 +2467,8 @@ def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>; def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>; def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>; def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>; +def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group<m_x86_Features_Group>; +def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, Group<m_x86_Features_Group>; def mavx512bw : Flag<["-"], "mavx512bw">, Group<m_x86_Features_Group>; def mno_avx512bw : Flag<["-"], "mno-avx512bw">, Group<m_x86_Features_Group>; def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>; @@ -2481,8 +2483,12 @@ def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>; def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>; def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>; def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group<m_x86_Features_Group>; +def mavx512vbmi2 : Flag<["-"], "mavx512vbmi2">, Group<m_x86_Features_Group>; +def mno_avx512vbmi2 : Flag<["-"], "mno-avx512vbmi2">, Group<m_x86_Features_Group>; def mavx512vl : Flag<["-"], "mavx512vl">, Group<m_x86_Features_Group>; def mno_avx512vl : Flag<["-"], "mno-avx512vl">, Group<m_x86_Features_Group>; +def mavx512vnni : Flag<["-"], "mavx512vnni">, Group<m_x86_Features_Group>; +def mno_avx512vnni : Flag<["-"], "mno-avx512vnni">, Group<m_x86_Features_Group>; def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group<m_x86_Features_Group>; def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group<m_x86_Features_Group>; def madx : Flag<["-"], "madx">, Group<m_x86_Features_Group>; @@ -2511,6 +2517,8 @@ def mfsgsbase : Flag<["-"], "mfsgsbase">, Group<m_x86_Features_Group>; def mno_fsgsbase : Flag<["-"], "mno-fsgsbase">, Group<m_x86_Features_Group>; def mfxsr : Flag<["-"], "mfxsr">, Group<m_x86_Features_Group>; def mno_fxsr : Flag<["-"], "mno-fxsr">, Group<m_x86_Features_Group>; +def mgfni : Flag<["-"], "mgfni">, Group<m_x86_Features_Group>; +def mno_gfni : Flag<["-"], "mno-gfni">, Group<m_x86_Features_Group>; def mlwp : Flag<["-"], "mlwp">, Group<m_x86_Features_Group>; def mno_lwp : Flag<["-"], "mno-lwp">, Group<m_x86_Features_Group>; def mlzcnt : Flag<["-"], "mlzcnt">, Group<m_x86_Features_Group>; @@ -2543,6 +2551,10 @@ def msha : Flag<["-"], "msha">, Group<m_x86_Features_Group>; def mno_sha : Flag<["-"], "mno-sha">, Group<m_x86_Features_Group>; def mtbm : Flag<["-"], "mtbm">, Group<m_x86_Features_Group>; def mno_tbm : Flag<["-"], "mno-tbm">, Group<m_x86_Features_Group>; +def mvaes : Flag<["-"], "mvaes">, Group<m_x86_Features_Group>; +def mno_vaes : Flag<["-"], "mno-vaes">, Group<m_x86_Features_Group>; +def mvpclmulqdq : Flag<["-"], "mvpclmulqdq">, Group<m_x86_Features_Group>; +def mno_vpclmulqdq : Flag<["-"], "mno-vpclmulqdq">, Group<m_x86_Features_Group>; def mxop : Flag<["-"], "mxop">, Group<m_x86_Features_Group>; def mno_xop : Flag<["-"], "mno-xop">, Group<m_x86_Features_Group>; def mxsave : Flag<["-"], "mxsave">, Group<m_x86_Features_Group>; diff --git a/contrib/llvm/tools/clang/include/clang/Parse/Parser.h b/contrib/llvm/tools/clang/include/clang/Parse/Parser.h index 8f5b20c2bd71..4a25c70956a3 100644 --- a/contrib/llvm/tools/clang/include/clang/Parse/Parser.h +++ b/contrib/llvm/tools/clang/include/clang/Parse/Parser.h @@ -2200,13 +2200,16 @@ private: void stripTypeAttributesOffDeclSpec(ParsedAttributesWithRange &Attrs, DeclSpec &DS, Sema::TagUseKind TUK); - - void ProhibitAttributes(ParsedAttributesWithRange &attrs) { + + // FixItLoc = possible correct location for the attributes + void ProhibitAttributes(ParsedAttributesWithRange &attrs, + SourceLocation FixItLoc = SourceLocation()) { if (!attrs.Range.isValid()) return; - DiagnoseProhibitedAttributes(attrs); + DiagnoseProhibitedAttributes(attrs, FixItLoc); attrs.clear(); } - void DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs); + void DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs, + SourceLocation FixItLoc); // Forbid C++11 and C2x attributes that appear on certain syntactic locations // which standard permits but we don't supported yet, for example, attributes diff --git a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp index dd96148edb27..3dc961d4f12b 100644 --- a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp @@ -2148,7 +2148,7 @@ static bool unionHasUniqueObjectRepresentations(const ASTContext &Context, return true; } -bool isStructEmpty(QualType Ty) { +static bool isStructEmpty(QualType Ty) { const RecordDecl *RD = Ty->castAs<RecordType>()->getDecl(); if (!RD->field_empty()) diff --git a/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp b/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp index 0e627f9737ce..0d1d9807549f 100644 --- a/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp @@ -134,12 +134,17 @@ namespace clang { bool ImportTemplateArguments(const TemplateArgument *FromArgs, unsigned NumFromArgs, SmallVectorImpl<TemplateArgument> &ToArgs); + template <typename InContainerTy> + bool ImportTemplateArgumentListInfo(const InContainerTy &Container, + TemplateArgumentListInfo &ToTAInfo); bool IsStructuralMatch(RecordDecl *FromRecord, RecordDecl *ToRecord, bool Complain = true); bool IsStructuralMatch(VarDecl *FromVar, VarDecl *ToVar, bool Complain = true); bool IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToRecord); bool IsStructuralMatch(EnumConstantDecl *FromEC, EnumConstantDecl *ToEC); + bool IsStructuralMatch(FunctionTemplateDecl *From, + FunctionTemplateDecl *To); bool IsStructuralMatch(ClassTemplateDecl *From, ClassTemplateDecl *To); bool IsStructuralMatch(VarTemplateDecl *From, VarTemplateDecl *To); Decl *VisitDecl(Decl *D); @@ -195,6 +200,7 @@ namespace clang { ClassTemplateSpecializationDecl *D); Decl *VisitVarTemplateDecl(VarTemplateDecl *D); Decl *VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D); + Decl *VisitFunctionTemplateDecl(FunctionTemplateDecl *D); // Importing statements DeclGroupRef ImportDeclGroup(DeclGroupRef DG); @@ -280,6 +286,7 @@ namespace clang { Expr *VisitCXXDeleteExpr(CXXDeleteExpr *E); Expr *VisitCXXConstructExpr(CXXConstructExpr *E); Expr *VisitCXXMemberCallExpr(CXXMemberCallExpr *E); + Expr *VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E); Expr *VisitExprWithCleanups(ExprWithCleanups *EWC); Expr *VisitCXXThisExpr(CXXThisExpr *E); Expr *VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *E); @@ -1247,6 +1254,18 @@ bool ASTNodeImporter::ImportTemplateArguments(const TemplateArgument *FromArgs, return false; } +template <typename InContainerTy> +bool ASTNodeImporter::ImportTemplateArgumentListInfo( + const InContainerTy &Container, TemplateArgumentListInfo &ToTAInfo) { + for (const auto &FromLoc : Container) { + if (auto ToLoc = ImportTemplateArgumentLoc(FromLoc)) + ToTAInfo.addArgument(*ToLoc); + else + return true; + } + return false; +} + bool ASTNodeImporter::IsStructuralMatch(RecordDecl *FromRecord, RecordDecl *ToRecord, bool Complain) { // Eliminate a potential failure point where we attempt to re-import @@ -1280,6 +1299,14 @@ bool ASTNodeImporter::IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToEnum) { return Ctx.IsStructurallyEquivalent(FromEnum, ToEnum); } +bool ASTNodeImporter::IsStructuralMatch(FunctionTemplateDecl *From, + FunctionTemplateDecl *To) { + StructuralEquivalenceContext Ctx( + Importer.getFromContext(), Importer.getToContext(), + Importer.getNonEquivalentDecls(), false, false); + return Ctx.IsStructurallyEquivalent(From, To); +} + bool ASTNodeImporter::IsStructuralMatch(EnumConstantDecl *FromEC, EnumConstantDecl *ToEC) { @@ -4197,6 +4224,64 @@ Decl *ASTNodeImporter::VisitVarTemplateSpecializationDecl( return D2; } +Decl *ASTNodeImporter::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) { + DeclContext *DC, *LexicalDC; + DeclarationName Name; + SourceLocation Loc; + NamedDecl *ToD; + + if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc)) + return nullptr; + + if (ToD) + return ToD; + + // Try to find a function in our own ("to") context with the same name, same + // type, and in the same context as the function we're importing. + if (!LexicalDC->isFunctionOrMethod()) { + unsigned IDNS = Decl::IDNS_Ordinary; + SmallVector<NamedDecl *, 2> FoundDecls; + DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls); + for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) { + if (!FoundDecls[I]->isInIdentifierNamespace(IDNS)) + continue; + + if (FunctionTemplateDecl *FoundFunction = + dyn_cast<FunctionTemplateDecl>(FoundDecls[I])) { + if (FoundFunction->hasExternalFormalLinkage() && + D->hasExternalFormalLinkage()) { + if (IsStructuralMatch(D, FoundFunction)) { + Importer.Imported(D, FoundFunction); + // FIXME: Actually try to merge the body and other attributes. + return FoundFunction; + } + } + } + } + } + + TemplateParameterList *Params = + ImportTemplateParameterList(D->getTemplateParameters()); + if (!Params) + return nullptr; + + FunctionDecl *TemplatedFD = + cast_or_null<FunctionDecl>(Importer.Import(D->getTemplatedDecl())); + if (!TemplatedFD) + return nullptr; + + FunctionTemplateDecl *ToFunc = FunctionTemplateDecl::Create( + Importer.getToContext(), DC, Loc, Name, Params, TemplatedFD); + + TemplatedFD->setDescribedFunctionTemplate(ToFunc); + ToFunc->setAccess(D->getAccess()); + ToFunc->setLexicalDeclContext(LexicalDC); + Importer.Imported(D, ToFunc); + + LexicalDC->addDeclInternal(ToFunc); + return ToFunc; +} + //---------------------------------------------------------------------------- // Import Statements //---------------------------------------------------------------------------- @@ -4321,9 +4406,8 @@ Stmt *ASTNodeImporter::VisitCompoundStmt(CompoundStmt *S) { SourceLocation ToLBraceLoc = Importer.Import(S->getLBracLoc()); SourceLocation ToRBraceLoc = Importer.Import(S->getRBracLoc()); - return new (Importer.getToContext()) CompoundStmt(Importer.getToContext(), - ToStmts, - ToLBraceLoc, ToRBraceLoc); + return CompoundStmt::Create(Importer.getToContext(), ToStmts, ToLBraceLoc, + ToRBraceLoc); } Stmt *ASTNodeImporter::VisitCaseStmt(CaseStmt *S) { @@ -5759,6 +5843,47 @@ Expr *ASTNodeImporter::VisitCXXPseudoDestructorExpr( Importer.Import(E->getTildeLoc()), Storage); } +Expr *ASTNodeImporter::VisitCXXDependentScopeMemberExpr( + CXXDependentScopeMemberExpr *E) { + Expr *Base = nullptr; + if (!E->isImplicitAccess()) { + Base = Importer.Import(E->getBase()); + if (!Base) + return nullptr; + } + + QualType BaseType = Importer.Import(E->getBaseType()); + if (BaseType.isNull()) + return nullptr; + + TemplateArgumentListInfo ToTAInfo(Importer.Import(E->getLAngleLoc()), + Importer.Import(E->getRAngleLoc())); + TemplateArgumentListInfo *ResInfo = nullptr; + if (E->hasExplicitTemplateArgs()) { + if (ImportTemplateArgumentListInfo(E->template_arguments(), ToTAInfo)) + return nullptr; + ResInfo = &ToTAInfo; + } + + DeclarationName Name = Importer.Import(E->getMember()); + if (!E->getMember().isEmpty() && Name.isEmpty()) + return nullptr; + + DeclarationNameInfo MemberNameInfo(Name, Importer.Import(E->getMemberLoc())); + // Import additional name location/type info. + ImportDeclarationNameLoc(E->getMemberNameInfo(), MemberNameInfo); + auto ToFQ = Importer.Import(E->getFirstQualifierFoundInScope()); + if (!ToFQ && E->getFirstQualifierFoundInScope()) + return nullptr; + + return CXXDependentScopeMemberExpr::Create( + Importer.getToContext(), Base, BaseType, E->isArrow(), + Importer.Import(E->getOperatorLoc()), + Importer.Import(E->getQualifierLoc()), + Importer.Import(E->getTemplateKeywordLoc()), + cast_or_null<NamedDecl>(ToFQ), MemberNameInfo, ResInfo); +} + Expr *ASTNodeImporter::VisitCallExpr(CallExpr *E) { QualType T = Importer.Import(E->getType()); if (T.isNull()) diff --git a/contrib/llvm/tools/clang/lib/AST/Decl.cpp b/contrib/llvm/tools/clang/lib/AST/Decl.cpp index 629037b1755c..4c1d591b41e9 100644 --- a/contrib/llvm/tools/clang/lib/AST/Decl.cpp +++ b/contrib/llvm/tools/clang/lib/AST/Decl.cpp @@ -1550,7 +1550,7 @@ void NamedDecl::printQualifiedName(raw_ostream &OS, // the enum-specifier. Each scoped enumerator is declared in the // scope of the enumeration. // For the case of unscoped enumerator, do not include in the qualified - // name any information about its enum enclosing scope, as is visibility + // name any information about its enum enclosing scope, as its visibility // is global. if (ED->isScoped()) OS << *ED; diff --git a/contrib/llvm/tools/clang/lib/AST/Stmt.cpp b/contrib/llvm/tools/clang/lib/AST/Stmt.cpp index 8d240c1336ab..982fd458493f 100644 --- a/contrib/llvm/tools/clang/lib/AST/Stmt.cpp +++ b/contrib/llvm/tools/clang/lib/AST/Stmt.cpp @@ -299,31 +299,34 @@ SourceLocation Stmt::getLocEnd() const { llvm_unreachable("unknown statement kind"); } -CompoundStmt::CompoundStmt(const ASTContext &C, ArrayRef<Stmt*> Stmts, - SourceLocation LB, SourceLocation RB) - : Stmt(CompoundStmtClass), LBraceLoc(LB), RBraceLoc(RB) { +CompoundStmt::CompoundStmt(ArrayRef<Stmt *> Stmts, SourceLocation LB, + SourceLocation RB) + : Stmt(CompoundStmtClass), LBraceLoc(LB), RBraceLoc(RB) { CompoundStmtBits.NumStmts = Stmts.size(); + setStmts(Stmts); +} + +void CompoundStmt::setStmts(ArrayRef<Stmt *> Stmts) { assert(CompoundStmtBits.NumStmts == Stmts.size() && "NumStmts doesn't fit in bits of CompoundStmtBits.NumStmts!"); - if (Stmts.empty()) { - Body = nullptr; - return; - } - - Body = new (C) Stmt*[Stmts.size()]; - std::copy(Stmts.begin(), Stmts.end(), Body); + std::copy(Stmts.begin(), Stmts.end(), body_begin()); } -void CompoundStmt::setStmts(const ASTContext &C, ArrayRef<Stmt *> Stmts) { - if (Body) - C.Deallocate(Body); - CompoundStmtBits.NumStmts = Stmts.size(); - assert(CompoundStmtBits.NumStmts == Stmts.size() && - "NumStmts doesn't fit in bits of CompoundStmtBits.NumStmts!"); +CompoundStmt *CompoundStmt::Create(const ASTContext &C, ArrayRef<Stmt *> Stmts, + SourceLocation LB, SourceLocation RB) { + void *Mem = + C.Allocate(totalSizeToAlloc<Stmt *>(Stmts.size()), alignof(CompoundStmt)); + return new (Mem) CompoundStmt(Stmts, LB, RB); +} - Body = new (C) Stmt*[Stmts.size()]; - std::copy(Stmts.begin(), Stmts.end(), Body); +CompoundStmt *CompoundStmt::CreateEmpty(const ASTContext &C, + unsigned NumStmts) { + void *Mem = + C.Allocate(totalSizeToAlloc<Stmt *>(NumStmts), alignof(CompoundStmt)); + CompoundStmt *New = new (Mem) CompoundStmt(EmptyShell()); + New->CompoundStmtBits.NumStmts = NumStmts; + return New; } const char *LabelStmt::getName() const { @@ -334,7 +337,7 @@ AttributedStmt *AttributedStmt::Create(const ASTContext &C, SourceLocation Loc, ArrayRef<const Attr*> Attrs, Stmt *SubStmt) { assert(!Attrs.empty() && "Attrs should not be empty"); - void *Mem = C.Allocate(sizeof(AttributedStmt) + sizeof(Attr *) * Attrs.size(), + void *Mem = C.Allocate(totalSizeToAlloc<const Attr *>(Attrs.size()), alignof(AttributedStmt)); return new (Mem) AttributedStmt(Loc, Attrs, SubStmt); } @@ -342,7 +345,7 @@ AttributedStmt *AttributedStmt::Create(const ASTContext &C, SourceLocation Loc, AttributedStmt *AttributedStmt::CreateEmpty(const ASTContext &C, unsigned NumAttrs) { assert(NumAttrs > 0 && "NumAttrs should be greater than zero"); - void *Mem = C.Allocate(sizeof(AttributedStmt) + sizeof(Attr *) * NumAttrs, + void *Mem = C.Allocate(totalSizeToAlloc<const Attr *>(NumAttrs), alignof(AttributedStmt)); return new (Mem) AttributedStmt(EmptyShell(), NumAttrs); } diff --git a/contrib/llvm/tools/clang/lib/Analysis/BodyFarm.cpp b/contrib/llvm/tools/clang/lib/Analysis/BodyFarm.cpp index e5d3c5ce5bc2..89ca8484819d 100644 --- a/contrib/llvm/tools/clang/lib/Analysis/BodyFarm.cpp +++ b/contrib/llvm/tools/clang/lib/Analysis/BodyFarm.cpp @@ -133,7 +133,7 @@ BinaryOperator *ASTMaker::makeComparison(const Expr *LHS, const Expr *RHS, } CompoundStmt *ASTMaker::makeCompound(ArrayRef<Stmt *> Stmts) { - return new (C) CompoundStmt(C, Stmts, SourceLocation(), SourceLocation()); + return CompoundStmt::Create(C, Stmts, SourceLocation(), SourceLocation()); } DeclRefExpr *ASTMaker::makeDeclRefExpr( diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets/AArch64.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets/AArch64.cpp index 6080cefac744..4d3cd121f705 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets/AArch64.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Targets/AArch64.cpp @@ -159,7 +159,7 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__ARM_FP_FAST", "1"); Builder.defineMacro("__ARM_SIZEOF_WCHAR_T", - llvm::utostr(Opts.WCharSize ? Opts.WCharSize : 4)); + Twine(Opts.WCharSize ? Opts.WCharSize : 4)); Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4"); diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets/ARM.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets/ARM.cpp index fe261b774855..6fb0ab41ff5b 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets/ARM.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Targets/ARM.cpp @@ -582,7 +582,7 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, // ACLE 6.4.4 LDREX/STREX if (LDREX) - Builder.defineMacro("__ARM_FEATURE_LDREX", "0x" + llvm::utohexstr(LDREX)); + Builder.defineMacro("__ARM_FEATURE_LDREX", "0x" + Twine::utohexstr(LDREX)); // ACLE 6.4.5 CLZ if (ArchVersion == 5 || (ArchVersion == 6 && CPUProfile != "M") || @@ -591,7 +591,7 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, // ACLE 6.5.1 Hardware Floating Point if (HW_FP) - Builder.defineMacro("__ARM_FP", "0x" + llvm::utohexstr(HW_FP)); + Builder.defineMacro("__ARM_FP", "0x" + Twine::utohexstr(HW_FP)); // ACLE predefines. Builder.defineMacro("__ARM_ACLE", "200"); @@ -672,11 +672,11 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, // current AArch32 NEON implementations do not support double-precision // floating-point even when it is present in VFP. Builder.defineMacro("__ARM_NEON_FP", - "0x" + llvm::utohexstr(HW_FP & ~HW_FP_DP)); + "0x" + Twine::utohexstr(HW_FP & ~HW_FP_DP)); } Builder.defineMacro("__ARM_SIZEOF_WCHAR_T", - llvm::utostr(Opts.WCharSize ? Opts.WCharSize : 4)); + Twine(Opts.WCharSize ? Opts.WCharSize : 4)); Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4"); diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp index bdf5cdb9407b..3efba26a8373 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp @@ -132,7 +132,14 @@ bool X86TargetInfo::initFeatureMap( break; case CK_Icelake: - // TODO: Add icelake features here. + setFeatureEnabledImpl(Features, "vaes", true); + setFeatureEnabledImpl(Features, "gfni", true); + setFeatureEnabledImpl(Features, "vpclmulqdq", true); + setFeatureEnabledImpl(Features, "avx512bitalg", true); + setFeatureEnabledImpl(Features, "avx512vnni", true); + setFeatureEnabledImpl(Features, "avx512vbmi2", true); + setFeatureEnabledImpl(Features, "avx512vpopcntdq", true); + setFeatureEnabledImpl(Features, "clwb", true); LLVM_FALLTHROUGH; case CK_Cannonlake: setFeatureEnabledImpl(Features, "avx512ifma", true); @@ -145,8 +152,10 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "avx512dq", true); setFeatureEnabledImpl(Features, "avx512bw", true); setFeatureEnabledImpl(Features, "avx512vl", true); - setFeatureEnabledImpl(Features, "pku", true); - setFeatureEnabledImpl(Features, "clwb", true); + if (Kind == CK_SkylakeServer) { + setFeatureEnabledImpl(Features, "pku", true); + setFeatureEnabledImpl(Features, "clwb", true); + } LLVM_FALLTHROUGH; case CK_SkylakeClient: setFeatureEnabledImpl(Features, "xsavec", true); @@ -443,7 +452,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features, LLVM_FALLTHROUGH; case SSE2: Features["sse2"] = Features["pclmul"] = Features["aes"] = Features["sha"] = - false; + Features["gfni"] = false; LLVM_FALLTHROUGH; case SSE3: Features["sse3"] = false; @@ -460,7 +469,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features, LLVM_FALLTHROUGH; case AVX: Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] = - Features["xsaveopt"] = false; + Features["xsaveopt"] = Features["vaes"] = Features["vpclmulqdq"] = false; setXOPLevel(Features, FMA4, false); LLVM_FALLTHROUGH; case AVX2: @@ -470,7 +479,9 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features, Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] = Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] = Features["avx512vl"] = Features["avx512vbmi"] = - Features["avx512ifma"] = Features["avx512vpopcntdq"] = false; + Features["avx512ifma"] = Features["avx512vpopcntdq"] = + Features["avx512bitalg"] = Features["avx512vnni"] = + Features["avx512vbmi2"] = false; break; } } @@ -572,9 +583,26 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features, } else if (Name == "aes") { if (Enabled) setSSELevel(Features, SSE2, Enabled); + else + Features["vaes"] = false; + } else if (Name == "vaes") { + if (Enabled) { + setSSELevel(Features, AVX, Enabled); + Features["aes"] = true; + } } else if (Name == "pclmul") { if (Enabled) setSSELevel(Features, SSE2, Enabled); + else + Features["vpclmulqdq"] = false; + } else if (Name == "vpclmulqdq") { + if (Enabled) { + setSSELevel(Features, AVX, Enabled); + Features["pclmul"] = true; + } + } else if (Name == "gfni") { + if (Enabled) + setSSELevel(Features, SSE2, Enabled); } else if (Name == "avx") { setSSELevel(Features, AVX, Enabled); } else if (Name == "avx2") { @@ -584,15 +612,17 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features, } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" || Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" || Name == "avx512vbmi" || Name == "avx512ifma" || - Name == "avx512vpopcntdq") { + Name == "avx512vpopcntdq" || Name == "avx512bitalg" || + Name == "avx512vnni" || Name == "avx512vbmi2") { if (Enabled) setSSELevel(Features, AVX512F, Enabled); - // Enable BWI instruction if VBMI is being enabled. - if (Name == "avx512vbmi" && Enabled) + // Enable BWI instruction if VBMI/VBMI2/BITALG is being enabled. + if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled) Features["avx512bw"] = true; - // Also disable VBMI if BWI is being disabled. + // Also disable VBMI/VBMI2/BITALG if BWI is being disabled. if (Name == "avx512bw" && !Enabled) - Features["avx512vbmi"] = false; + Features["avx512vbmi"] = Features["avx512vbmi2"] = + Features["avx512bitalg"] = false; } else if (Name == "fma") { if (Enabled) setSSELevel(Features, AVX, Enabled); @@ -636,8 +666,12 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, if (Feature == "+aes") { HasAES = true; + } else if (Feature == "+vaes") { + HasVAES = true; } else if (Feature == "+pclmul") { HasPCLMUL = true; + } else if (Feature == "+vpclmulqdq") { + HasVPCLMULQDQ = true; } else if (Feature == "+lzcnt") { HasLZCNT = true; } else if (Feature == "+rdrnd") { @@ -666,22 +700,30 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasFMA = true; } else if (Feature == "+f16c") { HasF16C = true; + } else if (Feature == "+gfni") { + HasGFNI = true; } else if (Feature == "+avx512cd") { HasAVX512CD = true; } else if (Feature == "+avx512vpopcntdq") { HasAVX512VPOPCNTDQ = true; + } else if (Feature == "+avx512vnni") { + HasAVX512VNNI = true; } else if (Feature == "+avx512er") { HasAVX512ER = true; } else if (Feature == "+avx512pf") { HasAVX512PF = true; } else if (Feature == "+avx512dq") { HasAVX512DQ = true; + } else if (Feature == "+avx512bitalg") { + HasAVX512BITALG = true; } else if (Feature == "+avx512bw") { HasAVX512BW = true; } else if (Feature == "+avx512vl") { HasAVX512VL = true; } else if (Feature == "+avx512vbmi") { HasAVX512VBMI = true; + } else if (Feature == "+avx512vbmi2") { + HasAVX512VBMI2 = true; } else if (Feature == "+avx512ifma") { HasAVX512IFMA = true; } else if (Feature == "+sha") { @@ -934,9 +976,15 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasAES) Builder.defineMacro("__AES__"); + if (HasVAES) + Builder.defineMacro("__VAES__"); + if (HasPCLMUL) Builder.defineMacro("__PCLMUL__"); + if (HasVPCLMULQDQ) + Builder.defineMacro("__VPCLMULQDQ__"); + if (HasLZCNT) Builder.defineMacro("__LZCNT__"); @@ -996,22 +1044,31 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasF16C) Builder.defineMacro("__F16C__"); + if (HasGFNI) + Builder.defineMacro("__GFNI__"); + if (HasAVX512CD) Builder.defineMacro("__AVX512CD__"); if (HasAVX512VPOPCNTDQ) Builder.defineMacro("__AVX512VPOPCNTDQ__"); + if (HasAVX512VNNI) + Builder.defineMacro("__AVX512VNNI__"); if (HasAVX512ER) Builder.defineMacro("__AVX512ER__"); if (HasAVX512PF) Builder.defineMacro("__AVX512PF__"); if (HasAVX512DQ) Builder.defineMacro("__AVX512DQ__"); + if (HasAVX512BITALG) + Builder.defineMacro("__AVX512BITALG__"); if (HasAVX512BW) Builder.defineMacro("__AVX512BW__"); if (HasAVX512VL) Builder.defineMacro("__AVX512VL__"); if (HasAVX512VBMI) Builder.defineMacro("__AVX512VBMI__"); + if (HasAVX512VBMI2) + Builder.defineMacro("__AVX512VBMI2__"); if (HasAVX512IFMA) Builder.defineMacro("__AVX512IFMA__"); @@ -1141,12 +1198,15 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("avx512f", true) .Case("avx512cd", true) .Case("avx512vpopcntdq", true) + .Case("avx512vnni", true) .Case("avx512er", true) .Case("avx512pf", true) .Case("avx512dq", true) + .Case("avx512bitalg", true) .Case("avx512bw", true) .Case("avx512vl", true) .Case("avx512vbmi", true) + .Case("avx512vbmi2", true) .Case("avx512ifma", true) .Case("bmi", true) .Case("bmi2", true) @@ -1159,6 +1219,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("fma4", true) .Case("fsgsbase", true) .Case("fxsr", true) + .Case("gfni", true) .Case("lwp", true) .Case("lzcnt", true) .Case("mmx", true) @@ -1185,6 +1246,8 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("sse4.2", true) .Case("sse4a", true) .Case("tbm", true) + .Case("vaes", true) + .Case("vpclmulqdq", true) .Case("x87", true) .Case("xop", true) .Case("xsave", true) @@ -1203,12 +1266,15 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx512f", SSELevel >= AVX512F) .Case("avx512cd", HasAVX512CD) .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ) + .Case("avx512vnni", HasAVX512VNNI) .Case("avx512er", HasAVX512ER) .Case("avx512pf", HasAVX512PF) .Case("avx512dq", HasAVX512DQ) + .Case("avx512bitalg", HasAVX512BITALG) .Case("avx512bw", HasAVX512BW) .Case("avx512vl", HasAVX512VL) .Case("avx512vbmi", HasAVX512VBMI) + .Case("avx512vbmi2", HasAVX512VBMI2) .Case("avx512ifma", HasAVX512IFMA) .Case("bmi", HasBMI) .Case("bmi2", HasBMI2) @@ -1221,6 +1287,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("fma4", XOPLevel >= FMA4) .Case("fsgsbase", HasFSGSBASE) .Case("fxsr", HasFXSR) + .Case("gfni", HasGFNI) .Case("ibt", HasIBT) .Case("lwp", HasLWP) .Case("lzcnt", HasLZCNT) @@ -1249,6 +1316,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("sse4.2", SSELevel >= SSE42) .Case("sse4a", XOPLevel >= SSE4A) .Case("tbm", HasTBM) + .Case("vaes", HasVAES) + .Case("vpclmulqdq", HasVPCLMULQDQ) .Case("x86", true) .Case("x86_32", getTriple().getArch() == llvm::Triple::x86) .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64) diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets/X86.h b/contrib/llvm/tools/clang/lib/Basic/Targets/X86.h index b1811593545e..cbd6a2d24fb5 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets/X86.h +++ b/contrib/llvm/tools/clang/lib/Basic/Targets/X86.h @@ -48,7 +48,10 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { enum XOPEnum { NoXOP, SSE4A, FMA4, XOP } XOPLevel = NoXOP; bool HasAES = false; + bool HasVAES = false; bool HasPCLMUL = false; + bool HasVPCLMULQDQ = false; + bool HasGFNI = false; bool HasLZCNT = false; bool HasRDRND = false; bool HasFSGSBASE = false; @@ -65,12 +68,15 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasF16C = false; bool HasAVX512CD = false; bool HasAVX512VPOPCNTDQ = false; + bool HasAVX512VNNI = false; bool HasAVX512ER = false; bool HasAVX512PF = false; bool HasAVX512DQ = false; + bool HasAVX512BITALG = false; bool HasAVX512BW = false; bool HasAVX512VL = false; bool HasAVX512VBMI = false; + bool HasAVX512VBMI2 = false; bool HasAVX512IFMA = false; bool HasSHA = false; bool HasMPX = false; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp index 609987c4fa4c..ba54f8342f1b 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp @@ -8143,12 +8143,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } + case X86::BI__builtin_ia32_vpopcntb_128: case X86::BI__builtin_ia32_vpopcntd_128: case X86::BI__builtin_ia32_vpopcntq_128: + case X86::BI__builtin_ia32_vpopcntw_128: + case X86::BI__builtin_ia32_vpopcntb_256: case X86::BI__builtin_ia32_vpopcntd_256: case X86::BI__builtin_ia32_vpopcntq_256: + case X86::BI__builtin_ia32_vpopcntw_256: + case X86::BI__builtin_ia32_vpopcntb_512: case X86::BI__builtin_ia32_vpopcntd_512: - case X86::BI__builtin_ia32_vpopcntq_512: { + case X86::BI__builtin_ia32_vpopcntq_512: + case X86::BI__builtin_ia32_vpopcntw_512: { llvm::Type *ResultType = ConvertType(E->getType()); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); return Builder.CreateCall(F, Ops); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp index 90eeddf5cc0b..c7dc8337e19e 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp @@ -570,7 +570,7 @@ static llvm::Value *emitHash16Bytes(CGBuilderTy &Builder, llvm::Value *Low, bool CodeGenFunction::isNullPointerAllowed(TypeCheckKind TCK) { return TCK == TCK_DowncastPointer || TCK == TCK_Upcast || - TCK == TCK_UpcastToVirtualBase; + TCK == TCK_UpcastToVirtualBase || TCK == TCK_DynamicOperation; } bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) { @@ -578,7 +578,7 @@ bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) { return (RD && RD->hasDefinition() && RD->isDynamicClass()) && (TCK == TCK_MemberAccess || TCK == TCK_MemberCall || TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference || - TCK == TCK_UpcastToVirtualBase); + TCK == TCK_UpcastToVirtualBase || TCK == TCK_DynamicOperation); } bool CodeGenFunction::sanitizePerformTypeCheck() const { diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp index 0749b0ac46a7..c32f1e5415da 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp @@ -2056,6 +2056,15 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E, // Get the vtable pointer. Address ThisPtr = CGF.EmitLValue(E).getAddress(); + QualType SrcRecordTy = E->getType(); + + // C++ [class.cdtor]p4: + // If the operand of typeid refers to the object under construction or + // destruction and the static type of the operand is neither the constructor + // or destructor’s class nor one of its bases, the behavior is undefined. + CGF.EmitTypeCheck(CodeGenFunction::TCK_DynamicOperation, E->getExprLoc(), + ThisPtr.getPointer(), SrcRecordTy); + // C++ [expr.typeid]p2: // If the glvalue expression is obtained by applying the unary * operator to // a pointer and the pointer is a null pointer value, the typeid expression @@ -2064,7 +2073,6 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E, // However, this paragraph's intent is not clear. We choose a very generous // interpretation which implores us to consider comma operators, conditional // operators, parentheses and other such constructs. - QualType SrcRecordTy = E->getType(); if (CGF.CGM.getCXXABI().shouldTypeidBeNullChecked( isGLValueFromPointerDeref(E), SrcRecordTy)) { llvm::BasicBlock *BadTypeidBlock = @@ -2127,10 +2135,6 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, CGM.EmitExplicitCastExprType(DCE, this); QualType DestTy = DCE->getTypeAsWritten(); - if (DCE->isAlwaysNull()) - if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy)) - return T; - QualType SrcTy = DCE->getSubExpr()->getType(); // C++ [expr.dynamic.cast]p7: @@ -2151,6 +2155,18 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, DestRecordTy = DestTy->castAs<ReferenceType>()->getPointeeType(); } + // C++ [class.cdtor]p5: + // If the operand of the dynamic_cast refers to the object under + // construction or destruction and the static type of the operand is not a + // pointer to or object of the constructor or destructor’s own class or one + // of its bases, the dynamic_cast results in undefined behavior. + EmitTypeCheck(TCK_DynamicOperation, DCE->getExprLoc(), ThisAddr.getPointer(), + SrcRecordTy); + + if (DCE->isAlwaysNull()) + if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy)) + return T; + assert(SrcRecordTy->isRecordType() && "source type must be a record type!"); // C++ [expr.dynamic.cast]p4: diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5db29eb6004d..fa38ee80bf41 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4175,14 +4175,23 @@ static void emitPrivatesInit(CodeGenFunction &CGF, auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); LValue SrcBase; - if (!Data.FirstprivateVars.empty()) { + bool IsTargetTask = + isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || + isOpenMPTargetExecutionDirective(D.getDirectiveKind()); + // For target-based directives skip 3 firstprivate arrays BasePointersArray, + // PointersArray and SizesArray. The original variables for these arrays are + // not captured and we get their addresses explicitly. + if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || + (IsTargetTask && Data.FirstprivateVars.size() > 3)) { SrcBase = CGF.MakeAddrLValue( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), SharedsTy); } - CodeGenFunction::CGCapturedStmtInfo CapturesInfo( - cast<CapturedStmt>(*D.getAssociatedStmt())); + OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) + ? OMPD_taskloop + : OMPD_task; + CodeGenFunction::CGCapturedStmtInfo CapturesInfo(*D.getCapturedStmt(Kind)); FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); for (auto &&Pair : Privates) { auto *VD = Pair.second.PrivateCopy; @@ -4192,14 +4201,27 @@ static void emitPrivatesInit(CodeGenFunction &CGF, LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); if (auto *Elem = Pair.second.PrivateElemInit) { auto *OriginalVD = Pair.second.Original; - auto *SharedField = CapturesInfo.lookup(OriginalVD); - auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); - SharedRefLValue = CGF.MakeAddrLValue( - Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), - SharedRefLValue.getType(), - LValueBaseInfo(AlignmentSource::Decl), - SharedRefLValue.getTBAAInfo()); + // Check if the variable is the target-based BasePointersArray, + // PointersArray or SizesArray. + LValue SharedRefLValue; QualType Type = OriginalVD->getType(); + if (IsTargetTask && isa<ImplicitParamDecl>(OriginalVD) && + isa<CapturedDecl>(OriginalVD->getDeclContext()) && + cast<CapturedDecl>(OriginalVD->getDeclContext())->getNumParams() == + 0 && + isa<TranslationUnitDecl>( + cast<CapturedDecl>(OriginalVD->getDeclContext()) + ->getDeclContext())) { + SharedRefLValue = + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); + } else { + auto *SharedField = CapturesInfo.lookup(OriginalVD); + SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); + SharedRefLValue = CGF.MakeAddrLValue( + Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), + SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), + SharedRefLValue.getTBAAInfo()); + } if (Type->isArrayType()) { // Initialize firstprivate array. if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { @@ -4400,8 +4422,10 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, } KmpTaskTQTy = SavedKmpTaskloopTQTy; } else { - assert(D.getDirectiveKind() == OMPD_task && - "Expected taskloop or task directive"); + assert((D.getDirectiveKind() == OMPD_task || + isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || + isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && + "Expected taskloop, task or target directive"); if (SavedKmpTaskTQTy.isNull()) { SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); @@ -7417,8 +7441,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( // Generate the code for the opening of the data environment. Capture all the // arguments of the runtime call by reference because they are used in the // closing of the region. - auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &&BeginThenGen = [this, &D, Device, &Info, + &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; @@ -7454,8 +7478,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; - auto &RT = CGF.CGM.getOpenMPRuntime(); - CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), OffloadingArgs); // If device pointer privatization is required, emit the body of the region @@ -7465,7 +7488,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( }; // Generate code for the closing of the data region. - auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, + PrePostActionTy &) { assert(Info.isValid() && "Invalid data environment closing arguments."); llvm::Value *BasePointersArrayArg = nullptr; @@ -7490,8 +7514,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; - auto &RT = CGF.CGM.getOpenMPRuntime(); - CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), OffloadingArgs); }; @@ -7543,25 +7566,11 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( isa<OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."); + CodeGenFunction::OMPTargetDataInfo InputInfo; + llvm::Value *MapTypesArray = nullptr; // Generate the code for the opening of the data environment. - auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) { - // Fill up the arrays with all the mapped variables. - MappableExprsHandler::MapBaseValuesArrayTy BasePointers; - MappableExprsHandler::MapValuesArrayTy Pointers; - MappableExprsHandler::MapValuesArrayTy Sizes; - MappableExprsHandler::MapFlagsArrayTy MapTypes; - - // Get map clause information. - MappableExprsHandler MEHandler(D, CGF); - MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); - - // Fill up the arrays and create the arguments. - TargetDataInfo Info; - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); - emitOffloadingArraysArgument(CGF, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info); - + auto &&ThenGen = [this, &D, Device, &InputInfo, + &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { // Emit device ID if any. llvm::Value *DeviceID = nullptr; if (Device) { @@ -7572,13 +7581,16 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( } // Emit the number of elements in the offloading arrays. - auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); + llvm::Constant *PointerNum = + CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); - llvm::Value *OffloadingArgs[] = { - DeviceID, PointerNum, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; + llvm::Value *OffloadingArgs[] = {DeviceID, + PointerNum, + InputInfo.BasePointersArray.getPointer(), + InputInfo.PointersArray.getPointer(), + InputInfo.SizesArray.getPointer(), + MapTypesArray}; - auto &RT = CGF.CGM.getOpenMPRuntime(); // Select the right runtime function call for each expected standalone // directive. const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); @@ -7600,18 +7612,47 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( : OMPRTL__tgt_target_data_update; break; } - CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); + CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); }; - // In the event we get an if clause, we don't have to take any action on the - // else side. - auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; + auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( + CodeGenFunction &CGF, PrePostActionTy &) { + // Fill up the arrays with all the mapped variables. + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; - if (IfCond) { - emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - } else { - RegionCodeGenTy ThenGenRCG(ThenGen); - ThenGenRCG(CGF); + // Get map clause information. + MappableExprsHandler MEHandler(D, CGF); + MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + + TargetDataInfo Info; + // Fill up the arrays and create the arguments. + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitOffloadingArraysArgument(CGF, Info.BasePointersArray, + Info.PointersArray, Info.SizesArray, + Info.MapTypesArray, Info); + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; + InputInfo.BasePointersArray = + Address(Info.BasePointersArray, CGM.getPointerAlign()); + InputInfo.PointersArray = + Address(Info.PointersArray, CGM.getPointerAlign()); + InputInfo.SizesArray = + Address(Info.SizesArray, CGM.getPointerAlign()); + MapTypesArray = Info.MapTypesArray; + if (D.hasClausesOfKind<OMPDependClause>()) + CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); + else + emitInlinedDirective(CGF, OMPD_target_update, ThenGen); + }; + + if (IfCond) + emitOMPIfClause(CGF, IfCond, TargetThenGen, + [](CodeGenFunction &CGF, PrePostActionTy &) {}); + else { + RegionCodeGenTy ThenRCG(TargetThenGen); + ThenRCG(CGF); } } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index b5fc8d308067..7b2993cfd38d 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -33,10 +33,11 @@ enum OpenMPRTLFunctionNVPTX { /// \brief Call to void __kmpc_spmd_kernel_deinit(); OMPRTL_NVPTX__kmpc_spmd_kernel_deinit, /// \brief Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function, void ***args, kmp_int32 nArgs); + /// *outlined_function, void ***args, kmp_int32 nArgs, int16_t + /// IsOMPRuntimeInitialized); OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function, void - /// ***args); + /// ***args, int16_t IsOMPRuntimeInitialized); OMPRTL_NVPTX__kmpc_kernel_parallel, /// \brief Call to void __kmpc_kernel_end_parallel(); OMPRTL_NVPTX__kmpc_kernel_end_parallel, @@ -521,7 +522,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // Set up shared arguments Address SharedArgs = CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrPtrTy, "shared_args"); - llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer()}; + // TODO: Optimize runtime initialization and pass in correct value. + llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer(), + /*RequiresOMPRuntime=*/Bld.getInt16(1)}; llvm::Value *Ret = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); @@ -637,18 +640,21 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function, void ***args, kmp_int32 nArgs); + /// void *outlined_function, void ***args, kmp_int32 nArgs, int16_t + /// IsOMPRuntimeInitialized); llvm::Type *TypeParams[] = {CGM.Int8PtrTy, - CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty}; + CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty, + CGM.Int16Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); break; } case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function, void ***args); + /// Build bool __kmpc_kernel_parallel(void **outlined_function, void + /// ***args, int16_t IsOMPRuntimeInitialized); llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, - CGM.Int8PtrPtrTy->getPointerTo(0)}; + CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int16Ty}; llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); @@ -949,8 +955,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "shared_args"); llvm::Value *SharedArgsPtr = SharedArgs.getPointer(); + // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {ID, SharedArgsPtr, - Bld.getInt32(CapturedVars.size())}; + Bld.getInt32(CapturedVars.size()), + /*RequiresOMPRuntime=*/Bld.getInt16(1)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), @@ -970,9 +978,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( Idx++; } } else { - llvm::Value *Args[] = {ID, - llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)), - /*nArgs=*/Bld.getInt32(0)}; + // TODO: Optimize runtime initialization and pass in correct value. + llvm::Value *Args[] = { + ID, llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)), + /*nArgs=*/Bld.getInt32(0), /*RequiresOMPRuntime=*/Bld.getInt16(1)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), Args); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp index f04d28ed0d4a..f9861735832b 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2907,6 +2907,151 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, TaskGen(*this, OutlinedFn, Data); } +static ImplicitParamDecl * +createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, + QualType Ty, CapturedDecl *CD) { + auto *OrigVD = ImplicitParamDecl::Create( + C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other); + auto *OrigRef = + DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, + /*RefersToEnclosingVariableOrCapture=*/false, + SourceLocation(), Ty, VK_LValue); + auto *PrivateVD = ImplicitParamDecl::Create( + C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other); + auto *PrivateRef = DeclRefExpr::Create( + C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, + /*RefersToEnclosingVariableOrCapture=*/false, SourceLocation(), Ty, + VK_LValue); + QualType ElemType = C.getBaseElementType(Ty); + auto *InitVD = + ImplicitParamDecl::Create(C, CD, SourceLocation(), /*Id=*/nullptr, + ElemType, ImplicitParamDecl::Other); + auto *InitRef = + DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, + /*RefersToEnclosingVariableOrCapture=*/false, + SourceLocation(), ElemType, VK_LValue); + PrivateVD->setInitStyle(VarDecl::CInit); + PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, + InitRef, /*BasePath=*/nullptr, + VK_RValue)); + Data.FirstprivateVars.emplace_back(OrigRef); + Data.FirstprivateCopies.emplace_back(PrivateRef); + Data.FirstprivateInits.emplace_back(InitRef); + return OrigVD; +} + +void CodeGenFunction::EmitOMPTargetTaskBasedDirective( + const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, + OMPTargetDataInfo &InputInfo) { + // Emit outlined function for task construct. + auto CS = S.getCapturedStmt(OMPD_task); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); + auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + auto *I = CS->getCapturedDecl()->param_begin(); + auto *PartId = std::next(I); + auto *TaskT = std::next(I, 4); + OMPTaskDataTy Data; + // The task is not final. + Data.Final.setInt(/*IntVal=*/false); + // Get list of firstprivate variables. + for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { + auto IRef = C->varlist_begin(); + auto IElemInitRef = C->inits().begin(); + for (auto *IInit : C->private_copies()) { + Data.FirstprivateVars.push_back(*IRef); + Data.FirstprivateCopies.push_back(IInit); + Data.FirstprivateInits.push_back(*IElemInitRef); + ++IRef; + ++IElemInitRef; + } + } + OMPPrivateScope TargetScope(*this); + VarDecl *BPVD = nullptr; + VarDecl *PVD = nullptr; + VarDecl *SVD = nullptr; + if (InputInfo.NumberOfTargetItems > 0) { + auto *CD = CapturedDecl::Create( + getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); + llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); + QualType BaseAndPointersType = getContext().getConstantArrayType( + getContext().VoidPtrTy, ArrSize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + BPVD = createImplicitFirstprivateForType(getContext(), Data, + BaseAndPointersType, CD); + PVD = createImplicitFirstprivateForType(getContext(), Data, + BaseAndPointersType, CD); + QualType SizesType = getContext().getConstantArrayType( + getContext().getSizeType(), ArrSize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD); + TargetScope.addPrivate( + BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); + TargetScope.addPrivate(PVD, + [&InputInfo]() { return InputInfo.PointersArray; }); + TargetScope.addPrivate(SVD, + [&InputInfo]() { return InputInfo.SizesArray; }); + } + (void)TargetScope.Privatize(); + // Build list of dependences. + for (const auto *C : S.getClausesOfKind<OMPDependClause>()) + for (auto *IRef : C->varlists()) + Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); + auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, + &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { + // Set proper addresses for generated private copies. + OMPPrivateScope Scope(CGF); + if (!Data.FirstprivateVars.empty()) { + enum { PrivatesParam = 2, CopyFnParam = 3 }; + auto *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); + auto *PrivatesPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); + // Map privates. + llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; + llvm::SmallVector<llvm::Value *, 16> CallArgs; + CallArgs.push_back(PrivatesPtr); + for (auto *E : Data.FirstprivateVars) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), + ".firstpriv.ptr.addr"); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + CopyFn, CallArgs); + for (auto &&Pair : PrivatePtrs) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + } + } + // Privatize all private variables except for in_reduction items. + (void)Scope.Privatize(); + InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); + InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); + InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); + + Action.Enter(CGF); + OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true, + /*EmitPreInitStmt=*/false); + BodyGen(CGF); + }; + auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, + Data.NumberOfParts); + llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); + IntegerLiteral IfCond(getContext(), TrueOrFalse, + getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); + + CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn, + SharedsTy, CapturedStruct, &IfCond, Data); +} + void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Emit outlined function for task construct. auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); @@ -4252,14 +4397,8 @@ void CodeGenFunction::EmitOMPTargetEnterDataDirective( if (auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); - auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, - PrePostActionTy &) { - CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, - Device); - }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_enter_data, - CodeGen); + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } void CodeGenFunction::EmitOMPTargetExitDataDirective( @@ -4279,14 +4418,8 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective( if (auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); - auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, - PrePostActionTy &) { - CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, - Device); - }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_exit_data, - CodeGen); + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } static void emitTargetParallelRegion(CodeGenFunction &CGF, @@ -4585,12 +4718,6 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective( if (auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); - auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, - PrePostActionTy &) { - CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, - Device); - }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_update, - CodeGen); + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h index cd62d00dfb53..dd4c2e43ef64 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h @@ -2371,7 +2371,10 @@ public: /// object within its lifetime. TCK_UpcastToVirtualBase, /// Checking the value assigned to a _Nonnull pointer. Must not be null. - TCK_NonnullAssign + TCK_NonnullAssign, + /// Checking the operand of a dynamic_cast or a typeid expression. Must be + /// null or an object within its lifetime. + TCK_DynamicOperation }; /// Determine whether the pointer type check \p TCK permits null pointers. @@ -2820,6 +2823,20 @@ public: void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, OMPTaskDataTy &Data); + struct OMPTargetDataInfo { + Address BasePointersArray = Address::invalid(); + Address PointersArray = Address::invalid(); + Address SizesArray = Address::invalid(); + unsigned NumberOfTargetItems = 0; + explicit OMPTargetDataInfo() = default; + OMPTargetDataInfo(Address BasePointersArray, Address PointersArray, + Address SizesArray, unsigned NumberOfTargetItems) + : BasePointersArray(BasePointersArray), PointersArray(PointersArray), + SizesArray(SizesArray), NumberOfTargetItems(NumberOfTargetItems) {} + }; + void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, + const RegionCodeGenTy &BodyGen, + OMPTargetDataInfo &InputInfo); void EmitOMPParallelDirective(const OMPParallelDirective &S); void EmitOMPSimdDirective(const OMPSimdDirective &S); diff --git a/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp b/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp index 6ba8892f3501..3c985a1f71d7 100644 --- a/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp @@ -794,7 +794,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, if (MsanTrackOrigins) CmdArgs.push_back(Args.MakeArgString("-fsanitize-memory-track-origins=" + - llvm::utostr(MsanTrackOrigins))); + Twine(MsanTrackOrigins))); if (MsanUseAfterDtor) CmdArgs.push_back("-fsanitize-memory-use-after-dtor"); @@ -829,7 +829,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, if (AsanFieldPadding) CmdArgs.push_back(Args.MakeArgString("-fsanitize-address-field-padding=" + - llvm::utostr(AsanFieldPadding))); + Twine(AsanFieldPadding))); if (AsanUseAfterScope) CmdArgs.push_back("-fsanitize-address-use-after-scope"); diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp index 7b3f4bc9d872..8b895c4514c4 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp @@ -1738,10 +1738,9 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args, CmdArgs.push_back("-Wreturn-type"); if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { - std::string N = llvm::utostr(G.getValue()); - std::string Opt = std::string("-hexagon-small-data-threshold=") + N; CmdArgs.push_back("-mllvm"); - CmdArgs.push_back(Args.MakeArgString(Opt)); + CmdArgs.push_back(Args.MakeArgString("-hexagon-small-data-threshold=" + + Twine(G.getValue()))); } if (!Args.hasArg(options::OPT_fno_short_enums)) diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/CommonArgs.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/CommonArgs.cpp index 60f96d03c9c8..f26880123d8c 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -419,8 +419,8 @@ void tools::AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args, CmdArgs.push_back("-plugin-opt=thinlto"); if (unsigned Parallelism = getLTOParallelism(Args, D)) - CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=jobs=") + - llvm::to_string(Parallelism))); + CmdArgs.push_back( + Args.MakeArgString("-plugin-opt=jobs=" + Twine(Parallelism))); // If an explicit debugger tuning argument appeared, pass it along. if (Arg *A = Args.getLastArg(options::OPT_gTune_Group, diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp index 289f4ed92f6c..2250e82d9dbf 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp @@ -545,8 +545,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (unsigned Parallelism = getLTOParallelism(Args, getToolChain().getDriver())) { CmdArgs.push_back("-mllvm"); - CmdArgs.push_back( - Args.MakeArgString(Twine("-threads=") + llvm::to_string(Parallelism))); + CmdArgs.push_back(Args.MakeArgString("-threads=" + Twine(Parallelism))); } if (getToolChain().ShouldLinkCXXStdlib(Args)) diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Hexagon.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Hexagon.cpp index f21af5b4dcf5..2debf0e2de54 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -138,16 +138,15 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const Driver &D = HTC.getDriver(); ArgStringList CmdArgs; - std::string MArchString = "-march=hexagon"; - CmdArgs.push_back(Args.MakeArgString(MArchString)); + CmdArgs.push_back("-march=hexagon"); RenderExtraToolArgs(JA, CmdArgs); - std::string AsName = "hexagon-llvm-mc"; - std::string MCpuString = "-mcpu=hexagon" + - toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); + const char *AsName = "hexagon-llvm-mc"; CmdArgs.push_back("-filetype=obj"); - CmdArgs.push_back(Args.MakeArgString(MCpuString)); + CmdArgs.push_back(Args.MakeArgString( + "-mcpu=hexagon" + + toolchains::HexagonToolChain::GetTargetCPUVersion(Args))); if (Output.isFilename()) { CmdArgs.push_back("-o"); @@ -158,8 +157,7 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, } if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { - std::string N = llvm::utostr(G.getValue()); - CmdArgs.push_back(Args.MakeArgString(std::string("-gpsize=") + N)); + CmdArgs.push_back(Args.MakeArgString("-gpsize=" + Twine(G.getValue()))); } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); @@ -192,7 +190,7 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, II.getInputArg().render(Args, CmdArgs); } - auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName.c_str())); + auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName)); C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs)); } @@ -243,10 +241,8 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, CmdArgs.push_back(Opt.c_str()); CmdArgs.push_back("-march=hexagon"); - std::string CpuVer = - toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); - std::string MCpuString = "-mcpu=hexagon" + CpuVer; - CmdArgs.push_back(Args.MakeArgString(MCpuString)); + StringRef CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args); + CmdArgs.push_back(Args.MakeArgString("-mcpu=hexagon" + CpuVer)); if (IsShared) { CmdArgs.push_back("-shared"); @@ -261,8 +257,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, CmdArgs.push_back("-pie"); if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { - std::string N = llvm::utostr(G.getValue()); - CmdArgs.push_back(Args.MakeArgString(std::string("-G") + N)); + CmdArgs.push_back(Args.MakeArgString("-G" + Twine(G.getValue()))); UseG0 = G.getValue() == 0; } @@ -291,7 +286,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, //---------------------------------------------------------------------------- // Start Files //---------------------------------------------------------------------------- - const std::string MCpuSuffix = "/" + CpuVer; + const std::string MCpuSuffix = "/" + CpuVer.str(); const std::string MCpuG0Suffix = MCpuSuffix + "/G0"; const std::string RootDir = HTC.getHexagonTargetDir(D.InstalledDir, D.PrefixDirs) + "/"; @@ -351,7 +346,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, CmdArgs.push_back("--start-group"); if (!IsShared) { - for (const std::string &Lib : OsLibs) + for (StringRef Lib : OsLibs) CmdArgs.push_back(Args.MakeArgString("-l" + Lib)); CmdArgs.push_back("-lc"); } diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512bitalgintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512bitalgintrin.h new file mode 100644 index 000000000000..2dd1471d2f7e --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/avx512bitalgintrin.h @@ -0,0 +1,97 @@ +/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512BITALGINTRIN_H +#define __AVX512BITALGINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_popcnt_epi16(__m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U, + (__v32hi) _mm512_popcnt_epi16(__B), + (__v32hi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) +{ + return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(), + __U, + __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_popcnt_epi8(__m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U, + (__v64qi) _mm512_popcnt_epi8(__B), + (__v64qi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) +{ + return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(), + __U, + __B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A, + (__v64qi) __B, + __U); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) +{ + return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1, + __A, + __B); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vbmi2intrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vbmi2intrin.h new file mode 100644 index 000000000000..43e97b40a098 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vbmi2intrin.h @@ -0,0 +1,391 @@ +/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VBMI2INTRIN_H +#define __AVX512VBMI2INTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"))) + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, + (__v32hi) __S, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, + (__v32hi) _mm512_setzero_hi(), + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, + (__v64qi) __S, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, + (__v64qi) _mm512_setzero_qi(), + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) +{ + __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) +{ + __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, + (__v32hi) __S, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, + (__v32hi) _mm512_setzero_hi(), + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, + (__v64qi) __S, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, + (__v64qi) _mm512_setzero_qi(), + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, + (__v32hi) __S, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, + (__v32hi) _mm512_setzero_hi(), + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, + (__v64qi) __S, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, + (__v64qi) _mm512_setzero_qi(), + __U); +} + +#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \ + (__v8di)(B), \ + (int)(I), \ + (__v8di)(S), \ + (__mmask8)(U)); }) + +#define _mm512_maskz_shldi_epi64(U, A, B, I) \ + _mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shldi_epi64(A, B, I) \ + _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \ + (__v16si)(B), \ + (int)(I), \ + (__v16si)(S), \ + (__mmask16)(U)); }) + +#define _mm512_maskz_shldi_epi32(U, A, B, I) \ + _mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shldi_epi32(A, B, I) \ + _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) + +#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \ + (__v32hi)(B), \ + (int)(I), \ + (__v32hi)(S), \ + (__mmask32)(U)); }) + +#define _mm512_maskz_shldi_epi16(U, A, B, I) \ + _mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shldi_epi16(A, B, I) \ + _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) + +#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \ + (__v8di)(B), \ + (int)(I), \ + (__v8di)(S), \ + (__mmask8)(U)); }) + +#define _mm512_maskz_shrdi_epi64(U, A, B, I) \ + _mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shrdi_epi64(A, B, I) \ + _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \ + (__v16si)(B), \ + (int)(I), \ + (__v16si)(S), \ + (__mmask16)(U)); }) + +#define _mm512_maskz_shrdi_epi32(U, A, B, I) \ + _mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shrdi_epi32(A, B, I) \ + _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) + +#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \ + (__v32hi)(B), \ + (int)(I), \ + (__v32hi)(S), \ + (__mmask32)(U)); }) + +#define _mm512_maskz_shrdi_epi16(U, A, B, I) \ + _mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shrdi_epi16(A, B, I) \ + _mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, + (__v8di) __A, + (__v8di) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S, + (__v8di) __A, + (__v8di) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, + (__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, + (__v32hi) __A, + (__v32hi) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S, + (__v32hi) __A, + (__v32hi) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, + (__v32hi) __A, + (__v32hi) __B, + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, + (__v8di) __A, + (__v8di) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S, + (__v8di) __A, + (__v8di) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, + (__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, + (__v32hi) __A, + (__v32hi) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S, + (__v32hi) __A, + (__v32hi) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, + (__v32hi) __A, + (__v32hi) __B, + (__mmask32) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif + diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlbitalgintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlbitalgintrin.h new file mode 100644 index 000000000000..76eb87721b8b --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlbitalgintrin.h @@ -0,0 +1,157 @@ +/*===------------- avx512vlbitalgintrin.h - BITALG intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512vlbitalgintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VLBITALGINTRIN_H +#define __AVX512VLBITALGINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"))) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_popcnt_epi16(__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U, + (__v16hi) _mm256_popcnt_epi16(__B), + (__v16hi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) +{ + return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(), + __U, + __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_popcnt_epi16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, + (__v8hi) _mm128_popcnt_epi16(__B), + (__v8hi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) +{ + return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), + __U, + __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_popcnt_epi8(__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U, + (__v32qi) _mm256_popcnt_epi8(__B), + (__v32qi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) +{ + return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(), + __U, + __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_popcnt_epi8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, + (__v16qi) _mm128_popcnt_epi8(__B), + (__v16qi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) +{ + return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), + __U, + __B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A, + (__v32qi) __B, + __U); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) +{ + return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1, + __A, + __B); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A, + (__v16qi) __B, + __U); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) +{ + return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1, + __A, + __B); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlvbmi2intrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlvbmi2intrin.h new file mode 100644 index 000000000000..d1ec4976f274 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlvbmi2intrin.h @@ -0,0 +1,748 @@ +/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VLVBMI2INTRIN_H +#define __AVX512VLVBMI2INTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"))) + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm128_setzero_hi(void) { + return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, + (__v8hi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_compress_epi16(__mmask8 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, + (__v8hi) _mm128_setzero_hi(), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, + (__v16qi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_compress_epi8(__mmask16 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, + (__v16qi) _mm128_setzero_hi(), + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm128_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) +{ + __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm128_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) +{ + __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, + (__v8hi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_expand_epi16(__mmask8 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, + (__v8hi) _mm128_setzero_hi(), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, + (__v16qi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_expand_epi8(__mmask16 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, + (__v16qi) _mm128_setzero_hi(), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, + (__v8hi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, + (__v8hi) _mm128_setzero_hi(), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, + (__v16qi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, + (__v16qi) _mm128_setzero_hi(), + __U); +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_setzero_hi(void) { + return (__m256i)(__v16hi){ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, + (__v16hi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, + (__v16hi) _mm256_setzero_hi(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, + (__v32qi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, + (__v32qi) _mm256_setzero_hi(), + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) +{ + __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) +{ + __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, + (__v16hi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, + (__v16hi) _mm256_setzero_hi(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, + (__v32qi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, + (__v32qi) _mm256_setzero_hi(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, + (__v16hi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, + (__v16hi) _mm256_setzero_hi(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, + (__v32qi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, + (__v32qi) _mm256_setzero_hi(), + __U); +} + +#define _mm256_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \ + (__v4di)(B), \ + (int)(I), \ + (__v4di)(S), \ + (__mmask8)(U)); }) + +#define _mm256_maskz_shldi_epi64(U, A, B, I) \ + _mm256_mask_shldi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shldi_epi64(A, B, I) \ + _mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \ + (__v2di)(B), \ + (int)(I), \ + (__v2di)(S), \ + (__mmask8)(U)); }) + +#define _mm128_maskz_shldi_epi64(U, A, B, I) \ + _mm128_mask_shldi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shldi_epi64(A, B, I) \ + _mm128_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \ + (__v8si)(B), \ + (int)(I), \ + (__v8si)(S), \ + (__mmask8)(U)); }) + +#define _mm256_maskz_shldi_epi32(U, A, B, I) \ + _mm256_mask_shldi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shldi_epi32(A, B, I) \ + _mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \ + (__v4si)(B), \ + (int)(I), \ + (__v4si)(S), \ + (__mmask8)(U)); }) + +#define _mm128_maskz_shldi_epi32(U, A, B, I) \ + _mm128_mask_shldi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shldi_epi32(A, B, I) \ + _mm128_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \ + (__v16hi)(B), \ + (int)(I), \ + (__v16hi)(S), \ + (__mmask16)(U)); }) + +#define _mm256_maskz_shldi_epi16(U, A, B, I) \ + _mm256_mask_shldi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shldi_epi16(A, B, I) \ + _mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \ + (__v8hi)(B), \ + (int)(I), \ + (__v8hi)(S), \ + (__mmask8)(U)); }) + +#define _mm128_maskz_shldi_epi16(U, A, B, I) \ + _mm128_mask_shldi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shldi_epi16(A, B, I) \ + _mm128_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \ + (__v4di)(B), \ + (int)(I), \ + (__v4di)(S), \ + (__mmask8)(U)); }) + +#define _mm256_maskz_shrdi_epi64(U, A, B, I) \ + _mm256_mask_shrdi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shrdi_epi64(A, B, I) \ + _mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \ + (__v2di)(B), \ + (int)(I), \ + (__v2di)(S), \ + (__mmask8)(U)); }) + +#define _mm128_maskz_shrdi_epi64(U, A, B, I) \ + _mm128_mask_shrdi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shrdi_epi64(A, B, I) \ + _mm128_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \ + (__v8si)(B), \ + (int)(I), \ + (__v8si)(S), \ + (__mmask8)(U)); }) + +#define _mm256_maskz_shrdi_epi32(U, A, B, I) \ + _mm256_mask_shrdi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shrdi_epi32(A, B, I) \ + _mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \ + (__v4si)(B), \ + (int)(I), \ + (__v4si)(S), \ + (__mmask8)(U)); }) + +#define _mm128_maskz_shrdi_epi32(U, A, B, I) \ + _mm128_mask_shrdi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shrdi_epi32(A, B, I) \ + _mm128_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \ + (__v16hi)(B), \ + (int)(I), \ + (__v16hi)(S), \ + (__mmask16)(U)); }) + +#define _mm256_maskz_shrdi_epi16(U, A, B, I) \ + _mm256_mask_shrdi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shrdi_epi16(A, B, I) \ + _mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \ + (__v8hi)(B), \ + (int)(I), \ + (__v8hi)(S), \ + (__mmask8)(U)); }) + +#define _mm128_maskz_shrdi_epi16(U, A, B, I) \ + _mm128_mask_shrdi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shrdi_epi16(A, B, I) \ + _mm128_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + (__mmask16) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + (__mmask16) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + (__mmask8) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlvnniintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlvnniintrin.h new file mode 100644 index 000000000000..745ae8b7ad3d --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlvnniintrin.h @@ -0,0 +1,254 @@ +/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VLVNNIINTRIN_H +#define __AVX512VLVNNIINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"))) + + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vnniintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vnniintrin.h new file mode 100644 index 000000000000..0c6badd231aa --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vnniintrin.h @@ -0,0 +1,146 @@ +/*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VNNIINTRIN_H +#define __AVX512VNNIINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"))) + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/contrib/llvm/tools/clang/lib/Headers/gfniintrin.h b/contrib/llvm/tools/clang/lib/Headers/gfniintrin.h new file mode 100644 index 000000000000..20fadccfaaed --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/gfniintrin.h @@ -0,0 +1,202 @@ +/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __GFNIINTRIN_H +#define __GFNIINTRIN_H + + +#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), \ + (char)(I)); }) + +#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ + (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v16qi)(__m128i)(S)); }) + + +#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \ + U, A, B, I); }) + + +#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), \ + (char)(I)); }) + +#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ + (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v32qi)(__m256i)(S)); }) + +#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ + U, A, B, I); }) + + +#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), \ + (char)(I)); }) + +#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v64qi)(__m512i)(S)); }) + +#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_qi(), \ + U, A, B, I); }) + +#define _mm_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), \ + (char)(I)); }) + +#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ + (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \ + (__v16qi)(__m128i)(S)); }) + + +#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \ + U, A, B, I); }) + + +#define _mm256_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), \ + (char)(I)); }) + +#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ + (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \ + (__v32qi)(__m256i)(S)); }) + +#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \ + U, A, B, I); }) + + +#define _mm512_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), \ + (char)(I)); }) + +#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \ + (__v64qi)(__m512i)(S)); }) + +#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_qi(), \ + U, A, B, I); }) + +/* Default attributes for simple form (no masking). */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"))) + +/* Default attributes for ZMM forms. */ +#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"))) + +/* Default attributes for VLX forms. */ +#define __DEFAULT_FN_ATTRS_VL __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"))) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_gf2p8mul_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A, + (__v16qi) __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_selectb_128(__U, + (__v16qi) _mm_gf2p8mul_epi8(__A, __B), + (__v16qi) __S); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B) +{ + return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(), + __U, __A, __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A, + (__v32qi) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_selectb_256(__U, + (__v32qi) _mm256_gf2p8mul_epi8(__A, __B), + (__v32qi) __S); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B) +{ + return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(), + __U, __A, __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F +_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A, + (__v64qi) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F +_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_selectb_512(__U, + (__v64qi) _mm512_gf2p8mul_epi8(__A, __B), + (__v64qi) __S); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F +_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B) +{ + return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_qi(), + __U, __A, __B); +} + +#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_F +#undef __DEFAULT_FN_ATTRS_VL + +#endif // __GFNIINTRIN_H + diff --git a/contrib/llvm/tools/clang/lib/Headers/immintrin.h b/contrib/llvm/tools/clang/lib/Headers/immintrin.h index 64ad6e658422..d3421dc86c99 100644 --- a/contrib/llvm/tools/clang/lib/Headers/immintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/immintrin.h @@ -118,6 +118,10 @@ _mm256_cvtph_ps(__m128i __a) } #endif /* __AVX2__ */ +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) +#include <vpclmulqdqintrin.h> +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) #include <bmiintrin.h> #endif @@ -146,6 +150,10 @@ _mm256_cvtph_ps(__m128i __a) #include <avx512bwintrin.h> #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__) +#include <avx512bitalgintrin.h> +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) #include <avx512cdintrin.h> #endif @@ -159,11 +167,25 @@ _mm256_cvtph_ps(__m128i __a) #include <avx512vpopcntdqvlintrin.h> #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__) +#include <avx512vnniintrin.h> +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512VNNI__)) +#include <avx512vlvnniintrin.h> +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) #include <avx512dqintrin.h> #endif #if !defined(_MSC_VER) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512BITALG__)) +#include <avx512vlbitalgintrin.h> +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BW__)) #include <avx512vlbwintrin.h> #endif @@ -200,6 +222,15 @@ _mm256_cvtph_ps(__m128i __a) #include <avx512vbmivlintrin.h> #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__) +#include <avx512vbmi2intrin.h> +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ + (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) +#include <avx512vlvbmi2intrin.h> +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) #include <avx512pfintrin.h> #endif @@ -208,6 +239,14 @@ _mm256_cvtph_ps(__m128i __a) #include <pkuintrin.h> #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__) +#include <vaesintrin.h> +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__) +#include <gfniintrin.h> +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand16_step(unsigned short *__p) diff --git a/contrib/llvm/tools/clang/lib/Headers/vaesintrin.h b/contrib/llvm/tools/clang/lib/Headers/vaesintrin.h new file mode 100644 index 000000000000..efbb8a565292 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/vaesintrin.h @@ -0,0 +1,98 @@ +/*===------------------ vaesintrin.h - VAES intrinsics ---------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <vaesintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __VAESINTRIN_H +#define __VAESINTRIN_H + +/* Default attributes for YMM forms. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"))) + +/* Default attributes for ZMM forms. */ +#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"))) + + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesenc_epi128(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_aesenc256((__v4di) __A, + (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesenc_epi128(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_aesenc512((__v8di) __A, + (__v8di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesdec_epi128(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_aesdec256((__v4di) __A, + (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesdec_epi128(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_aesdec512((__v8di) __A, + (__v8di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesenclast_epi128(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A, + (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesenclast_epi128(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A, + (__v8di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesdeclast_epi128(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A, + (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesdeclast_epi128(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A, + (__v8di) __B); +} + + +#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_F + +#endif diff --git a/contrib/llvm/tools/clang/lib/Headers/vpclmulqdqintrin.h b/contrib/llvm/tools/clang/lib/Headers/vpclmulqdqintrin.h new file mode 100644 index 000000000000..21cda2221007 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/vpclmulqdqintrin.h @@ -0,0 +1,42 @@ +/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __VPCLMULQDQINTRIN_H +#define __VPCLMULQDQINTRIN_H + +#define _mm256_clmulepi64_epi128(A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), \ + (char)(I)); }) + +#define _mm512_clmulepi64_epi128(A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), \ + (char)(I)); }) + +#endif // __VPCLMULQDQINTRIN_H + diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp index 9fe4309ca124..2a999399fb50 100644 --- a/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp @@ -1548,15 +1548,21 @@ void Parser::DiagnoseMisplacedCXX11Attribute(ParsedAttributesWithRange &Attrs, SourceLocation Loc = Tok.getLocation(); ParseCXX11Attributes(Attrs); CharSourceRange AttrRange(SourceRange(Loc, Attrs.Range.getEnd()), true); - + // FIXME: use err_attributes_misplaced Diag(Loc, diag::err_attributes_not_allowed) << FixItHint::CreateInsertionFromRange(CorrectLocation, AttrRange) << FixItHint::CreateRemoval(AttrRange); } -void Parser::DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs) { - Diag(attrs.Range.getBegin(), diag::err_attributes_not_allowed) - << attrs.Range; +void Parser::DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs, + const SourceLocation CorrectLocation) { + if (CorrectLocation.isValid()) { + CharSourceRange AttrRange(attrs.Range, true); + Diag(CorrectLocation, diag::err_attributes_misplaced) + << FixItHint::CreateInsertionFromRange(CorrectLocation, AttrRange) + << FixItHint::CreateRemoval(AttrRange); + } else + Diag(attrs.Range.getBegin(), diag::err_attributes_not_allowed) << attrs.Range; } void Parser::ProhibitCXX11Attributes(ParsedAttributesWithRange &Attrs, diff --git a/contrib/llvm/tools/clang/lib/Parse/Parser.cpp b/contrib/llvm/tools/clang/lib/Parse/Parser.cpp index 72d653797c60..8aa50a2c7f2a 100644 --- a/contrib/llvm/tools/clang/lib/Parse/Parser.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/Parser.cpp @@ -930,7 +930,31 @@ Parser::ParseDeclOrFunctionDefInternal(ParsedAttributesWithRange &attrs, // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };" // declaration-specifiers init-declarator-list[opt] ';' if (Tok.is(tok::semi)) { - ProhibitAttributes(attrs); + auto LengthOfTSTToken = [](DeclSpec::TST TKind) { + assert(DeclSpec::isDeclRep(TKind)); + switch(TKind) { + case DeclSpec::TST_class: + return 5; + case DeclSpec::TST_struct: + return 6; + case DeclSpec::TST_union: + return 5; + case DeclSpec::TST_enum: + return 4; + case DeclSpec::TST_interface: + return 9; + default: + llvm_unreachable("we only expect to get the length of the class/struct/union/enum"); + } + + }; + // Suggest correct location to fix '[[attrib]] struct' to 'struct [[attrib]]' + SourceLocation CorrectLocationForAttributes = + DeclSpec::isDeclRep(DS.getTypeSpecType()) + ? DS.getTypeSpecTypeLoc().getLocWithOffset( + LengthOfTSTToken(DS.getTypeSpecType())) + : SourceLocation(); + ProhibitAttributes(attrs, CorrectLocationForAttributes); ConsumeToken(); RecordDecl *AnonRecord = nullptr; Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none, diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp index f2fb95c39163..aa26b37f444d 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp @@ -12265,11 +12265,10 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion( // Construct the body of the conversion function { return __invoke; }. Expr *FunctionRef = BuildDeclRefExpr(Invoker, Invoker->getType(), VK_LValue, Conv->getLocation()).get(); - assert(FunctionRef && "Can't refer to __invoke function?"); - Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get(); - Conv->setBody(new (Context) CompoundStmt(Context, Return, - Conv->getLocation(), - Conv->getLocation())); + assert(FunctionRef && "Can't refer to __invoke function?"); + Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get(); + Conv->setBody(CompoundStmt::Create(Context, Return, Conv->getLocation(), + Conv->getLocation())); Conv->markUsed(Context); Conv->setReferenced(); @@ -12330,9 +12329,8 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion( // Set the body of the conversion function. Stmt *ReturnS = Return.get(); - Conv->setBody(new (Context) CompoundStmt(Context, ReturnS, - Conv->getLocation(), - Conv->getLocation())); + Conv->setBody(CompoundStmt::Create(Context, ReturnS, Conv->getLocation(), + Conv->getLocation())); Conv->markUsed(Context); // We're done; notify the mutation listener, if any. diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp index 9c842ded1e10..cff9fbbf491b 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp @@ -6265,9 +6265,8 @@ Stmt *Sema::MaybeCreateStmtWithCleanups(Stmt *SubStmt) { // a StmtExpr; currently this is only used for asm statements. // This is hacky, either create a new CXXStmtWithTemporaries statement or // a new AsmStmtWithTemporaries. - CompoundStmt *CompStmt = new (Context) CompoundStmt(Context, SubStmt, - SourceLocation(), - SourceLocation()); + CompoundStmt *CompStmt = CompoundStmt::Create( + Context, SubStmt, SourceLocation(), SourceLocation()); Expr *E = new (Context) StmtExpr(CompStmt, Context.VoidTy, SourceLocation(), SourceLocation()); return MaybeCreateExprWithCleanups(E); diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp index d3f91a4e273d..a9db973851df 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp @@ -1589,7 +1589,7 @@ bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) { return false; // Find the extra places where we need to look. - llvm::DenseSet<Module*> &LookupModules = SemaRef.getLookupModules(); + const auto &LookupModules = SemaRef.getLookupModules(); if (LookupModules.empty()) return false; @@ -1604,7 +1604,8 @@ bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) { // Check whether DeclModule is transitively exported to an import of // the lookup set. return std::any_of(LookupModules.begin(), LookupModules.end(), - [&](Module *M) { return M->isModuleVisible(DeclModule); }); + [&](const Module *M) { + return M->isModuleVisible(DeclModule); }); } bool Sema::isVisibleSlow(const NamedDecl *D) { diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp index 0880b2d79060..24b58e8fd12b 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp @@ -1297,7 +1297,8 @@ bool Sema::IsOpenMPCapturedByRef(ValueDecl *D, unsigned Level) { Level, /*NotLastprivate=*/true) && // If the variable is artificial and must be captured by value - try to // capture by value. - !(isa<OMPCapturedExprDecl>(D) && D->hasAttr<OMPCaptureKindAttr>()); + !(isa<OMPCapturedExprDecl>(D) && !D->hasAttr<OMPCaptureNoInitAttr>() && + !cast<OMPCapturedExprDecl>(D)->getInit()->isGLValue()); } // When passing data by copy, we need to make sure it fits the uintptr size @@ -2326,7 +2327,6 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id, ASTContext &C = S.getASTContext(); Expr *Init = AsExpression ? CaptureExpr : CaptureExpr->IgnoreImpCasts(); QualType Ty = Init->getType(); - Attr *OMPCaptureKind = nullptr; if (CaptureExpr->getObjectKind() == OK_Ordinary && CaptureExpr->isGLValue()) { if (S.getLangOpts().CPlusPlus) { Ty = C.getLValueReferenceType(Ty); @@ -2339,16 +2339,11 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id, Init = Res.get(); } WithInit = true; - } else if (AsExpression) { - // This variable must be captured by value. - OMPCaptureKind = OMPCaptureKindAttr::CreateImplicit(C, OMPC_unknown); } auto *CED = OMPCapturedExprDecl::Create(C, S.CurContext, Id, Ty, CaptureExpr->getLocStart()); if (!WithInit) CED->addAttr(OMPCaptureNoInitAttr::CreateImplicit(C, SourceRange())); - if (OMPCaptureKind) - CED->addAttr(OMPCaptureKind); S.CurContext->addHiddenDecl(CED); S.AddInitializerToDecl(CED, Init, /*DirectInit=*/false); return CED; @@ -7628,6 +7623,11 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_teams_distribute_parallel_for_simd: CaptureRegion = OMPD_teams; break; + case OMPD_target_update: + case OMPD_target_enter_data: + case OMPD_target_exit_data: + CaptureRegion = OMPD_task; + break; case OMPD_cancel: case OMPD_parallel: case OMPD_parallel_sections: @@ -7644,9 +7644,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_target_data: - case OMPD_target_enter_data: - case OMPD_target_exit_data: - case OMPD_target_update: // Do not capture if-clause expressions. break; case OMPD_threadprivate: @@ -8007,15 +8004,17 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( break; case OMPC_device: switch (DKind) { + case OMPD_target_update: + case OMPD_target_enter_data: + case OMPD_target_exit_data: + CaptureRegion = OMPD_task; + break; case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: case OMPD_target_data: - case OMPD_target_enter_data: - case OMPD_target_exit_data: - case OMPD_target_update: case OMPD_target: case OMPD_target_simd: case OMPD_target_parallel: diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp index ff0f4d995851..4474d62949a2 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp @@ -388,7 +388,7 @@ StmtResult Sema::ActOnCompoundStmt(SourceLocation L, SourceLocation R, DiagnoseEmptyLoopBody(Elts[i], Elts[i + 1]); } - return new (Context) CompoundStmt(Context, Elts, L, R); + return CompoundStmt::Create(Context, Elts, L, R); } StmtResult diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 1deb8638756b..d8af8f34530b 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -3932,22 +3932,22 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, TemplateArgs)) return; - if (CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(Function)) { - // If this is a constructor, instantiate the member initializers. - InstantiateMemInitializers(Ctor, cast<CXXConstructorDecl>(PatternDecl), - TemplateArgs); - - // If this is an MS ABI dllexport default constructor, instantiate any - // default arguments. - if (Context.getTargetInfo().getCXXABI().isMicrosoft() && - Ctor->isDefaultConstructor()) { - InstantiateDefaultCtorDefaultArgs(*this, Ctor); - } - } - if (PatternDecl->hasSkippedBody()) { ActOnSkippedFunctionBody(Function); } else { + if (CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(Function)) { + // If this is a constructor, instantiate the member initializers. + InstantiateMemInitializers(Ctor, cast<CXXConstructorDecl>(PatternDecl), + TemplateArgs); + + // If this is an MS ABI dllexport default constructor, instantiate any + // default arguments. + if (Context.getTargetInfo().getCXXABI().isMicrosoft() && + Ctor->isDefaultConstructor()) { + InstantiateDefaultCtorDefaultArgs(*this, Ctor); + } + } + // Instantiate the function body. StmtResult Body = SubstStmt(Pattern, TemplateArgs); diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp index 8ef1491eb2da..6163b811c769 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp @@ -119,7 +119,7 @@ void ASTStmtReader::VisitCompoundStmt(CompoundStmt *S) { unsigned NumStmts = Record.readInt(); while (NumStmts--) Stmts.push_back(Record.readSubStmt()); - S->setStmts(Record.getContext(), Stmts); + S->setStmts(Stmts); S->LBraceLoc = ReadSourceLocation(); S->RBraceLoc = ReadSourceLocation(); } @@ -3081,7 +3081,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case STMT_COMPOUND: - S = new (Context) CompoundStmt(Empty); + S = CompoundStmt::CreateEmpty( + Context, /*NumStmts=*/Record[ASTStmtReader::NumStmtFields]); break; case STMT_CASE: diff --git a/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp b/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp index 6982bfc43db4..8117d2f4a232 100644 --- a/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp +++ b/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp @@ -2106,7 +2106,7 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, OverloadInfo &OI = I.second; OS << "case NEON::BI__builtin_neon_" << I.first << ": "; - OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL"; + OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL"; if (OI.PtrArgNum >= 0) OS << "; PtrArgNum = " << OI.PtrArgNum; if (OI.HasConstPtr) @@ -2320,7 +2320,7 @@ void NeonEmitter::run(raw_ostream &OS) { Type T2 = T; T2.makeScalar(); - OS << utostr(T.getNumElements()) << "))) "; + OS << T.getNumElements() << "))) "; OS << T2.str(); OS << " " << T.str() << ";\n"; } @@ -2350,7 +2350,7 @@ void NeonEmitter::run(raw_ostream &OS) { Type VT(TS, M); OS << "typedef struct " << VT.str() << " {\n"; OS << " " << T.str() << " val"; - OS << "[" << utostr(NumMembers) << "]"; + OS << "[" << NumMembers << "]"; OS << ";\n} "; OS << VT.str() << ";\n"; OS << "\n"; diff --git a/contrib/llvm/tools/lld/COFF/Config.h b/contrib/llvm/tools/lld/COFF/Config.h index 4eb8bae3c622..93bef23a97f0 100644 --- a/contrib/llvm/tools/lld/COFF/Config.h +++ b/contrib/llvm/tools/lld/COFF/Config.h @@ -174,6 +174,7 @@ struct Configuration { bool HighEntropyVA = false; bool AppContainer = false; bool MinGW = false; + bool WarnLocallyDefinedImported = true; }; extern Configuration *Config; diff --git a/contrib/llvm/tools/lld/COFF/Driver.cpp b/contrib/llvm/tools/lld/COFF/Driver.cpp index 0e7db7b6ae34..1aaec355c7a5 100644 --- a/contrib/llvm/tools/lld/COFF/Driver.cpp +++ b/contrib/llvm/tools/lld/COFF/Driver.cpp @@ -227,7 +227,7 @@ static bool isDecorated(StringRef Sym) { void LinkerDriver::parseDirectives(StringRef S) { ArgParser Parser; // .drectve is always tokenized using Windows shell rules. - opt::InputArgList Args = Parser.parse(S); + opt::InputArgList Args = Parser.parseDirectives(S); for (auto *Arg : Args) { switch (Arg->getOption().getUnaliasedOption().getID()) { @@ -245,6 +245,13 @@ void LinkerDriver::parseDirectives(StringRef S) { Config->Entry = addUndefined(mangle(Arg->getValue())); break; case OPT_export: { + // If a common header file contains dllexported function + // declarations, many object files may end up with having the + // same /EXPORT options. In order to save cost of parsing them, + // we dedup them first. + if (!DirectivesExports.insert(Arg->getValue()).second) + break; + Export E = parseExport(Arg->getValue()); if (Config->Machine == I386 && Config->MinGW) { if (!isDecorated(E.Name)) @@ -795,6 +802,13 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { SearchPaths.push_back(Arg->getValue()); addLibSearchPaths(); + // Handle /ignore + for (auto *Arg : Args.filtered(OPT_ignore)) { + if (StringRef(Arg->getValue()) == "4217") + Config->WarnLocallyDefinedImported = false; + // Other warning numbers are ignored. + } + // Handle /out if (auto *Arg = Args.getLastArg(OPT_out)) Config->OutputFile = Arg->getValue(); diff --git a/contrib/llvm/tools/lld/COFF/Driver.h b/contrib/llvm/tools/lld/COFF/Driver.h index 63d41cf69093..3f7fad1038f3 100644 --- a/contrib/llvm/tools/lld/COFF/Driver.h +++ b/contrib/llvm/tools/lld/COFF/Driver.h @@ -16,6 +16,7 @@ #include "lld/Common/Reproduce.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" #include "llvm/Option/Arg.h" @@ -54,6 +55,10 @@ public: // Tokenizes a given string and then parses as command line options. llvm::opt::InputArgList parse(StringRef S) { return parse(tokenize(S)); } + // Tokenizes a given string and then parses as command line options in + // .drectve section. + llvm::opt::InputArgList parseDirectives(StringRef S); + private: // Parses command line options. llvm::opt::InputArgList parse(llvm::ArrayRef<const char *> Args); @@ -123,6 +128,8 @@ private: std::list<std::function<void()>> TaskQueue; std::vector<StringRef> FilePaths; std::vector<MemoryBufferRef> Resources; + + llvm::StringSet<> DirectivesExports; }; // Functions below this line are defined in DriverUtils.cpp. diff --git a/contrib/llvm/tools/lld/COFF/DriverUtils.cpp b/contrib/llvm/tools/lld/COFF/DriverUtils.cpp index 07783b51c519..e0641e04a017 100644 --- a/contrib/llvm/tools/lld/COFF/DriverUtils.cpp +++ b/contrib/llvm/tools/lld/COFF/DriverUtils.cpp @@ -750,6 +750,22 @@ opt::InputArgList ArgParser::parse(ArrayRef<const char *> Argv) { return Args; } +// Tokenizes and parses a given string as command line in .drective section. +opt::InputArgList ArgParser::parseDirectives(StringRef S) { + // Make InputArgList from string vectors. + unsigned MissingIndex; + unsigned MissingCount; + + opt::InputArgList Args = + Table.ParseArgs(tokenize(S), MissingIndex, MissingCount); + + if (MissingCount) + fatal(Twine(Args.getArgString(MissingIndex)) + ": missing argument"); + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) + warn("ignoring unknown argument: " + Arg->getSpelling()); + return Args; +} + // link.exe has an interesting feature. If LINK or _LINK_ environment // variables exist, their contents are handled as command line strings. // So you can pass extra arguments using them. diff --git a/contrib/llvm/tools/lld/COFF/Options.td b/contrib/llvm/tools/lld/COFF/Options.td index 0e7a79730fa2..7d4cdba14f75 100644 --- a/contrib/llvm/tools/lld/COFF/Options.td +++ b/contrib/llvm/tools/lld/COFF/Options.td @@ -29,6 +29,7 @@ def export : P<"export", "Export a function">; // No help text because /failifmismatch is not intended to be used by the user. def failifmismatch : P<"failifmismatch", "">; def heap : P<"heap", "Size of the heap">; +def ignore : P<"ignore", "Specify warning codes to ignore">; def implib : P<"implib", "Import library name">; def libpath : P<"libpath", "Additional library search path">; def linkrepro : P<"linkrepro", "Dump linker invocation and input files for debugging">; @@ -155,7 +156,6 @@ def fastfail : F<"fastfail">; def delay : QF<"delay">; def errorreport : QF<"errorreport">; def idlout : QF<"idlout">; -def ignore : QF<"ignore">; def maxilksize : QF<"maxilksize">; def natvis : QF<"natvis">; def pdbaltpath : QF<"pdbaltpath">; diff --git a/contrib/llvm/tools/lld/COFF/SymbolTable.cpp b/contrib/llvm/tools/lld/COFF/SymbolTable.cpp index 95b48e6d059f..df76679535cb 100644 --- a/contrib/llvm/tools/lld/COFF/SymbolTable.cpp +++ b/contrib/llvm/tools/lld/COFF/SymbolTable.cpp @@ -117,9 +117,10 @@ void SymbolTable::reportRemainingUndefines() { for (Symbol *B : Config->GCRoot) { if (Undefs.count(B)) errorOrWarn("<root>: undefined symbol: " + B->getName()); - if (Symbol *Imp = LocalImports.lookup(B)) - warn("<root>: locally defined symbol imported: " + Imp->getName() + - " (defined in " + toString(Imp->getFile()) + ")"); + if (Config->WarnLocallyDefinedImported) + if (Symbol *Imp = LocalImports.lookup(B)) + warn("<root>: locally defined symbol imported: " + Imp->getName() + + " (defined in " + toString(Imp->getFile()) + ")"); } for (ObjFile *File : ObjFile::Instances) { @@ -128,9 +129,11 @@ void SymbolTable::reportRemainingUndefines() { continue; if (Undefs.count(Sym)) errorOrWarn(toString(File) + ": undefined symbol: " + Sym->getName()); - if (Symbol *Imp = LocalImports.lookup(Sym)) - warn(toString(File) + ": locally defined symbol imported: " + - Imp->getName() + " (defined in " + toString(Imp->getFile()) + ")"); + if (Config->WarnLocallyDefinedImported) + if (Symbol *Imp = LocalImports.lookup(Sym)) + warn(toString(File) + ": locally defined symbol imported: " + + Imp->getName() + " (defined in " + toString(Imp->getFile()) + + ")"); } } } diff --git a/contrib/llvm/tools/lld/ELF/Arch/X86.cpp b/contrib/llvm/tools/lld/ELF/Arch/X86.cpp index fc848917d4e9..10517bef14f3 100644 --- a/contrib/llvm/tools/lld/ELF/Arch/X86.cpp +++ b/contrib/llvm/tools/lld/ELF/Arch/X86.cpp @@ -192,9 +192,9 @@ void X86::writePltHeader(uint8_t *Buf) const { } const uint8_t PltData[] = { - 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushl (GOTPLT+4) - 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *(GOTPLT+8) - 0x90, 0x90, 0x90, 0x90 // nop + 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4) + 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8) + 0x90, 0x90, 0x90, 0x90, // nop }; memcpy(Buf, PltData, sizeof(PltData)); uint32_t GotPlt = InX::GotPlt->getVA(); @@ -206,9 +206,9 @@ void X86::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { const uint8_t Inst[] = { - 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, // jmp *foo_in_GOT|*foo@GOT(%ebx) - 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $reloc_offset - 0xe9, 0x00, 0x00, 0x00, 0x00 // jmp .PLT0@PC + 0xff, 0x00, 0, 0, 0, 0, // jmp *foo_in_GOT or jmp *foo@GOT(%ebx) + 0x68, 0, 0, 0, 0, // pushl $reloc_offset + 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC }; memcpy(Buf, Inst, sizeof(Inst)); @@ -318,7 +318,7 @@ void X86::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { // subl $x@ntpoff,%eax const uint8_t Inst[] = { 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax - 0x81, 0xe8, 0x00, 0x00, 0x00, 0x00 // subl 0(%ebx), %eax + 0x81, 0xe8, 0, 0, 0, 0, // subl Val(%ebx), %eax }; memcpy(Loc - 3, Inst, sizeof(Inst)); write32le(Loc + 5, Val); @@ -333,7 +333,7 @@ void X86::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const { // addl x@gotntpoff(%ebx), %eax const uint8_t Inst[] = { 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax - 0x03, 0x83, 0x00, 0x00, 0x00, 0x00 // addl 0(%ebx), %eax + 0x03, 0x83, 0, 0, 0, 0, // addl Val(%ebx), %eax }; memcpy(Loc - 3, Inst, sizeof(Inst)); write32le(Loc + 5, Val); @@ -394,7 +394,7 @@ void X86::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const { const uint8_t Inst[] = { 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax 0x90, // nop - 0x8d, 0x74, 0x26, 0x00 // leal 0(%esi,1),%esi + 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi }; memcpy(Loc - 2, Inst, sizeof(Inst)); } diff --git a/contrib/llvm/tools/lld/ELF/Arch/X86_64.cpp b/contrib/llvm/tools/lld/ELF/Arch/X86_64.cpp index 14e354b9f4fb..c977d9247d92 100644 --- a/contrib/llvm/tools/lld/ELF/Arch/X86_64.cpp +++ b/contrib/llvm/tools/lld/ELF/Arch/X86_64.cpp @@ -129,9 +129,9 @@ void X86_64<ELFT>::writeGotPlt(uint8_t *Buf, const Symbol &S) const { template <class ELFT> void X86_64<ELFT>::writePltHeader(uint8_t *Buf) const { const uint8_t PltData[] = { - 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushq GOTPLT+8(%rip) - 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *GOTPLT+16(%rip) - 0x0f, 0x1f, 0x40, 0x00 // nop + 0xff, 0x35, 0, 0, 0, 0, // pushq GOTPLT+8(%rip) + 0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip) + 0x0f, 0x1f, 0x40, 0x00, // nop }; memcpy(Buf, PltData, sizeof(PltData)); uint64_t GotPlt = InX::GotPlt->getVA(); @@ -145,9 +145,9 @@ void X86_64<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { const uint8_t Inst[] = { - 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmpq *got(%rip) - 0x68, 0x00, 0x00, 0x00, 0x00, // pushq <relocation index> - 0xe9, 0x00, 0x00, 0x00, 0x00 // jmpq plt[0] + 0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip) + 0x68, 0, 0, 0, 0, // pushq <relocation index> + 0xe9, 0, 0, 0, 0, // jmpq plt[0] }; memcpy(Buf, Inst, sizeof(Inst)); @@ -175,7 +175,7 @@ void X86_64<ELFT>::relaxTlsGdToLe(uint8_t *Loc, RelType Type, // lea x@tpoff,%rax const uint8_t Inst[] = { 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax - 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff,%rax + 0x48, 0x8d, 0x80, 0, 0, 0, 0, // lea x@tpoff,%rax }; memcpy(Loc - 4, Inst, sizeof(Inst)); @@ -198,7 +198,7 @@ void X86_64<ELFT>::relaxTlsGdToIe(uint8_t *Loc, RelType Type, // addq x@tpoff,%rax const uint8_t Inst[] = { 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax - 0x48, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00 // addq x@tpoff,%rax + 0x48, 0x03, 0x05, 0, 0, 0, 0, // addq x@tpoff,%rax }; memcpy(Loc - 4, Inst, sizeof(Inst)); @@ -274,9 +274,9 @@ void X86_64<ELFT>::relaxTlsLdToLe(uint8_t *Loc, RelType Type, } const uint8_t Inst[] = { - 0x66, 0x66, // .word 0x6666 - 0x66, // .byte 0x66 - 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 // mov %fs:0,%rax + 0x66, 0x66, // .word 0x6666 + 0x66, // .byte 0x66 + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax }; memcpy(Loc - 3, Inst, sizeof(Inst)); } diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp index 91873e318f54..8f50a977fd75 100644 --- a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp +++ b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp @@ -693,6 +693,8 @@ void LinkerScript::assignOffsets(OutputSection *Sec) { if (auto *Cmd = dyn_cast<ByteCommand>(Base)) { Cmd->Offset = Dot - Ctx->OutSec->Addr; Dot += Cmd->Size; + if (Ctx->MemRegion) + Ctx->MemRegionOffset[Ctx->MemRegion] += Cmd->Size; Ctx->OutSec->Size = Dot - Ctx->OutSec->Addr; continue; } diff --git a/contrib/llvm/tools/lld/ELF/Relocations.cpp b/contrib/llvm/tools/lld/ELF/Relocations.cpp index 94ea3e1557c4..1aa0957b1d01 100644 --- a/contrib/llvm/tools/lld/ELF/Relocations.cpp +++ b/contrib/llvm/tools/lld/ELF/Relocations.cpp @@ -581,44 +581,38 @@ static RelExpr getPltExpr(Symbol &Sym, RelExpr Expr, bool &IsConstant) { return toPlt(Expr); } +// This modifies the expression if we can use a copy relocation or point the +// symbol to the PLT. template <class ELFT> static RelExpr adjustExpr(Symbol &Sym, RelExpr Expr, RelType Type, InputSectionBase &S, uint64_t RelOff, bool &IsConstant) { - // We can create any dynamic relocation if a section is simply writable. - if (S.Flags & SHF_WRITE) - return Expr; - - // Or, if we are allowed to create dynamic relocations against - // read-only sections (i.e. when "-z notext" is given), - // we can create a dynamic relocation as we want, too. - if (!Config->ZText) { - // We use PLT for relocations that may overflow in runtime, - // see comment for getPltExpr(). - if (Sym.isFunc() && !Target->isPicRel(Type)) - return getPltExpr(Sym, Expr, IsConstant); - return Expr; - } - // If a relocation can be applied at link-time, we don't need to // create a dynamic relocation in the first place. if (IsConstant) return Expr; - // If we got here we know that this relocation would require the dynamic - // linker to write a value to read only memory. - - // If the relocation is to a weak undef, give up on it and produce a - // non preemptible 0. - if (Sym.isUndefWeak()) { + // If the relocation is to a weak undef, and we are producing + // executable, give up on it and produce a non preemptible 0. + if (!Config->Shared && Sym.isUndefWeak()) { Sym.IsPreemptible = false; IsConstant = true; return Expr; } + // We can create any dynamic relocation supported by the dynamic linker if a + // section is writable or we are passed -z notext. + bool CanWrite = (S.Flags & SHF_WRITE) || !Config->ZText; + if (CanWrite && Target->isPicRel(Type)) + return Expr; + + // If we got here we know that this relocation would require the dynamic + // linker to write a value to read only memory or use an unsupported + // relocation. + // We can hack around it if we are producing an executable and // the refered symbol can be preemepted to refer to the executable. - if (Config->Shared || (Config->Pic && !isRelExpr(Expr))) { + if (!CanWrite && (Config->Shared || (Config->Pic && !isRelExpr(Expr)))) { error( "can't create dynamic relocation " + toString(Type) + " against " + (Sym.getName().empty() ? "local symbol" : "symbol: " + toString(Sym)) + @@ -627,6 +621,11 @@ static RelExpr adjustExpr(Symbol &Sym, RelExpr Expr, RelType Type, return Expr; } + // Copy relocations are only possible if we are creating an executable and the + // symbol is shared. + if (!Sym.isShared() || Config->Shared) + return Expr; + if (Sym.getVisibility() != STV_DEFAULT) { error("cannot preempt symbol: " + toString(Sym) + getLocation(S, Sym, RelOff)); diff --git a/contrib/llvm/tools/lld/ELF/ScriptLexer.cpp b/contrib/llvm/tools/lld/ELF/ScriptLexer.cpp index 9f33c16f36b0..ef5a1cff7590 100644 --- a/contrib/llvm/tools/lld/ELF/ScriptLexer.cpp +++ b/contrib/llvm/tools/lld/ELF/ScriptLexer.cpp @@ -115,11 +115,19 @@ void ScriptLexer::tokenize(MemoryBufferRef MB) { continue; } + // ">foo" is parsed to ">" and "foo", but ">>" is parsed to ">>". + if (S.startswith("<<") || S.startswith("<=") || S.startswith(">>") || + S.startswith(">=")) { + Vec.push_back(S.substr(0, 2)); + S = S.substr(2); + continue; + } + // Unquoted token. This is more relaxed than tokens in C-like language, // so that you can write "file-name.cpp" as one bare token, for example. size_t Pos = S.find_first_not_of( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - "0123456789_.$/\\~=+[]*?-!<>^:"); + "0123456789_.$/\\~=+[]*?-!^:"); // A character that cannot start a word (which is usually a // punctuation) forms a single character token. diff --git a/contrib/llvm/tools/lld/ELF/ScriptParser.cpp b/contrib/llvm/tools/lld/ELF/ScriptParser.cpp index c1176ccfa8d5..4263944981f2 100644 --- a/contrib/llvm/tools/lld/ELF/ScriptParser.cpp +++ b/contrib/llvm/tools/lld/ELF/ScriptParser.cpp @@ -151,6 +151,7 @@ static ExprValue add(ExprValue A, ExprValue B) { } static ExprValue sub(ExprValue A, ExprValue B) { + // The distance between two symbols in sections is absolute. if (!A.isAbsolute() && !B.isAbsolute()) return A.getValue() - B.getValue(); return {A.Sec, false, A.getSectionOffset() - B.getValue(), A.Loc}; @@ -707,8 +708,6 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { if (consume(">")) Cmd->MemoryRegionName = next(); - else if (peek().startswith(">")) - Cmd->MemoryRegionName = next().drop_front(); Cmd->Phdrs = readOutputSectionPhdrs(); diff --git a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h index d78358c714ac..eef96be8d1fe 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h +++ b/contrib/llvm/tools/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h @@ -140,6 +140,10 @@ public: size_t FindTypes(const std::vector<lldb_private::CompilerContext> &context, bool append, lldb_private::TypeMap &types) override; + void FindTypesByRegex(const lldb_private::RegularExpression ®ex, + uint32_t max_matches, + lldb_private::TypeMap &types); + lldb_private::TypeList *GetTypeList() override; size_t GetTypes(lldb_private::SymbolContextScope *sc_scope, @@ -172,10 +176,6 @@ private: const llvm::pdb::PDBSymbolCompiland &cu, llvm::DenseMap<uint32_t, uint32_t> &index_map) const; - void FindTypesByRegex(const lldb_private::RegularExpression ®ex, - uint32_t max_matches, - lldb_private::TypeMap &types); - void FindTypesByName(const std::string &name, uint32_t max_matches, lldb_private::TypeMap &types); diff --git a/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp b/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp index 00258f2a1b33..c5ea50bff273 100644 --- a/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp +++ b/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp @@ -353,13 +353,14 @@ std::unique_ptr<CoverageMapping> CodeCoverageTool::load() { auto Coverage = std::move(CoverageOrErr.get()); unsigned Mismatched = Coverage->getMismatchedCount(); if (Mismatched) { - warning(utostr(Mismatched) + " functions have mismatched data"); + warning(Twine(Mismatched) + " functions have mismatched data"); if (ViewOpts.Debug) { for (const auto &HashMismatch : Coverage->getHashMismatches()) errs() << "hash-mismatch: " << "No profile record found for '" << HashMismatch.first << "'" - << " with hash = 0x" << utohexstr(HashMismatch.second) << "\n"; + << " with hash = 0x" << Twine::utohexstr(HashMismatch.second) + << '\n'; for (const auto &CounterMismatch : Coverage->getCounterMismatches()) errs() << "counter-mismatch: " diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp index 79204c6e9533..3a9112423cff 100644 --- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1643,7 +1643,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { outs() << " <" << TargetName; uint64_t Disp = Target - TargetAddress; if (Disp) - outs() << "+0x" << utohexstr(Disp); + outs() << "+0x" << Twine::utohexstr(Disp); outs() << '>'; } } diff --git a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp index 8ac9f1a51cc5..0e76e75c085d 100644 --- a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -81,6 +81,9 @@ public: void printSymbols() override; void printDynamicSymbols() override; void printUnwindInfo() override; + + void printNeededLibraries() override; + void printCOFFImports() override; void printCOFFExports() override; void printCOFFDirectives() override; @@ -764,7 +767,7 @@ void COFFDumper::printRVATable(uint64_t TableVA, uint64_t Count, for (uintptr_t I = TableStart; I < TableEnd; I += EntrySize) { uint32_t RVA = *reinterpret_cast<const ulittle32_t *>(I); raw_ostream &OS = W.startLine(); - OS << "0x" << utohexstr(Obj->getImageBase() + RVA); + OS << "0x" << W.hex(Obj->getImageBase() + RVA); if (PrintExtra) PrintExtra(OS, reinterpret_cast<const uint8_t *>(I)); OS << '\n'; @@ -1522,6 +1525,25 @@ void COFFDumper::printUnwindInfo() { } } +void COFFDumper::printNeededLibraries() { + ListScope D(W, "NeededLibraries"); + + using LibsTy = std::vector<StringRef>; + LibsTy Libs; + + for (const ImportDirectoryEntryRef &DirRef : Obj->import_directories()) { + StringRef Name; + if (!DirRef.getName(Name)) + Libs.push_back(Name); + } + + std::stable_sort(Libs.begin(), Libs.end()); + + for (const auto &L : Libs) { + outs() << " " << L << "\n"; + } +} + void COFFDumper::printImportedSymbols( iterator_range<imported_symbol_iterator> Range) { for (const ImportedSymbolRef &I : Range) { diff --git a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp index f2b936904393..5605eaea7555 100644 --- a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1900,8 +1900,8 @@ template <> void ELFDumper<ELFType<support::little, false>>::printAttributes() { ArrayRef<uint8_t> Contents = unwrapOrError(Obj->getSectionContents(&Sec)); if (Contents[0] != ARMBuildAttrs::Format_Version) { - errs() << "unrecognised FormatVersion: 0x" << utohexstr(Contents[0]) - << '\n'; + errs() << "unrecognised FormatVersion: 0x" + << Twine::utohexstr(Contents[0]) << '\n'; continue; } diff --git a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index 7755cd1be355..64cf23314497 100644 --- a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -1873,7 +1873,7 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) { // Verify that we are passing the right number of operands. if (Frag->getNumArgs() != Children.size()) { TP.error("'" + Op->getName() + "' fragment requires " + - utostr(Frag->getNumArgs()) + " operands!"); + Twine(Frag->getNumArgs()) + " operands!"); return nullptr; } @@ -2195,7 +2195,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { SignBitAndAbove == 1) continue; - TP.error("Integer value '" + itostr(II->getValue()) + + TP.error("Integer value '" + Twine(II->getValue()) + "' is out of range for type '" + getEnumName(VT) + "'!"); break; } @@ -2245,9 +2245,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { MadeChange |= UpdateNodeType(i, Int->IS.RetVTs[i], TP); if (getNumChildren() != NumParamVTs + 1) { - TP.error("Intrinsic '" + Int->Name + "' expects " + - utostr(NumParamVTs) + " operands, not " + - utostr(getNumChildren() - 1) + " operands!"); + TP.error("Intrinsic '" + Int->Name + "' expects " + Twine(NumParamVTs) + + " operands, not " + Twine(getNumChildren() - 1) + " operands!"); return false; } @@ -2271,7 +2270,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { if (NI.getNumOperands() >= 0 && getNumChildren() != (unsigned)NI.getNumOperands()) { TP.error(getOperator()->getName() + " node requires exactly " + - itostr(NI.getNumOperands()) + " operands!"); + Twine(NI.getNumOperands()) + " operands!"); return false; } @@ -2340,7 +2339,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { TreePatternNode *SubIdxChild = getChild(I + 1); if (!isOperandClass(SubIdxChild, "SubRegIndex")) { TP.error("REG_SEQUENCE requires a SubRegIndex for operand " + - itostr(I + 1) + "!"); + Twine(I + 1) + "!"); return false; } } @@ -3514,7 +3513,7 @@ const DAGInstruction &CodeGenDAGPatterns::parseInstructionPattern( CGIOperandList::OperandInfo &Op = CGI.Operands[i]; const std::string &OpName = Op.Name; if (OpName.empty()) - I->error("Operand #" + utostr(i) + " in operands list has no name!"); + I->error("Operand #" + Twine(i) + " in operands list has no name!"); if (!InstInputsCheck.count(OpName)) { // If this is an operand with a DefaultOps set filled in, we can ignore diff --git a/contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp index f879a5bae215..1c1932a0144a 100644 --- a/contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp @@ -283,10 +283,10 @@ void dbgsInsnClass(const std::vector<unsigned> &InsnClass) { if (i > 0) { DEBUG(dbgs() << ", "); } - DEBUG(dbgs() << "0x" << utohexstr(InsnClass[i])); + DEBUG(dbgs() << "0x" << Twine::utohexstr(InsnClass[i])); } DFAInput InsnInput = getDFAInsnInput(InsnClass); - DEBUG(dbgs() << " (input: 0x" << utohexstr(InsnInput) << ")"); + DEBUG(dbgs() << " (input: 0x" << Twine::utohexstr(InsnInput) << ")"); } // @@ -301,7 +301,7 @@ void dbgsStateInfo(const std::set<unsigned> &stateInfo) { if (i > 0) { DEBUG(dbgs() << ", "); } - DEBUG(dbgs() << "0x" << utohexstr(thisState)); + DEBUG(dbgs() << "0x" << Twine::utohexstr(thisState)); } } @@ -361,7 +361,7 @@ void State::AddInsnClass(std::vector<unsigned> &InsnClass, DenseSet<unsigned> VisitedResourceStates; - DEBUG(dbgs() << " thisState: 0x" << utohexstr(thisState) << "\n"); + DEBUG(dbgs() << " thisState: 0x" << Twine::utohexstr(thisState) << "\n"); AddInsnClassStages(InsnClass, ComboBitToBitsMap, numstages - 1, numstages, thisState, thisState, @@ -381,7 +381,7 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass, DEBUG({ dbgsIndent((1 + numstages - chkstage) << 1); dbgs() << "AddInsnClassStages " << chkstage << " (0x" - << utohexstr(thisStage) << ") from "; + << Twine::utohexstr(thisStage) << ") from "; dbgsInsnClass(InsnClass); dbgs() << "\n"; }); @@ -395,9 +395,10 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass, if (resourceMask & thisStage) { unsigned combo = ComboBitToBitsMap[resourceMask]; if (combo && ((~prevState & combo) != combo)) { - DEBUG(dbgs() << "\tSkipped Add 0x" << utohexstr(prevState) - << " - combo op 0x" << utohexstr(resourceMask) - << " (0x" << utohexstr(combo) <<") cannot be scheduled\n"); + DEBUG(dbgs() << "\tSkipped Add 0x" << Twine::utohexstr(prevState) + << " - combo op 0x" << Twine::utohexstr(resourceMask) + << " (0x" << Twine::utohexstr(combo) + << ") cannot be scheduled\n"); continue; } // @@ -407,11 +408,11 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass, unsigned ResultingResourceState = prevState | resourceMask | combo; DEBUG({ dbgsIndent((2 + numstages - chkstage) << 1); - dbgs() << "0x" << utohexstr(prevState) - << " | 0x" << utohexstr(resourceMask); + dbgs() << "0x" << Twine::utohexstr(prevState) << " | 0x" + << Twine::utohexstr(resourceMask); if (combo) - dbgs() << " | 0x" << utohexstr(combo); - dbgs() << " = 0x" << utohexstr(ResultingResourceState) << " "; + dbgs() << " | 0x" << Twine::utohexstr(combo); + dbgs() << " = 0x" << Twine::utohexstr(ResultingResourceState) << " "; }); // @@ -433,7 +434,7 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass, VisitedResourceStates.insert(ResultingResourceState); PossibleStates.insert(ResultingResourceState); DEBUG(dbgs() << "\tResultingResourceState: 0x" - << utohexstr(ResultingResourceState) << "\n"); + << Twine::utohexstr(ResultingResourceState) << "\n"); } else { DEBUG(dbgs() << "\tSkipped Add - state already seen\n"); } @@ -493,9 +494,10 @@ bool State::canMaybeAddInsnClass(std::vector<unsigned> &InsnClass, // These cases are caught later in AddInsnClass. unsigned combo = ComboBitToBitsMap[InsnClass[i]]; if (combo && ((~resources & combo) != combo)) { - DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x" << utohexstr(resources) - << " - combo op 0x" << utohexstr(InsnClass[i]) - << " (0x" << utohexstr(combo) <<") cannot be scheduled\n"); + DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x" + << Twine::utohexstr(resources) << " - combo op 0x" + << Twine::utohexstr(InsnClass[i]) << " (0x" + << Twine::utohexstr(combo) << ") cannot be scheduled\n"); available = false; break; } @@ -573,9 +575,8 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName, for (State::TransitionMap::iterator II = SI->Transitions.begin(), IE = SI->Transitions.end(); II != IE; ++II) { - OS << "{0x" << utohexstr(getDFAInsnInput(II->first)) << ", " - << II->second->stateNum - << "},\t"; + OS << "{0x" << Twine::utohexstr(getDFAInsnInput(II->first)) << ", " + << II->second->stateNum << "},\t"; } ValidTransitions += SI->Transitions.size(); @@ -668,8 +669,8 @@ int DFAPacketizerEmitter::collectAllFuncUnits( "Exceeded maximum number of representable resources"); unsigned FuncResources = (unsigned) (1U << j); FUNameToBitsMap[FUs[j]->getName()] = FuncResources; - DEBUG(dbgs() << " " << FUs[j]->getName() - << ":0x" << utohexstr(FuncResources)); + DEBUG(dbgs() << " " << FUs[j]->getName() << ":0x" + << Twine::utohexstr(FuncResources)); } if (((int) numFUs) > maxFUs) { maxFUs = numFUs; @@ -713,20 +714,20 @@ int DFAPacketizerEmitter::collectAllComboFuncs( const std::string &ComboFuncName = ComboFunc->getName(); unsigned ComboBit = FUNameToBitsMap[ComboFuncName]; unsigned ComboResources = ComboBit; - DEBUG(dbgs() << " combo: " << ComboFuncName - << ":0x" << utohexstr(ComboResources) << "\n"); + DEBUG(dbgs() << " combo: " << ComboFuncName << ":0x" + << Twine::utohexstr(ComboResources) << "\n"); for (unsigned k = 0, M = FuncList.size(); k < M; ++k) { std::string FuncName = FuncList[k]->getName(); unsigned FuncResources = FUNameToBitsMap[FuncName]; - DEBUG(dbgs() << " " << FuncName - << ":0x" << utohexstr(FuncResources) << "\n"); + DEBUG(dbgs() << " " << FuncName << ":0x" + << Twine::utohexstr(FuncResources) << "\n"); ComboResources |= FuncResources; } ComboBitToBitsMap[ComboBit] = ComboResources; numCombos++; DEBUG(dbgs() << " => combo bits: " << ComboFuncName << ":0x" - << utohexstr(ComboBit) << " = 0x" - << utohexstr(ComboResources) << "\n"); + << Twine::utohexstr(ComboBit) << " = 0x" + << Twine::utohexstr(ComboResources) << "\n"); } } return numCombos; @@ -781,7 +782,7 @@ int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName, dbglen += 8; DEBUG(dbgs() << "\t"); } - DEBUG(dbgs() << " (bits: 0x" << utohexstr(UnitBitValue) << ")\n"); + DEBUG(dbgs() << " (bits: 0x" << Twine::utohexstr(UnitBitValue) << ")\n"); } if (!UnitBits.empty()) diff --git a/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp b/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp index 37e024b1665e..ba793ad9b938 100644 --- a/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -448,7 +448,7 @@ void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints, // If the entry fit in the table, just emit it. if (FixedEncodings[i] != ~0U) { - OS << "0x" << utohexstr(FixedEncodings[i]) << ", "; + OS << "0x" << Twine::utohexstr(FixedEncodings[i]) << ", "; continue; } |