diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2016-01-06 20:19:13 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2016-01-06 20:19:13 +0000 |
commit | 4d0b32cd7f47f4836ea0a7a9da24d3576c6e2fe0 (patch) | |
tree | b7c03c042b220d85a294b0e2e89936b631d3e6ad /contrib | |
parent | 9b52dc8469bb3471e8caea1a48274f2ff3085584 (diff) | |
parent | 8a6c1c25bce0267ee4072bd7b786b921e8a66a35 (diff) |
Update llvm to trunk r256945.
Notes
Notes:
svn path=/projects/clang380-import/; revision=293265
Diffstat (limited to 'contrib')
133 files changed, 2995 insertions, 1578 deletions
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h b/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h index 87fb3efaf50e..493a99a4b11e 100644 --- a/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h +++ b/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -59,11 +59,6 @@ bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false); -/// \brief Tests if a value is a call or invoke to a library function that -/// allocates memory and never returns null (such as operator new). -bool isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast = false); - //===----------------------------------------------------------------------===// // malloc Call Utility Functions. // diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h index 978864e96ca5..05c9a9e0b079 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h @@ -97,7 +97,7 @@ private: // of memory operands required to be precise exceeds the maximum value of // NumMemRefs - currently 256 - we remove the operands entirely. Note also // that this is a non-owning reference to a shared copy on write buffer owned - // by the MachineFunction and created via MF.allocateMemRefsArray. + // by the MachineFunction and created via MF.allocateMemRefsArray. mmo_iterator MemRefs; DebugLoc debugLoc; // Source line information. @@ -354,7 +354,7 @@ public: mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; } /// Return true if we don't have any memory operands which described the the /// memory access done by this instruction. If this is true, calling code - /// must be conservative. + /// must be conservative. bool memoperands_empty() const { return NumMemRefs == 0; } iterator_range<mmo_iterator> memoperands() { @@ -774,7 +774,7 @@ public: bool isKill() const { return getOpcode() == TargetOpcode::KILL; } bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; } bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; } - bool isMSInlineAsm() const { + bool isMSInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM && getInlineAsmDialect(); } bool isStackAligningInlineAsm() const; @@ -1180,11 +1180,26 @@ public: /// Assign this MachineInstr's memory reference descriptor list. /// This does not transfer ownership. void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) { - MemRefs = NewMemRefs; - NumMemRefs = uint8_t(NewMemRefsEnd - NewMemRefs); - assert(NumMemRefs == NewMemRefsEnd - NewMemRefs && "Too many memrefs"); + setMemRefs(std::make_pair(NewMemRefs, NewMemRefsEnd-NewMemRefs)); } + /// Assign this MachineInstr's memory reference descriptor list. First + /// element in the pair is the begin iterator/pointer to the array; the + /// second is the number of MemoryOperands. This does not transfer ownership + /// of the underlying memory. + void setMemRefs(std::pair<mmo_iterator, unsigned> NewMemRefs) { + MemRefs = NewMemRefs.first; + NumMemRefs = uint8_t(NewMemRefs.second); + assert(NumMemRefs == NewMemRefs.second && + "Too many memrefs - must drop memory operands"); + } + + /// Return a set of memrefs (begin iterator, size) which conservatively + /// describe the memory behavior of both MachineInstrs. This is appropriate + /// for use when merging two MachineInstrs into one. This routine does not + /// modify the memrefs of the this MachineInstr. + std::pair<mmo_iterator, unsigned> mergeMemRefsWith(const MachineInstr& Other); + /// Clear this MachineInstr's memory reference descriptor list. This resets /// the memrefs to their most conservative state. This should be used only /// as a last resort since it greatly pessimizes our knowledge of the memory diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index aa5f4b24df61..8fe9b280d5d2 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -162,6 +162,11 @@ public: return *this; } + const MachineInstrBuilder &setMemRefs(std::pair<MachineInstr::mmo_iterator, + unsigned> MemOperandsRef) const { + MI->setMemRefs(MemOperandsRef); + return *this; + } const MachineInstrBuilder &addOperand(const MachineOperand &MO) const { MI->addOperand(*MF, MO); diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h index 4fbe206fceb9..4e88606c05a7 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h @@ -178,7 +178,7 @@ public: /// register. bool FullyDefined; - /// Reg or ont of its aliases is read. The register may only be read + /// Reg or one of its aliases is read. The register may only be read /// partially. bool Read; /// Reg or a super-register is read. The full register is read. diff --git a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h index 70d558f5cfbd..f6ad7a8572ab 100644 --- a/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/WinEHFuncInfo.h @@ -83,7 +83,9 @@ enum class ClrHandlerType { Catch, Finally, Fault, Filter }; struct ClrEHUnwindMapEntry { MBBOrBasicBlock Handler; uint32_t TypeToken; - int Parent; + int HandlerParentState; ///< Outer handler enclosing this entry's handler + int TryParentState; ///< Outer try region enclosing this entry's try region, + ///< treating later catches on same try as "outer" ClrHandlerType HandlerType; }; diff --git a/contrib/llvm/include/llvm/IR/CallSite.h b/contrib/llvm/include/llvm/IR/CallSite.h index f4b8a8a5a1c9..f7bfb47a5b44 100644 --- a/contrib/llvm/include/llvm/IR/CallSite.h +++ b/contrib/llvm/include/llvm/IR/CallSite.h @@ -310,6 +310,11 @@ public: CALLSITE_DELEGATE_GETTER(hasFnAttr(A)); } + /// \brief Return true if this function has the given attribute. + bool hasFnAttr(StringRef A) const { + CALLSITE_DELEGATE_GETTER(hasFnAttr(A)); + } + /// \brief Return true if the call or the callee has the given attribute. bool paramHasAttr(unsigned i, Attribute::AttrKind A) const { CALLSITE_DELEGATE_GETTER(paramHasAttr(i, A)); diff --git a/contrib/llvm/include/llvm/IR/IRBuilder.h b/contrib/llvm/include/llvm/IR/IRBuilder.h index 7fe04f2a091a..a30505471aac 100644 --- a/contrib/llvm/include/llvm/IR/IRBuilder.h +++ b/contrib/llvm/include/llvm/IR/IRBuilder.h @@ -61,9 +61,13 @@ protected: MDNode *DefaultFPMathTag; FastMathFlags FMF; + ArrayRef<OperandBundleDef> DefaultOperandBundles; + public: - IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr) - : Context(context), DefaultFPMathTag(FPMathTag), FMF() { + IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr, + ArrayRef<OperandBundleDef> OpBundles = None) + : Context(context), DefaultFPMathTag(FPMathTag), FMF(), + DefaultOperandBundles(OpBundles) { ClearInsertionPoint(); } @@ -538,37 +542,44 @@ class IRBuilder : public IRBuilderBase, public Inserter { public: IRBuilder(LLVMContext &C, const T &F, Inserter I = Inserter(), - MDNode *FPMathTag = nullptr) - : IRBuilderBase(C, FPMathTag), Inserter(std::move(I)), Folder(F) {} - - explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr) - : IRBuilderBase(C, FPMathTag), Folder() { - } - - explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = nullptr) - : IRBuilderBase(TheBB->getContext(), FPMathTag), Folder(F) { + MDNode *FPMathTag = nullptr, + ArrayRef<OperandBundleDef> OpBundles = None) + : IRBuilderBase(C, FPMathTag, OpBundles), Inserter(std::move(I)), + Folder(F) {} + + explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr, + ArrayRef<OperandBundleDef> OpBundles = None) + : IRBuilderBase(C, FPMathTag, OpBundles), Folder() {} + + explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = nullptr, + ArrayRef<OperandBundleDef> OpBundles = None) + : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder(F) { SetInsertPoint(TheBB); } - explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = nullptr) - : IRBuilderBase(TheBB->getContext(), FPMathTag), Folder() { + explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = nullptr, + ArrayRef<OperandBundleDef> OpBundles = None) + : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder() { SetInsertPoint(TheBB); } - explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr) - : IRBuilderBase(IP->getContext(), FPMathTag), Folder() { + explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr, + ArrayRef<OperandBundleDef> OpBundles = None) + : IRBuilderBase(IP->getContext(), FPMathTag, OpBundles), Folder() { SetInsertPoint(IP); } - IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F, - MDNode *FPMathTag = nullptr) - : IRBuilderBase(TheBB->getContext(), FPMathTag), Folder(F) { + IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T &F, + MDNode *FPMathTag = nullptr, + ArrayRef<OperandBundleDef> OpBundles = None) + : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder(F) { SetInsertPoint(TheBB, IP); } IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, - MDNode *FPMathTag = nullptr) - : IRBuilderBase(TheBB->getContext(), FPMathTag), Folder() { + MDNode *FPMathTag = nullptr, + ArrayRef<OperandBundleDef> OpBundles = None) + : IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder() { SetInsertPoint(TheBB, IP); } @@ -1529,8 +1540,11 @@ public: CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None, ArrayRef<OperandBundleDef> OpBundles = None, - const Twine &Name = "") { - return Insert(CallInst::Create(Callee, Args, OpBundles), Name); + const Twine &Name = "", MDNode *FPMathTag = nullptr) { + CallInst *CI = CallInst::Create(Callee, Args, OpBundles); + if (isa<FPMathOperator>(CI)) + CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF)); + return Insert(CI, Name); } CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args, @@ -1543,7 +1557,7 @@ public: CallInst *CreateCall(llvm::FunctionType *FTy, Value *Callee, ArrayRef<Value *> Args, const Twine &Name = "", MDNode *FPMathTag = nullptr) { - CallInst *CI = CallInst::Create(FTy, Callee, Args); + CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles); if (isa<FPMathOperator>(CI)) CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF)); return Insert(CI, Name); diff --git a/contrib/llvm/include/llvm/IR/Instructions.h b/contrib/llvm/include/llvm/IR/Instructions.h index d781c7af36d7..aba48ca6fa9e 100644 --- a/contrib/llvm/include/llvm/IR/Instructions.h +++ b/contrib/llvm/include/llvm/IR/Instructions.h @@ -3550,6 +3550,11 @@ public: return hasFnAttrImpl(A); } + /// \brief Determine whether this call has the given attribute. + bool hasFnAttr(StringRef A) const { + return hasFnAttrImpl(A); + } + /// \brief Determine whether the call or the callee has the given attributes. bool paramHasAttr(unsigned i, Attribute::AttrKind A) const; @@ -3734,7 +3739,19 @@ private: unsigned getNumSuccessorsV() const override; void setSuccessorV(unsigned idx, BasicBlock *B) override; - bool hasFnAttrImpl(Attribute::AttrKind A) const; + template <typename AttrKind> bool hasFnAttrImpl(AttrKind A) const { + if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A)) + return true; + + // Operand bundles override attributes on the called function, but don't + // override attributes directly present on the invoke instruction. + if (isFnAttrDisallowedByOpBundle(A)) + return false; + + if (const Function *F = getCalledFunction()) + return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A); + return false; + } // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. @@ -3966,6 +3983,8 @@ public: /// point to the added handler. void addHandler(BasicBlock *Dest); + void removeHandler(handler_iterator HI); + unsigned getNumSuccessors() const { return getNumOperands() - 1; } BasicBlock *getSuccessor(unsigned Idx) const { assert(Idx < getNumSuccessors() && diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td index 18390f853510..54bcbd8da509 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td @@ -33,6 +33,19 @@ let TargetPrefix = "x86" in { } //===----------------------------------------------------------------------===// +// FLAGS. +let TargetPrefix = "x86" in { + def int_x86_flags_read_u32 : GCCBuiltin<"__builtin_ia32_readeflags_u32">, + Intrinsic<[llvm_i32_ty], [], []>; + def int_x86_flags_read_u64 : GCCBuiltin<"__builtin_ia32_readeflags_u64">, + Intrinsic<[llvm_i64_ty], [], []>; + def int_x86_flags_write_u32 : GCCBuiltin<"__builtin_ia32_writeeflags_u32">, + Intrinsic<[], [llvm_i32_ty], []>; + def int_x86_flags_write_u64 : GCCBuiltin<"__builtin_ia32_writeeflags_u64">, + Intrinsic<[], [llvm_i64_ty], []>; +} + +//===----------------------------------------------------------------------===// // Read Time Stamp Counter. let TargetPrefix = "x86" in { def int_x86_rdtsc : GCCBuiltin<"__builtin_ia32_rdtsc">, @@ -2211,6 +2224,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_w_128 : GCCBuiltin<"__builtin_ia32_psraw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, + llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_w_256 : GCCBuiltin<"__builtin_ia32_psraw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_w_512 : GCCBuiltin<"__builtin_ia32_psraw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, + llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_wi_128 : GCCBuiltin<"__builtin_ia32_psrawi128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, + llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_wi_256 : GCCBuiltin<"__builtin_ia32_psrawi256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_wi_512 : GCCBuiltin<"__builtin_ia32_psrawi512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, + llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_d : GCCBuiltin<"__builtin_ia32_pslld512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; @@ -2229,6 +2261,69 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_psra_q : GCCBuiltin<"__builtin_ia32_psraq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_psra_d_128 : GCCBuiltin<"__builtin_ia32_psrad128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_d_256 : GCCBuiltin<"__builtin_ia32_psrad256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_di_128 : GCCBuiltin<"__builtin_ia32_psradi128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_di_256 : GCCBuiltin<"__builtin_ia32_psradi256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_di_512 : GCCBuiltin<"__builtin_ia32_psradi512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_q_128 : GCCBuiltin<"__builtin_ia32_psraq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_q_256 : GCCBuiltin<"__builtin_ia32_psraq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_qi_128 : GCCBuiltin<"__builtin_ia32_psraqi128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_qi_256 : GCCBuiltin<"__builtin_ia32_psraqi256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psra_qi_512 : GCCBuiltin<"__builtin_ia32_psraqi512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_psrl_d_128: GCCBuiltin<"__builtin_ia32_psrld128_mask">, + Intrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty, + llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty ], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_d_256: GCCBuiltin<"__builtin_ia32_psrld256_mask">, + Intrinsic<[llvm_v8i32_ty], [ llvm_v8i32_ty, + llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty ], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_di_128: GCCBuiltin<"__builtin_ia32_psrldi128_mask">, + Intrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty, + llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty ], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_di_256: GCCBuiltin<"__builtin_ia32_psrldi256_mask">, + Intrinsic<[llvm_v8i32_ty], [ llvm_v8i32_ty, + llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty ], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_di_512: GCCBuiltin<"__builtin_ia32_psrldi512_mask">, + Intrinsic<[llvm_v16i32_ty], [ llvm_v16i32_ty, + llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty ], [IntrNoMem]>; + + def int_x86_avx512_mask_psrl_q_128: GCCBuiltin<"__builtin_ia32_psrlq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_q_256: GCCBuiltin<"__builtin_ia32_psrlq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_qi_128: GCCBuiltin<"__builtin_ia32_psrlqi128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_qi_256: GCCBuiltin<"__builtin_ia32_psrlqi256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_qi_512: GCCBuiltin<"__builtin_ia32_psrlqi512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } // Pack ops. @@ -2696,6 +2791,59 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_psrl_dq_512 : GCCBuiltin<"__builtin_ia32_psrldq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_psll_d_128 : GCCBuiltin<"__builtin_ia32_pslld128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_d_256 : GCCBuiltin<"__builtin_ia32_pslld256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_di_128 : GCCBuiltin<"__builtin_ia32_pslldi128_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_di_256 : GCCBuiltin<"__builtin_ia32_pslldi256_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_di_512 : GCCBuiltin<"__builtin_ia32_pslldi512_mask">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_q_128 : GCCBuiltin<"__builtin_ia32_psllq128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_q_256 : GCCBuiltin<"__builtin_ia32_psllq256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_qi_128 : GCCBuiltin<"__builtin_ia32_psllqi128_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_qi_256 : GCCBuiltin<"__builtin_ia32_psllqi256_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_qi_512 : GCCBuiltin<"__builtin_ia32_psllqi512_mask">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_psrlv16_hi : GCCBuiltin<"__builtin_ia32_psrlv16hi_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrlv2_di : GCCBuiltin<"__builtin_ia32_psrlv2di_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrlv32hi : GCCBuiltin<"__builtin_ia32_psrlv32hi_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, + llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrlv4_di : GCCBuiltin<"__builtin_ia32_psrlv4di_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrlv4_si : GCCBuiltin<"__builtin_ia32_psrlv4si_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrlv8_hi : GCCBuiltin<"__builtin_ia32_psrlv8hi_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, + llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrlv8_si : GCCBuiltin<"__builtin_ia32_psrlv8si_mask">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; } // Gather ops @@ -3919,9 +4067,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Support protection key let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_rdpkru : GCCBuiltin <"__builtin_ia32_rdpkru">, - Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [], []>; def int_x86_wrpkru : GCCBuiltin<"__builtin_ia32_wrpkru">, - Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[], [llvm_i32_ty], []>; } //===----------------------------------------------------------------------===// // Half float conversion diff --git a/contrib/llvm/include/llvm/IR/Metadata.h b/contrib/llvm/include/llvm/IR/Metadata.h index 2ea591383f82..4a8557d074f0 100644 --- a/contrib/llvm/include/llvm/IR/Metadata.h +++ b/contrib/llvm/include/llvm/IR/Metadata.h @@ -283,14 +283,20 @@ private: LLVMContext &Context; uint64_t NextIndex; SmallDenseMap<void *, std::pair<OwnerTy, uint64_t>, 4> UseMap; + /// Flag that can be set to false if this metadata should not be + /// RAUW'ed, e.g. if it is used as the key of a map. + bool CanReplace; public: ReplaceableMetadataImpl(LLVMContext &Context) - : Context(Context), NextIndex(0) {} + : Context(Context), NextIndex(0), CanReplace(true) {} ~ReplaceableMetadataImpl() { assert(UseMap.empty() && "Cannot destroy in-use replaceable metadata"); } + /// Set the CanReplace flag to the given value. + void setCanReplace(bool Replaceable) { CanReplace = Replaceable; } + LLVMContext &getContext() const { return Context; } /// \brief Replace all uses of this with MD. @@ -901,14 +907,19 @@ public: Context.getReplaceableUses()->replaceAllUsesWith(MD); } + /// Set the CanReplace flag to the given value. + void setCanReplace(bool Replaceable) { + Context.getReplaceableUses()->setCanReplace(Replaceable); + } + /// \brief Resolve cycles. /// /// Once all forward declarations have been resolved, force cycles to be - /// resolved. If \p MDMaterialized is true, then any temporary metadata + /// resolved. If \p AllowTemps is true, then any temporary metadata /// is ignored, otherwise it asserts when encountering temporary metadata. /// /// \pre No operands (or operands' operands, etc.) have \a isTemporary(). - void resolveCycles(bool MDMaterialized = true); + void resolveCycles(bool AllowTemps = false); /// \brief Replace a temporary node with a permanent one. /// diff --git a/contrib/llvm/include/llvm/IR/Statepoint.h b/contrib/llvm/include/llvm/IR/Statepoint.h index 7310c5697a7e..51a0951a9798 100644 --- a/contrib/llvm/include/llvm/IR/Statepoint.h +++ b/contrib/llvm/include/llvm/IR/Statepoint.h @@ -22,6 +22,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Compiler.h" @@ -36,14 +37,13 @@ enum class StatepointFlags { MaskAll = GCTransition ///< A bitmask that includes all valid flags. }; -class GCRelocateOperands; +class GCRelocateInst; class ImmutableStatepoint; bool isStatepoint(const ImmutableCallSite &CS); bool isStatepoint(const Value *V); bool isStatepoint(const Value &V); -bool isGCRelocate(const Value *V); bool isGCRelocate(const ImmutableCallSite &CS); bool isGCResult(const Value *V); @@ -247,7 +247,7 @@ public: /// May contain several relocations for the same base/derived pair. /// For example this could happen due to relocations on unwinding /// path of invoke. - std::vector<GCRelocateOperands> getRelocates() const; + std::vector<const GCRelocateInst *> getRelocates() const; /// Get the experimental_gc_result call tied to this statepoint. Can be /// nullptr if there isn't a gc_result tied to this statepoint. Guaranteed to @@ -305,33 +305,27 @@ public: explicit Statepoint(CallSite CS) : Base(CS) {} }; -/// Wraps a call to a gc.relocate and provides access to it's operands. -/// TODO: This should likely be refactored to resememble the wrappers in -/// InstrinsicInst.h. -class GCRelocateOperands { - ImmutableCallSite RelocateCS; - +/// This represents the gc.relocate intrinsic. +class GCRelocateInst : public IntrinsicInst { public: - GCRelocateOperands(const User *U) : RelocateCS(U) { assert(isGCRelocate(U)); } - GCRelocateOperands(const Instruction *inst) : RelocateCS(inst) { - assert(isGCRelocate(inst)); + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::experimental_gc_relocate; + } + static inline bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); } - GCRelocateOperands(CallSite CS) : RelocateCS(CS) { assert(isGCRelocate(CS)); } /// Return true if this relocate is tied to the invoke statepoint. /// This includes relocates which are on the unwinding path. bool isTiedToInvoke() const { - const Value *Token = RelocateCS.getArgument(0); + const Value *Token = getArgOperand(0); return isa<LandingPadInst>(Token) || isa<InvokeInst>(Token); } - /// Get enclosed relocate intrinsic - ImmutableCallSite getUnderlyingCallSite() { return RelocateCS; } - /// The statepoint with which this gc.relocate is associated. - const Instruction *getStatepoint() { - const Value *Token = RelocateCS.getArgument(0); + const Instruction *getStatepoint() const { + const Value *Token = getArgOperand(0); // This takes care both of relocates for call statepoints and relocates // on normal path of invoke statepoint. @@ -354,22 +348,22 @@ public: /// The index into the associate statepoint's argument list /// which contains the base pointer of the pointer whose /// relocation this gc.relocate describes. - unsigned getBasePtrIndex() { - return cast<ConstantInt>(RelocateCS.getArgument(1))->getZExtValue(); + unsigned getBasePtrIndex() const { + return cast<ConstantInt>(getArgOperand(1))->getZExtValue(); } /// The index into the associate statepoint's argument list which /// contains the pointer whose relocation this gc.relocate describes. - unsigned getDerivedPtrIndex() { - return cast<ConstantInt>(RelocateCS.getArgument(2))->getZExtValue(); + unsigned getDerivedPtrIndex() const { + return cast<ConstantInt>(getArgOperand(2))->getZExtValue(); } - Value *getBasePtr() { + Value *getBasePtr() const { ImmutableCallSite CS(getStatepoint()); return *(CS.arg_begin() + getBasePtrIndex()); } - Value *getDerivedPtr() { + Value *getDerivedPtr() const { ImmutableCallSite CS(getStatepoint()); return *(CS.arg_begin() + getDerivedPtrIndex()); } @@ -377,11 +371,11 @@ public: template <typename FunTy, typename InstructionTy, typename ValueTy, typename CallSiteTy> -std::vector<GCRelocateOperands> +std::vector<const GCRelocateInst *> StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates() const { - std::vector<GCRelocateOperands> Result; + std::vector<const GCRelocateInst *> Result; CallSiteTy StatepointCS = getCallSite(); @@ -389,8 +383,8 @@ StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates() // gc_relocates ensures that we only get pairs which are actually relocated // and used after the statepoint. for (const User *U : getInstruction()->users()) - if (isGCRelocate(U)) - Result.push_back(GCRelocateOperands(U)); + if (auto *Relocate = dyn_cast<GCRelocateInst>(U)) + Result.push_back(Relocate); if (!StatepointCS.isInvoke()) return Result; @@ -401,8 +395,8 @@ StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates() // Search for gc relocates that are attached to this landingpad. for (const User *LandingPadUser : LandingPad->users()) { - if (isGCRelocate(LandingPadUser)) - Result.push_back(GCRelocateOperands(LandingPadUser)); + if (auto *Relocate = dyn_cast<GCRelocateInst>(LandingPadUser)) + Result.push_back(Relocate); } return Result; } diff --git a/contrib/llvm/include/llvm/MC/SubtargetFeature.h b/contrib/llvm/include/llvm/MC/SubtargetFeature.h index 0d97b226d728..75d1e7997119 100644 --- a/contrib/llvm/include/llvm/MC/SubtargetFeature.h +++ b/contrib/llvm/include/llvm/MC/SubtargetFeature.h @@ -39,8 +39,8 @@ public: FeatureBitset(const bitset<MAX_SUBTARGET_FEATURES>& B) : bitset(B) {} FeatureBitset(std::initializer_list<unsigned> Init) : bitset() { - for (auto I = Init.begin() , E = Init.end(); I != E; ++I) - set(*I); + for (auto I : Init) + set(I); } }; @@ -59,6 +59,11 @@ struct SubtargetFeatureKV { bool operator<(StringRef S) const { return StringRef(Key) < S; } + + // Compare routine for std::is_sorted. + bool operator<(const SubtargetFeatureKV &Other) const { + return StringRef(Key) < StringRef(Other.Key); + } }; //===----------------------------------------------------------------------===// @@ -98,14 +103,13 @@ public: /// Adding Features. void AddFeature(StringRef String, bool Enable = true); - /// ToggleFeature - Toggle a feature and returns the newly updated feature - /// bits. - FeatureBitset ToggleFeature(FeatureBitset Bits, StringRef String, - ArrayRef<SubtargetFeatureKV> FeatureTable); + /// ToggleFeature - Toggle a feature and update the feature bits. + static void ToggleFeature(FeatureBitset &Bits, StringRef String, + ArrayRef<SubtargetFeatureKV> FeatureTable); - /// Apply the feature flag and return the newly updated feature bits. - FeatureBitset ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature, - ArrayRef<SubtargetFeatureKV> FeatureTable); + /// Apply the feature flag and update the feature bits. + static void ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature, + ArrayRef<SubtargetFeatureKV> FeatureTable); /// Get feature bits of a CPU. FeatureBitset getFeatureBits(StringRef CPU, diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm/include/llvm/ProfileData/InstrProf.h index 4688759a3bd1..49569d89507b 100644 --- a/contrib/llvm/include/llvm/ProfileData/InstrProf.h +++ b/contrib/llvm/include/llvm/ProfileData/InstrProf.h @@ -155,11 +155,36 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName); GlobalVariable *createPGOFuncNameVar(Module &M, GlobalValue::LinkageTypes Linkage, StringRef FuncName); +/// Return the initializer in string of the PGO name var \c NameVar. +StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar); /// Given a PGO function name, remove the filename prefix and return /// the original (static) function name. StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName); +/// Given a vector of strings (function PGO names) \c NameStrs, the +/// method generates a combined string \c Result thatis ready to be +/// serialized. The \c Result string is comprised of three fields: +/// The first field is the legnth of the uncompressed strings, and the +/// the second field is the length of the zlib-compressed string. +/// Both fields are encoded in ULEB128. If \c doCompress is false, the +/// third field is the uncompressed strings; otherwise it is the +/// compressed string. When the string compression is off, the +/// second field will have value zero. +int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs, + bool doCompression, std::string &Result); +/// Produce \c Result string with the same format described above. The input +/// is vector of PGO function name variables that are referenced. +int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars, + std::string &Result); +class InstrProfSymtab; +/// \c NameStrings is a string composed of one of more sub-strings encoded in +/// the +/// format described above. The substrings are seperated by 0 or more zero +/// bytes. +/// This method decodes the string and populates the \c Symtab. +int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab); + const std::error_category &instrprof_category(); enum class instrprof_error { @@ -235,6 +260,11 @@ public: /// This interface is used by reader of CoverageMapping test /// format. inline std::error_code create(StringRef D, uint64_t BaseAddr); + /// \c NameStrings is a string composed of one of more sub-strings + /// encoded in the format described above. The substrings are + /// seperated by 0 or more zero bytes. This method decodes the + /// string and populates the \c Symtab. + inline std::error_code create(StringRef NameStrings); /// Create InstrProfSymtab from a set of names iteratable from /// \p IterRange. This interface is used by IndexedProfReader. template <typename NameIterRange> void create(const NameIterRange &IterRange); @@ -255,8 +285,8 @@ public: AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val)); } AddrHashMap &getAddrHashMap() { return AddrToMD5Map; } - /// Return function's PGO name from the function name's symabol - /// address in the object file. If an error occurs, Return + /// Return function's PGO name from the function name's symbol + /// address in the object file. If an error occurs, return /// an empty string. StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize); /// Return function's PGO name from the name's md5 hash value. @@ -270,6 +300,12 @@ std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) { return std::error_code(); } +std::error_code InstrProfSymtab::create(StringRef NameStrings) { + if (readPGOFuncNameStrings(NameStrings, *this)) + return make_error_code(instrprof_error::malformed); + return std::error_code(); +} + template <typename NameIterRange> void InstrProfSymtab::create(const NameIterRange &IterRange) { for (auto Name : IterRange) @@ -576,8 +612,14 @@ template <class IntPtrT> struct CovMapFunctionRecord { #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; #include "llvm/ProfileData/InstrProfData.inc" }; -LLVM_PACKED_END +// Per module coverage mapping data header, i.e. CoverageMapFileHeader +// documented above. +struct CovMapHeader { +#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" +}; +LLVM_PACKED_END } } // end namespace llvm diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc index 48dae506cabb..3a7c0c5f2773 100644 --- a/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -1,4 +1,4 @@ -/*===-- InstrProfData.inc - instr profiling runtime structures -----------=== *\ +/*===-- InstrProfData.inc - instr profiling runtime structures -*- C++ -*-=== *\ |* |* The LLVM Compiler Infrastructure |* @@ -167,6 +167,25 @@ COVMAP_FUNC_RECORD(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ #undef COVMAP_FUNC_RECORD /* COVMAP_FUNC_RECORD end. */ +/* COVMAP_HEADER start */ +/* Definition of member fields of coverage map header. + */ +#ifndef COVMAP_HEADER +#define COVMAP_HEADER(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +COVMAP_HEADER(uint32_t, Int32Ty, NRecords, \ + llvm::ConstantInt::get(Int32Ty, FunctionRecords.size())) +COVMAP_HEADER(uint32_t, Int32Ty, FilenamesSize, \ + llvm::ConstantInt::get(Int32Ty, FilenamesSize)) +COVMAP_HEADER(uint32_t, Int32Ty, CoverageSize, \ + llvm::ConstantInt::get(Int32Ty, CoverageMappingSize)) +COVMAP_HEADER(uint32_t, Int32Ty, Version, \ + llvm::ConstantInt::get(Int32Ty, CoverageMappingVersion1)) +#undef COVMAP_HEADER +/* COVMAP_HEADER end. */ + #ifdef INSTR_PROF_VALUE_PROF_DATA #define INSTR_PROF_DATA_DEFINED diff --git a/contrib/llvm/include/llvm/Support/ARMTargetParser.def b/contrib/llvm/include/llvm/Support/ARMTargetParser.def index 2f99b0717adf..c895b095bc5b 100644 --- a/contrib/llvm/include/llvm/Support/ARMTargetParser.def +++ b/contrib/llvm/include/llvm/Support/ARMTargetParser.def @@ -213,6 +213,7 @@ ARM_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true, AEK_CRC) ARM_CPU_NAME("cortex-a57", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC) ARM_CPU_NAME("cortex-a72", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC) ARM_CPU_NAME("cyclone", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC) +ARM_CPU_NAME("exynos-m1", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC) // Non-standard Arch names. ARM_CPU_NAME("iwmmxt", AK_IWMMXT, FK_NONE, true, AEK_NONE) ARM_CPU_NAME("xscale", AK_XSCALE, FK_NONE, true, AEK_NONE) diff --git a/contrib/llvm/include/llvm/Support/Program.h b/contrib/llvm/include/llvm/Support/Program.h index 43302101e3e0..727864df2721 100644 --- a/contrib/llvm/include/llvm/Support/Program.h +++ b/contrib/llvm/include/llvm/Support/Program.h @@ -130,7 +130,7 @@ struct ProcessInfo { /// Return true if the given arguments fit within system-specific /// argument length limits. - bool argumentsFitWithinSystemLimits(ArrayRef<const char*> Args); + bool commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args); /// File encoding options when writing contents that a non-UTF8 tool will /// read (on Windows systems). For UNIX, we always use UTF-8. diff --git a/contrib/llvm/include/llvm/Support/YAMLParser.h b/contrib/llvm/include/llvm/Support/YAMLParser.h index b056ab6c1ce2..a5addfa3c7ae 100644 --- a/contrib/llvm/include/llvm/Support/YAMLParser.h +++ b/contrib/llvm/include/llvm/Support/YAMLParser.h @@ -305,7 +305,7 @@ private: /// increment() which must set CurrentEntry to 0 to create an end iterator. template <class BaseT, class ValueT> class basic_collection_iterator - : public std::iterator<std::forward_iterator_tag, ValueT> { + : public std::iterator<std::input_iterator_tag, ValueT> { public: basic_collection_iterator() : Base(nullptr) {} basic_collection_iterator(BaseT *B) : Base(B) {} @@ -326,11 +326,24 @@ public: return Base->CurrentEntry; } + /// Note on EqualityComparable: + /// + /// The iterator is not re-entrant, + /// it is meant to be used for parsing YAML on-demand + /// Once iteration started - it can point only to one entry at a time + /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal + /// iff Base and Other.Base are equal. + bool operator==(const basic_collection_iterator &Other) const { + if (Base && (Base == Other.Base)) { + assert((Base->CurrentEntry == Other.Base->CurrentEntry) + && "Equal Bases expected to point to equal Entries"); + } + + return Base == Other.Base; + } + bool operator!=(const basic_collection_iterator &Other) const { - if (Base != Other.Base) - return true; - return (Base && Other.Base) && - Base->CurrentEntry != Other.Base->CurrentEntry; + return !(Base == Other.Base); } basic_collection_iterator &operator++() { diff --git a/contrib/llvm/include/llvm/TableGen/Record.h b/contrib/llvm/include/llvm/TableGen/Record.h index eb1c5c78b9c0..4c1ef4013dda 100644 --- a/contrib/llvm/include/llvm/TableGen/Record.h +++ b/contrib/llvm/include/llvm/TableGen/Record.h @@ -232,7 +232,7 @@ protected: /// We could pack these a bit tighter by not having the IK_FirstXXXInit /// and IK_LastXXXInit be their own values, but that would degrade /// readability for really no benefit. - enum InitKind { + enum InitKind : uint8_t { IK_BitInit, IK_FirstTypedInit, IK_BitsInit, @@ -256,6 +256,9 @@ protected: private: const InitKind Kind; +protected: + uint8_t Opc; // Used by UnOpInit, BinOpInit, and TernOpInit +private: Init(const Init &) = delete; Init &operator=(const Init &) = delete; virtual void anchor(); @@ -264,7 +267,7 @@ public: InitKind getKind() const { return Kind; } protected: - explicit Init(InitKind K) : Kind(K) {} + explicit Init(InitKind K, uint8_t Opc = 0) : Kind(K), Opc(Opc) {} public: virtual ~Init() {} @@ -365,7 +368,8 @@ class TypedInit : public Init { TypedInit &operator=(const TypedInit &Other) = delete; protected: - explicit TypedInit(InitKind K, RecTy *T) : Init(K), Ty(T) {} + explicit TypedInit(InitKind K, RecTy *T, uint8_t Opc = 0) + : Init(K, Opc), Ty(T) {} ~TypedInit() override { // If this is a DefInit we need to delete the RecordRecTy. if (getKind() == IK_DefInit) @@ -650,7 +654,8 @@ class OpInit : public TypedInit { OpInit &operator=(OpInit &Other) = delete; protected: - explicit OpInit(InitKind K, RecTy *Type) : TypedInit(K, Type) {} + explicit OpInit(InitKind K, RecTy *Type, uint8_t Opc) + : TypedInit(K, Type, Opc) {} public: static bool classof(const Init *I) { @@ -677,14 +682,13 @@ public: /// class UnOpInit : public OpInit { public: - enum UnaryOp { CAST, HEAD, TAIL, EMPTY }; + enum UnaryOp : uint8_t { CAST, HEAD, TAIL, EMPTY }; private: - UnaryOp Opc; Init *LHS; UnOpInit(UnaryOp opc, Init *lhs, RecTy *Type) - : OpInit(IK_UnOpInit, Type), Opc(opc), LHS(lhs) {} + : OpInit(IK_UnOpInit, Type, opc), LHS(lhs) {} UnOpInit(const UnOpInit &Other) = delete; UnOpInit &operator=(const UnOpInit &Other) = delete; @@ -708,7 +712,7 @@ public: return getOperand(); } - UnaryOp getOpcode() const { return Opc; } + UnaryOp getOpcode() const { return (UnaryOp)Opc; } Init *getOperand() const { return LHS; } // Fold - If possible, fold this to a simpler init. Return this if not @@ -724,14 +728,14 @@ public: /// class BinOpInit : public OpInit { public: - enum BinaryOp { ADD, AND, SHL, SRA, SRL, LISTCONCAT, STRCONCAT, CONCAT, EQ }; + enum BinaryOp : uint8_t { ADD, AND, SHL, SRA, SRL, LISTCONCAT, + STRCONCAT, CONCAT, EQ }; private: - BinaryOp Opc; Init *LHS, *RHS; BinOpInit(BinaryOp opc, Init *lhs, Init *rhs, RecTy *Type) : - OpInit(IK_BinOpInit, Type), Opc(opc), LHS(lhs), RHS(rhs) {} + OpInit(IK_BinOpInit, Type, opc), LHS(lhs), RHS(rhs) {} BinOpInit(const BinOpInit &Other) = delete; BinOpInit &operator=(const BinOpInit &Other) = delete; @@ -759,7 +763,7 @@ public: } } - BinaryOp getOpcode() const { return Opc; } + BinaryOp getOpcode() const { return (BinaryOp)Opc; } Init *getLHS() const { return LHS; } Init *getRHS() const { return RHS; } @@ -776,15 +780,14 @@ public: /// class TernOpInit : public OpInit { public: - enum TernaryOp { SUBST, FOREACH, IF }; + enum TernaryOp : uint8_t { SUBST, FOREACH, IF }; private: - TernaryOp Opc; Init *LHS, *MHS, *RHS; TernOpInit(TernaryOp opc, Init *lhs, Init *mhs, Init *rhs, RecTy *Type) : - OpInit(IK_TernOpInit, Type), Opc(opc), LHS(lhs), MHS(mhs), RHS(rhs) {} + OpInit(IK_TernOpInit, Type, opc), LHS(lhs), MHS(mhs), RHS(rhs) {} TernOpInit(const TernOpInit &Other) = delete; TernOpInit &operator=(const TernOpInit &Other) = delete; @@ -815,7 +818,7 @@ public: } } - TernaryOp getOpcode() const { return Opc; } + TernaryOp getOpcode() const { return (TernaryOp)Opc; } Init *getLHS() const { return LHS; } Init *getMHS() const { return MHS; } Init *getRHS() const { return RHS; } diff --git a/contrib/llvm/include/llvm/Target/Target.td b/contrib/llvm/include/llvm/Target/Target.td index 79046b2b7352..c869341c384f 100644 --- a/contrib/llvm/include/llvm/Target/Target.td +++ b/contrib/llvm/include/llvm/Target/Target.td @@ -936,6 +936,10 @@ class AsmParser { // ShouldEmitMatchRegisterName - Set to false if the target needs a hand // written register name matcher bit ShouldEmitMatchRegisterName = 1; + + // HasMnemonicFirst - Set to false if target instructions don't always + // start with a mnemonic as the first token. + bit HasMnemonicFirst = 1; } def DefaultAsmParser : AsmParser; diff --git a/contrib/llvm/include/llvm/Target/TargetLowering.h b/contrib/llvm/include/llvm/Target/TargetLowering.h index 140c36591acc..863b7cd044fb 100644 --- a/contrib/llvm/include/llvm/Target/TargetLowering.h +++ b/contrib/llvm/include/llvm/Target/TargetLowering.h @@ -2269,6 +2269,12 @@ public: return false; } + /// Return true if the MachineFunction contains a COPY which would imply + /// HasOpaqueSPAdjustment. + virtual bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const { + return false; + } + /// Perform necessary initialization to handle a subset of CSRs explicitly /// via copies. This function is called at the beginning of instruction /// selection. diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h b/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h index 0d081c0194bf..af0d60b2625f 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h @@ -23,11 +23,13 @@ namespace llvm { -/// This optimization identifies DIV instructions that can be +/// This optimization identifies DIV instructions in a BB that can be /// profitably bypassed and carried out with a shorter, faster divide. -bool bypassSlowDivision(Function &F, - Function::iterator &I, - const DenseMap<unsigned int, unsigned int> &BypassWidth); +/// +/// This optimization may add basic blocks immediately after BB; for obvious +/// reasons, you shouldn't pass those blocks to bypassSlowDivision. +bool bypassSlowDivision( + BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidth); } // End llvm namespace diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 17aaee03e4a8..2cfacb650ff5 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -39,6 +40,8 @@ struct LICMSafetyInfo { bool MayThrow; // The current loop contains an instruction which // may throw. bool HeaderMayThrow; // Same as previous, but specific to loop header + // Used to update funclet bundle operands. + DenseMap<BasicBlock *, ColorVector> BlockColors; LICMSafetyInfo() : MayThrow(false), HeaderMayThrow(false) {} }; diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 00f346ea115d..85404d87a611 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -543,7 +543,6 @@ static bool isMemsetPattern16(const Function *MS, isa<IntegerType>(MemsetType->getParamType(2))) return true; } - return false; } @@ -583,9 +582,6 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) { if (F->onlyAccessesArgMemory()) Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); - if (isMemsetPattern16(F, TLI)) - Min = FMRB_OnlyAccessesArgumentPointees; - // Otherwise be conservative. return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min); } @@ -599,22 +595,21 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, case Intrinsic::memset: case Intrinsic::memcpy: case Intrinsic::memmove: - assert((ArgIdx == 0 || ArgIdx == 1) && - "Invalid argument index for memory intrinsic"); - return ArgIdx ? MRI_Ref : MRI_Mod; + // We don't currently have a writeonly attribute. All other properties + // of these intrinsics are nicely described via attributes in + // Intrinsics.td and handled generically below. + if (ArgIdx == 0) + return MRI_Mod; } // We can bound the aliasing properties of memset_pattern16 just as we can // for memcpy/memset. This is particularly important because the // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 - // whenever possible. - if (CS.getCalledFunction() && - isMemsetPattern16(CS.getCalledFunction(), TLI)) { - assert((ArgIdx == 0 || ArgIdx == 1) && - "Invalid argument index for memset_pattern16"); - return ArgIdx ? MRI_Ref : MRI_Mod; - } - // FIXME: Handle memset_pattern4 and memset_pattern8 also. + // whenever possible. Note that all but the missing writeonly attribute are + // handled via InferFunctionAttr. + if (CS.getCalledFunction() && isMemsetPattern16(CS.getCalledFunction(), TLI)) + if (ArgIdx == 0) + return MRI_Mod; if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly)) return MRI_Ref; diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp index ab2263ae374e..249f3954d554 100644 --- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp @@ -376,15 +376,6 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, } else { return true; // Argument of an unknown call. } - // If the Callee is not ReadNone, it may read the global, - // and if it is not ReadOnly, it may also write to it. - Function *CalleeF = CS.getCalledFunction(); - if (!CalleeF->doesNotAccessMemory()) { - if (Readers) - Readers->insert(CalleeF); - if (Writers && !CalleeF->onlyReadsMemory()) - Writers->insert(CalleeF); - } } } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { if (!isa<ConstantPointerNull>(ICI->getOperand(1))) @@ -516,7 +507,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { if (F->isDeclaration()) { // Try to get mod/ref behaviour from function attributes. - if (F->doesNotAccessMemory() || F->onlyAccessesInaccessibleMemory()) { + if (F->doesNotAccessMemory()) { // Can't do better than that! } else if (F->onlyReadsMemory()) { FI.addModRefInfo(MRI_Ref); @@ -524,12 +515,6 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // This function might call back into the module and read a global - // consider every global as possibly being read by this function. FI.setMayReadAnyGlobal(); - } else if (F->onlyAccessesArgMemory() || - F->onlyAccessesInaccessibleMemOrArgMem()) { - // This function may only access (read/write) memory pointed to by its - // arguments. If this pointer is to a global, this escaping use of the - // pointer is captured in AnalyzeUsesOfPointer(). - FI.addModRefInfo(MRI_ModRef); } else { FI.addModRefInfo(MRI_ModRef); // Can't say anything useful unless it's an intrinsic - they don't diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index b19ecadd3161..9e896aed0dce 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -187,13 +187,6 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, return getAllocationData(V, AllocLike, TLI, LookThroughBitCast); } -/// \brief Tests if a value is a call or invoke to a library function that -/// allocates memory and never returns null (such as operator new). -bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast); -} - /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 3e80bfe1fdfb..6918360536a3 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -477,7 +477,7 @@ MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom( // being 42. A key property of this program however is that if either // 1 or 4 were missing, there would be a race between the store of 42 // either the store of 0 or the load (making the whole progam racy). - // The paper mentionned above shows that the same property is respected + // The paper mentioned above shows that the same property is respected // by every program that can detect any optimisation of that kind: either // it is racy (undefined) or there is a release followed by an acquire // between the pair of accesses under consideration. @@ -685,13 +685,13 @@ MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom( return MemDepResult::getDef(Inst); if (isInvariantLoad) continue; - // Be conservative if the accessed pointer may alias the allocation. - if (AA->alias(Inst, AccessPtr) != NoAlias) - return MemDepResult::getClobber(Inst); - // If the allocation is not aliased and does not read memory (like - // strdup), it is safe to ignore. - if (isa<AllocaInst>(Inst) || - isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI)) + // Be conservative if the accessed pointer may alias the allocation - + // fallback to the generic handling below. + if ((AA->alias(Inst, AccessPtr) == NoAlias) && + // If the allocation is not aliased and does not read memory (like + // strdup), it is safe to ignore. + (isa<AllocaInst>(Inst) || isMallocLikeFn(Inst, TLI) || + isCallocLikeFn(Inst, TLI))) continue; } @@ -792,10 +792,8 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, int Count = -1) { if (Count == -1) Count = Cache.size(); - if (Count == 0) return; - - for (unsigned i = 1; i != unsigned(Count); ++i) - assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!"); + assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) && + "Cache isn't sorted!"); } #endif diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp index e00f4aed07fc..ce3881925627 100644 --- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -52,14 +52,13 @@ static bool hasSinCosPiStret(const Triple &T) { /// specified target triple. This should be carefully written so that a missing /// target triple gets a sane set of defaults. static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, - const char *const *StandardNames) { -#ifndef NDEBUG + ArrayRef<const char *> StandardNames) { // Verify that the StandardNames array is in alphabetical order. - for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) { - if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0) - llvm_unreachable("TargetLibraryInfoImpl function names must be sorted"); - } -#endif // !NDEBUG + assert(std::is_sorted(StandardNames.begin(), StandardNames.end(), + [](const char *LHS, const char *RHS) { + return strcmp(LHS, RHS) < 0; + }) && + "TargetLibraryInfoImpl function names must be sorted"); if (T.getArch() == Triple::r600 || T.getArch() == Triple::amdgcn) { diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 314ec9c1886e..abc57ed8bca0 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -1743,9 +1743,10 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, return false; Value *X = nullptr, *Y = nullptr; - // A shift of a power of two is a power of two or zero. + // A shift left or a logical shift right of a power of two is a power of two + // or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || - match(V, m_Shr(m_Value(X), m_Value())))) + match(V, m_LShr(m_Value(X), m_Value())))) return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL); if (ZExtInst *ZI = dyn_cast<ZExtInst>(V)) @@ -2829,7 +2830,12 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL) { unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType()); APInt ByteOffset(BitWidth, 0); - while (1) { + + // We walk up the defs but use a visited set to handle unreachable code. In + // that case, we stop after accumulating the cycle once (not that it + // matters). + SmallPtrSet<Value *, 16> Visited; + while (Visited.insert(Ptr).second) { if (Ptr->getType()->isVectorTy()) break; @@ -3268,12 +3274,9 @@ static bool isDereferenceableAndAlignedPointer( } // For gc.relocate, look through relocations - if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V)) - if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) { - GCRelocateOperands RelocateInst(I); - return isDereferenceableAndAlignedPointer( - RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited); - } + if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V)) + return isDereferenceableAndAlignedPointer( + RelocateInst->getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited); if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V)) return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL, @@ -3474,10 +3477,6 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { if (CS.isReturnNonNull()) return true; - // operator new never returns null. - if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true)) - return true; - return false; } diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 2e670d584ecc..c7606fd488a0 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -3071,7 +3071,12 @@ void BitcodeReader::saveMetadataList( for (unsigned ID = 0; ID < MetadataList.size(); ++ID) { Metadata *MD = MetadataList[ID]; auto *N = dyn_cast_or_null<MDNode>(MD); + assert((!N || (N->isResolved() || N->isTemporary())) && + "Found non-resolved non-temp MDNode while saving metadata"); // Save all values if !OnlyTempMD, otherwise just the temporary metadata. + // Note that in the !OnlyTempMD case we need to save all Metadata, not + // just MDNode, as we may have references to other types of module-level + // metadata (e.g. ValueAsMetadata) from instructions. if (!OnlyTempMD || (N && N->isTemporary())) { // Will call this after materializing each function, in order to // handle remapping of the function's instructions/metadata. @@ -3080,6 +3085,11 @@ void BitcodeReader::saveMetadataList( assert(MetadataToIDs[MD] == ID && "Inconsistent metadata value id"); continue; } + if (N && N->isTemporary()) + // Ensure that we assert if someone tries to RAUW this temporary + // metadata while it is the key of a map. The flag will be set back + // to true when the saved metadata list is destroyed. + N->setCanReplace(false); MetadataToIDs[MD] = ID; } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index 48b7104f24c3..4da5b580fcda 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -976,32 +976,32 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { } } -static int getRank(const WinEHFuncInfo &FuncInfo, int State) { +static int getTryRank(const WinEHFuncInfo &FuncInfo, int State) { int Rank = 0; while (State != -1) { ++Rank; - State = FuncInfo.ClrEHUnwindMap[State].Parent; + State = FuncInfo.ClrEHUnwindMap[State].TryParentState; } return Rank; } -static int getAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) { - int LeftRank = getRank(FuncInfo, Left); - int RightRank = getRank(FuncInfo, Right); +static int getTryAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) { + int LeftRank = getTryRank(FuncInfo, Left); + int RightRank = getTryRank(FuncInfo, Right); while (LeftRank < RightRank) { - Right = FuncInfo.ClrEHUnwindMap[Right].Parent; + Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState; --RightRank; } while (RightRank < LeftRank) { - Left = FuncInfo.ClrEHUnwindMap[Left].Parent; + Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState; --LeftRank; } while (Left != Right) { - Left = FuncInfo.ClrEHUnwindMap[Left].Parent; - Right = FuncInfo.ClrEHUnwindMap[Right].Parent; + Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState; + Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState; } return Left; @@ -1035,9 +1035,9 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) { FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>(); HandlerStates[HandlerBlock] = State; // Use this loop through all handlers to verify our assumption (used in - // the MinEnclosingState computation) that ancestors have lower state - // numbers than their descendants. - assert(FuncInfo.ClrEHUnwindMap[State].Parent < State && + // the MinEnclosingState computation) that enclosing funclets have lower + // state numbers than their enclosed funclets. + assert(FuncInfo.ClrEHUnwindMap[State].HandlerParentState < State && "ill-formed state numbering"); } // Map the main function to the NullState. @@ -1070,7 +1070,6 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) { SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates); // Visit the root function and each funclet. - for (MachineFunction::const_iterator FuncletStart = MF->begin(), FuncletEnd = MF->begin(), End = MF->end(); @@ -1100,17 +1099,18 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) { for (const auto &StateChange : InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) { // Close any try regions we're not still under - int AncestorState = - getAncestor(FuncInfo, CurrentState, StateChange.NewState); - while (CurrentState != AncestorState) { - assert(CurrentState != NullState && "Failed to find ancestor!"); + int StillPendingState = + getTryAncestor(FuncInfo, CurrentState, StateChange.NewState); + while (CurrentState != StillPendingState) { + assert(CurrentState != NullState && + "Failed to find still-pending state!"); // Close the pending clause Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel, CurrentState, FuncletState}); - // Now the parent handler is current - CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].Parent; + // Now the next-outer try region is current + CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].TryParentState; // Pop the new start label from the handler stack if we've exited all - // descendants of the corresponding handler. + // inner try regions of the corresponding try region. if (HandlerStack.back().second == CurrentState) CurrentStartLabel = HandlerStack.pop_back_val().first; } @@ -1121,7 +1121,8 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) { // it. for (int EnteredState = StateChange.NewState; EnteredState != CurrentState; - EnteredState = FuncInfo.ClrEHUnwindMap[EnteredState].Parent) { + EnteredState = + FuncInfo.ClrEHUnwindMap[EnteredState].TryParentState) { int &MinEnclosingState = MinClauseMap[EnteredState]; if (FuncletState < MinEnclosingState) MinEnclosingState = FuncletState; diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index 5844124d8565..6fbdea84c10f 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -225,8 +225,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (!OptSize && TLI && TLI->isSlowDivBypassed()) { const DenseMap<unsigned int, unsigned int> &BypassWidths = TLI->getBypassSlowDivWidths(); - for (Function::iterator I = F.begin(); I != F.end(); I++) - EverMadeChange |= bypassSlowDivision(F, I, BypassWidths); + BasicBlock* BB = &*F.begin(); + while (BB != nullptr) { + // bypassSlowDivision may create new BBs, but we don't want to reapply the + // optimization to those blocks. + BasicBlock* Next = BB->getNextNode(); + EverMadeChange |= bypassSlowDivision(BB, BypassWidths); + BB = Next; + } } // Eliminate blocks that contain only PHI nodes and an @@ -526,19 +532,17 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { // Computes a map of base pointer relocation instructions to corresponding // derived pointer relocation instructions given a vector of all relocate calls static void computeBaseDerivedRelocateMap( - const SmallVectorImpl<User *> &AllRelocateCalls, - DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> & - RelocateInstMap) { + const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls, + DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> + &RelocateInstMap) { // Collect information in two maps: one primarily for locating the base object // while filling the second map; the second map is the final structure holding // a mapping between Base and corresponding Derived relocate calls - DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap; - for (auto &U : AllRelocateCalls) { - GCRelocateOperands ThisRelocate(U); - IntrinsicInst *I = cast<IntrinsicInst>(U); - auto K = std::make_pair(ThisRelocate.getBasePtrIndex(), - ThisRelocate.getDerivedPtrIndex()); - RelocateIdxMap.insert(std::make_pair(K, I)); + DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap; + for (auto *ThisRelocate : AllRelocateCalls) { + auto K = std::make_pair(ThisRelocate->getBasePtrIndex(), + ThisRelocate->getDerivedPtrIndex()); + RelocateIdxMap.insert(std::make_pair(K, ThisRelocate)); } for (auto &Item : RelocateIdxMap) { std::pair<unsigned, unsigned> Key = Item.first; @@ -546,7 +550,7 @@ static void computeBaseDerivedRelocateMap( // Base relocation: nothing to insert continue; - IntrinsicInst *I = Item.second; + GCRelocateInst *I = Item.second; auto BaseKey = std::make_pair(Key.first, Key.first); // We're iterating over RelocateIdxMap so we cannot modify it. @@ -579,16 +583,13 @@ static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, // Takes a RelocatedBase (base pointer relocation instruction) and Targets to // replace, computes a replacement, and affects it. static bool -simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, - const SmallVectorImpl<IntrinsicInst *> &Targets) { +simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, + const SmallVectorImpl<GCRelocateInst *> &Targets) { bool MadeChange = false; - for (auto &ToReplace : Targets) { - GCRelocateOperands MasterRelocate(RelocatedBase); - GCRelocateOperands ThisRelocate(ToReplace); - - assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() && + for (GCRelocateInst *ToReplace : Targets) { + assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && "Not relocating a derived object of the original base object"); - if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) { + if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) { // A duplicate relocate call. TODO: coalesce duplicates. continue; } @@ -601,8 +602,8 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, continue; } - Value *Base = ThisRelocate.getBasePtr(); - auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr()); + Value *Base = ToReplace->getBasePtr(); + auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr()); if (!Derived || Derived->getPointerOperand() != Base) continue; @@ -680,12 +681,12 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, // %val = load %ptr' bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { bool MadeChange = false; - SmallVector<User *, 2> AllRelocateCalls; + SmallVector<GCRelocateInst *, 2> AllRelocateCalls; for (auto *U : I.users()) - if (isGCRelocate(dyn_cast<Instruction>(U))) + if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U)) // Collect all the relocate calls associated with a statepoint - AllRelocateCalls.push_back(U); + AllRelocateCalls.push_back(Relocate); // We need atleast one base pointer relocation + one derived pointer // relocation to mangle @@ -694,7 +695,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { // RelocateInstMap is a mapping from the base relocate instruction to the // corresponding derived relocate instructions - DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap; + DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap; computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); if (RelocateInstMap.empty()) return false; diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 021707b7c3c7..aad376c4702b 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -122,8 +122,7 @@ INITIALIZE_PASS_END(MachineCSE, "machine-cse", bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB) { bool Changed = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); @@ -186,8 +185,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, return true; bool SeenDef = false; - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = I->getOperand(i); + for (const MachineOperand &MO : I->operands()) { if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) SeenDef = true; if (!MO.isReg() || !MO.getReg()) @@ -220,8 +218,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, SmallVectorImpl<unsigned> &PhysDefs, bool &PhysUseDef) const{ // First, add all uses to PhysRefs. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || MO.isDef()) continue; unsigned Reg = MO.getReg(); @@ -239,8 +236,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, // (which currently contains only uses), set the PhysUseDef flag. PhysUseDef = false; MachineBasicBlock::const_iterator I = MI; I = std::next(I); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); @@ -311,8 +307,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, if (I == E) return true; - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = I->getOperand(i); + for (const MachineOperand &MO : I->operands()) { // RegMasks go on instructions like calls that clobber lots of physregs. // Don't attempt to CSE across such an instruction. if (MO.isRegMask()) @@ -398,8 +393,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // Heuristics #2: If the expression doesn't not use a vr and the only use // of the redundant computation are copies, do not cse. bool HasVRegUse = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.isUse() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { HasVRegUse = true; @@ -580,9 +574,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Actually perform the elimination. if (DoCSE) { - for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { - unsigned OldReg = CSEPairs[i].first; - unsigned NewReg = CSEPairs[i].second; + for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) { + unsigned OldReg = CSEPair.first; + unsigned NewReg = CSEPair.second; // OldReg may have been unused but is used now, clear the Dead flag MachineInstr *Def = MRI->getUniqueVRegDef(NewReg); assert(Def != nullptr && "CSEd register has no unique definition?"); @@ -594,8 +588,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Go through implicit defs of CSMI and MI, if a def is not dead at MI, // we should make sure it is not dead at CSMI. - for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i) - CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false); + for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) + CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); // Go through implicit defs of CSMI and MI, and clear the kill flags on // their uses in all the instructions between CSMI and MI. @@ -685,18 +679,14 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { Node = WorkList.pop_back_val(); Scopes.push_back(Node); const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); - unsigned NumChildren = Children.size(); - OpenChildren[Node] = NumChildren; - for (unsigned i = 0; i != NumChildren; ++i) { - MachineDomTreeNode *Child = Children[i]; + OpenChildren[Node] = Children.size(); + for (MachineDomTreeNode *Child : Children) WorkList.push_back(Child); - } } while (!WorkList.empty()); // Now perform CSE. bool Changed = false; - for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { - MachineDomTreeNode *Node = Scopes[i]; + for (MachineDomTreeNode *Node : Scopes) { MachineBasicBlock *MBB = Node->getBlock(); EnterScope(MBB); Changed |= ProcessBlock(MBB); diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 1eb2edcd7cec..6b8eeccd173d 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -866,6 +866,27 @@ void MachineInstr::addMemOperand(MachineFunction &MF, setMemRefs(NewMemRefs, NewMemRefs + NewNum); } +std::pair<MachineInstr::mmo_iterator, unsigned> +MachineInstr::mergeMemRefsWith(const MachineInstr& Other) { + // TODO: If we end up with too many memory operands, return the empty + // conservative set rather than failing asserts. + // TODO: consider uniquing elements within the operand lists to reduce + // space usage and fall back to conservative information less often. + size_t CombinedNumMemRefs = (memoperands_end() - memoperands_begin()) + + (Other.memoperands_end() - Other.memoperands_begin()); + + MachineFunction *MF = getParent()->getParent(); + mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs); + mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(), + MemBegin); + MemEnd = std::copy(Other.memoperands_begin(), Other.memoperands_end(), + MemEnd); + assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs && + "missing memrefs"); + + return std::make_pair(MemBegin, CombinedNumMemRefs); +} + bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { assert(!isBundledWithPred() && "Must be called on bundle header"); for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) { @@ -1738,7 +1759,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, bool HaveSemi = false; const unsigned PrintableFlags = FrameSetup | FrameDestroy; if (Flags & PrintableFlags) { - if (!HaveSemi) OS << ";"; HaveSemi = true; + if (!HaveSemi) { + OS << ";"; + HaveSemi = true; + } OS << " flags: "; if (Flags & FrameSetup) @@ -1749,7 +1773,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, } if (!memoperands_empty()) { - if (!HaveSemi) OS << ";"; HaveSemi = true; + if (!HaveSemi) { + OS << ";"; + HaveSemi = true; + } OS << " mem:"; for (mmo_iterator i = memoperands_begin(), e = memoperands_end(); @@ -1762,7 +1789,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, // Print the regclass of any virtual registers encountered. if (MRI && !VirtRegs.empty()) { - if (!HaveSemi) OS << ";"; HaveSemi = true; + if (!HaveSemi) { + OS << ";"; + HaveSemi = true; + } for (unsigned i = 0; i != VirtRegs.size(); ++i) { const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); OS << " " << TRI->getRegClassName(RC) @@ -1781,7 +1811,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, // Print debug location information. if (isDebugValue() && getOperand(e - 2).isMetadata()) { - if (!HaveSemi) OS << ";"; + if (!HaveSemi) + OS << ";"; auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata()); OS << " line no:" << DV->getLine(); if (auto *InlinedAt = debugLoc->getInlinedAt()) { @@ -1795,7 +1826,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, if (isIndirectDebugValue()) OS << " indirect"; } else if (debugLoc && MF) { - if (!HaveSemi) OS << ";"; + if (!HaveSemi) + OS << ";"; OS << " dbg:"; debugLoc.print(OS); } diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp index 3eaf4c5dea0f..4619daf30141 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -315,7 +315,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, if (!TRI->regsOverlap(MOReg, Reg)) continue; - bool Covered = TRI->isSuperRegisterEq(MOReg, Reg); + bool Covered = TRI->isSuperRegisterEq(Reg, MOReg); if (MO.readsReg()) { PRI.Read = true; if (Covered) { diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index 8382b0912bde..3749b1dd217a 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -97,9 +97,8 @@ void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) { unsigned Weight = PSetI.getWeight(); for (; PSetI.isValid(); ++PSetI) { CurrSetPressure[*PSetI] += Weight; - if (CurrSetPressure[*PSetI] > P.MaxSetPressure[*PSetI]) { - P.MaxSetPressure[*PSetI] = CurrSetPressure[*PSetI]; - } + P.MaxSetPressure[*PSetI] = + std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]); } } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0872d7a9a228..bc2405b952a6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6843,9 +6843,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); SDLoc DL(LN0); + // The original load itself didn't wrap, so an offset within it doesn't. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, LN0->getBasePtr(), - DAG.getConstant(PtrOff, DL, PtrType)); + DAG.getConstant(PtrOff, DL, PtrType), + &Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index abbc48e10e46..96bf914701c6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2843,6 +2843,43 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { return (AZero | BZero).isAllOnesValue(); } +static SDValue FoldCONCAT_VECTORS(SDLoc DL, EVT VT, ArrayRef<SDValue> Ops, + llvm::SelectionDAG &DAG) { + if (Ops.size() == 1) + return Ops[0]; + + // Concat of UNDEFs is UNDEF. + if (std::all_of(Ops.begin(), Ops.end(), + [](SDValue Op) { return Op.isUndef(); })) + return DAG.getUNDEF(VT); + + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified + // to one big BUILD_VECTOR. + // FIXME: Add support for UNDEF and SCALAR_TO_VECTOR as well. + if (!std::all_of(Ops.begin(), Ops.end(), [](SDValue Op) { + return Op.getOpcode() == ISD::BUILD_VECTOR; + })) + return SDValue(); + + EVT SVT = VT.getScalarType(); + SmallVector<SDValue, 16> Elts; + for (SDValue Op : Ops) + Elts.append(Op->op_begin(), Op->op_end()); + + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + for (SDValue Op : Elts) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + + if (SVT.bitsGT(VT.getScalarType())) + for (SDValue &Op : Elts) + Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, DL, SVT) + : DAG.getSExtOrTrunc(Op, DL, SVT); + + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts); +} + /// getNode - Gets or creates the specified node. /// SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { @@ -3426,34 +3463,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (N2.getOpcode() == ISD::EntryToken) return N1; if (N1 == N2) return N1; break; - case ISD::CONCAT_VECTORS: - // Concat of UNDEFs is UNDEF. - if (N1.getOpcode() == ISD::UNDEF && - N2.getOpcode() == ISD::UNDEF) - return getUNDEF(VT); - - // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to - // one big BUILD_VECTOR. - if (N1.getOpcode() == ISD::BUILD_VECTOR && - N2.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), - N1.getNode()->op_end()); - Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); - - // BUILD_VECTOR requires all inputs to be of the same type, find the - // maximum type and extend them all. - EVT SVT = VT.getScalarType(); - for (SDValue Op : Elts) - SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); - if (SVT.bitsGT(VT.getScalarType())) - for (SDValue &Op : Elts) - Op = TLI->isZExtFree(Op.getValueType(), SVT) - ? getZExtOrTrunc(Op, DL, SVT) - : getSExtOrTrunc(Op, DL, SVT); - - return getNode(ISD::BUILD_VECTOR, DL, VT, Elts); - } + case ISD::CONCAT_VECTORS: { + // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. + SDValue Ops[] = {N1, N2}; + if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) + return V; break; + } case ISD::AND: assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && @@ -3911,19 +3927,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, } break; } - case ISD::CONCAT_VECTORS: - // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to - // one big BUILD_VECTOR. - if (N1.getOpcode() == ISD::BUILD_VECTOR && - N2.getOpcode() == ISD::BUILD_VECTOR && - N3.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), - N1.getNode()->op_end()); - Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); - Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end()); - return getNode(ISD::BUILD_VECTOR, DL, VT, Elts); - } + case ISD::CONCAT_VECTORS: { + // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. + SDValue Ops[] = {N1, N2, N3}; + if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) + return V; break; + } case ISD::SETCC: { // Use FoldSetCC to simplify SETCC's. if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) @@ -5462,6 +5472,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, switch (Opcode) { default: break; + case ISD::CONCAT_VECTORS: { + // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. + if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) + return V; + break; + } case ISD::SELECT_CC: { assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); assert(Ops[0].getValueType() == Ops[1].getValueType() && diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d2ea85ab4d22..e446a934554e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1329,12 +1329,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + SmallVector<SDValue, 4> Chains(NumValues); for (unsigned i = 0; i != NumValues; ++i) { SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i], - getCurSDLoc())); + getCurSDLoc()), + &Flags); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), @@ -2994,8 +3000,15 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Field) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); + + // In an inbouds GEP with an offset that is nonnegative even when + // interpreted as signed, assume there is no unsigned overflow. + SDNodeFlags Flags; + if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds()) + Flags.setNoUnsignedWrap(true); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, - DAG.getConstant(Offset, dl, N.getValueType())); + DAG.getConstant(Offset, dl, N.getValueType()), &Flags); } Ty = StTy->getElementType(Field); @@ -3020,7 +3033,14 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue OffsVal = VectorWidth ? DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : DAG.getConstant(Offs, dl, PtrTy); - N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); + + // In an inbouds GEP with an offset that is nonnegative even when + // interpreted as signed, assume there is no unsigned overflow. + SDNodeFlags Flags; + if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds()) + Flags.setNoUnsignedWrap(true); + + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags); continue; } @@ -3092,10 +3112,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { Align = 0; // Round the size of the allocation up to the stack alignment size - // by add SA-1 to the size. + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign - 1, dl)); + DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, dl, @@ -3168,6 +3191,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile) Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); + // An aggregate load cannot wrap around the address space, so offsets to its + // parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); @@ -3188,7 +3216,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT)); + DAG.getConstant(Offsets[i], dl, PtrVT), + &Flags); SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, isNonTemporal, isInvariant, Alignment, AAInfo, @@ -3243,6 +3272,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { AAMDNodes AAInfo; I.getAAMetadata(AAInfo); + // An aggregate load cannot wrap around the address space, so offsets to its + // parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. @@ -3253,7 +3287,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { ChainI = 0; } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT)); + DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); SDValue St = DAG.getStore(Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), @@ -5189,7 +5223,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::experimental_gc_relocate: { - visitGCRelocate(I); + visitGCRelocate(cast<GCRelocateInst>(I)); return nullptr; } case Intrinsic::instrprof_increment: @@ -7202,10 +7236,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ReturnValues.resize(NumValues); SmallVector<SDValue, 4> Chains(NumValues); + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, - PtrVT)); + PtrVT), &Flags); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 49a3872d20c8..8fb85ff6ecc7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -855,7 +855,7 @@ private: // These three are implemented in StatepointLowering.cpp void visitStatepoint(const CallInst &I); - void visitGCRelocate(const CallInst &I); + void visitGCRelocate(const GCRelocateInst &I); void visitGCResult(const CallInst &I); void visitUserOp1(const Instruction &I) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 853a21a15eb9..9f8759df0bab 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -633,6 +633,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MRI.replaceRegWith(From, To); } + if (TLI->hasCopyImplyingStackAdjustment(MF)) + MFI->setHasOpaqueSPAdjustment(true); + // Freeze the set of reserved registers now that MachineFrameInfo has been // set up. All the information required by getReservedRegs() should be // available now. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 050ec2116c5d..6547a62d0778 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -128,13 +128,11 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, return Optional<int>(); // Spill location is known for gc relocates - if (isGCRelocate(Val)) { - GCRelocateOperands RelocOps(cast<Instruction>(Val)); - + if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) { FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = - Builder.FuncInfo.StatepointRelocatedValues[RelocOps.getStatepoint()]; + Builder.FuncInfo.StatepointRelocatedValues[Relocate->getStatepoint()]; - auto It = SpillMap.find(RelocOps.getDerivedPtr()); + auto It = SpillMap.find(Relocate->getDerivedPtr()); if (It == SpillMap.end()) return Optional<int>(); @@ -401,10 +399,10 @@ static void getIncomingStatepointGCValues( SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs, SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite, SelectionDAGBuilder &Builder) { - for (GCRelocateOperands relocateOpers : StatepointSite.getRelocates()) { - Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction()); - Bases.push_back(relocateOpers.getBasePtr()); - Ptrs.push_back(relocateOpers.getDerivedPtr()); + for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) { + Relocs.push_back(Relocate); + Bases.push_back(Relocate->getBasePtr()); + Ptrs.push_back(Relocate->getDerivedPtr()); } // Remove any redundant llvm::Values which map to the same SDValue as another @@ -602,8 +600,8 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr]; - for (GCRelocateOperands RelocateOpers : StatepointSite.getRelocates()) { - const Value *V = RelocateOpers.getDerivedPtr(); + for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) { + const Value *V = Relocate->getDerivedPtr(); SDValue SDV = Builder.getValue(V); SDValue Loc = Builder.StatepointLowering.getLocation(SDV); @@ -624,8 +622,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // uses of the corresponding values so that it would automatically // export them. Relocates of the spilled values does not use original // value. - if (RelocateOpers.getUnderlyingCallSite().getParent() != - StatepointInstr->getParent()) + if (Relocate->getParent() != StatepointInstr->getParent()) Builder.ExportFromCurrentBlock(V); } } @@ -656,7 +653,7 @@ void SelectionDAGBuilder::LowerStatepoint( // statepoint. for (const User *U : CS->users()) { const CallInst *Call = cast<CallInst>(U); - if (isGCRelocate(Call) && Call->getParent() == CS.getParent()) + if (isa<GCRelocateInst>(Call) && Call->getParent() == CS.getParent()) StatepointLowering.scheduleRelocCall(*Call); } #endif @@ -859,24 +856,22 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { } } -void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { - GCRelocateOperands RelocateOpers(&CI); - +void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { #ifndef NDEBUG // Consistency check // We skip this check for relocates not in the same basic block as thier // statepoint. It would be too expensive to preserve validation info through // different basic blocks. - if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) { - StatepointLowering.relocCallVisited(CI); + if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) { + StatepointLowering.relocCallVisited(Relocate); } #endif - const Value *DerivedPtr = RelocateOpers.getDerivedPtr(); + const Value *DerivedPtr = Relocate.getDerivedPtr(); SDValue SD = getValue(DerivedPtr); FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = - FuncInfo.StatepointRelocatedValues[RelocateOpers.getStatepoint()]; + FuncInfo.StatepointRelocatedValues[Relocate.getStatepoint()]; // We should have recorded location for this pointer assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value"); @@ -885,7 +880,7 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { // We didn't need to spill these special cases (constants and allocas). // See the handling in spillIncomingValueForStatepoint for detail. if (!DerivedPtrLocation) { - setValue(&CI, SD); + setValue(&Relocate, SD); return; } @@ -907,5 +902,5 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { DAG.setRoot(SpillLoad.getValue(1)); assert(SpillLoad.getNode()); - setValue(&CI, SpillLoad); + setValue(&Relocate, SpillLoad); } diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp index fc656396ade8..1c4558cea5f5 100644 --- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -212,7 +212,7 @@ unsigned TargetSchedModel::computeOperandLatency( && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef() && SchedModel.isComplete()) { errs() << "DefIdx " << DefIdx << " exceeds machine model writes for " - << *DefMI; + << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)"; llvm_unreachable("incomplete machine model"); } #endif diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp index 52fb922c935a..2426c27d43dc 100644 --- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -17,11 +17,14 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Verifier.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -435,11 +438,12 @@ void llvm::calculateWinCXXEHStateNumbers(const Function *Fn, calculateStateNumbersForInvokes(Fn, FuncInfo); } -static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int ParentState, - ClrHandlerType HandlerType, uint32_t TypeToken, - const BasicBlock *Handler) { +static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int HandlerParentState, + int TryParentState, ClrHandlerType HandlerType, + uint32_t TypeToken, const BasicBlock *Handler) { ClrEHUnwindMapEntry Entry; - Entry.Parent = ParentState; + Entry.HandlerParentState = HandlerParentState; + Entry.TryParentState = TryParentState; Entry.Handler = Handler; Entry.HandlerType = HandlerType; Entry.TypeToken = TypeToken; @@ -453,82 +457,199 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn, if (!FuncInfo.EHPadStateMap.empty()) return; + // This numbering assigns one state number to each catchpad and cleanuppad. + // It also computes two tree-like relations over states: + // 1) Each state has a "HandlerParentState", which is the state of the next + // outer handler enclosing this state's handler (same as nearest ancestor + // per the ParentPad linkage on EH pads, but skipping over catchswitches). + // 2) Each state has a "TryParentState", which: + // a) for a catchpad that's not the last handler on its catchswitch, is + // the state of the next catchpad on that catchswitch + // b) for all other pads, is the state of the pad whose try region is the + // next outer try region enclosing this state's try region. The "try + // regions are not present as such in the IR, but will be inferred + // based on the placement of invokes and pads which reach each other + // by exceptional exits + // Catchswitches do not get their own states, but each gets mapped to the + // state of its first catchpad. + + // Step one: walk down from outermost to innermost funclets, assigning each + // catchpad and cleanuppad a state number. Add an entry to the + // ClrEHUnwindMap for each state, recording its HandlerParentState and + // handler attributes. Record the TryParentState as well for each catchpad + // that's not the last on its catchswitch, but initialize all other entries' + // TryParentStates to a sentinel -1 value that the next pass will update. + + // Seed a worklist with pads that have no parent. SmallVector<std::pair<const Instruction *, int>, 8> Worklist; - - // Each pad needs to be able to refer to its parent, so scan the function - // looking for top-level handlers and seed the worklist with them. for (const BasicBlock &BB : *Fn) { - if (!BB.isEHPad()) - continue; - if (BB.isLandingPad()) - report_fatal_error("CoreCLR EH cannot use landingpads"); const Instruction *FirstNonPHI = BB.getFirstNonPHI(); - if (!isTopLevelPadForMSVC(FirstNonPHI)) + const Value *ParentPad; + if (const auto *CPI = dyn_cast<CleanupPadInst>(FirstNonPHI)) + ParentPad = CPI->getParentPad(); + else if (const auto *CSI = dyn_cast<CatchSwitchInst>(FirstNonPHI)) + ParentPad = CSI->getParentPad(); + else continue; - // queue this with sentinel parent state -1 to mean unwind to caller. - Worklist.emplace_back(FirstNonPHI, -1); + if (isa<ConstantTokenNone>(ParentPad)) + Worklist.emplace_back(FirstNonPHI, -1); } + // Use the worklist to visit all pads, from outer to inner. Record + // HandlerParentState for all pads. Record TryParentState only for catchpads + // that aren't the last on their catchswitch (setting all other entries' + // TryParentStates to an initial value of -1). This loop is also responsible + // for setting the EHPadStateMap entry for all catchpads, cleanuppads, and + // catchswitches. while (!Worklist.empty()) { const Instruction *Pad; - int ParentState; - std::tie(Pad, ParentState) = Worklist.pop_back_val(); - - Value *ParentPad; - int PredState; - if (const CleanupPadInst *Cleanup = dyn_cast<CleanupPadInst>(Pad)) { - // A cleanup can have multiple exits; don't re-process after the first. - if (FuncInfo.EHPadStateMap.count(Cleanup)) - continue; - // CoreCLR personality uses arity to distinguish faults from finallies. - const BasicBlock *PadBlock = Cleanup->getParent(); + int HandlerParentState; + std::tie(Pad, HandlerParentState) = Worklist.pop_back_val(); + + if (const auto *Cleanup = dyn_cast<CleanupPadInst>(Pad)) { + // Create the entry for this cleanup with the appropriate handler + // properties. Finaly and fault handlers are distinguished by arity. ClrHandlerType HandlerType = - (Cleanup->getNumOperands() ? ClrHandlerType::Fault - : ClrHandlerType::Finally); - int NewState = - addClrEHHandler(FuncInfo, ParentState, HandlerType, 0, PadBlock); - FuncInfo.EHPadStateMap[Cleanup] = NewState; - // Propagate the new state to all preds of the cleanup - ParentPad = Cleanup->getParentPad(); - PredState = NewState; - } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { - SmallVector<const CatchPadInst *, 1> Handlers; - for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { - const auto *Catch = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI()); - Handlers.push_back(Catch); - } - FuncInfo.EHPadStateMap[CatchSwitch] = ParentState; - int NewState = ParentState; - for (auto HandlerI = Handlers.rbegin(), HandlerE = Handlers.rend(); - HandlerI != HandlerE; ++HandlerI) { - const CatchPadInst *Catch = *HandlerI; - const BasicBlock *PadBlock = Catch->getParent(); + (Cleanup->getNumArgOperands() ? ClrHandlerType::Fault + : ClrHandlerType::Finally); + int CleanupState = addClrEHHandler(FuncInfo, HandlerParentState, -1, + HandlerType, 0, Pad->getParent()); + // Queue any child EH pads on the worklist. + for (const User *U : Cleanup->users()) + if (const auto *I = dyn_cast<Instruction>(U)) + if (I->isEHPad()) + Worklist.emplace_back(I, CleanupState); + // Remember this pad's state. + FuncInfo.EHPadStateMap[Cleanup] = CleanupState; + } else { + // Walk the handlers of this catchswitch in reverse order since all but + // the last need to set the following one as its TryParentState. + const auto *CatchSwitch = cast<CatchSwitchInst>(Pad); + int CatchState = -1, FollowerState = -1; + SmallVector<const BasicBlock *, 4> CatchBlocks(CatchSwitch->handlers()); + for (auto CBI = CatchBlocks.rbegin(), CBE = CatchBlocks.rend(); + CBI != CBE; ++CBI, FollowerState = CatchState) { + const BasicBlock *CatchBlock = *CBI; + // Create the entry for this catch with the appropriate handler + // properties. + const auto *Catch = cast<CatchPadInst>(CatchBlock->getFirstNonPHI()); uint32_t TypeToken = static_cast<uint32_t>( cast<ConstantInt>(Catch->getArgOperand(0))->getZExtValue()); - NewState = addClrEHHandler(FuncInfo, NewState, ClrHandlerType::Catch, - TypeToken, PadBlock); - FuncInfo.EHPadStateMap[Catch] = NewState; + CatchState = + addClrEHHandler(FuncInfo, HandlerParentState, FollowerState, + ClrHandlerType::Catch, TypeToken, CatchBlock); + // Queue any child EH pads on the worklist. + for (const User *U : Catch->users()) + if (const auto *I = dyn_cast<Instruction>(U)) + if (I->isEHPad()) + Worklist.emplace_back(I, CatchState); + // Remember this catch's state. + FuncInfo.EHPadStateMap[Catch] = CatchState; } - for (const auto *CatchPad : Handlers) { - for (const User *U : CatchPad->users()) { - const auto *UserI = cast<Instruction>(U); - if (UserI->isEHPad()) - Worklist.emplace_back(UserI, ParentState); + // Associate the catchswitch with the state of its first catch. + assert(CatchSwitch->getNumHandlers()); + FuncInfo.EHPadStateMap[CatchSwitch] = CatchState; + } + } + + // Step two: record the TryParentState of each state. For cleanuppads that + // don't have cleanuprets, we may need to infer this from their child pads, + // so visit pads in descendant-most to ancestor-most order. + for (auto Entry = FuncInfo.ClrEHUnwindMap.rbegin(), + End = FuncInfo.ClrEHUnwindMap.rend(); + Entry != End; ++Entry) { + const Instruction *Pad = + Entry->Handler.get<const BasicBlock *>()->getFirstNonPHI(); + // For most pads, the TryParentState is the state associated with the + // unwind dest of exceptional exits from it. + const BasicBlock *UnwindDest; + if (const auto *Catch = dyn_cast<CatchPadInst>(Pad)) { + // If a catch is not the last in its catchswitch, its TryParentState is + // the state associated with the next catch in the switch, even though + // that's not the unwind dest of exceptions escaping the catch. Those + // cases were already assigned a TryParentState in the first pass, so + // skip them. + if (Entry->TryParentState != -1) + continue; + // Otherwise, get the unwind dest from the catchswitch. + UnwindDest = Catch->getCatchSwitch()->getUnwindDest(); + } else { + const auto *Cleanup = cast<CleanupPadInst>(Pad); + UnwindDest = nullptr; + for (const User *U : Cleanup->users()) { + if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { + // Common and unambiguous case -- cleanupret indicates cleanup's + // unwind dest. + UnwindDest = CleanupRet->getUnwindDest(); + break; + } + + // Get an unwind dest for the user + const BasicBlock *UserUnwindDest = nullptr; + if (auto *Invoke = dyn_cast<InvokeInst>(U)) { + UserUnwindDest = Invoke->getUnwindDest(); + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(U)) { + UserUnwindDest = CatchSwitch->getUnwindDest(); + } else if (auto *ChildCleanup = dyn_cast<CleanupPadInst>(U)) { + int UserState = FuncInfo.EHPadStateMap[ChildCleanup]; + int UserUnwindState = + FuncInfo.ClrEHUnwindMap[UserState].TryParentState; + if (UserUnwindState != -1) + UserUnwindDest = FuncInfo.ClrEHUnwindMap[UserUnwindState] + .Handler.get<const BasicBlock *>(); } + + // Not having an unwind dest for this user might indicate that it + // doesn't unwind, so can't be taken as proof that the cleanup itself + // may unwind to caller (see e.g. SimplifyUnreachable and + // RemoveUnwindEdge). + if (!UserUnwindDest) + continue; + + // Now we have an unwind dest for the user, but we need to see if it + // unwinds all the way out of the cleanup or if it stays within it. + const Instruction *UserUnwindPad = UserUnwindDest->getFirstNonPHI(); + const Value *UserUnwindParent; + if (auto *CSI = dyn_cast<CatchSwitchInst>(UserUnwindPad)) + UserUnwindParent = CSI->getParentPad(); + else + UserUnwindParent = + cast<CleanupPadInst>(UserUnwindPad)->getParentPad(); + + // The unwind stays within the cleanup iff it targets a child of the + // cleanup. + if (UserUnwindParent == Cleanup) + continue; + + // This unwind exits the cleanup, so its dest is the cleanup's dest. + UnwindDest = UserUnwindDest; + break; } - PredState = NewState; - ParentPad = CatchSwitch->getParentPad(); - } else { - llvm_unreachable("Unexpected EH pad"); } - // Queue all predecessors with the given state - for (const BasicBlock *Pred : predecessors(Pad->getParent())) { - if ((Pred = getEHPadFromPredecessor(Pred, ParentPad))) - Worklist.emplace_back(Pred->getFirstNonPHI(), PredState); + // Record the state of the unwind dest as the TryParentState. + int UnwindDestState; + + // If UnwindDest is null at this point, either the pad in question can + // be exited by unwind to caller, or it cannot be exited by unwind. In + // either case, reporting such cases as unwinding to caller is correct. + // This can lead to EH tables that "look strange" -- if this pad's is in + // a parent funclet which has other children that do unwind to an enclosing + // pad, the try region for this pad will be missing the "duplicate" EH + // clause entries that you'd expect to see covering the whole parent. That + // should be benign, since the unwind never actually happens. If it were + // an issue, we could add a subsequent pass that pushes unwind dests down + // from parents that have them to children that appear to unwind to caller. + if (!UnwindDest) { + UnwindDestState = -1; + } else { + UnwindDestState = FuncInfo.EHPadStateMap[UnwindDest->getFirstNonPHI()]; } + + Entry->TryParentState = UnwindDestState; } + // Step three: transfer information from pads to invokes. calculateStateNumbersForInvokes(Fn, FuncInfo); } @@ -597,6 +718,11 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) { for (auto &Funclets : FuncletBlocks) { BasicBlock *FuncletPadBB = Funclets.first; std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second; + Value *FuncletToken; + if (FuncletPadBB == &F.getEntryBlock()) + FuncletToken = ConstantTokenNone::get(F.getContext()); + else + FuncletToken = FuncletPadBB->getFirstNonPHI(); std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone; ValueToValueMapTy VMap; @@ -668,15 +794,44 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) { RemapInstruction(&I, VMap, RF_IgnoreMissingEntries | RF_NoModuleLevelChanges); + // Catchrets targeting cloned blocks need to be updated separately from + // the loop above because they are not in the current funclet. + SmallVector<CatchReturnInst *, 2> FixupCatchrets; + for (auto &BBMapping : Orig2Clone) { + BasicBlock *OldBlock = BBMapping.first; + BasicBlock *NewBlock = BBMapping.second; + + FixupCatchrets.clear(); + for (BasicBlock *Pred : predecessors(OldBlock)) + if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator())) + if (CatchRet->getParentPad() == FuncletToken) + FixupCatchrets.push_back(CatchRet); + + for (CatchReturnInst *CatchRet : FixupCatchrets) + CatchRet->setSuccessor(NewBlock); + } + auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) { unsigned NumPreds = PN->getNumIncomingValues(); for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd; ++PredIdx) { BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx); - ColorVector &IncomingColors = BlockColors[IncomingBlock]; - bool BlockInFunclet = IncomingColors.size() == 1 && - IncomingColors.front() == FuncletPadBB; - if (IsForOldBlock != BlockInFunclet) + bool EdgeTargetsFunclet; + if (auto *CRI = + dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) { + EdgeTargetsFunclet = (CRI->getParentPad() == FuncletToken); + } else { + ColorVector &IncomingColors = BlockColors[IncomingBlock]; + assert(!IncomingColors.empty() && "Block not colored!"); + assert((IncomingColors.size() == 1 || + llvm::all_of(IncomingColors, + [&](BasicBlock *Color) { + return Color != FuncletPadBB; + })) && + "Cloning should leave this funclet's blocks monochromatic"); + EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB); + } + if (IsForOldBlock != EdgeTargetsFunclet) continue; PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false); // Revisit the next entry. @@ -864,7 +1019,6 @@ void WinEHPrepare::cleanupPreparedFunclets(Function &F) { } void WinEHPrepare::verifyPreparedFunclets(Function &F) { - // Recolor the CFG to verify that all is well. for (BasicBlock &BB : F) { size_t NumColors = BlockColors[&BB].size(); assert(NumColors == 1 && "Expected monochromatic BB!"); @@ -872,12 +1026,8 @@ void WinEHPrepare::verifyPreparedFunclets(Function &F) { report_fatal_error("Uncolored BB!"); if (NumColors > 1) report_fatal_error("Multicolor BB!"); - if (!DisableDemotion) { - bool EHPadHasPHI = BB.isEHPad() && isa<PHINode>(BB.begin()); - assert(!EHPadHasPHI && "EH Pad still has a PHI!"); - if (EHPadHasPHI) - report_fatal_error("EH Pad still has a PHI!"); - } + assert((DisableDemotion || !(BB.isEHPad() && isa<PHINode>(BB.begin()))) && + "EH Pad still has a PHI!"); } } @@ -896,12 +1046,17 @@ bool WinEHPrepare::prepareExplicitEH(Function &F) { demotePHIsOnFunclets(F); if (!DisableCleanups) { + DEBUG(verifyFunction(F)); removeImplausibleInstructions(F); + DEBUG(verifyFunction(F)); cleanupPreparedFunclets(F); } - verifyPreparedFunclets(F); + DEBUG(verifyPreparedFunclets(F)); + // Recolor the CFG to verify that all is well. + DEBUG(colorFunclets(F)); + DEBUG(verifyPreparedFunclets(F)); BlockColors.clear(); FuncletBlocks.clear(); diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp index 185db47f07e5..1ebe9b7ee5bc 100644 --- a/contrib/llvm/lib/IR/AsmWriter.cpp +++ b/contrib/llvm/lib/IR/AsmWriter.cpp @@ -2060,7 +2060,7 @@ private: // printGCRelocateComment - print comment after call to the gc.relocate // intrinsic indicating base and derived pointer names. - void printGCRelocateComment(const Value &V); + void printGCRelocateComment(const GCRelocateInst &Relocate); }; } // namespace @@ -2722,14 +2722,11 @@ void AssemblyWriter::printInstructionLine(const Instruction &I) { /// printGCRelocateComment - print comment after call to the gc.relocate /// intrinsic indicating base and derived pointer names. -void AssemblyWriter::printGCRelocateComment(const Value &V) { - assert(isGCRelocate(&V)); - GCRelocateOperands GCOps(cast<Instruction>(&V)); - +void AssemblyWriter::printGCRelocateComment(const GCRelocateInst &Relocate) { Out << " ; ("; - writeOperand(GCOps.getBasePtr(), false); + writeOperand(Relocate.getBasePtr(), false); Out << ", "; - writeOperand(GCOps.getDerivedPtr(), false); + writeOperand(Relocate.getDerivedPtr(), false); Out << ")"; } @@ -2737,8 +2734,8 @@ void AssemblyWriter::printGCRelocateComment(const Value &V) { /// which slot it occupies. /// void AssemblyWriter::printInfoComment(const Value &V) { - if (isGCRelocate(&V)) - printGCRelocateComment(V); + if (const auto *Relocate = dyn_cast<GCRelocateInst>(&V)) + printGCRelocateComment(*Relocate); if (AnnotationWriter) AnnotationWriter->printInfoComment(V, Out); diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp index bcf7dc365ce5..6c01bb645629 100644 --- a/contrib/llvm/lib/IR/Attributes.cpp +++ b/contrib/llvm/lib/IR/Attributes.cpp @@ -641,14 +641,15 @@ AttributeSet AttributeSet::get(LLVMContext &C, if (Attrs.empty()) return AttributeSet(); -#ifndef NDEBUG - for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { - assert((!i || Attrs[i-1].first <= Attrs[i].first) && - "Misordered Attributes list!"); - assert(!Attrs[i].second.hasAttribute(Attribute::None) && - "Pointless attribute!"); - } -#endif + assert(std::is_sorted(Attrs.begin(), Attrs.end(), + [](const std::pair<unsigned, Attribute> &LHS, + const std::pair<unsigned, Attribute> &RHS) { + return LHS.first < RHS.first; + }) && "Misordered Attributes list!"); + assert(std::none_of(Attrs.begin(), Attrs.end(), + [](const std::pair<unsigned, Attribute> &Pair) { + return Pair.second.hasAttribute(Attribute::None); + }) && "Pointless attribute!"); // Create a vector if (unsigned, AttributeSetNode*) pairs from the attributes // list. diff --git a/contrib/llvm/lib/IR/Instruction.cpp b/contrib/llvm/lib/IR/Instruction.cpp index a0bd2c9698e8..4b33d2e66ea1 100644 --- a/contrib/llvm/lib/IR/Instruction.cpp +++ b/contrib/llvm/lib/IR/Instruction.cpp @@ -76,22 +76,21 @@ iplist<Instruction>::iterator Instruction::eraseFromParent() { return getParent()->getInstList().erase(getIterator()); } -/// insertBefore - Insert an unlinked instructions into a basic block -/// immediately before the specified instruction. +/// Insert an unlinked instruction into a basic block immediately before the +/// specified instruction. void Instruction::insertBefore(Instruction *InsertPos) { InsertPos->getParent()->getInstList().insert(InsertPos->getIterator(), this); } -/// insertAfter - Insert an unlinked instructions into a basic block -/// immediately after the specified instruction. +/// Insert an unlinked instruction into a basic block immediately after the +/// specified instruction. void Instruction::insertAfter(Instruction *InsertPos) { InsertPos->getParent()->getInstList().insertAfter(InsertPos->getIterator(), this); } -/// moveBefore - Unlink this instruction from its current basic block and -/// insert it into the basic block that MovePos lives in, right before -/// MovePos. +/// Unlink this instruction from its current basic block and insert it into the +/// basic block that MovePos lives in, right before MovePos. void Instruction::moveBefore(Instruction *MovePos) { MovePos->getParent()->getInstList().splice( MovePos->getIterator(), getParent()->getInstList(), getIterator()); diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp index 4ae2fd522b52..7c64ca7b7275 100644 --- a/contrib/llvm/lib/IR/Instructions.cpp +++ b/contrib/llvm/lib/IR/Instructions.cpp @@ -609,20 +609,6 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) { return setSuccessor(idx, B); } -bool InvokeInst::hasFnAttrImpl(Attribute::AttrKind A) const { - if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A)) - return true; - - // Operand bundles override attributes on the called function, but don't - // override attributes directly present on the invoke instruction. - if (isFnAttrDisallowedByOpBundle(A)) - return false; - - if (const Function *F = getCalledFunction()) - return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A); - return false; -} - bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const { assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!"); @@ -934,6 +920,17 @@ void CatchSwitchInst::addHandler(BasicBlock *Handler) { getOperandList()[OpNo] = Handler; } +void CatchSwitchInst::removeHandler(handler_iterator HI) { + // Move all subsequent handlers up one. + Use *EndDst = op_end() - 1; + for (Use *CurDst = HI.getCurrent(); CurDst != EndDst; ++CurDst) + *CurDst = *(CurDst + 1); + // Null out the last handler use. + *EndDst = nullptr; + + setNumHungOffUseOperands(getNumOperands() - 1); +} + BasicBlock *CatchSwitchInst::getSuccessorV(unsigned idx) const { return getSuccessor(idx); } diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp index ab1ba5e2035b..d8eaceb9ea2b 100644 --- a/contrib/llvm/lib/IR/Metadata.cpp +++ b/contrib/llvm/lib/IR/Metadata.cpp @@ -190,6 +190,8 @@ void ReplaceableMetadataImpl::moveRef(void *Ref, void *New, void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) { assert(!(MD && isa<MDNode>(MD) && cast<MDNode>(MD)->isTemporary()) && "Expected non-temp node"); + assert(CanReplace && + "Attempted to replace Metadata marked for no replacement"); if (UseMap.empty()) return; @@ -555,7 +557,7 @@ void MDNode::decrementUnresolvedOperandCount() { resolve(); } -void MDNode::resolveCycles(bool MDMaterialized) { +void MDNode::resolveCycles(bool AllowTemps) { if (isResolved()) return; @@ -568,7 +570,7 @@ void MDNode::resolveCycles(bool MDMaterialized) { if (!N) continue; - if (N->isTemporary() && !MDMaterialized) + if (N->isTemporary() && AllowTemps) continue; assert(!N->isTemporary() && "Expected all forward declarations to be resolved"); diff --git a/contrib/llvm/lib/IR/Statepoint.cpp b/contrib/llvm/lib/IR/Statepoint.cpp index d45c1883ef9e..27a990eaff81 100644 --- a/contrib/llvm/lib/IR/Statepoint.cpp +++ b/contrib/llvm/lib/IR/Statepoint.cpp @@ -40,20 +40,7 @@ bool llvm::isStatepoint(const Value &inst) { } bool llvm::isGCRelocate(const ImmutableCallSite &CS) { - if (!CS.getInstruction()) { - // This is not a call site - return false; - } - - return isGCRelocate(CS.getInstruction()); -} -bool llvm::isGCRelocate(const Value *inst) { - if (const CallInst *call = dyn_cast<CallInst>(inst)) { - if (const Function *F = call->getCalledFunction()) { - return F->getIntrinsicID() == Intrinsic::experimental_gc_relocate; - } - } - return false; + return CS.getInstruction() && isa<GCRelocateInst>(CS.getInstruction()); } bool llvm::isGCResult(const ImmutableCallSite &CS) { diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp index 81c87e4759b7..6dfb05d94491 100644 --- a/contrib/llvm/lib/IR/Verifier.cpp +++ b/contrib/llvm/lib/IR/Verifier.cpp @@ -1657,14 +1657,14 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) { const CallInst *Call = dyn_cast<const CallInst>(U); Assert(Call, "illegal use of statepoint token", &CI, U); if (!Call) continue; - Assert(isGCRelocate(Call) || isGCResult(Call), + Assert(isa<GCRelocateInst>(Call) || isGCResult(Call), "gc.result or gc.relocate are the only value uses" "of a gc.statepoint", &CI, U); if (isGCResult(Call)) { Assert(Call->getArgOperand(0) == &CI, "gc.result connected to wrong gc.statepoint", &CI, Call); - } else if (isGCRelocate(Call)) { + } else if (isa<GCRelocateInst>(Call)) { Assert(Call->getArgOperand(0) == &CI, "gc.relocate connected to wrong gc.statepoint", &CI, Call); } @@ -3019,8 +3019,7 @@ void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) { &CPI); auto *ParentPad = CPI.getParentPad(); - Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) || - isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad), + Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad), "CleanupPadInst has an invalid parent.", &CPI); User *FirstUser = nullptr; @@ -3077,10 +3076,17 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) { } auto *ParentPad = CatchSwitch.getParentPad(); - Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) || - isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad), + Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad), "CatchSwitchInst has an invalid parent.", ParentPad); + Assert(CatchSwitch.getNumHandlers() != 0, + "CatchSwitchInst cannot have empty handler list", &CatchSwitch); + + for (BasicBlock *Handler : CatchSwitch.handlers()) { + Assert(isa<CatchPadInst>(Handler->getFirstNonPHI()), + "CatchSwitchInst handlers must be catchpads", &CatchSwitch, Handler); + } + visitTerminatorInst(CatchSwitch); } @@ -3675,8 +3681,8 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // Verify rest of the relocate arguments - GCRelocateOperands Ops(CS); - ImmutableCallSite StatepointCS(Ops.getStatepoint()); + ImmutableCallSite StatepointCS( + cast<GCRelocateInst>(*CS.getInstruction()).getStatepoint()); // Both the base and derived must be piped through the safepoint Value* Base = CS.getArgOperand(1); @@ -3731,14 +3737,14 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // Relocated value must be a pointer type, but gc_relocate does not need to return the // same pointer type as the relocated pointer. It can be casted to the correct type later // if it's desired. However, they must have the same address space. - GCRelocateOperands Operands(CS); - Assert(Operands.getDerivedPtr()->getType()->isPointerTy(), + GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction()); + Assert(Relocate.getDerivedPtr()->getType()->isPointerTy(), "gc.relocate: relocated value must be a gc pointer", CS); // gc_relocate return type must be a pointer type, and is verified earlier in // VerifyIntrinsicType(). Assert(cast<PointerType>(CS.getType())->getAddressSpace() == - cast<PointerType>(Operands.getDerivedPtr()->getType())->getAddressSpace(), + cast<PointerType>(Relocate.getDerivedPtr()->getType())->getAddressSpace(), "gc.relocate: relocating a pointer shouldn't change its address space", CS); break; } diff --git a/contrib/llvm/lib/Linker/IRMover.cpp b/contrib/llvm/lib/Linker/IRMover.cpp index fa6e37517fc4..309690f61d74 100644 --- a/contrib/llvm/lib/Linker/IRMover.cpp +++ b/contrib/llvm/lib/Linker/IRMover.cpp @@ -524,6 +524,23 @@ public: ValueMapperFlags = ValueMapperFlags | RF_HaveUnmaterializedMetadata; } + ~IRLinker() { + // In the case where we are not linking metadata, we unset the CanReplace + // flag on all temporary metadata in the MetadataToIDs map to ensure + // none was replaced while being a map key. Now that we are destructing + // the map, set the flag back to true, so that it is replaceable during + // metadata linking. + if (!shouldLinkMetadata()) { + for (auto MDI : MetadataToIDs) { + Metadata *MD = const_cast<Metadata *>(MDI.first); + MDNode *Node = dyn_cast<MDNode>(MD); + assert((Node && Node->isTemporary()) && + "Found non-temp metadata in map when not linking metadata"); + Node->setCanReplace(true); + } + } + } + bool run(); Value *materializeDeclFor(Value *V, bool ForAlias); void materializeInitFor(GlobalValue *New, GlobalValue *Old, bool ForAlias); @@ -1111,7 +1128,8 @@ bool IRLinker::linkFunctionBody(Function &Dst, Function &Src) { // a function and before remapping metadata on instructions below // in RemapInstruction, as the saved mapping is used to handle // the temporary metadata hanging off instructions. - SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, true); + SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, + /* OnlyTempMD = */ true); // Link in the prefix data. if (Src.hasPrefixData()) @@ -1514,7 +1532,8 @@ bool IRLinker::run() { // Ensure metadata materialized if (SrcM.getMaterializer()->materializeMetadata()) return true; - SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, false); + SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, + /* OnlyTempMD = */ false); } linkNamedMDNodes(); diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp index a99ac4eca59e..dafa7683b1ab 100644 --- a/contrib/llvm/lib/MC/MCDwarf.cpp +++ b/contrib/llvm/lib/MC/MCDwarf.cpp @@ -514,13 +514,13 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) { MCOS->EmitULEB128IntValue(1); MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit); MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1); - EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, - context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4); + EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, context.getDwarfVersion() >= 4 + ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4); if (context.getGenDwarfSectionSyms().size() > 1 && context.getDwarfVersion() >= 3) { - EmitAbbrev(MCOS, dwarf::DW_AT_ranges, - context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + EmitAbbrev(MCOS, dwarf::DW_AT_ranges, context.getDwarfVersion() >= 4 + ? dwarf::DW_FORM_sec_offset : dwarf::DW_FORM_data4); } else { EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr); diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp index 028f2e955b21..34f49cac1628 100644 --- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp @@ -1,4 +1,4 @@ -//===-- MObjectFileInfo.cpp - Object File Information ---------------------===// +//===-- MCObjectFileInfo.cpp - Object File Information --------------------===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp index dc864d3a17f8..1b592504b1e4 100644 --- a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp +++ b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp @@ -63,31 +63,30 @@ FeatureBitset MCSubtargetInfo::ToggleFeature(const FeatureBitset &FB) { /// ToggleFeature - Toggle a feature and returns the re-computed feature /// bits. This version will also change all implied bits. FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef FS) { - SubtargetFeatures Features; - FeatureBits = Features.ToggleFeature(FeatureBits, FS, ProcFeatures); + SubtargetFeatures::ToggleFeature(FeatureBits, FS, ProcFeatures); return FeatureBits; } FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) { - SubtargetFeatures Features; - FeatureBits = Features.ApplyFeatureFlag(FeatureBits, FS, ProcFeatures); + SubtargetFeatures::ApplyFeatureFlag(FeatureBits, FS, ProcFeatures); return FeatureBits; } const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { assert(ProcSchedModels && "Processor machine model not available!"); - size_t NumProcs = ProcDesc.size(); - assert(std::is_sorted(ProcSchedModels, ProcSchedModels+NumProcs, + ArrayRef<SubtargetInfoKV> SchedModels(ProcSchedModels, ProcDesc.size()); + + assert(std::is_sorted(SchedModels.begin(), SchedModels.end(), [](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) { return strcmp(LHS.Key, RHS.Key) < 0; }) && "Processor machine model table is not sorted"); // Find entry - const SubtargetInfoKV *Found = - std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, CPU); - if (Found == ProcSchedModels+NumProcs || StringRef(Found->Key) != CPU) { + auto Found = + std::lower_bound(SchedModels.begin(), SchedModels.end(), CPU); + if (Found == SchedModels.end() || StringRef(Found->Key) != CPU) { if (CPU != "help") // Don't error if the user asked for help. errs() << "'" << CPU << "' is not a recognized processor for this target" diff --git a/contrib/llvm/lib/MC/SubtargetFeature.cpp b/contrib/llvm/lib/MC/SubtargetFeature.cpp index b642f17f0e79..7cce0fe756ef 100644 --- a/contrib/llvm/lib/MC/SubtargetFeature.cpp +++ b/contrib/llvm/lib/MC/SubtargetFeature.cpp @@ -160,10 +160,9 @@ void ClearImpliedBits(FeatureBitset &Bits, } } -/// ToggleFeature - Toggle a feature and returns the newly updated feature -/// bits. -FeatureBitset -SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature, +/// ToggleFeature - Toggle a feature and update the feature bits. +void +SubtargetFeatures::ToggleFeature(FeatureBitset &Bits, StringRef Feature, ArrayRef<SubtargetFeatureKV> FeatureTable) { // Find feature in table. @@ -186,12 +185,9 @@ SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature, << "' is not a recognized feature for this target" << " (ignoring feature)\n"; } - - return Bits; } -FeatureBitset -SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature, +void SubtargetFeatures::ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature, ArrayRef<SubtargetFeatureKV> FeatureTable) { assert(hasFlag(Feature)); @@ -203,7 +199,7 @@ SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature, if (FeatureEntry) { // Enable/disable feature in bits if (isEnabled(Feature)) { - Bits |= FeatureEntry->Value; + Bits |= FeatureEntry->Value; // For each feature that this implies, set it. SetImpliedBits(Bits, FeatureEntry, FeatureTable); @@ -218,8 +214,6 @@ SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature, << "' is not a recognized feature for this target" << " (ignoring feature)\n"; } - - return Bits; } @@ -234,14 +228,10 @@ SubtargetFeatures::getFeatureBits(StringRef CPU, return FeatureBitset(); #ifndef NDEBUG - for (size_t i = 1, e = CPUTable.size(); i != e; ++i) { - assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 && - "CPU table is not sorted"); - } - for (size_t i = 1, e = FeatureTable.size(); i != e; ++i) { - assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 && - "CPU features table is not sorted"); - } + assert(std::is_sorted(std::begin(CPUTable), std::end(CPUTable)) && + "CPU table is not sorted"); + assert(std::is_sorted(std::begin(FeatureTable), std::end(FeatureTable)) && + "CPU features table is not sorted"); #endif // Resulting bits FeatureBitset Bits; @@ -277,7 +267,7 @@ SubtargetFeatures::getFeatureBits(StringRef CPU, if (Feature == "+help") Help(CPUTable, FeatureTable); - Bits = ApplyFeatureFlag(Bits, Feature, FeatureTable); + ApplyFeatureFlag(Bits, Feature, FeatureTable); } return Bits; diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp index a0f82a0d4ede..32c692d8073a 100644 --- a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp +++ b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp @@ -316,12 +316,17 @@ static std::error_code readCoverageMappingData( // Read the records in the coverage data section. for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) { - if (Buf + 4 * sizeof(uint32_t) > End) + if (Buf + sizeof(CovMapHeader) > End) return coveragemap_error::malformed; - uint32_t NRecords = endian::readNext<uint32_t, Endian, unaligned>(Buf); - uint32_t FilenamesSize = endian::readNext<uint32_t, Endian, unaligned>(Buf); - uint32_t CoverageSize = endian::readNext<uint32_t, Endian, unaligned>(Buf); - uint32_t Version = endian::readNext<uint32_t, Endian, unaligned>(Buf); + auto CovHeader = reinterpret_cast<const coverage::CovMapHeader *>(Buf); + uint32_t NRecords = + endian::byte_swap<uint32_t, Endian>(CovHeader->NRecords); + uint32_t FilenamesSize = + endian::byte_swap<uint32_t, Endian>(CovHeader->FilenamesSize); + uint32_t CoverageSize = + endian::byte_swap<uint32_t, Endian>(CovHeader->CoverageSize); + uint32_t Version = endian::byte_swap<uint32_t, Endian>(CovHeader->Version); + Buf = reinterpret_cast<const char *>(++CovHeader); switch (Version) { case CoverageMappingVersion1: diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp index f5acd23129dc..027f0f78c546 100644 --- a/contrib/llvm/lib/ProfileData/InstrProf.cpp +++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp @@ -12,12 +12,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/ProfileData/InstrProf.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/ManagedStatic.h" using namespace llvm; @@ -162,6 +165,98 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName) { return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName); } +int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs, + bool doCompression, std::string &Result) { + uint8_t Header[16], *P = Header; + std::string UncompressedNameStrings = + join(NameStrs.begin(), NameStrs.end(), StringRef(" ")); + + unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P); + P += EncLen; + + auto WriteStringToResult = [&](size_t CompressedLen, + const std::string &InputStr) { + EncLen = encodeULEB128(CompressedLen, P); + P += EncLen; + char *HeaderStr = reinterpret_cast<char *>(&Header[0]); + unsigned HeaderLen = P - &Header[0]; + Result.append(HeaderStr, HeaderLen); + Result += InputStr; + return 0; + }; + + if (!doCompression) + return WriteStringToResult(0, UncompressedNameStrings); + + SmallVector<char, 128> CompressedNameStrings; + zlib::Status Success = + zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings, + zlib::BestSizeCompression); + + if (Success != zlib::StatusOK) + return 1; + + return WriteStringToResult( + CompressedNameStrings.size(), + std::string(CompressedNameStrings.data(), CompressedNameStrings.size())); +} + +StringRef getPGOFuncNameInitializer(GlobalVariable *NameVar) { + auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer()); + StringRef NameStr = + Arr->isCString() ? Arr->getAsCString() : Arr->getAsString(); + return NameStr; +} + +int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars, + std::string &Result) { + std::vector<std::string> NameStrs; + for (auto *NameVar : NameVars) { + NameStrs.push_back(getPGOFuncNameInitializer(NameVar)); + } + return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result); +} + +int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { + const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data()); + const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() + + NameStrings.size()); + while (P < EndP) { + uint32_t N; + uint64_t UncompressedSize = decodeULEB128(P, &N); + P += N; + uint64_t CompressedSize = decodeULEB128(P, &N); + P += N; + bool isCompressed = (CompressedSize != 0); + SmallString<128> UncompressedNameStrings; + StringRef NameStrings; + if (isCompressed) { + StringRef CompressedNameStrings(reinterpret_cast<const char *>(P), + CompressedSize); + if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings, + UncompressedSize) != zlib::StatusOK) + return 1; + P += CompressedSize; + NameStrings = StringRef(UncompressedNameStrings.data(), + UncompressedNameStrings.size()); + } else { + NameStrings = + StringRef(reinterpret_cast<const char *>(P), UncompressedSize); + P += UncompressedSize; + } + // Now parse the name strings. + SmallVector<StringRef, 0> Names; + NameStrings.split(Names, ' '); + for (StringRef &Name : Names) + Symtab.addFuncName(Name); + + while (P < EndP && *P == 0) + P++; + } + Symtab.finalizeSymtab(); + return 0; +} + instrprof_error InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input, uint64_t Weight) { diff --git a/contrib/llvm/lib/Support/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc index a8d1fe3c07d0..7d3537e20727 100644 --- a/contrib/llvm/lib/Support/Unix/Program.inc +++ b/contrib/llvm/lib/Support/Unix/Program.inc @@ -446,7 +446,7 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents, return EC; } -bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) { +bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args) { static long ArgMax = sysconf(_SC_ARG_MAX); // System says no practical limit. @@ -456,7 +456,7 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) { // Conservatively account for space required by environment variables. long HalfArgMax = ArgMax / 2; - size_t ArgLength = 0; + size_t ArgLength = Program.size() + 1; for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end(); I != E; ++I) { ArgLength += strlen(*I) + 1; diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc index d4e14ddc6518..78fc538bd9bf 100644 --- a/contrib/llvm/lib/Support/Windows/Program.inc +++ b/contrib/llvm/lib/Support/Windows/Program.inc @@ -535,14 +535,15 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents, return EC; } -bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) { +bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args) { // The documented max length of the command line passed to CreateProcess. static const size_t MaxCommandStringLength = 32768; - size_t ArgLength = 0; + // Account for the trailing space for the program path and the + // trailing NULL of the last argument. + size_t ArgLength = ArgLenWithQuotes(Program.str().c_str()) + 2; for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end(); I != E; ++I) { - // Account for the trailing space for every arg but the last one and the - // trailing NULL of the last argument. + // Account for the trailing space for every arg ArgLength += ArgLenWithQuotes(*I) + 1; if (ArgLength > MaxCommandStringLength) { return false; diff --git a/contrib/llvm/lib/Support/Windows/WindowsSupport.h b/contrib/llvm/lib/Support/Windows/WindowsSupport.h index 34d961b148d1..c65e3148921e 100644 --- a/contrib/llvm/lib/Support/Windows/WindowsSupport.h +++ b/contrib/llvm/lib/Support/Windows/WindowsSupport.h @@ -30,6 +30,9 @@ #define _WIN32_WINNT 0x0601 #define _WIN32_IE 0x0800 // MinGW at it again. FIXME: verify if still needed. #define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +#define NOMINMAX +#endif #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -44,6 +47,21 @@ #include <string> #include <vector> +#if !defined(__CYGWIN__) && !defined(__MINGW32__) +#include <VersionHelpers.h> +#else +// Cygwin does not have the IsWindows8OrGreater() API. +// Some version of mingw does not have the API either. +inline bool IsWindows8OrGreater() { + OSVERSIONINFO osvi = {}; + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + if (!::GetVersionEx(&osvi)) + return false; + return (osvi.dwMajorVersion > 6 || + (osvi.dwMajorVersion == 6 && osvi.dwMinorVersion >= 2)); +} +#endif // __CYGWIN__ + inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) { if (!ErrMsg) return true; diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp index 57c7ac32f559..57162dc6e95a 100644 --- a/contrib/llvm/lib/Support/raw_ostream.cpp +++ b/contrib/llvm/lib/Support/raw_ostream.cpp @@ -57,6 +57,10 @@ #endif #endif +#ifdef LLVM_ON_WIN32 +#include "Windows/WindowsSupport.h" +#endif + using namespace llvm; raw_ostream::~raw_ostream() { @@ -567,8 +571,21 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { assert(FD >= 0 && "File already closed."); pos += Size; +#ifndef LLVM_ON_WIN32 + bool ShouldWriteInChunks = false; +#else + // Writing a large size of output to Windows console returns ENOMEM. It seems + // that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and + // the latter has a size limit (66000 bytes or less, depending on heap usage). + bool ShouldWriteInChunks = !!::_isatty(FD) && !IsWindows8OrGreater(); +#endif + do { - ssize_t ret = ::write(FD, Ptr, Size); + size_t ChunkSize = Size; + if (ChunkSize > 32767 && ShouldWriteInChunks) + ChunkSize = 32767; + + ssize_t ret = ::write(FD, Ptr, ChunkSize); if (ret < 0) { // If it's a recoverable error, swallow it and retry the write. diff --git a/contrib/llvm/lib/TableGen/Record.cpp b/contrib/llvm/lib/TableGen/Record.cpp index 87a3422b32ab..11e35b75375e 100644 --- a/contrib/llvm/lib/TableGen/Record.cpp +++ b/contrib/llvm/lib/TableGen/Record.cpp @@ -722,7 +722,7 @@ Init *UnOpInit::resolveReferences(Record &R, const RecordVal *RV) const { std::string UnOpInit::getAsString() const { std::string Result; - switch (Opc) { + switch (getOpcode()) { case CAST: Result = "!cast<" + getType()->getAsString() + ">"; break; case HEAD: Result = "!head"; break; case TAIL: Result = "!tail"; break; @@ -850,7 +850,7 @@ Init *BinOpInit::resolveReferences(Record &R, const RecordVal *RV) const { std::string BinOpInit::getAsString() const { std::string Result; - switch (Opc) { + switch (getOpcode()) { case CONCAT: Result = "!con"; break; case ADD: Result = "!add"; break; case AND: Result = "!and"; break; @@ -1054,7 +1054,7 @@ Init *TernOpInit::resolveReferences(Record &R, const RecordVal *RV) const { Init *lhs = LHS->resolveReferences(R, RV); - if (Opc == IF && lhs != LHS) { + if (getOpcode() == IF && lhs != LHS) { IntInit *Value = dyn_cast<IntInit>(lhs); if (Init *I = lhs->convertInitializerTo(IntRecTy::get())) Value = dyn_cast<IntInit>(I); @@ -1082,7 +1082,7 @@ Init *TernOpInit::resolveReferences(Record &R, std::string TernOpInit::getAsString() const { std::string Result; - switch (Opc) { + switch (getOpcode()) { case SUBST: Result = "!subst"; break; case FOREACH: Result = "!foreach"; break; case IF: Result = "!if"; break; diff --git a/contrib/llvm/lib/TableGen/TGParser.cpp b/contrib/llvm/lib/TableGen/TGParser.cpp index e5f6f165d13f..1506a7171ac4 100644 --- a/contrib/llvm/lib/TableGen/TGParser.cpp +++ b/contrib/llvm/lib/TableGen/TGParser.cpp @@ -77,7 +77,8 @@ bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) { /// SetValue - /// Return true on error, false on success. bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName, - const std::vector<unsigned> &BitList, Init *V) { + ArrayRef<unsigned> BitList, Init *V, + bool AllowSelfAssignment) { if (!V) return false; if (!CurRec) CurRec = &CurMultiClass->Rec; @@ -91,8 +92,8 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName, // in the resolution machinery. if (BitList.empty()) if (VarInit *VI = dyn_cast<VarInit>(V)) - if (VI->getNameInit() == ValName) - return false; + if (VI->getNameInit() == ValName && !AllowSelfAssignment) + return true; // If we are assigning to a subset of the bits in the value... then we must be // assigning to a field of BitsRecTy, which must have a BitsInit @@ -165,7 +166,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) { if (i < SubClass.TemplateArgs.size()) { // If a value is specified for this template arg, set it now. if (SetValue(CurRec, SubClass.RefRange.Start, TArgs[i], - std::vector<unsigned>(), SubClass.TemplateArgs[i])) + None, SubClass.TemplateArgs[i])) return true; // Resolve it next. @@ -243,8 +244,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC, // If a value is specified for this template arg, set it in the // superclass now. if (SetValue(CurRec, SubMultiClass.RefRange.Start, SMCTArgs[i], - std::vector<unsigned>(), - SubMultiClass.TemplateArgs[i])) + None, SubMultiClass.TemplateArgs[i])) return true; // Resolve it next. @@ -258,8 +258,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC, for (const auto &Def : makeArrayRef(CurMC->DefPrototypes).slice(newDefStart)) { if (SetValue(Def.get(), SubMultiClass.RefRange.Start, SMCTArgs[i], - std::vector<unsigned>(), - SubMultiClass.TemplateArgs[i])) + None, SubMultiClass.TemplateArgs[i])) return true; // Resolve it next. @@ -332,8 +331,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){ IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false)); - if (SetValue(IterRec.get(), Loc, IterVar->getName(), - std::vector<unsigned>(), IVal)) + if (SetValue(IterRec.get(), Loc, IterVar->getName(), None, IVal)) return Error(Loc, "when instantiating this def"); // Resolve it next. @@ -1728,7 +1726,7 @@ Init *TGParser::ParseDeclaration(Record *CurRec, SMLoc ValLoc = Lex.getLoc(); Init *Val = ParseValue(CurRec, Type); if (!Val || - SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val)) + SetValue(CurRec, ValLoc, DeclName, None, Val)) // Return the name, even if an error is thrown. This is so that we can // continue to make some progress, even without the value having been // initialized. @@ -2358,8 +2356,8 @@ Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto, // Set the value for NAME. We don't resolve references to it 'til later, // though, so that uses in nested multiclass names don't get // confused. - if (SetValue(CurRec.get(), Ref.RefRange.Start, "NAME", - std::vector<unsigned>(), DefmPrefix)) { + if (SetValue(CurRec.get(), Ref.RefRange.Start, "NAME", None, DefmPrefix, + /*AllowSelfAssignment*/true)) { Error(DefmPrefixRange.Start, "Could not resolve " + CurRec->getNameInitAsString() + ":NAME to '" + DefmPrefix->getAsUnquotedString() + "'"); @@ -2446,8 +2444,7 @@ bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC, Record *CurRec, // Check if a value is specified for this temp-arg. if (i < TemplateVals.size()) { // Set it now. - if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], std::vector<unsigned>(), - TemplateVals[i])) + if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], None, TemplateVals[i])) return true; // Resolve it next. diff --git a/contrib/llvm/lib/TableGen/TGParser.h b/contrib/llvm/lib/TableGen/TGParser.h index 8b41134d4ff1..739d9a9c5f37 100644 --- a/contrib/llvm/lib/TableGen/TGParser.h +++ b/contrib/llvm/lib/TableGen/TGParser.h @@ -105,10 +105,13 @@ public: private: // Semantic analysis methods. bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV); bool SetValue(Record *TheRec, SMLoc Loc, Init *ValName, - const std::vector<unsigned> &BitList, Init *V); + ArrayRef<unsigned> BitList, Init *V, + bool AllowSelfAssignment = false); bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName, - const std::vector<unsigned> &BitList, Init *V) { - return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V); + ArrayRef<unsigned> BitList, Init *V, + bool AllowSelfAssignment = false) { + return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V, + AllowSelfAssignment); } bool AddSubClass(Record *Rec, SubClassReference &SubClass); bool AddSubMultiClass(MultiClass *CurMC, diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td index 0bff9b592c15..46ef2c111bae 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64.td @@ -124,6 +124,14 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", FeaturePerfMon, FeatureZCRegMove, FeatureZCZeroing]>; +def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", + "Samsung Exynos-M1 processors", + [FeatureFPARMv8, + FeatureNEON, + FeatureCrypto, + FeatureCRC, + FeaturePerfMon]>; + def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8, FeatureNEON, FeatureCRC, @@ -136,6 +144,8 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; // FIXME: Cortex-A72 is currently modelled as an Cortex-A57. def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>; def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; +// FIXME: Exynos-M1 is currently modelled without a specific SchedModel. +def : ProcessorModel<"exynos-m1", NoSchedModel, [ProcExynosM1]>; //===----------------------------------------------------------------------===// // Assembly parser diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index 79a84ad8c6c5..3d1ab4e3fc2b 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -158,7 +158,7 @@ INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE, "AArch64 A57 FP Load-Balancing", false, false) namespace { -/// A Chain is a sequence of instructions that are linked together by +/// A Chain is a sequence of instructions that are linked together by /// an accumulation operand. For example: /// /// fmul d0<def>, ? @@ -285,7 +285,7 @@ public: std::string str() const { std::string S; raw_string_ostream OS(S); - + OS << "{"; StartInst->print(OS, /* SkipOpers= */true); OS << " -> "; @@ -427,7 +427,7 @@ Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor, return Ch; } } - + // Bailout case - just return the first item. Chain *Ch = L.front(); L.erase(L.begin()); @@ -495,7 +495,7 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C, RS.enterBasicBlock(&MBB); RS.forward(MachineBasicBlock::iterator(G->getStart())); - // Can we find an appropriate register that is available throughout the life + // Can we find an appropriate register that is available throughout the life // of the chain? unsigned RegClassID = G->getStart()->getDesc().OpInfo[0].RegClass; BitVector AvailableRegs = RS.getRegsAvailable(TRI->getRegClass(RegClassID)); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9f5beff12100..4ecfbe9e2280 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2426,7 +2426,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( continue; } - + if (VA.isRegLoc()) { // Arguments stored in registers. EVT RegVT = VA.getLocVT(); @@ -5074,7 +5074,7 @@ static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT, // The index of an EXT is the first element if it is not UNDEF. // Watch out for the beginning UNDEFs. The EXT index should be the expected - // value of the first element. E.g. + // value of the first element. E.g. // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. // ExpectedElt is the last mask index plus 1. @@ -9491,6 +9491,103 @@ static SDValue performBRCONDCombine(SDNode *N, return SDValue(); } +// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test +// as well as whether the test should be inverted. This code is required to +// catch these cases (as opposed to standard dag combines) because +// AArch64ISD::TBZ is matched during legalization. +static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, + SelectionDAG &DAG) { + + if (!Op->hasOneUse()) + return Op; + + // We don't handle undef/constant-fold cases below, as they should have + // already been taken care of (e.g. and of 0, test of undefined shifted bits, + // etc.) + + // (tbz (trunc x), b) -> (tbz x, b) + // This case is just here to enable more of the below cases to be caught. + if (Op->getOpcode() == ISD::TRUNCATE && + Bit < Op->getValueType(0).getSizeInBits()) { + return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); + } + + if (Op->getNumOperands() != 2) + return Op; + + auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1)); + if (!C) + return Op; + + switch (Op->getOpcode()) { + default: + return Op; + + // (tbz (and x, m), b) -> (tbz x, b) + case ISD::AND: + if ((C->getZExtValue() >> Bit) & 1) + return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); + return Op; + + // (tbz (shl x, c), b) -> (tbz x, b-c) + case ISD::SHL: + if (C->getZExtValue() <= Bit && + (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) { + Bit = Bit - C->getZExtValue(); + return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); + } + return Op; + + // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x + case ISD::SRA: + Bit = Bit + C->getZExtValue(); + if (Bit >= Op->getValueType(0).getSizeInBits()) + Bit = Op->getValueType(0).getSizeInBits() - 1; + return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); + + // (tbz (srl x, c), b) -> (tbz x, b+c) + case ISD::SRL: + if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) { + Bit = Bit + C->getZExtValue(); + return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); + } + return Op; + + // (tbz (xor x, -1), b) -> (tbnz x, b) + case ISD::XOR: + if ((C->getZExtValue() >> Bit) & 1) + Invert = !Invert; + return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); + } +} + +// Optimize test single bit zero/non-zero and branch. +static SDValue performTBZCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + bool Invert = false; + SDValue TestSrc = N->getOperand(1); + SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG); + + if (TestSrc == NewTestSrc) + return SDValue(); + + unsigned NewOpc = N->getOpcode(); + if (Invert) { + if (NewOpc == AArch64ISD::TBZ) + NewOpc = AArch64ISD::TBNZ; + else { + assert(NewOpc == AArch64ISD::TBNZ); + NewOpc = AArch64ISD::TBZ; + } + } + + SDLoc DL(N); + return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc, + DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3)); +} + // vselect (v1i1 setcc) -> // vselect (v1iXX setcc) (XX is the size of the compared operand type) // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as @@ -9642,6 +9739,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performSTORECombine(N, DCI, DAG, Subtarget); case AArch64ISD::BRCOND: return performBRCONDCombine(N, DCI, DAG); + case AArch64ISD::TBNZ: + case AArch64ISD::TBZ: + return performTBZCombine(N, DCI, DAG); case AArch64ISD::CSEL: return performCONDCombine(N, DCI, DAG, 2, 3); case AArch64ISD::DUP: diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 566aa2c9a9ba..43664df3b861 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -613,21 +613,6 @@ static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst, (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); } -// Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI. -static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, - MachineInstr *Op1) { - assert(MI->memoperands_empty() && "expected a new machineinstr"); - size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) + - (Op1->memoperands_end() - Op1->memoperands_begin()); - - MachineFunction *MF = MI->getParent()->getParent(); - MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); - MachineSDNode::mmo_iterator MemEnd = - std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); - MemEnd = std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); - MI->setMemRefs(MemBegin, MemEnd); -} - MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, @@ -692,10 +677,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, TII->get(NewOpc)) .addOperand(getLdStRegOp(RtNewDest)) .addOperand(BaseRegOp) - .addImm(OffsetImm); - - // Copy MachineMemOperands from the original loads. - concatenateMemOperands(NewMemMI, I, Paired); + .addImm(OffsetImm) + .setMemRefs(I->mergeMemRefsWith(*Paired)); DEBUG( dbgs() @@ -786,9 +769,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, TII->get(NewOpc)) .addOperand(getLdStRegOp(I)) .addOperand(BaseRegOp) - .addImm(OffsetImm); - // Copy MachineMemOperands from the original stores. - concatenateMemOperands(MIB, I, Paired); + .addImm(OffsetImm) + .setMemRefs(I->mergeMemRefsWith(*Paired)); } else { // Handle Unscaled if (IsUnscaled) diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h index 1b8b9b27719c..151133b2f32c 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -33,7 +33,14 @@ class Triple; class AArch64Subtarget : public AArch64GenSubtargetInfo { protected: - enum ARMProcFamilyEnum {Others, CortexA35, CortexA53, CortexA57, Cyclone}; + enum ARMProcFamilyEnum { + Others, + CortexA35, + CortexA53, + CortexA57, + Cyclone, + ExynosM1 + }; /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. ARMProcFamilyEnum ARMProcFamily; @@ -143,6 +150,7 @@ public: bool isCyclone() const { return CPUString == "cyclone"; } bool isCortexA57() const { return CPUString == "cortex-a57"; } bool isCortexA53() const { return CPUString == "cortex-a53"; } + bool isExynosM1() const { return CPUString == "exynos-m1"; } bool useAA() const override { return isCortexA53(); } diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 78f5289ec26d..cde1c6df2608 100644 --- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -834,7 +834,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings }; uint32_t -AArch64SysReg::SysRegMapper::fromString(StringRef Name, +AArch64SysReg::SysRegMapper::fromString(StringRef Name, const FeatureBitset& FeatureBits, bool &Valid) const { std::string NameLower = Name.lower(); @@ -878,7 +878,7 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, } std::string -AArch64SysReg::SysRegMapper::toString(uint32_t Bits, +AArch64SysReg::SysRegMapper::toString(uint32_t Bits, const FeatureBitset& FeatureBits) const { // First search the registers shared by all for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index f649cb9b8a8d..e63627eae123 100644 --- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -285,17 +285,17 @@ struct AArch64NamedImmMapper { // Zero value of FeatureBitSet means the mapping is always available FeatureBitset FeatureBitSet; - bool isNameEqual(std::string Other, + bool isNameEqual(std::string Other, const FeatureBitset& FeatureBits) const { - if (FeatureBitSet.any() && + if (FeatureBitSet.any() && (FeatureBitSet & FeatureBits).none()) return false; return Name == Other; } - bool isValueEqual(uint32_t Other, + bool isValueEqual(uint32_t Other, const FeatureBitset& FeatureBits) const { - if (FeatureBitSet.any() && + if (FeatureBitSet.any() && (FeatureBitSet & FeatureBits).none()) return false; return Value == Other; @@ -310,7 +310,7 @@ struct AArch64NamedImmMapper { StringRef toString(uint32_t Value, const FeatureBitset& FeatureBits, bool &Valid) const; // Maps string to value, depending on availability for FeatureBits given - uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits, + uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits, bool &Valid) const; /// Many of the instructions allow an alternative assembly form consisting of @@ -1322,7 +1322,7 @@ namespace AArch64TLBI { return true; } } -} +} namespace AArch64II { /// Target Operand Flag enum. diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td index d4af8d2e48d1..db869cf7dd8b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -118,6 +118,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", "true", "Support flat address space">; +def FeatureXNACK : SubtargetFeature<"xnack", + "EnableXNACK", + "true", + "Enable XNACK support">; + def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", "EnableVGPRSpilling", "true", diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index ba71dc05a8fc..9c3790264377 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -417,13 +417,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, } } - if (VCCUsed || FlatUsed) + if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) { MaxSGPR += 2; - if (FlatUsed) { - MaxSGPR += 2; - // 2 additional for VI+. - if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (FlatUsed) + MaxSGPR += 2; + + if (STM.isXNACKEnabled()) MaxSGPR += 2; } @@ -620,6 +620,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, if (MFI->hasDispatchPtr()) header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + if (STM.isXNACKEnabled()) + header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; + header.kernarg_segment_byte_size = MFI->ABIArgOffset; header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 11f6139deddd..2a7ce6a47176 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -204,14 +204,6 @@ def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isGlobalLoad(dyn_cast<LoadSDNode>(N)); }]>; -def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ - return isFlatLoad(dyn_cast<LoadSDNode>(N)); -}]>; - -def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ - return isFlatLoad(dyn_cast<LoadSDNode>(N)); -}]>; - def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); }]>; @@ -243,14 +235,6 @@ def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ return isGlobalLoad(dyn_cast<LoadSDNode>(N)); }]>; -def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ - return isFlatLoad(dyn_cast<LoadSDNode>(N)); -}]>; - -def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ - return isFlatLoad(dyn_cast<LoadSDNode>(N)); -}]>; - def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); }]>; @@ -299,16 +283,6 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr), return isGlobalStore(dyn_cast<StoreSDNode>(N)); }]>; -def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr), - (truncstorei8 node:$val, node:$ptr), [{ - return isFlatStore(dyn_cast<StoreSDNode>(N)); -}]>; - -def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr), - (truncstorei16 node:$val, node:$ptr), [{ - return isFlatStore(dyn_cast<StoreSDNode>(N)); -}]>; - def local_store : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return isLocalStore(dyn_cast<StoreSDNode>(N)); @@ -385,15 +359,6 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> { defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>; -def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return isFlatLoad(dyn_cast<LoadSDNode>(N)); -}]>; - -def flat_store : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return isFlatStore(dyn_cast<StoreSDNode>(N)); -}]>; - def mskor_flat : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 44e0c47877a9..c6af5b93d257 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -73,6 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), + EnableXNACK(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 9c7bb88f8f4a..d3712276d5e7 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -76,6 +76,7 @@ private: bool EnableIfCvt; bool EnableLoadStoreOpt; bool EnableUnsafeDSOffsetFolding; + bool EnableXNACK; unsigned WavefrontSize; bool CFALUBug; int LocalMemorySize; @@ -290,6 +291,10 @@ public: } bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const; + bool isXNACKEnabled() const { + return EnableXNACK; + } + unsigned getMaxWavesPerCU() const { if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) return 10; diff --git a/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td index 88a090d3df35..c543814cae0d 100644 --- a/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td @@ -264,42 +264,6 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC < } // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst -//===----------------------------------------------------------------------===// -// Flat Patterns -//===----------------------------------------------------------------------===// - -let Predicates = [HasFlatAddressSpace] in { - -class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt, - PatFrag flat_ld> : - Pat <(vt (flat_ld i64:$ptr)), - (Instr_ADDR64 $ptr, 0, 0, 0) ->; - -def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>; -def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>; -def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>; -def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>; -def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>; -def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>; -def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>; -def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>; -def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>; - -class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> : - Pat <(st vt:$value, i64:$ptr), - (Instr $value, $ptr, 0, 0, 0) - >; - -def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>; -def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>; -def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>; -def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>; -def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>; -def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>; - -} // End HasFlatAddressSpace predicate - let Predicates = [isCI] in { // Convert (x - floor(x)) to fract(x) @@ -320,20 +284,10 @@ def : Pat < //===----------------------------------------------------------------------===// -// Patterns to generate flat for global +// Flat Patterns //===----------------------------------------------------------------------===// -def useFlatForGlobal : Predicate < - "Subtarget->useFlatForGlobal() || " - "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">; - -let Predicates = [useFlatForGlobal] in { - -// 1. Offset as 20bit DWORD immediate -def : Pat < - (SIload_constant v4i32:$sbase, IMM20bit:$offset), - (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) ->; +let Predicates = [isCIVI] in { // Patterns for global loads with no offset class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < @@ -341,24 +295,24 @@ class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < (inst $addr, 0, 0, 0) >; -def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>; -def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>; -def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>; -def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>; -def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>; -def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>; -def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>; +def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>; +def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>; +def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>; +def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>; +def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>; +def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>; +def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>; class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < (node vt:$data, i64:$addr), (inst $data, $addr, 0, 0, 0) >; -def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>; -def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>; -def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>; -def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>; -def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>; +def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>; +def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>; +def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>; +def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>; +def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>; class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < (vt (node i64:$addr, vt:$data)), @@ -376,4 +330,4 @@ def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>; def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>; def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>; -} // End Predicates = [useFlatForGlobal] +} // End Predicates = [isCIVI] diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 6b3c81c3af74..7d20509c464d 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -105,51 +105,53 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, MBB.addLiveIn(PreloadedPrivateBufferReg); } - // We reserved the last registers for this. Shift it down to the end of those - // which were actually used. - // - // FIXME: It might be safer to use a pseudoregister before replacement. - - // FIXME: We should be able to eliminate unused input registers. We only - // cannot do this for the resources required for scratch access. For now we - // skip over user SGPRs and may leave unused holes. - - // We find the resource first because it has an alignment requirement. - if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) { - MachineRegisterInfo &MRI = MF.getRegInfo(); - - unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4; - // Skip the last 2 elements because the last one is reserved for VCC, and - // this is the 2nd to last element already. - for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) { - // Pick the first unallocated one. Make sure we don't clobber the other - // reserved input we needed. - if (!MRI.isPhysRegUsed(Reg)) { - assert(MRI.isAllocatable(Reg)); - MRI.replaceRegWith(ScratchRsrcReg, Reg); - ScratchRsrcReg = Reg; - MFI->setScratchRSrcReg(ScratchRsrcReg); - break; + if (!ST.hasSGPRInitBug()) { + // We reserved the last registers for this. Shift it down to the end of those + // which were actually used. + // + // FIXME: It might be safer to use a pseudoregister before replacement. + + // FIXME: We should be able to eliminate unused input registers. We only + // cannot do this for the resources required for scratch access. For now we + // skip over user SGPRs and may leave unused holes. + + // We find the resource first because it has an alignment requirement. + if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + + unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4; + // Skip the last 2 elements because the last one is reserved for VCC, and + // this is the 2nd to last element already. + for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) { + // Pick the first unallocated one. Make sure we don't clobber the other + // reserved input we needed. + if (!MRI.isPhysRegUsed(Reg)) { + assert(MRI.isAllocatable(Reg)); + MRI.replaceRegWith(ScratchRsrcReg, Reg); + ScratchRsrcReg = Reg; + MFI->setScratchRSrcReg(ScratchRsrcReg); + break; + } } } - } - if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) { - MachineRegisterInfo &MRI = MF.getRegInfo(); - // Skip the last 2 elements because the last one is reserved for VCC, and - // this is the 2nd to last element already. - unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); - for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) { - // Pick the first unallocated SGPR. Be careful not to pick an alias of the - // scratch descriptor, since we haven’t added its uses yet. - if (!MRI.isPhysRegUsed(Reg)) { - assert(MRI.isAllocatable(Reg) && - !TRI->isSubRegisterEq(ScratchRsrcReg, Reg)); - - MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); - ScratchWaveOffsetReg = Reg; - MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg); - break; + if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Skip the last 2 elements because the last one is reserved for VCC, and + // this is the 2nd to last element already. + unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); + for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) { + // Pick the first unallocated SGPR. Be careful not to pick an alias of the + // scratch descriptor, since we haven’t added its uses yet. + if (!MRI.isPhysRegUsed(Reg)) { + assert(MRI.isAllocatable(Reg) && + !TRI->isSubRegisterEq(ScratchRsrcReg, Reg)); + + MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); + ScratchWaveOffsetReg = Reg; + MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg); + break; + } } } } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 10f2adde4867..8735277149a6 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -134,6 +134,34 @@ def SIconstdata_ptr : SDNode< SDTCisVT<0, i64>]> >; +//===----------------------------------------------------------------------===// +// PatFrags for FLAT instructions +//===----------------------------------------------------------------------===// + +class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr), + (ld node:$ptr), [{ + return isFlatLoad(dyn_cast<LoadSDNode>(N)) || + isGlobalLoad(dyn_cast<LoadSDNode>(N)) || + isConstantLoad(cast<LoadSDNode>(N), -1); +}]>; + +def flat_load : flat_ld <load>; +def flat_az_extloadi8 : flat_ld <az_extloadi8>; +def flat_sextloadi8 : flat_ld <sextloadi8>; +def flat_az_extloadi16 : flat_ld <az_extloadi16>; +def flat_sextloadi16 : flat_ld <sextloadi16>; + +class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + return isFlatStore(dyn_cast<StoreSDNode>(N)) || + isGlobalStore(dyn_cast<StoreSDNode>(N)); +}]>; + +def flat_store: flat_st <store>; +def flat_truncstorei8 : flat_st <truncstorei8>; +def flat_truncstorei16 : flat_st <truncstorei16>; + + def mubuf_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isGlobalLoad(cast<LoadSDNode>(N)) || isConstantLoad(cast<LoadSDNode>(N), -1); diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td index 6f653c70aca0..b7df058b7c0c 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -59,8 +59,6 @@ defm EXP : EXP_m; // SMRD Instructions //===----------------------------------------------------------------------===// -let mayLoad = 1 in { - // We are using the SGPR_32 and not the SReg_32 register class for 32-bit // SMRD instructions, because the SGPR_32 register class does not include M0 // and writing to M0 from an SMRD instruction will hang the GPU. @@ -90,8 +88,6 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512 >; -} // mayLoad = 1 - //def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>; defm S_DCACHE_INV : SMRD_Inval <smrd<0x1f, 0x20>, "s_dcache_inv", diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 935aad427198..bf15516bea7b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -156,6 +156,17 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( if (!LaneVGPRs.count(LaneVGPRIdx)) { unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); + + if (LaneVGPR == AMDGPU::NoRegister) { + LLVMContext &Ctx = MF->getFunction()->getContext(); + Ctx.emitError("Ran out of VGPRs for spilling SGPR"); + + // When compiling from inside Mesa, the compilation continues. + // Select an arbitrary register to avoid triggering assertions + // during subsequent passes. + LaneVGPR = AMDGPU::VGPR0; + } + LaneVGPRs[LaneVGPRIdx] = LaneVGPR; // Add this register as live-in to all blocks to avoid machine verifer diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 3cdffef05583..2afa00996609 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -37,13 +37,17 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); if (ST.hasSGPRInitBug()) { unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4; + if (ST.isXNACKEnabled()) + BaseIdx -= 4; + unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the - // next sgpr128 down. + // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and + // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down + // either way. return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95; } @@ -54,13 +58,25 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const { const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); if (ST.hasSGPRInitBug()) { - unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; + unsigned Idx; + + if (!ST.isXNACKEnabled()) + Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; + else + Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1; + return AMDGPU::SGPR_32RegClass.getRegister(Idx); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // Next register before reservations for flat_scr and vcc. - return AMDGPU::SGPR97; + if (!ST.isXNACKEnabled()) { + // Next register before reservations for flat_scr and vcc. + return AMDGPU::SGPR97; + } else { + // Next register before reservations for flat_scr, xnack_mask, vcc, + // and scratch resource. + return AMDGPU::SGPR91; + } } return AMDGPU::SGPR95; @@ -86,6 +102,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // for VCC/FLAT_SCR. reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99); reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101); + + if (ST.isXNACKEnabled()) + reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97); } // Tonga and Iceland can only allocate a fixed number of SGPRs due @@ -93,9 +112,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (ST.hasSGPRInitBug()) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). - // Assume XNACK_MASK is unused. unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; + if (ST.isXNACKEnabled()) + Limit -= 2; + for (unsigned i = Limit; i < NumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); @@ -282,11 +303,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); - if (Spill.VGPR == AMDGPU::NoRegister) { - LLVMContext &Ctx = MF->getFunction()->getContext(); - Ctx.emitError("Ran out of VGPRs for spilling SGPR"); - } - BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) @@ -315,11 +331,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); - if (Spill.VGPR == AMDGPU::NoRegister) { - LLVMContext &Ctx = MF->getFunction()->getContext(); - Ctx.emitError("Ran out of VGPRs for spilling SGPR"); - } - BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) diff --git a/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td index 20a026a822e2..1a7801c92bd7 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td @@ -101,3 +101,12 @@ def S_DCACHE_WB_VOL : SMEM_Inval <0x23, } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI +let Predicates = [isVI] in { + +// 1. Offset as 20bit DWORD immediate +def : Pat < + (SIload_constant v4i32:$sbase, IMM20bit:$offset), + (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) +>; + +} // End Predicates = [isVI] diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index a44dc830a673..c171656b48ab 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -252,6 +252,8 @@ def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", "Swift ARM processors", []>; +def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", + "Samsung Exynos-M1 processors", []>; def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", "Cortex-R4 ARM processors", []>; @@ -649,6 +651,12 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureCrypto, FeatureZCZeroing]>; +def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1, + FeatureHWDiv, + FeatureHWDivARM, + FeatureT2XtPk, + FeatureCrypto, + FeatureCRC]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index e89757c19ecc..55c1684028c2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -340,12 +340,12 @@ namespace { /// verify - check BBOffsets, BBSizes, alignment of islands void ARMConstantIslands::verify() { #ifndef NDEBUG - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = &*MBBI; - unsigned MBBId = MBB->getNumber(); - assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); - } + assert(std::is_sorted(MF->begin(), MF->end(), + [this](const MachineBasicBlock &LHS, + const MachineBasicBlock &RHS) { + return BBInfo[LHS.getNumber()].postOffset() < + BBInfo[RHS.getNumber()].postOffset(); + })); DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n"); for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { CPUser &U = CPUsers[i]; diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 725b8383c961..6e7e47b8706a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1986,23 +1986,6 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, return AddedRegPressure.size() <= MemRegs.size() * 2; } - -/// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. -static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, - MachineInstr *Op1) { - assert(MI->memoperands_empty() && "expected a new machineinstr"); - size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) - + (Op1->memoperands_end() - Op1->memoperands_begin()); - - MachineFunction *MF = MI->getParent()->getParent(); - MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); - MachineSDNode::mmo_iterator MemEnd = - std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); - MemEnd = - std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); - MI->setMemRefs(MemBegin, MemEnd); -} - bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc, @@ -2196,7 +2179,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, if (!isT2) MIB.addReg(0); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); - concatenateMemOperands(MIB, Op0, Op1); + MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1)); DEBUG(dbgs() << "Formed " << *MIB << "\n"); ++NumLDRDFormed; } else { @@ -2210,7 +2193,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, if (!isT2) MIB.addReg(0); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); - concatenateMemOperands(MIB, Op0, Op1); + MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1)); DEBUG(dbgs() << "Formed " << *MIB << "\n"); ++NumSTRDFormed; } diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index a8b28018f1b2..4d54e5751473 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -44,7 +44,7 @@ protected: enum ARMProcFamilyEnum { Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexA35, CortexA53, - CortexA57, CortexA72, Krait, Swift + CortexA57, CortexA72, Krait, Swift, ExynosM1 }; enum ARMProcClassEnum { None, AClass, RClass, MClass diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td index 1189cfd488ee..5a7eb215de42 100644 --- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td +++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td @@ -251,6 +251,10 @@ def : Proc<"hexagonv60", HexagonModelV60, // Declare the target which we are implementing //===----------------------------------------------------------------------===// +def HexagonAsmParser : AsmParser { + bit HasMnemonicFirst = 0; +} + def HexagonAsmParserVariant : AsmParserVariant { int Variant = 0; string TokenizingCharacters = "#()=:.<>!+*"; @@ -259,5 +263,6 @@ def HexagonAsmParserVariant : AsmParserVariant { def Hexagon : Target { // Pull in Instruction Info: let InstructionSet = HexagonInstrInfo; + let AssemblyParsers = [HexagonAsmParser]; let AssemblyParserVariants = [HexagonAsmParserVariant]; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td index 5cfeba720d90..421403f49724 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td @@ -5807,3 +5807,5 @@ include "HexagonInstrInfoV60.td" include "HexagonInstrInfoVector.td" include "HexagonInstrAlias.td" +include "HexagonSystemInst.td" + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td b/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td new file mode 100644 index 000000000000..784686a437ad --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td @@ -0,0 +1,113 @@ +//==- HexagonSystemInst.td - System Instructions for Hexagon -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Cache manipulation instructions. +//===----------------------------------------------------------------------===// +let mayStore = 1 in +class ST_MISC_CACHEOP<dag outs, dag ins, + string asmstr, list<dag> pattern = [], + bits<3> amode, bits<3> type, bits<1> un> + : ST0Inst<outs, ins, asmstr, pattern, "", ST_tc_ld_SLOT0> { + + bits<5> Rs; + bits<5> Rt; + bits<5> Rd; + let Inst{31-28} = 0b1010; + let Inst{27-25} = amode; + let Inst{24-22} = type; + let Inst{21} = un; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4-0} = Rd; +} + +let mayStore = 1 in +class ST_MISC_CACHEOP_SYS<dag outs, dag ins, + string asmstr, list<dag> pattern = [], + bits<3> amode, bits<3> type, bits<1> un> + : SYSInst<outs, ins, asmstr, pattern, ""> { + + bits<5> Rs; + bits<5> Rt; + bits<5> Rd; + let Inst{31-28} = 0b1010; + let Inst{27-25} = amode; + let Inst{24-22} = type; + let Inst{21} = un; + let Inst{20-16} = Rs; + let Inst{12-8} = Rt; + let Inst{4-0} = Rd; +} + + +let isSolo = 1, Rs = 0, Rt = 0, Rd = 0 in { +def Y2_syncht: ST_MISC_CACHEOP <(outs), (ins), + "syncht" , [], 0b100, 0b001, 0b0>; +} + +let Rt = 0, Rd = 0 in { +let isSoloAin1 = 1 in { + def Y2_dccleana: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs), + "dccleana($Rs)", [], 0b000, 0b000, 0b0>; + def Y2_dcinva: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs), + "dcinva($Rs)", [], 0b000, 0b000, 0b1>; + def Y2_dccleaninva: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs), + "dccleaninva($Rs)", [], 0b000, 0b001, 0b0>; + } +} + +let isSoloAX = 1, hasSideEffects = 1, Rd = 0 in { + def Y4_l2fetch: ST_MISC_CACHEOP_SYS<(outs), (ins IntRegs:$Rs, IntRegs:$Rt), + "l2fetch($Rs, $Rt)", [], 0b011, 0b000, 0b0>; + def Y5_l2fetch: ST_MISC_CACHEOP_SYS<(outs), (ins IntRegs:$Rs, DoubleRegs:$Rt), + "l2fetch($Rs, $Rt)", [], 0b011, 0b010, 0b0>; +} + +let hasSideEffects = 0, isSolo = 1 in +class Y2_INVALIDATE_CACHE<string mnemonic, bit MajOp> + : JRInst < + (outs), (ins IntRegs:$Rs), + #mnemonic#"($Rs)" > { + bits<5> Rs; + + let IClass = 0b0101; + let Inst{27-21} = 0b0110110; + let Inst{20-16} = Rs; + let Inst{13-12} = 0b00; + let Inst{11} = MajOp; + } +// Instruction cache invalidate +def Y2_icinva : Y2_INVALIDATE_CACHE<"icinva", 0b0>; + +// Zero an aligned 32-byte cacheline. +let isSoloAin1 = 1 in +def Y2_dczeroa: ST0Inst <(outs), (ins IntRegs:$Rs), + "dczeroa($Rs)"> { + bits<5> Rs; + let IClass = 0b1010; + let Inst{27-21} = 0b0000110; + let Inst{13} = 0b0; + let Inst{20-16} = Rs; + } + +// Memory synchronization. +let hasSideEffects = 0, isSolo = 1 in +def Y2_isync: JRInst <(outs), (ins), + "isync"> { + let IClass = 0b0101; + let Inst{27-16} = 0b011111000000; + let Inst{13} = 0b0; + let Inst{9-0} = 0b0000000010; + } + diff --git a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt index ee9d060f339e..92ecde3f90d6 100644 --- a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt +++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -5,6 +5,23 @@ pr38151.c va-arg-22.c +# WebAssemblyRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator, int, unsigned int, llvm::RegScavenger *) const: Assertion `MI.getOperand(1).getImm() == 0 && "Can't eliminate FI yet if offset is already set"' +20030313-1.c +20030916-1.c +20031012-1.c +20041126-1.c +20060420-1.c +20071202-1.c +20120808-1.c +pr20527-1.c +pr27073.c +pr36339.c +pr37573.c +pr43236.c +pr43835.c +pr45070.c +pr51933.c + # TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed. struct-ret-1.c va-arg-11.c diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp index 82f0ee5a5ebc..73f654cba38c 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -32,7 +32,6 @@ static unsigned getVectorRegSize(unsigned RegNo) { return 64; llvm_unreachable("Unknown vector reg!"); - return 0; } static MVT getRegOperandVectorVT(const MCInst *MI, const MVT &ScalarVT, diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 4fdd527d87c8..619f7c8d25df 100644 --- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "X86ShuffleDecode.h" -#include "llvm/IR/Constants.h" #include "llvm/CodeGen/MachineValueType.h" //===----------------------------------------------------------------------===// @@ -296,54 +295,6 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, } } -void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) { - Type *MaskTy = C->getType(); - // It is not an error for the PSHUFB mask to not be a vector of i8 because the - // constant pool uniques constants by their bit representation. - // e.g. the following take up the same space in the constant pool: - // i128 -170141183420855150465331762880109871104 - // - // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160> - // - // <4 x i32> <i32 -2147483648, i32 -2147483648, - // i32 -2147483648, i32 -2147483648> - -#ifndef NDEBUG - unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); - assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512); -#endif - - // This is a straightforward byte vector. - if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) { - int NumElements = MaskTy->getVectorNumElements(); - ShuffleMask.reserve(NumElements); - - for (int i = 0; i < NumElements; ++i) { - // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte - // lane of the vector we're inside. - int Base = i & ~0xf; - Constant *COp = C->getAggregateElement(i); - if (!COp) { - ShuffleMask.clear(); - return; - } else if (isa<UndefValue>(COp)) { - ShuffleMask.push_back(SM_SentinelUndef); - continue; - } - uint64_t Element = cast<ConstantInt>(COp)->getZExtValue(); - // If the high bit (7) of the byte is set, the element is zeroed. - if (Element & (1 << 7)) - ShuffleMask.push_back(SM_SentinelZero); - else { - // Only the least significant 4 bits of the byte are used. - int Index = Base + (Element & 0xf); - ShuffleMask.push_back(Index); - } - } - } - // TODO: Handle funny-looking vectors too. -} - void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, SmallVectorImpl<int> &ShuffleMask) { for (int i = 0, e = RawMask.size(); i < e; ++i) { @@ -388,68 +339,6 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { } } -void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, - SmallVectorImpl<int> &ShuffleMask) { - Type *MaskTy = C->getType(); - // It is not an error for the PSHUFB mask to not be a vector of i8 because the - // constant pool uniques constants by their bit representation. - // e.g. the following take up the same space in the constant pool: - // i128 -170141183420855150465331762880109871104 - // - // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160> - // - // <4 x i32> <i32 -2147483648, i32 -2147483648, - // i32 -2147483648, i32 -2147483648> - - unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); - - if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512. - return; - - // Only support vector types. - if (!MaskTy->isVectorTy()) - return; - - // Make sure its an integer type. - Type *VecEltTy = MaskTy->getVectorElementType(); - if (!VecEltTy->isIntegerTy()) - return; - - // Support any element type from byte up to element size. - // This is necesary primarily because 64-bit elements get split to 32-bit - // in the constant pool on 32-bit target. - unsigned EltTySize = VecEltTy->getIntegerBitWidth(); - if (EltTySize < 8 || EltTySize > ElSize) - return; - - unsigned NumElements = MaskTySize / ElSize; - assert((NumElements == 2 || NumElements == 4 || NumElements == 8) && - "Unexpected number of vector elements."); - ShuffleMask.reserve(NumElements); - unsigned NumElementsPerLane = 128 / ElSize; - unsigned Factor = ElSize / EltTySize; - - for (unsigned i = 0; i < NumElements; ++i) { - Constant *COp = C->getAggregateElement(i * Factor); - if (!COp) { - ShuffleMask.clear(); - return; - } else if (isa<UndefValue>(COp)) { - ShuffleMask.push_back(SM_SentinelUndef); - continue; - } - int Index = i & ~(NumElementsPerLane - 1); - uint64_t Element = cast<ConstantInt>(COp)->getZExtValue(); - if (ElSize == 64) - Index += (Element >> 1) & 0x1; - else - Index += Element & 0x3; - ShuffleMask.push_back(Index); - } - - // TODO: Handle funny-looking vectors too. -} - void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) { unsigned NumDstElts = DstVT.getVectorNumElements(); unsigned SrcScalarBits = SrcVT.getScalarSizeInBits(); @@ -572,58 +461,4 @@ void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, } } -void DecodeVPERMVMask(const Constant *C, MVT VT, - SmallVectorImpl<int> &ShuffleMask) { - Type *MaskTy = C->getType(); - if (MaskTy->isVectorTy()) { - unsigned NumElements = MaskTy->getVectorNumElements(); - if (NumElements == VT.getVectorNumElements()) { - for (unsigned i = 0; i < NumElements; ++i) { - Constant *COp = C->getAggregateElement(i); - if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) { - ShuffleMask.clear(); - return; - } - if (isa<UndefValue>(COp)) - ShuffleMask.push_back(SM_SentinelUndef); - else { - uint64_t Element = cast<ConstantInt>(COp)->getZExtValue(); - Element &= (1 << NumElements) - 1; - ShuffleMask.push_back(Element); - } - } - } - return; - } - // Scalar value; just broadcast it - if (!isa<ConstantInt>(C)) - return; - uint64_t Element = cast<ConstantInt>(C)->getZExtValue(); - int NumElements = VT.getVectorNumElements(); - Element &= (1 << NumElements) - 1; - for (int i = 0; i < NumElements; ++i) - ShuffleMask.push_back(Element); -} - -void DecodeVPERMV3Mask(const Constant *C, MVT VT, - SmallVectorImpl<int> &ShuffleMask) { - Type *MaskTy = C->getType(); - unsigned NumElements = MaskTy->getVectorNumElements(); - if (NumElements == VT.getVectorNumElements()) { - for (unsigned i = 0; i < NumElements; ++i) { - Constant *COp = C->getAggregateElement(i); - if (!COp) { - ShuffleMask.clear(); - return; - } - if (isa<UndefValue>(COp)) - ShuffleMask.push_back(SM_SentinelUndef); - else { - uint64_t Element = cast<ConstantInt>(COp)->getZExtValue(); - Element &= (1 << NumElements*2) - 1; - ShuffleMask.push_back(Element); - } - } - } -} } // llvm namespace diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h index ab18e6438ec9..72db6a81912b 100644 --- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -23,7 +23,6 @@ //===----------------------------------------------------------------------===// namespace llvm { -class Constant; class MVT; enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 }; @@ -72,9 +71,6 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask); /// different datatypes and vector widths. void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a PSHUFB mask from an IR-level vector constant. -void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask); - /// \brief Decode a PSHUFB mask from a raw array of constants such as from /// BUILD_VECTOR. void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, @@ -95,10 +91,6 @@ void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm, /// No VT provided since it only works on 256-bit, 4 element vectors. void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a VPERMILP variable mask from an IR-level vector constant. -void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, - SmallVectorImpl<int> &ShuffleMask); - /// \brief Decode a zero extension instruction as a shuffle mask. void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &ShuffleMask); @@ -118,18 +110,10 @@ void DecodeEXTRQIMask(int Len, int Idx, void DecodeINSERTQIMask(int Len, int Idx, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant. -void DecodeVPERMVMask(const Constant *C, MVT VT, - SmallVectorImpl<int> &ShuffleMask); - /// \brief Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants. void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant. -void DecodeVPERMV3Mask(const Constant *C, MVT VT, - SmallVectorImpl<int> &ShuffleMask); - /// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants. void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, SmallVectorImpl<int> &ShuffleMask); diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index de94a138d865..629d4d3565f2 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -1098,9 +1098,9 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { RetRegs.push_back(VA.getLocReg()); } - // All x86 ABIs require that for returning structs by value we copy
- // the sret argument into %rax/%eax (depending on ABI) for the return.
- // We saved the argument into a virtual register in the entry block,
+ // All x86 ABIs require that for returning structs by value we copy + // the sret argument into %rax/%eax (depending on ABI) for the return. + // We saved the argument into a virtual register in the entry block, // so now we copy the value out and into %rax/%eax. if (F.hasStructRetAttr()) { unsigned Reg = X86MFInfo->getSRetReturnReg(); diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp index 242d0333ef9a..8b5fd27b4775 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -78,27 +78,6 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); } -/// usesTheStack - This function checks if any of the users of EFLAGS -/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has -/// to use the stack, and if we don't adjust the stack we clobber the first -/// frame index. -/// See X86InstrInfo::copyPhysReg. -static bool usesTheStack(const MachineFunction &MF) { - const MachineRegisterInfo &MRI = MF.getRegInfo(); - - // Conservativley assume that inline assembly might use the stack. - if (MF.hasInlineAsm()) - return true; - - return any_of(MRI.reg_instructions(X86::EFLAGS), - [](const MachineInstr &RI) { return RI.isCopy(); }); -} - -static bool doesStackUseImplyFP(const MachineFunction &MF) { - bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); - return IsWin64Prologue && usesTheStack(MF); -} - /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. @@ -112,8 +91,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() || MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() || - MFI->hasStackMap() || MFI->hasPatchPoint() || - doesStackUseImplyFP(MF)); + MFI->hasStackMap() || MFI->hasPatchPoint()); } static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { @@ -965,11 +943,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // push and pop from the stack. if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) && !TRI->needsStackRealignment(MF) && - !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->adjustsStack() && // No calls. - !IsWin64CC && // Win64 has no Red Zone - !usesTheStack(MF) && // Don't push and pop. - !MF.shouldSplitStack()) { // Regular stack + !MFI->hasVarSizedObjects() && // No dynamic alloca. + !MFI->adjustsStack() && // No calls. + !IsWin64CC && // Win64 has no Red Zone + !MFI->hasOpaqueSPAdjustment() && // Don't push and pop. + !MF.shouldSplitStack()) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 4414e478b99b..868ae4e19e55 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -157,13 +157,9 @@ namespace { /// performance. bool OptForSize; - /// If true, selector should try to optimize for minimum code size. - bool OptForMinSize; - public: explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), OptForSize(false), - OptForMinSize(false) {} + : SelectionDAGISel(tm, OptLevel), OptForSize(false) {} const char *getPassName() const override { return "X86 DAG->DAG Instruction Selection"; @@ -535,10 +531,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { } void X86DAGToDAGISel::PreprocessISelDAG() { - // OptFor[Min]Size are used in pattern predicates that isel is matching. + // OptForSize is used in pattern predicates that isel is matching. OptForSize = MF->getFunction()->optForSize(); - OptForMinSize = MF->getFunction()->optForMinSize(); - assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize"); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 0927c2f4fa50..d31aab0fa141 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18,6 +18,7 @@ #include "X86FrameLowering.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" +#include "X86ShuffleDecodeConstantPool.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" #include "llvm/ADT/SmallBitVector.h" @@ -4556,6 +4557,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32; SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8); + Result = DAG.getBitcast(CastVT, Result); Vec256 = DAG.getBitcast(CastVT, Vec256); Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask); return DAG.getBitcast(ResultVT, Vec256); @@ -4851,8 +4853,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) { DecodePSHUFBMask(C, Mask); - if (Mask.empty()) - return false; break; } @@ -4870,7 +4870,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, case X86ISD::VPERM2X128: ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); - if (Mask.empty()) return false; // Mask only contains negative index if an element is zero. if (std::any_of(Mask.begin(), Mask.end(), [](int M){ return M == SM_SentinelZero; })) @@ -4948,8 +4947,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) { DecodeVPERMVMask(C, VT, Mask); - if (Mask.empty()) - return false; break; } return false; @@ -5000,8 +4997,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) { DecodeVPERMV3Mask(C, VT, Mask); - if (Mask.empty()) - return false; break; } return false; @@ -5009,6 +5004,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, default: llvm_unreachable("unknown target shuffle node"); } + // Empty mask indicates the decode failed. + if (Mask.empty()) + return false; + // If we have a fake unary shuffle, the shuffle mask is spread across two // inputs that are actually the same node. Re-map the mask to always point // into the first input. @@ -17372,6 +17371,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, if (!IntrData) { if (IntNo == llvm::Intrinsic::x86_seh_ehregnode) return MarkEHRegistrationNode(Op, DAG); + if (IntNo == llvm::Intrinsic::x86_flags_read_u32 || + IntNo == llvm::Intrinsic::x86_flags_read_u64 || + IntNo == llvm::Intrinsic::x86_flags_write_u32 || + IntNo == llvm::Intrinsic::x86_flags_write_u64) { + // We need a frame pointer because this will get lowered to a PUSH/POP + // sequence. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setHasOpaqueSPAdjustment(true); + // Don't do anything here, we will expand these intrinsics out later + // during ExpandISelPseudos in EmitInstrWithCustomInserter. + return SDValue(); + } return SDValue(); } @@ -21144,6 +21155,47 @@ static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +static MachineBasicBlock *EmitWRPKRU(MachineInstr *MI, MachineBasicBlock *BB, + const X86Subtarget *Subtarget) { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + + // insert input VAL into EAX + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX) + .addReg(MI->getOperand(0).getReg()); + // insert zero to ECX + BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX) + .addReg(X86::ECX) + .addReg(X86::ECX); + // insert zero to EDX + BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::EDX) + .addReg(X86::EDX) + .addReg(X86::EDX); + // insert WRPKRU instruction + BuildMI(*BB, MI, dl, TII->get(X86::WRPKRUr)); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; +} + +static MachineBasicBlock *EmitRDPKRU(MachineInstr *MI, MachineBasicBlock *BB, + const X86Subtarget *Subtarget) { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + + // insert zero to ECX + BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX) + .addReg(X86::ECX) + .addReg(X86::ECX); + // insert RDPKRU instruction + BuildMI(*BB, MI, dl, TII->get(X86::RDPKRUr)); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) + .addReg(X86::EAX); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; +} + static MachineBasicBlock *EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB, const X86Subtarget *Subtarget) { DebugLoc dl = MI->getDebugLoc(); @@ -22495,6 +22547,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::CMOV_V64I1: return EmitLoweredSelect(MI, BB); + case X86::RDFLAGS32: + case X86::RDFLAGS64: { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + unsigned PushF = + MI->getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64; + unsigned Pop = + MI->getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r; + BuildMI(*BB, MI, DL, TII->get(PushF)); + BuildMI(*BB, MI, DL, TII->get(Pop), MI->getOperand(0).getReg()); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; + } + + case X86::WRFLAGS32: + case X86::WRFLAGS64: { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + unsigned Push = + MI->getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r; + unsigned PopF = + MI->getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64; + BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI->getOperand(0).getReg()); + BuildMI(*BB, MI, DL, TII->get(PopF)); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; + } + case X86::RELEASE_FADD32mr: case X86::RELEASE_FADD64mr: return EmitLoweredAtomicFP(MI, BB); @@ -22611,7 +22693,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Thread synchronization. case X86::MONITOR: return EmitMonitor(MI, BB, Subtarget); - + // PKU feature + case X86::WRPKRU: + return EmitWRPKRU(MI, BB, Subtarget); + case X86::RDPKRU: + return EmitRDPKRU(MI, BB, Subtarget); // xbegin case X86::XBEGIN: return EmitXBegin(MI, BB, Subtarget->getInstrInfo()); @@ -23480,6 +23566,31 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, } return SDValue(); } + case X86ISD::BLENDI: { + SDValue V0 = N->getOperand(0); + SDValue V1 = N->getOperand(1); + assert(VT == V0.getSimpleValueType() && VT == V1.getSimpleValueType() && + "Unexpected input vector types"); + + // Canonicalize a v2f64 blend with a mask of 2 by swapping the vector + // operands and changing the mask to 1. This saves us a bunch of + // pattern-matching possibilities related to scalar math ops in SSE/AVX. + // x86InstrInfo knows how to commute this back after instruction selection + // if it would help register allocation. + + // TODO: If optimizing for size or a processor that doesn't suffer from + // partial register update stalls, this should be transformed into a MOVSD + // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD. + + if (VT == MVT::v2f64) + if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2))) + if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) { + SDValue NewMask = DAG.getConstant(1, DL, MVT::i8); + return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask); + } + + return SDValue(); + } default: return SDValue(); } @@ -23573,9 +23684,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, /// the operands which explicitly discard the lanes which are unused by this /// operation to try to flow through the rest of the combiner the fact that /// they're unused. -static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) { +static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { SDLoc DL(N); EVT VT = N->getValueType(0); + if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && + (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) + return SDValue(); // We only handle target-independent shuffles. // FIXME: It would be easy and harmless to use the target shuffle mask @@ -23617,12 +23732,6 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) { isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}))) return SDValue(); - // Only specific types are legal at this point, assert so we notice if and - // when these change. - assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f32 || - VT == MVT::v4f64) && - "Unknown vector type encountered!"); - return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS); } @@ -23642,8 +23751,8 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, // If we have legalized the vector types, look for blends of FADD and FSUB // nodes that we can fuse into an ADDSUB node. - if (TLI.isTypeLegal(VT) && Subtarget->hasSSE3()) - if (SDValue AddSub = combineShuffleToAddSub(N, DAG)) + if (TLI.isTypeLegal(VT)) + if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG)) return AddSub; // Combine 256-bit vector shuffles. This is only profitable when in AVX mode @@ -27310,7 +27419,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, // from AH (which we otherwise need to do contortions to access). if (N0.getOpcode() == ISD::UDIVREM && N0.getResNo() == 1 && N0.getValueType() == MVT::i8 && - (VT == MVT::i32 || VT == MVT::i64)) { + VT == MVT::i32) { SDVTList NodeTys = DAG.getVTList(MVT::i8, VT); SDValue R = DAG.getNode(X86ISD::UDIVREM8_ZEXT_HREG, dl, NodeTys, N0.getOperand(0), N0.getOperand(1)); @@ -27382,32 +27491,6 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) { - SDValue V0 = N->getOperand(0); - SDValue V1 = N->getOperand(1); - SDLoc DL(N); - EVT VT = N->getValueType(0); - - // Canonicalize a v2f64 blend with a mask of 2 by swapping the vector - // operands and changing the mask to 1. This saves us a bunch of - // pattern-matching possibilities related to scalar math ops in SSE/AVX. - // x86InstrInfo knows how to commute this back after instruction selection - // if it would help register allocation. - - // TODO: If optimizing for size or a processor that doesn't suffer from - // partial register update stalls, this should be transformed into a MOVSD - // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD. - - if (VT == MVT::v2f64) - if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2))) - if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) { - SDValue NewMask = DAG.getConstant(1, DL, MVT::i8); - return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask); - } - - return SDValue(); -} - static SDValue PerformGatherScatterCombine(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); // Gather and Scatter instructions use k-registers for masks. The type of @@ -27840,6 +27923,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); +// TODO: refactor the [SU]DIVREM8_[SZ]EXT_HREG code so that it's not duplicated. case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); @@ -27851,6 +27935,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::PALIGNR: + case X86ISD::BLENDI: case X86ISD::UNPCKH: case X86ISD::UNPCKL: case X86ISD::MOVHLPS: @@ -27865,7 +27950,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget); - case X86ISD::BLENDI: return PerformBLENDICombine(N, DAG); case ISD::MGATHER: case ISD::MSCATTER: return PerformGatherScatterCombine(N, DAG); } @@ -27902,6 +27986,18 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { } } +/// This function checks if any of the users of EFLAGS copies the EFLAGS. We +/// know that the code that lowers COPY of EFLAGS has to use the stack, and if +/// we don't adjust the stack we clobber the first frame index. +/// See X86InstrInfo::copyPhysReg. +bool X86TargetLowering::hasCopyImplyingStackAdjustment( + MachineFunction *MF) const { + const MachineRegisterInfo &MRI = MF->getRegInfo(); + + return any_of(MRI.reg_instructions(X86::EFLAGS), + [](const MachineInstr &RI) { return RI.isCopy(); }); +} + /// IsDesirableToPromoteOp - This method query the target whether it is /// beneficial for dag combiner to promote the specified node. If true, it /// should return the desired promotion type by reference. diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h index a29dc9af54f6..8bb0e5f8bd36 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h @@ -697,6 +697,10 @@ namespace llvm { /// and some i16 instructions are slow. bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; + /// Return true if the MachineFunction contains a COPY which would imply + /// HasOpaqueSPAdjustment. + bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override; diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td index 8bf2925a75db..0a27c33f033e 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2366,6 +2366,7 @@ def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)), multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT, RegisterClass KRCSrc, Predicate prd> { let Predicates = [prd] in { + let hasSideEffects = 0 in def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td index 5d7283f7bd57..96a29ca8c370 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td @@ -250,7 +250,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), // Alias instruction mapping movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, - isPseudo = 1, AddedComplexity = 20 in + isPseudo = 1 in def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; @@ -263,7 +263,7 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> { } let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode], - AddedComplexity = 15 in { + AddedComplexity = 1 in { // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC, // which only require 3 bytes compared to MOV32ri which requires 5. let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in { @@ -278,24 +278,12 @@ let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode], def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>; } -let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in { -// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1. -// FIXME: Add itinerary class and Schedule. -def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "", - [(set GR32:$dst, i32immSExt8:$src)]>, - Requires<[OptForMinSize]>; -def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "", - [(set GR64:$dst, i64immSExt8:$src)]>, - Requires<[OptForMinSize, NotWin64WithoutFP]>; -} - // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however // that would make it more difficult to rematerialize. -let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, - isCodeGenOnly = 1, hasSideEffects = 0 in -def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src), - "", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>; +let isReMaterializable = 1, isAsCheapAsAMove = 1, + isPseudo = 1, hasSideEffects = 0 in +def MOV32ri64 : I<0, Pseudo, (outs GR32:$dst), (ins i64i32imm:$src), "", []>; // This 64-bit pseudo-move can be used for both a 64-bit constant that is // actually the zero-extension of a 32-bit constant and for labels in the @@ -566,8 +554,8 @@ let usesCustomInserter = 1, Uses = [EFLAGS] in { // TODO: Get this to fold the constant into the instruction. let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), - "or{l}\t{$zero, $dst|$dst, $zero}", - [], IIC_ALU_MEM>, Requires<[Not64BitMode]>, LOCK, + "or{l}\t{$zero, $dst|$dst, $zero}", [], + IIC_ALU_MEM>, Requires<[Not64BitMode]>, OpSize32, LOCK, Sched<[WriteALULd, WriteRMW]>; let hasSideEffects = 1 in diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp index 63e78de69bc9..246804e34289 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -23,7 +23,6 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DerivedTypes.h" @@ -4453,7 +4452,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // such as TF/IF/DF, which LLVM doesn't model. // // Notice that we have to adjust the stack if we don't want to clobber the - // first frame index. See X86FrameLowering.cpp - usesTheStack. + // first frame index. + // See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment. bool AXDead = (Reg == AX) || @@ -4465,6 +4465,10 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // (unnecessarily) saving+restoring a dead register. However the // MachineVerifier expects operands that read from dead registers // to be marked with the "undef" flag. + // An example of this can be found in + // test/CodeGen/X86/peephole-na-phys-copy-folding.ll and + // test/CodeGen/X86/cmpxchg-clobber-flags.ll when using + // -verify-machineinstrs. BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true)); } if (FromEFLAGS) { @@ -5309,50 +5313,6 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, return true; } -bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const { - MachineBasicBlock &MBB = *MIB->getParent(); - DebugLoc DL = MIB->getDebugLoc(); - int64_t Imm = MIB->getOperand(1).getImm(); - assert(Imm != 0 && "Using push/pop for 0 is not efficient."); - MachineBasicBlock::iterator I = MIB.getInstr(); - - int StackAdjustment; - - if (Subtarget.is64Bit()) { - assert(MIB->getOpcode() == X86::MOV64ImmSExti8 || - MIB->getOpcode() == X86::MOV32ImmSExti8); - // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and - // widen the register if necessary. - StackAdjustment = 8; - BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm); - MIB->setDesc(get(X86::POP64r)); - MIB->getOperand(0) - .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64)); - } else { - assert(MIB->getOpcode() == X86::MOV32ImmSExti8); - StackAdjustment = 4; - BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm); - MIB->setDesc(get(X86::POP32r)); - } - - // Build CFI if necessary. - MachineFunction &MF = *MBB.getParent(); - const X86FrameLowering *TFL = Subtarget.getFrameLowering(); - bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); - bool NeedsDwarfCFI = - !IsWin64Prologue && - (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry()); - bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI; - if (EmitCFI) { - TFL->BuildCFI(MBB, I, DL, - MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment)); - TFL->BuildCFI(MBB, std::next(I), DL, - MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment)); - } - - return true; -} - // LoadStackGuard has so far only been implemented for 64-bit MachO. Different // code sequence is needed for other targets. static void expandLoadStackGuard(MachineInstrBuilder &MIB, @@ -5385,9 +5345,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return expandMOV32r1(MIB, *this, /*MinusOne=*/ false); case X86::MOV32r_1: return expandMOV32r1(MIB, *this, /*MinusOne=*/ true); - case X86::MOV32ImmSExti8: - case X86::MOV64ImmSExti8: - return ExpandMOVImmSExti8(MIB); case X86::SETB_C8r: return Expand2AddrUndef(MIB, get(X86::SBB8rr)); case X86::SETB_C16r: @@ -5412,7 +5369,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; - + case X86::MOV32ri64: + MI->setDesc(get(X86::MOV32ri)); + return true; + // KNL does not recognize dependency-breaking idioms for mask registers, // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1. // Using %k0 as the undef input register is a performance heuristic based diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h index 9d40334206b2..edd09d617595 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h @@ -23,7 +23,6 @@ #include "X86GenInstrInfo.inc" namespace llvm { - class MachineInstrBuilder; class X86RegisterInfo; class X86Subtarget; @@ -565,9 +564,6 @@ private: /// operand and follow operands form a reference to the stack frame. bool isFrameOperand(const MachineInstr *MI, unsigned int Op, int &FrameIndex) const; - - /// Expand the MOVImmSExti8 pseudo-instructions. - bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const; }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index f4ca2b880bad..ea8e56206ce6 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -822,8 +822,6 @@ def In32BitMode : Predicate<"Subtarget->is32Bit()">, AssemblerPredicate<"Mode32Bit", "32-bit mode">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; -def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||" - "Subtarget->getFrameLowering()->hasFP(*MF)">; def IsPS4 : Predicate<"Subtarget->isTargetPS4()">; def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; @@ -837,7 +835,6 @@ def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||" def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">; def OptForSize : Predicate<"OptForSize">; -def OptForMinSize : Predicate<"OptForMinSize">; def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; @@ -1093,6 +1090,32 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[], } +let mayLoad = 1, mayStore = 1, usesCustomInserter = 1, + SchedRW = [WriteRMW], Defs = [ESP] in { + let Uses = [ESP, EFLAGS] in + def RDFLAGS32 : PseudoI<(outs GR32:$dst), (ins), + [(set GR32:$dst, (int_x86_flags_read_u32))]>, + Requires<[Not64BitMode]>; + + let Uses = [RSP, EFLAGS] in + def RDFLAGS64 : PseudoI<(outs GR64:$dst), (ins), + [(set GR64:$dst, (int_x86_flags_read_u64))]>, + Requires<[In64BitMode]>; +} + +let mayLoad = 1, mayStore = 1, usesCustomInserter = 1, + SchedRW = [WriteRMW] in { + let Defs = [ESP, EFLAGS], Uses = [ESP] in + def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src), + [(int_x86_flags_write_u32 GR32:$src)]>, + Requires<[Not64BitMode]>; + + let Defs = [RSP, EFLAGS], Uses = [RSP] in + def WRFLAGS64 : PseudoI<(outs), (ins GR64:$src), + [(int_x86_flags_write_u64 GR64:$src)]>, + Requires<[In64BitMode]>; +} + let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0, SchedRW = [WriteLoad] in { def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, @@ -1133,7 +1156,8 @@ def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [], let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1, SchedRW = [WriteStore] in { def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm), - "push{q}\t$imm", [], IIC_PUSH_IMM>, Requires<[In64BitMode]>; + "push{q}\t$imm", [], IIC_PUSH_IMM>, OpSize32, + Requires<[In64BitMode]>; def PUSH64i32 : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm), "push{q}\t$imm", [], IIC_PUSH_IMM>, OpSize32, Requires<[In64BitMode]>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td index 11dc1e7d466b..83f9b1409f61 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td +++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td @@ -651,7 +651,7 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))), // Misc. let SchedRW = [WriteShuffle] in { -let Uses = [EDI], Predicates = [HasSSE1,In32BitMode] in +let Uses = [EDI], Predicates = [HasSSE1,Not64BitMode] in def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)], diff --git a/contrib/llvm/lib/Target/X86/X86InstrMPX.td b/contrib/llvm/lib/Target/X86/X86InstrMPX.td index cf5e2e38fe58..31608cd4c128 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrMPX.td +++ b/contrib/llvm/lib/Target/X86/X86InstrMPX.td @@ -63,8 +63,8 @@ def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs i128mem:$dst), (ins BNDR:$src), Requires<[HasMPX, In64BitMode]>; def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src), - "bndstx \t{$src, $dst|$dst, $src}", []>, TB, + "bndstx \t{$src, $dst|$dst, $src}", []>, PS, Requires<[HasMPX]>; def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), - "bndldx \t{$src, $dst|$dst, $src}", []>, TB, - Requires<[HasMPX]>;
\ No newline at end of file + "bndldx \t{$src, $dst|$dst, $src}", []>, PS, + Requires<[HasMPX]>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td index 7a44212bd829..624b9316e6fd 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td @@ -1466,6 +1466,8 @@ def SSE_CVT_SD2SI : OpndItins< IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM >; +// FIXME: We probably want to match the rm form only when optimizing for +// size, to avoid false depenendecies (see sse_fp_unop_s for details) multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm, OpndItins itins> { @@ -1489,6 +1491,8 @@ let hasSideEffects = 0 in { } } +// FIXME: We probably want to match the rm form only when optimizing for +// size, to avoid false depenendecies (see sse_fp_unop_s for details) multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { let hasSideEffects = 0, Predicates = [UseAVX] in { @@ -1626,6 +1630,8 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", // Conversion Instructions Intrinsics - Match intrinsics which expect MM // and/or XMM operand(s). +// FIXME: We probably want to match the rm form only when optimizing for +// size, to avoid false depenendecies (see sse_fp_unop_s for details) multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, Operand memop, ComplexPattern mem_cpat, string asm, OpndItins itins> { @@ -3387,9 +3393,18 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, def : Pat<(Intr (load addr:$src)), (vt (COPY_TO_REGCLASS(!cast<Instruction>(NAME#Suffix##m) addr:$src), VR128))>; - def : Pat<(Intr mem_cpat:$src), - (!cast<Instruction>(NAME#Suffix##m_Int) - (vt (IMPLICIT_DEF)), mem_cpat:$src)>; + } + // We don't want to fold scalar loads into these instructions unless + // optimizing for size. This is because the folded instruction will have a + // partial register update, while the unfolded sequence will not, e.g. + // movss mem, %xmm0 + // rcpss %xmm0, %xmm0 + // which has a clobber before the rcp, vs. + // rcpss mem, %xmm0 + let Predicates = [target, OptForSize] in { + def : Pat<(Intr mem_cpat:$src), + (!cast<Instruction>(NAME#Suffix##m_Int) + (vt (IMPLICIT_DEF)), mem_cpat:$src)>; } } @@ -3420,28 +3435,37 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, } } + // We don't want to fold scalar loads into these instructions unless + // optimizing for size. This is because the folded instruction will have a + // partial register update, while the unfolded sequence will not, e.g. + // vmovss mem, %xmm0 + // vrcpss %xmm0, %xmm0, %xmm0 + // which has a clobber before the rcp, vs. + // vrcpss mem, %xmm0, %xmm0 + // TODO: In theory, we could fold the load, and avoid the stall caused by + // the partial register store, either in ExeDepFix or with smarter RA. let Predicates = [UseAVX] in { def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)), RC:$src)>; - - def : Pat<(vt (OpNode mem_cpat:$src)), - (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), - mem_cpat:$src)>; - } let Predicates = [HasAVX] in { def : Pat<(Intr VR128:$src), (!cast<Instruction>("V"#NAME#Suffix##r_Int) (vt (IMPLICIT_DEF)), VR128:$src)>; - - def : Pat<(Intr mem_cpat:$src), - (!cast<Instruction>("V"#NAME#Suffix##m_Int) + } + let Predicates = [HasAVX, OptForSize] in { + def : Pat<(Intr mem_cpat:$src), + (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), mem_cpat:$src)>; } - let Predicates = [UseAVX, OptForSize] in - def : Pat<(ScalarVT (OpNode (load addr:$src))), - (!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)), - addr:$src)>; + let Predicates = [UseAVX, OptForSize] in { + def : Pat<(ScalarVT (OpNode (load addr:$src))), + (!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)), + addr:$src)>; + def : Pat<(vt (OpNode mem_cpat:$src)), + (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), + mem_cpat:$src)>; + } } /// sse1_fp_unop_p - SSE1 unops in packed form. diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td index 85e17f516f91..a97d1e5c86d0 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td @@ -498,10 +498,10 @@ let Predicates = [HasXSAVE] in { let Predicates = [HasXSAVEOPT] in { def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaque512mem:$dst), "xsaveopt\t$dst", - [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, TB; + [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, PS; def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaque512mem:$dst), "xsaveopt64\t$dst", - [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; + [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[In64BitMode]>; } let Predicates = [HasXSAVEC] in { def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaque512mem:$dst), @@ -551,10 +551,17 @@ let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB; //==-----------------------------------------------------------------------===// // PKU - enable protection key +let usesCustomInserter = 1 in { + def WRPKRU : PseudoI<(outs), (ins GR32:$src), + [(int_x86_wrpkru GR32:$src)]>; + def RDPKRU : PseudoI<(outs GR32:$dst), (ins), + [(set GR32:$dst, (int_x86_rdpkru))]>; +} + let Defs = [EAX, EDX], Uses = [ECX] in - def RDPKRU : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB; + def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB; let Uses = [EAX, ECX, EDX] in - def WRPKRU : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB; + def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB; //===----------------------------------------------------------------------===// // FS/GS Base Instructions diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h index dc6d85d582c8..646b556faa8f 100644 --- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1208,19 +1208,55 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK, X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_di_128, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_di_256, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_di_512, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_qi_128, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_qi_256, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psll_qi_512, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_pslli_q, VSHIFT_MASK, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_mask_psllv_d, INTR_TYPE_2OP_MASK, ISD::SHL, 0), X86_INTRINSIC_DATA(avx512_mask_psllv_q, INTR_TYPE_2OP_MASK, ISD::SHL, 0), X86_INTRINSIC_DATA(avx512_mask_psra_d, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_di_128, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_di_256, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_di_512, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psra_q, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_qi_128, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_qi_256, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_qi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_wi_128, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_wi_256, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0), + X86_INTRINSIC_DATA(avx512_mask_psra_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psrai_d, VSHIFT_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psrai_q, VSHIFT_MASK, X86ISD::VSRAI, 0), X86_INTRINSIC_DATA(avx512_mask_psrav_d, INTR_TYPE_2OP_MASK, ISD::SRA, 0), X86_INTRINSIC_DATA(avx512_mask_psrav_q, INTR_TYPE_2OP_MASK, ISD::SRA, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_di_128, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_di_256, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_di_512, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_q, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_qi_128, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_qi_256, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psrl_qi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrl_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0), @@ -1229,6 +1265,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psrl_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrli_d, VSHIFT_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0), + X86_INTRINSIC_DATA(avx512_mask_psrlv16_hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrlv2_di, INTR_TYPE_2OP_MASK, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrlv32hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrlv4_di, INTR_TYPE_2OP_MASK, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrlv4_si, INTR_TYPE_2OP_MASK, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrlv8_hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_mask_psrlv8_si, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psub_b_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0), diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp index e186f7039b43..e1ca558f0f2c 100644 --- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -14,6 +14,7 @@ #include "X86AsmPrinter.h" #include "X86RegisterInfo.h" +#include "X86ShuffleDecodeConstantPool.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" #include "Utils/X86ShuffleDecode.h" @@ -454,10 +455,6 @@ ReSimplify: "LEA has segment specified!"); break; - case X86::MOV32ri64: - OutMI.setOpcode(X86::MOV32ri); - break; - // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B // if one of the registers is extended, but other isn't. case X86::VMOVZPQILo2PQIrr: diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp new file mode 100644 index 000000000000..ef16c5bdbfd8 --- /dev/null +++ b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -0,0 +1,190 @@ +//===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Define several functions to decode x86 specific shuffle semantics using +// constants from the constant pool. +// +//===----------------------------------------------------------------------===// + +#include "X86ShuffleDecodeConstantPool.h" +#include "Utils/X86ShuffleDecode.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/IR/Constants.h" + +//===----------------------------------------------------------------------===// +// Vector Mask Decoding +//===----------------------------------------------------------------------===// + +namespace llvm { + +void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) { + Type *MaskTy = C->getType(); + // It is not an error for the PSHUFB mask to not be a vector of i8 because the + // constant pool uniques constants by their bit representation. + // e.g. the following take up the same space in the constant pool: + // i128 -170141183420855150465331762880109871104 + // + // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160> + // + // <4 x i32> <i32 -2147483648, i32 -2147483648, + // i32 -2147483648, i32 -2147483648> + +#ifndef NDEBUG + unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); + assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512); +#endif + + // This is a straightforward byte vector. + if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) { + int NumElements = MaskTy->getVectorNumElements(); + ShuffleMask.reserve(NumElements); + + for (int i = 0; i < NumElements; ++i) { + // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte + // lane of the vector we're inside. + int Base = i & ~0xf; + Constant *COp = C->getAggregateElement(i); + if (!COp) { + ShuffleMask.clear(); + return; + } else if (isa<UndefValue>(COp)) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + uint64_t Element = cast<ConstantInt>(COp)->getZExtValue(); + // If the high bit (7) of the byte is set, the element is zeroed. + if (Element & (1 << 7)) + ShuffleMask.push_back(SM_SentinelZero); + else { + // Only the least significant 4 bits of the byte are used. + int Index = Base + (Element & 0xf); + ShuffleMask.push_back(Index); + } + } + } + // TODO: Handle funny-looking vectors too. +} + +void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, + SmallVectorImpl<int> &ShuffleMask) { + Type *MaskTy = C->getType(); + // It is not an error for the PSHUFB mask to not be a vector of i8 because the + // constant pool uniques constants by their bit representation. + // e.g. the following take up the same space in the constant pool: + // i128 -170141183420855150465331762880109871104 + // + // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160> + // + // <4 x i32> <i32 -2147483648, i32 -2147483648, + // i32 -2147483648, i32 -2147483648> + + unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); + + if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512. + return; + + // Only support vector types. + if (!MaskTy->isVectorTy()) + return; + + // Make sure its an integer type. + Type *VecEltTy = MaskTy->getVectorElementType(); + if (!VecEltTy->isIntegerTy()) + return; + + // Support any element type from byte up to element size. + // This is necesary primarily because 64-bit elements get split to 32-bit + // in the constant pool on 32-bit target. + unsigned EltTySize = VecEltTy->getIntegerBitWidth(); + if (EltTySize < 8 || EltTySize > ElSize) + return; + + unsigned NumElements = MaskTySize / ElSize; + assert((NumElements == 2 || NumElements == 4 || NumElements == 8) && + "Unexpected number of vector elements."); + ShuffleMask.reserve(NumElements); + unsigned NumElementsPerLane = 128 / ElSize; + unsigned Factor = ElSize / EltTySize; + + for (unsigned i = 0; i < NumElements; ++i) { + Constant *COp = C->getAggregateElement(i * Factor); + if (!COp) { + ShuffleMask.clear(); + return; + } else if (isa<UndefValue>(COp)) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + int Index = i & ~(NumElementsPerLane - 1); + uint64_t Element = cast<ConstantInt>(COp)->getZExtValue(); + if (ElSize == 64) + Index += (Element >> 1) & 0x1; + else + Index += Element & 0x3; + ShuffleMask.push_back(Index); + } + + // TODO: Handle funny-looking vectors too. +} + +void DecodeVPERMVMask(const Constant *C, MVT VT, + SmallVectorImpl<int> &ShuffleMask) { + Type *MaskTy = C->getType(); + if (MaskTy->isVectorTy()) { + unsigned NumElements = MaskTy->getVectorNumElements(); + if (NumElements == VT.getVectorNumElements()) { + for (unsigned i = 0; i < NumElements; ++i) { + Constant *COp = C->getAggregateElement(i); + if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) { + ShuffleMask.clear(); + return; + } + if (isa<UndefValue>(COp)) + ShuffleMask.push_back(SM_SentinelUndef); + else { + uint64_t Element = cast<ConstantInt>(COp)->getZExtValue(); + Element &= (1 << NumElements) - 1; + ShuffleMask.push_back(Element); + } + } + } + return; + } + // Scalar value; just broadcast it + if (!isa<ConstantInt>(C)) + return; + uint64_t Element = cast<ConstantInt>(C)->getZExtValue(); + int NumElements = VT.getVectorNumElements(); + Element &= (1 << NumElements) - 1; + for (int i = 0; i < NumElements; ++i) + ShuffleMask.push_back(Element); +} + +void DecodeVPERMV3Mask(const Constant *C, MVT VT, + SmallVectorImpl<int> &ShuffleMask) { + Type *MaskTy = C->getType(); + unsigned NumElements = MaskTy->getVectorNumElements(); + if (NumElements == VT.getVectorNumElements()) { + for (unsigned i = 0; i < NumElements; ++i) { + Constant *COp = C->getAggregateElement(i); + if (!COp) { + ShuffleMask.clear(); + return; + } + if (isa<UndefValue>(COp)) + ShuffleMask.push_back(SM_SentinelUndef); + else { + uint64_t Element = cast<ConstantInt>(COp)->getZExtValue(); + Element &= (1 << NumElements*2) - 1; + ShuffleMask.push_back(Element); + } + } + } +} +} // llvm namespace diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h new file mode 100644 index 000000000000..bcf46322c8cd --- /dev/null +++ b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h @@ -0,0 +1,45 @@ +//===-- X86ShuffleDecodeConstantPool.h - X86 shuffle decode -----*-C++-*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Define several functions to decode x86 specific shuffle semantics using +// constants from the constant pool. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_X86_X86SHUFFLEDECODECONSTANTPOOL_H +#define LLVM_LIB_TARGET_X86_X86SHUFFLEDECODECONSTANTPOOL_H + +#include "llvm/ADT/SmallVector.h" + +//===----------------------------------------------------------------------===// +// Vector Mask Decoding +//===----------------------------------------------------------------------===// + +namespace llvm { +class Constant; +class MVT; + +/// \brief Decode a PSHUFB mask from an IR-level vector constant. +void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask); + +/// \brief Decode a VPERMILP variable mask from an IR-level vector constant. +void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, + SmallVectorImpl<int> &ShuffleMask); + +/// \brief Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant. +void DecodeVPERMVMask(const Constant *C, MVT VT, + SmallVectorImpl<int> &ShuffleMask); + +/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant. +void DecodeVPERMV3Mask(const Constant *C, MVT VT, + SmallVectorImpl<int> &ShuffleMask); + +} // llvm namespace + +#endif diff --git a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp index d02c861a2948..4295a7595c29 100644 --- a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -10,6 +10,7 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -21,10 +22,12 @@ using namespace llvm; STATISTIC(NumReadNone, "Number of functions inferred as readnone"); STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); +STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly"); STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); +STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns"); static bool setDoesNotAccessMemory(Function &F) { if (F.doesNotAccessMemory()) @@ -42,6 +45,15 @@ static bool setOnlyReadsMemory(Function &F) { return true; } +static bool setOnlyAccessesArgMemory(Function &F) { + if (F.onlyAccessesArgMemory()) + return false; + F.setOnlyAccessesArgMemory (); + ++NumArgMemOnly; + return true; +} + + static bool setDoesNotThrow(Function &F) { if (F.doesNotThrow()) return false; @@ -74,6 +86,17 @@ static bool setDoesNotAlias(Function &F, unsigned n) { return true; } +static bool setNonNull(Function &F, unsigned n) { + assert((n != AttributeSet::ReturnIndex || + F.getReturnType()->isPointerTy()) && + "nonnull applies only to pointers"); + if (F.getAttributes().hasAttribute(n, Attribute::NonNull)) + return false; + F.addAttribute(n, Attribute::NonNull); + ++NumNonNull; + return true; +} + /// Analyze the name and prototype of the given function and set any applicable /// attributes. /// @@ -89,7 +112,6 @@ static bool inferPrototypeAttributes(Function &F, return false; bool Changed = false; - switch (TheLibFunc) { case LibFunc::strlen: if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) @@ -873,6 +895,35 @@ static bool inferPrototypeAttributes(Function &F, Changed |= setDoesNotCapture(F, 2); return Changed; + case LibFunc::Znwj: // new(unsigned int) + case LibFunc::Znwm: // new(unsigned long) + case LibFunc::Znaj: // new[](unsigned int) + case LibFunc::Znam: // new[](unsigned long) + case LibFunc::msvc_new_int: // new(unsigned int) + case LibFunc::msvc_new_longlong: // new(unsigned long long) + case LibFunc::msvc_new_array_int: // new[](unsigned int) + case LibFunc::msvc_new_array_longlong: // new[](unsigned long long) + if (FTy->getNumParams() != 1) + return false; + // Operator new always returns a nonnull noalias pointer + Changed |= setNonNull(F, AttributeSet::ReturnIndex); + Changed |= setDoesNotAlias(F, AttributeSet::ReturnIndex); + return Changed; + + //TODO: add LibFunc entries for: + //case LibFunc::memset_pattern4: + //case LibFunc::memset_pattern8: + case LibFunc::memset_pattern16: + if (FTy->isVarArg() || FTy->getNumParams() != 3 || + !isa<PointerType>(FTy->getParamType(0)) || + !isa<PointerType>(FTy->getParamType(1)) || + !isa<IntegerType>(FTy->getParamType(2))) + return false; + + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + default: // FIXME: It'd be really nice to cover all the library functions we're // aware of here. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e3634f269cf5..090245d1b22c 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1747,8 +1747,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Translate facts known about a pointer before relocating into // facts about the relocate value, while being careful to // preserve relocation semantics. - GCRelocateOperands Operands(II); - Value *DerivedPtr = Operands.getDerivedPtr(); + Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr(); auto *GCRelocateType = cast<PointerType>(II->getType()); // Remove the relocation if unused, note that this check is required diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index da835a192322..0f01d183b1ad 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -591,19 +591,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // zext (x <s 0) to i32 --> x>>u31 true if signbit set. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || - (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) { + (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { if (!DoXform) return ICI; Value *In = ICI->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getScalarSizeInBits()-1); - In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); + In->getType()->getScalarSizeInBits() - 1); + In = Builder->CreateLShr(In, Sh, In->getName() + ".lobit"); if (In->getType() != CI.getType()) In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/); if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, In->getName()+".not"); + In = Builder->CreateXor(In, One, In->getName() + ".not"); } return ReplaceInstUsesWith(CI, In); @@ -639,13 +639,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, return ReplaceInstUsesWith(CI, Res); } - uint32_t ShiftAmt = KnownZeroMask.logBase2(); + uint32_t ShAmt = KnownZeroMask.logBase2(); Value *In = ICI->getOperand(0); - if (ShiftAmt) { + if (ShAmt) { // Perform a logical shr by shiftamt. // Insert the shift to put the result in the low bit. - In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), - In->getName()+".lobit"); + In = Builder->CreateLShr(In, ConstantInt::get(In->getType(), ShAmt), + In->getName() + ".lobit"); } if ((Op1CV != 0) == isNE) { // Toggle the low bit. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 534f67008150..e4e506509d39 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -414,7 +414,7 @@ public: /// \brief A combiner-aware RAUW-like routine. /// /// This method is to be used when an instruction is found to be dead, - /// replacable with another preexisting expression. Here we add all uses of + /// replaceable with another preexisting expression. Here we add all uses of /// I to the worklist, replace all uses of I with the new value, then return /// I, so that the inst combiner will know that I was modified. Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index e25639ae943b..54a9fbdbe82e 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -383,15 +383,28 @@ static void replaceExtractElements(InsertElementInst *InsElt, auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), ConstantVector::get(ExtendMask)); - // Replace all extracts from the original narrow vector with extracts from - // the new wide vector. - WideVec->insertBefore(ExtElt); + // Insert the new shuffle after the vector operand of the extract is defined + // or at the start of the basic block, so any subsequent extracts can use it. + bool ReplaceAllExtUsers; + if (auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp)) { + WideVec->insertAfter(ExtVecOpInst); + ReplaceAllExtUsers = true; + } else { + // TODO: Insert at start of function, so it's always safe to replace all? + IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt()); + ReplaceAllExtUsers = false; + } + + // Replace extracts from the original narrow vector with extracts from the new + // wide vector. for (User *U : ExtVecOp->users()) { - if (ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U)) { - auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); - NewExt->insertAfter(WideVec); - IC.ReplaceInstUsesWith(*OldExt, NewExt); - } + ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U); + if (!OldExt || + (!ReplaceAllExtUsers && OldExt->getParent() != WideVec->getParent())) + continue; + auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); + NewExt->insertAfter(WideVec); + IC.ReplaceInstUsesWith(*OldExt, NewExt); } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 7c46cfd28fc9..903a0b5f5400 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3021,7 +3021,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, Instruction *Inst = &*--EndInst->getIterator(); if (!Inst->use_empty() && !Inst->getType()->isTokenTy()) Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); - if (Inst->isEHPad()) { + if (Inst->isEHPad() || Inst->getType()->isTokenTy()) { EndInst = Inst; continue; } @@ -3029,8 +3029,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, ++NumDeadInst; MadeIRChange = true; } - if (!Inst->getType()->isTokenTy()) - Inst->eraseFromParent(); + Inst->eraseFromParent(); } } diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 92e41ee27c09..51ff95d9a74c 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -234,16 +234,14 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { } void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) { - CoverageData->setSection(getCoverageSection()); - CoverageData->setAlignment(8); Constant *Init = CoverageData->getInitializer(); - // We're expecting { i32, i32, i32, i32, [n x { i8*, i32, i32 }], [m x i8] } + // We're expecting { [4 x 32], [n x { i8*, i32, i32 }], [m x i8] } // for some C. If not, the frontend's given us something broken. - assert(Init->getNumOperands() == 6 && "bad number of fields in coverage map"); - assert(isa<ConstantArray>(Init->getAggregateElement(4)) && + assert(Init->getNumOperands() == 3 && "bad number of fields in coverage map"); + assert(isa<ConstantArray>(Init->getAggregateElement(1)) && "invalid function list in coverage map"); - ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(4)); + ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(1)); for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) { Constant *Record = Records->getOperand(I); Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts(); diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp index 6d70cdc3ade2..e01e23f71732 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp @@ -75,10 +75,12 @@ DisablePromotion("disable-licm-promotion", cl::Hidden, cl::desc("Disable memory promotion in LICM pass")); static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); -static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop); +static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, + const LICMSafetyInfo *SafetyInfo); static bool hoist(Instruction &I, BasicBlock *Preheader); static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, - const Loop *CurLoop, AliasSetTracker *CurAST ); + const Loop *CurLoop, AliasSetTracker *CurAST, + const LICMSafetyInfo *SafetyInfo); static bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, @@ -92,10 +94,10 @@ static bool isSafeToExecuteUnconditionally(const Instruction &Inst, static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, AliasSetTracker *CurAST); -static Instruction *CloneInstructionInExitBlock(const Instruction &I, - BasicBlock &ExitBlock, - PHINode &PN, - const LoopInfo *LI); +static Instruction * +CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, + const LoopInfo *LI, + const LICMSafetyInfo *SafetyInfo); static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, @@ -348,10 +350,10 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // outside of the loop. In this case, it doesn't even matter if the // operands of the instruction are loop invariant. // - if (isNotUsedInLoop(I, CurLoop) && + if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo)) { ++II; - Changed |= sink(I, LI, DT, CurLoop, CurAST); + Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo); } } return Changed; @@ -432,6 +434,14 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) { for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); (I != E) && !SafetyInfo->MayThrow; ++I) SafetyInfo->MayThrow |= I->mayThrow(); + + // Compute funclet colors if we might sink/hoist in a function with a funclet + // personality routine. + Function *Fn = CurLoop->getHeader()->getParent(); + if (Fn->hasPersonalityFn()) + if (Constant *PersonalityFn = Fn->getPersonalityFn()) + if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))) + SafetyInfo->BlockColors = colorEHFunclets(*Fn); } /// canSinkOrHoistInst - Return true if the hoister and sinker can handle this @@ -466,6 +476,10 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, if (isa<DbgInfoIntrinsic>(I)) return false; + // Don't sink calls which can throw. + if (CI->mayThrow()) + return false; + // Handle simple cases by querying alias analysis. FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == FMRB_DoesNotAccessMemory) @@ -534,10 +548,24 @@ static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) { /// the loop. If this is true, we can sink the instruction to the exit /// blocks of the loop. /// -static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop) { +static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, + const LICMSafetyInfo *SafetyInfo) { + const auto &BlockColors = SafetyInfo->BlockColors; for (const User *U : I.users()) { const Instruction *UI = cast<Instruction>(U); if (const PHINode *PN = dyn_cast<PHINode>(UI)) { + const BasicBlock *BB = PN->getParent(); + // We cannot sink uses in catchswitches. + if (isa<CatchSwitchInst>(BB->getTerminator())) + return false; + + // We need to sink a callsite to a unique funclet. Avoid sinking if the + // phi use is too muddled. + if (isa<CallInst>(I)) + if (!BlockColors.empty() && + BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) + return false; + // A PHI node where all of the incoming values are this instruction are // special -- they can just be RAUW'ed with the instruction and thus // don't require a use in the predecessor. This is a particular important @@ -565,11 +593,41 @@ static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop) { return true; } -static Instruction *CloneInstructionInExitBlock(const Instruction &I, - BasicBlock &ExitBlock, - PHINode &PN, - const LoopInfo *LI) { - Instruction *New = I.clone(); +static Instruction * +CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, + const LoopInfo *LI, + const LICMSafetyInfo *SafetyInfo) { + Instruction *New; + if (auto *CI = dyn_cast<CallInst>(&I)) { + const auto &BlockColors = SafetyInfo->BlockColors; + + // Sinking call-sites need to be handled differently from other + // instructions. The cloned call-site needs a funclet bundle operand + // appropriate for it's location in the CFG. + SmallVector<OperandBundleDef, 1> OpBundles; + for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); + BundleIdx != BundleEnd; ++BundleIdx) { + OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); + if (Bundle.getTagID() == LLVMContext::OB_funclet) + continue; + + OpBundles.emplace_back(Bundle); + } + + if (!BlockColors.empty()) { + const ColorVector &CV = BlockColors.find(&ExitBlock)->second; + assert(CV.size() == 1 && "non-unique color for exit block!"); + BasicBlock *BBColor = CV.front(); + Instruction *EHPad = BBColor->getFirstNonPHI(); + if (EHPad->isEHPad()) + OpBundles.emplace_back("funclet", EHPad); + } + + New = CallInst::Create(CI, OpBundles); + } else { + New = I.clone(); + } + ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); if (!I.getName().empty()) New->setName(I.getName() + ".le"); @@ -601,7 +659,8 @@ static Instruction *CloneInstructionInExitBlock(const Instruction &I, /// position, and may either delete it or move it to outside of the loop. /// static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, - const Loop *CurLoop, AliasSetTracker *CurAST ) { + const Loop *CurLoop, AliasSetTracker *CurAST, + const LICMSafetyInfo *SafetyInfo) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); bool Changed = false; if (isa<LoadInst>(I)) ++NumMovedLoads; @@ -652,7 +711,7 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, New = It->second; else New = SunkCopies[ExitBlock] = - CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI); + CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo); PN->replaceAllUsesWith(New); PN->eraseFromParent(); @@ -950,6 +1009,21 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, if (!GuaranteedToExecute) return Changed; + // Figure out the loop exits and their insertion points, if this is the + // first promotion. + if (ExitBlocks.empty()) { + CurLoop->getUniqueExitBlocks(ExitBlocks); + InsertPts.clear(); + InsertPts.reserve(ExitBlocks.size()); + for (BasicBlock *ExitBlock : ExitBlocks) + InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); + } + + // Can't insert into a catchswitch. + for (BasicBlock *ExitBlock : ExitBlocks) + if (isa<CatchSwitchInst>(ExitBlock->getTerminator())) + return Changed; + // Otherwise, this is safe to promote, lets do it! DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); Changed = true; @@ -961,15 +1035,6 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, // location is better than none. DebugLoc DL = LoopUses[0]->getDebugLoc(); - // Figure out the loop exits and their insertion points, if this is the - // first promotion. - if (ExitBlocks.empty()) { - CurLoop->getUniqueExitBlocks(ExitBlocks); - InsertPts.resize(ExitBlocks.size()); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - InsertPts[i] = &*ExitBlocks[i]->getFirstInsertionPt(); - } - // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 2d577de7c2b8..4521640e3947 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -108,7 +108,11 @@ public: private: typedef SmallVector<StoreInst *, 8> StoreList; - StoreList StoreRefs; + StoreList StoreRefsForMemset; + StoreList StoreRefsForMemcpy; + bool HasMemset; + bool HasMemsetPattern; + bool HasMemcpy; /// \name Countable Loop Idiom Handling /// @{ @@ -118,17 +122,15 @@ private: SmallVectorImpl<BasicBlock *> &ExitBlocks); void collectStores(BasicBlock *BB); - bool isLegalStore(StoreInst *SI); + bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy); bool processLoopStore(StoreInst *SI, const SCEV *BECount); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, - unsigned StoreAlignment, Value *SplatValue, + unsigned StoreAlignment, Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride); - bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, - const SCEVAddRecExpr *StoreEv, - const SCEV *BECount, bool NegStride); + bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount); /// @} /// \name Noncountable Loop Idiom Handling @@ -207,8 +209,13 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { *CurLoop->getHeader()->getParent()); DL = &CurLoop->getHeader()->getModule()->getDataLayout(); - if (SE->hasLoopInvariantBackedgeTakenCount(L)) - return runOnCountableLoop(); + HasMemset = TLI->has(LibFunc::memset); + HasMemsetPattern = TLI->has(LibFunc::memset_pattern16); + HasMemcpy = TLI->has(LibFunc::memcpy); + + if (HasMemset || HasMemsetPattern || HasMemcpy) + if (SE->hasLoopInvariantBackedgeTakenCount(L)) + return runOnCountableLoop(); return runOnNoncountableLoop(); } @@ -297,7 +304,8 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C)); } -bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) { +bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, + bool &ForMemcpy) { // Don't touch volatile stores. if (!SI->isSimple()) return false; @@ -322,22 +330,86 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) { if (!isa<SCEVConstant>(StoreEv->getOperand(1))) return false; - return true; + // See if the store can be turned into a memset. + + // If the stored value is a byte-wise value (like i32 -1), then it may be + // turned into a memset of i8 -1, assuming that all the consecutive bytes + // are stored. A store of i32 0x01020304 can never be turned into a memset, + // but it can be turned into memset_pattern if the target supports it. + Value *SplatValue = isBytewiseValue(StoredVal); + Constant *PatternValue = nullptr; + + // If we're allowed to form a memset, and the stored value would be + // acceptable for memset, use it. + if (HasMemset && SplatValue && + // Verify that the stored value is loop invariant. If not, we can't + // promote the memset. + CurLoop->isLoopInvariant(SplatValue)) { + // It looks like we can use SplatValue. + ForMemset = true; + return true; + } else if (HasMemsetPattern && + // Don't create memset_pattern16s with address spaces. + StorePtr->getType()->getPointerAddressSpace() == 0 && + (PatternValue = getMemSetPatternValue(StoredVal, DL))) { + // It looks like we can use PatternValue! + ForMemset = true; + return true; + } + + // Otherwise, see if the store can be turned into a memcpy. + if (HasMemcpy) { + // Check to see if the stride matches the size of the store. If so, then we + // know that every byte is touched in the loop. + unsigned Stride = getStoreStride(StoreEv); + unsigned StoreSize = getStoreSizeInBytes(SI, DL); + if (StoreSize != Stride && StoreSize != -Stride) + return false; + + // The store must be feeding a non-volatile load. + LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); + if (!LI || !LI->isSimple()) + return false; + + // See if the pointer expression is an AddRec like {base,+,1} on the current + // loop, which indicates a strided load. If we have something else, it's a + // random load we can't handle. + const SCEVAddRecExpr *LoadEv = + dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + + // The store and load must share the same stride. + if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) + return false; + + // Success. This store can be converted into a memcpy. + ForMemcpy = true; + return true; + } + // This store can't be transformed into a memset/memcpy. + return false; } void LoopIdiomRecognize::collectStores(BasicBlock *BB) { - StoreRefs.clear(); + StoreRefsForMemset.clear(); + StoreRefsForMemcpy.clear(); for (Instruction &I : *BB) { StoreInst *SI = dyn_cast<StoreInst>(&I); if (!SI) continue; + bool ForMemset = false; + bool ForMemcpy = false; // Make sure this is a strided store with a constant stride. - if (!isLegalStore(SI)) + if (!isLegalStore(SI, ForMemset, ForMemcpy)) continue; // Save the store locations. - StoreRefs.push_back(SI); + if (ForMemset) + StoreRefsForMemset.push_back(SI); + else if (ForMemcpy) + StoreRefsForMemcpy.push_back(SI); } } @@ -357,9 +429,15 @@ bool LoopIdiomRecognize::runOnLoopBlock( bool MadeChange = false; // Look for store instructions, which may be optimized to memset/memcpy. collectStores(BB); - for (auto &SI : StoreRefs) + + // Look for a single store which can be optimized into a memset. + for (auto &SI : StoreRefsForMemset) MadeChange |= processLoopStore(SI, BECount); + // Optimize the store into a memcpy, if it feeds an similarly strided load. + for (auto &SI : StoreRefsForMemcpy) + MadeChange |= processLoopStoreOfLoopLoad(SI, BECount); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { Instruction *Inst = &*I++; // Look for memset instructions, which may be optimized to a larger memset. @@ -380,7 +458,7 @@ bool LoopIdiomRecognize::runOnLoopBlock( return MadeChange; } -/// processLoopStore - See if this store can be promoted to a memset or memcpy. +/// processLoopStore - See if this store can be promoted to a memset. bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { assert(SI->isSimple() && "Expected only non-volatile stores."); @@ -398,12 +476,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { bool NegStride = StoreSize == -Stride; // See if we can optimize just this store in isolation. - if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), - StoredVal, SI, StoreEv, BECount, NegStride)) - return true; - - // Optimize the store into a memcpy, if it feeds an similarly strided load. - return processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, BECount, NegStride); + return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), + StoredVal, SI, StoreEv, BECount, NegStride); } /// processLoopMemSet - See if this memset can be promoted to a large memset. @@ -440,8 +514,14 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, if (!Stride || MSI->getLength() != Stride->getValue()) return false; + // Verify that the memset value is loop invariant. If not, we can't promote + // the memset. + Value *SplatValue = MSI->getValue(); + if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue)) + return false; + return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, - MSI->getAlignment(), MSI->getValue(), MSI, Ev, + MSI->getAlignment(), SplatValue, MSI, Ev, BECount, /*NegStride=*/false); } @@ -496,37 +576,19 @@ bool LoopIdiomRecognize::processLoopStridedStore( Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride) { - - // If the stored value is a byte-wise value (like i32 -1), then it may be - // turned into a memset of i8 -1, assuming that all the consecutive bytes - // are stored. A store of i32 0x01020304 can never be turned into a memset, - // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = nullptr; - unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); - // If we're allowed to form a memset, and the stored value would be acceptable - // for memset, use it. - if (SplatValue && TLI->has(LibFunc::memset) && - // Verify that the stored value is loop invariant. If not, we can't - // promote the memset. - CurLoop->isLoopInvariant(SplatValue)) { - // Keep and use SplatValue. - PatternValue = nullptr; - } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) && - (PatternValue = getMemSetPatternValue(StoredVal, DL))) { - // Don't create memset_pattern16s with address spaces. - // It looks like we can use PatternValue! - SplatValue = nullptr; - } else { - // Otherwise, this isn't an idiom we can transform. For example, we can't - // do anything with a 3-byte store. - return false; - } + if (!SplatValue) + PatternValue = getMemSetPatternValue(StoredVal, DL); + + assert((SplatValue || PatternValue) && + "Expected either splat value or pattern value."); // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. This allows us to insert code for it in the preheader. + unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE, *DL, "loop-idiom"); @@ -608,29 +670,25 @@ bool LoopIdiomRecognize::processLoopStridedStore( /// If the stored value is a strided load in the same loop with the same stride /// this may be transformable into a memcpy. This kicks in for stuff like /// for (i) A[i] = B[i]; -bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( - StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv, - const SCEV *BECount, bool NegStride) { - // If we're not allowed to form memcpy, we fail. - if (!TLI->has(LibFunc::memcpy)) - return false; +bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, + const SCEV *BECount) { + assert(SI->isSimple() && "Expected only non-volatile stores."); + + Value *StorePtr = SI->getPointerOperand(); + const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); + unsigned Stride = getStoreStride(StoreEv); + unsigned StoreSize = getStoreSizeInBytes(SI, DL); + bool NegStride = StoreSize == -Stride; // The store must be feeding a non-volatile load. - LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); - if (!LI || !LI->isSimple()) - return false; + LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); + assert(LI->isSimple() && "Expected only non-volatile stores."); // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a // random load we can't handle. const SCEVAddRecExpr *LoadEv = - dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); - if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) - return false; - - // The store and load must share the same stride. - if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) - return false; + cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 0333bf2284e1..7354016c2122 100644 --- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -481,6 +481,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, return AMemSet; } +static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, + const LoadInst *LI) { + unsigned StoreAlign = SI->getAlignment(); + if (!StoreAlign) + StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); + unsigned LoadAlign = LI->getAlignment(); + if (!LoadAlign) + LoadAlign = DL.getABITypeAlignment(LI->getType()); + + return std::min(StoreAlign, LoadAlign); +} bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; @@ -496,12 +507,84 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { const DataLayout &DL = SI->getModule()->getDataLayout(); - // Detect cases where we're performing call slot forwarding, but - // happen to be using a load-store pair to implement it, rather than - // a memcpy. + // Load to store forwarding can be interpreted as memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { + + auto *T = LI->getType(); + if (T->isAggregateType()) { + AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + MemoryLocation LoadLoc = MemoryLocation::get(LI); + + // We use alias analysis to check if an instruction may store to + // the memory we load from in between the load and the store. If + // such an instruction is found, we try to promote there instead + // of at the store position. + Instruction *P = SI; + for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator(); + I != E; ++I) { + if (!(AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod)) + continue; + + // We found an instruction that may write to the loaded memory. + // We can try to promote at this position instead of the store + // position if nothing alias the store memory after this. + P = &*I; + for (; I != E; ++I) { + MemoryLocation StoreLoc = MemoryLocation::get(SI); + if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { + DEBUG(dbgs() << "Alias " << *I << "\n"); + P = nullptr; + break; + } + } + + break; + } + + // If a valid insertion position is found, then we can promote + // the load/store pair to a memcpy. + if (P) { + // If we load from memory that may alias the memory we store to, + // memmove must be used to preserve semantic. If not, memcpy can + // be used. + bool UseMemMove = false; + if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc)) + UseMemMove = true; + + unsigned Align = findCommonAlignment(DL, SI, LI); + uint64_t Size = DL.getTypeStoreSize(T); + + IRBuilder<> Builder(P); + Instruction *M; + if (UseMemMove) + M = Builder.CreateMemMove(SI->getPointerOperand(), + LI->getPointerOperand(), Size, + Align, SI->isVolatile()); + else + M = Builder.CreateMemCpy(SI->getPointerOperand(), + LI->getPointerOperand(), Size, + Align, SI->isVolatile()); + + DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI + << " => " << *M << "\n"); + + MD->removeInstruction(SI); + SI->eraseFromParent(); + MD->removeInstruction(LI); + LI->eraseFromParent(); + ++NumMemCpyInstr; + + // Make sure we do not invalidate the iterator. + BBI = M->getIterator(); + return true; + } + } + + // Detect cases where we're performing call slot forwarding, but + // happen to be using a load-store pair to implement it, rather than + // a memcpy. MemDepResult ldep = MD->getDependency(LI); CallInst *C = nullptr; if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) @@ -522,18 +605,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { } if (C) { - unsigned storeAlign = SI->getAlignment(); - if (!storeAlign) - storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); - unsigned loadAlign = LI->getAlignment(); - if (!loadAlign) - loadAlign = DL.getABITypeAlignment(LI->getType()); - bool changed = performCallSlotOptzn( LI, SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), DL.getTypeStoreSize(SI->getOperand(0)->getType()), - std::min(storeAlign, loadAlign), C); + findCommonAlignment(DL, SI, LI), C); if (changed) { MD->removeInstruction(SI); SI->eraseFromParent(); diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp index fb970c747ce1..401a740856e9 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -183,6 +183,8 @@ namespace { Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); Value *RemoveFactorFromExpression(Value *V, Value *Factor); void EraseInst(Instruction *I); + void RecursivelyEraseDeadInsts(Instruction *I, + SetVector<AssertingVH<Instruction>> &Insts); void OptimizeInst(Instruction *I); Instruction *canonicalizeNegConstExpr(Instruction *I); }; @@ -1926,6 +1928,22 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, return nullptr; } +// Remove dead instructions and if any operands are trivially dead add them to +// Insts so they will be removed as well. +void Reassociate::RecursivelyEraseDeadInsts( + Instruction *I, SetVector<AssertingVH<Instruction>> &Insts) { + assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); + SmallVector<Value *, 4> Ops(I->op_begin(), I->op_end()); + ValueRankMap.erase(I); + Insts.remove(I); + RedoInsts.remove(I); + I->eraseFromParent(); + for (auto Op : Ops) + if (Instruction *OpInst = dyn_cast<Instruction>(Op)) + if (OpInst->use_empty()) + Insts.insert(OpInst); +} + /// Zap the given instruction, adding interesting operands to the work list. void Reassociate::EraseInst(Instruction *I) { assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); @@ -2255,7 +2273,21 @@ bool Reassociate::runOnFunction(Function &F) { ++II; } - // If this produced extra instructions to optimize, handle them now. + // Make a copy of all the instructions to be redone so we can remove dead + // instructions. + SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts); + // Iterate over all instructions to be reevaluated and remove trivially dead + // instructions. If any operand of the trivially dead instruction becomes + // dead mark it for deletion as well. Continue this process until all + // trivially dead instructions have been removed. + while (!ToRedo.empty()) { + Instruction *I = ToRedo.pop_back_val(); + if (isInstructionTriviallyDead(I)) + RecursivelyEraseDeadInsts(I, ToRedo); + } + + // Now that we have removed dead instructions, we can reoptimize the + // remaining instructions. while (!RedoInsts.empty()) { Instruction *I = RedoInsts.pop_back_val(); if (isInstructionTriviallyDead(I)) diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index db127c3f7b4e..5d253be1aa86 100644 --- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -428,30 +428,15 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { // We should have never reached here if this argument isn't an gc value return BaseDefiningValueResult(I, true); - if (isa<GlobalVariable>(I)) - // base case + if (isa<Constant>(I)) + // We assume that objects with a constant base (e.g. a global) can't move + // and don't need to be reported to the collector because they are always + // live. All constants have constant bases. Besides global references, all + // kinds of constants (e.g. undef, constant expressions, null pointers) can + // be introduced by the inliner or the optimizer, especially on dynamically + // dead paths. See e.g. test4 in constants.ll. return BaseDefiningValueResult(I, true); - // inlining could possibly introduce phi node that contains - // undef if callee has multiple returns - if (isa<UndefValue>(I)) - // utterly meaningless, but useful for dealing with - // partially optimized code. - return BaseDefiningValueResult(I, true); - - // Due to inheritance, this must be _after_ the global variable and undef - // checks - if (isa<Constant>(I)) { - assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && - "order of checks wrong!"); - // Note: Even for frontends which don't have constant references, we can - // see constants appearing after optimizations. A simple example is - // specialization of an address computation on null feeding into a merge - // point where the actual use of the now-constant input is protected by - // another null check. (e.g. test4 in constants.ll) - return BaseDefiningValueResult(I, true); - } - if (CastInst *CI = dyn_cast<CastInst>(I)) { Value *Def = CI->stripPointerCasts(); // If stripping pointer casts changes the address space there is an @@ -1642,33 +1627,24 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs, DenseSet<Value *> &VisitedLiveValues) { for (User *U : GCRelocs) { - if (!isa<IntrinsicInst>(U)) + GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U); + if (!Relocate) continue; - IntrinsicInst *RelocatedValue = cast<IntrinsicInst>(U); - - // We only care about relocates - if (RelocatedValue->getIntrinsicID() != - Intrinsic::experimental_gc_relocate) { - continue; - } - - GCRelocateOperands RelocateOperands(RelocatedValue); - Value *OriginalValue = - const_cast<Value *>(RelocateOperands.getDerivedPtr()); + Value *OriginalValue = const_cast<Value *>(Relocate->getDerivedPtr()); assert(AllocaMap.count(OriginalValue)); Value *Alloca = AllocaMap[OriginalValue]; // Emit store into the related alloca // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to // the correct type according to alloca. - assert(RelocatedValue->getNextNode() && + assert(Relocate->getNextNode() && "Should always have one since it's not a terminator"); - IRBuilder<> Builder(RelocatedValue->getNextNode()); + IRBuilder<> Builder(Relocate->getNextNode()); Value *CastedRelocatedValue = - Builder.CreateBitCast(RelocatedValue, + Builder.CreateBitCast(Relocate, cast<AllocaInst>(Alloca)->getAllocatedType(), - suffixed_name_or(RelocatedValue, ".casted", "")); + suffixed_name_or(Relocate, ".casted", "")); StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca); Store->insertAfter(cast<Instruction>(CastedRelocatedValue)); diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index 0914699a2e38..42287d3bb2e8 100644 --- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -74,17 +74,13 @@ namespace llvm { // insertFastDiv - Substitutes the div/rem instruction with code that checks the // value of the operands and uses a shorter-faster div/rem instruction when // possible and the longer-slower div/rem instruction otherwise. -static bool insertFastDiv(Function &F, - Function::iterator &I, - BasicBlock::iterator &J, - IntegerType *BypassType, - bool UseDivOp, - bool UseSignedOp, +static bool insertFastDiv(Instruction *I, IntegerType *BypassType, + bool UseDivOp, bool UseSignedOp, DivCacheTy &PerBBDivCache) { + Function *F = I->getParent()->getParent(); // Get instruction operands - Instruction *Instr = &*J; - Value *Dividend = Instr->getOperand(0); - Value *Divisor = Instr->getOperand(1); + Value *Dividend = I->getOperand(0); + Value *Divisor = I->getOperand(1); if (isa<ConstantInt>(Divisor) || (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { @@ -94,13 +90,12 @@ static bool insertFastDiv(Function &F, } // Basic Block is split before divide - BasicBlock *MainBB = &*I; - BasicBlock *SuccessorBB = I->splitBasicBlock(J); - ++I; //advance iterator I to successorBB + BasicBlock *MainBB = &*I->getParent(); + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); // Add new basic block for slow divide operation - BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", - MainBB->getParent(), SuccessorBB); + BasicBlock *SlowBB = + BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); SlowBB->moveBefore(SuccessorBB); IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); Value *SlowQuotientV; @@ -115,8 +110,8 @@ static bool insertFastDiv(Function &F, SlowBuilder.CreateBr(SuccessorBB); // Add new basic block for fast divide operation - BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", - MainBB->getParent(), SuccessorBB); + BasicBlock *FastBB = + BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); FastBB->moveBefore(SlowBB); IRBuilder<> FastBuilder(FastBB, FastBB->begin()); Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, @@ -139,19 +134,19 @@ static bool insertFastDiv(Function &F, // Phi nodes for result of div and rem IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); - PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); QuoPhi->addIncoming(SlowQuotientV, SlowBB); QuoPhi->addIncoming(FastQuotientV, FastBB); - PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); RemPhi->addIncoming(SlowRemainderV, SlowBB); RemPhi->addIncoming(FastRemainderV, FastBB); - // Replace Instr with appropriate phi node + // Replace I with appropriate phi node if (UseDivOp) - Instr->replaceAllUsesWith(QuoPhi); + I->replaceAllUsesWith(QuoPhi); else - Instr->replaceAllUsesWith(RemPhi); - Instr->eraseFromParent(); + I->replaceAllUsesWith(RemPhi); + I->eraseFromParent(); // Combine operands into a single value with OR for value testing below MainBB->getInstList().back().eraseFromParent(); @@ -168,9 +163,6 @@ static bool insertFastDiv(Function &F, Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); - // point iterator J at first instruction of successorBB - J = I->begin(); - // Cache phi nodes to be used later in place of other instances // of div or rem with the same sign, dividend, and divisor DivOpInfo Key(UseSignedOp, Dividend, Divisor); @@ -179,57 +171,54 @@ static bool insertFastDiv(Function &F, return true; } -// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if -// operands and operation are identical. Otherwise call insertFastDiv to perform -// the optimization and cache the resulting dividend and remainder. -static bool reuseOrInsertFastDiv(Function &F, - Function::iterator &I, - BasicBlock::iterator &J, - IntegerType *BypassType, - bool UseDivOp, - bool UseSignedOp, +// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from +// the current BB if operands and operation are identical. Otherwise calls +// insertFastDiv to perform the optimization and caches the resulting dividend +// and remainder. +static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType, + bool UseDivOp, bool UseSignedOp, DivCacheTy &PerBBDivCache) { // Get instruction operands - Instruction *Instr = &*J; - DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); + DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1)); DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); if (CacheI == PerBBDivCache.end()) { // If previous instance does not exist, insert fast div - return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, - PerBBDivCache); + return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); } // Replace operation value with previously generated phi node DivPhiNodes &Value = CacheI->second; if (UseDivOp) { // Replace all uses of div instruction with quotient phi node - J->replaceAllUsesWith(Value.Quotient); + I->replaceAllUsesWith(Value.Quotient); } else { // Replace all uses of rem instruction with remainder phi node - J->replaceAllUsesWith(Value.Remainder); + I->replaceAllUsesWith(Value.Remainder); } - // Advance to next operation - ++J; - // Remove redundant operation - Instr->eraseFromParent(); + I->eraseFromParent(); return true; } -// bypassSlowDivision - This optimization identifies DIV instructions that can -// be profitably bypassed and carried out with a shorter, faster divide. -bool llvm::bypassSlowDivision(Function &F, - Function::iterator &I, - const DenseMap<unsigned int, unsigned int> &BypassWidths) { +// bypassSlowDivision - This optimization identifies DIV instructions in a BB +// that can be profitably bypassed and carried out with a shorter, faster +// divide. +bool llvm::bypassSlowDivision( + BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) { DivCacheTy DivCache; bool MadeChange = false; - for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { + Instruction* Next = &*BB->begin(); + while (Next != nullptr) { + // We may add instructions immediately after I, but we want to skip over + // them. + Instruction* I = Next; + Next = Next->getNextNode(); // Get instruction details - unsigned Opcode = J->getOpcode(); + unsigned Opcode = I->getOpcode(); bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; bool UseSignedOp = Opcode == Instruction::SDiv || @@ -240,11 +229,11 @@ bool llvm::bypassSlowDivision(Function &F, continue; // Skip division on vector types, only optimize integer instructions - if (!J->getType()->isIntegerTy()) + if (!I->getType()->isIntegerTy()) continue; // Get bitwidth of div/rem instruction - IntegerType *T = cast<IntegerType>(J->getType()); + IntegerType *T = cast<IntegerType>(I->getType()); unsigned int bitwidth = T->getBitWidth(); // Continue if bitwidth is not bypassed @@ -253,10 +242,9 @@ bool llvm::bypassSlowDivision(Function &F, continue; // Get type for div/rem instruction with bypass bitwidth - IntegerType *BT = IntegerType::get(J->getContext(), BI->second); + IntegerType *BT = IntegerType::get(I->getContext(), BI->second); - MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp, - UseSignedOp, DivCache); + MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache); } return MadeChange; diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index e75163f323df..0e386ac83e9e 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -1305,8 +1305,9 @@ static bool markAliveBlocks(Function &F, } } - // Turn invokes that call 'nounwind' functions into ordinary calls. - if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + TerminatorInst *Terminator = BB->getTerminator(); + if (auto *II = dyn_cast<InvokeInst>(Terminator)) { + // Turn invokes that call 'nounwind' functions into ordinary calls. Value *Callee = II->getCalledValue(); if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { changeToUnreachable(II, true); @@ -1321,6 +1322,44 @@ static bool markAliveBlocks(Function &F, changeToCall(II); Changed = true; } + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) { + // Remove catchpads which cannot be reached. + struct CatchPadDenseMapInfo { + static CatchPadInst *getEmptyKey() { + return DenseMapInfo<CatchPadInst *>::getEmptyKey(); + } + static CatchPadInst *getTombstoneKey() { + return DenseMapInfo<CatchPadInst *>::getTombstoneKey(); + } + static unsigned getHashValue(CatchPadInst *CatchPad) { + return static_cast<unsigned>(hash_combine_range( + CatchPad->value_op_begin(), CatchPad->value_op_end())); + } + static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) { + if (LHS == getEmptyKey() || LHS == getTombstoneKey() || + RHS == getEmptyKey() || RHS == getTombstoneKey()) + return LHS == RHS; + return LHS->isIdenticalTo(RHS); + } + }; + + // Set of unique CatchPads. + SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4, + CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>> + HandlerSet; + detail::DenseSetEmpty Empty; + for (CatchSwitchInst::handler_iterator I = CatchSwitch->handler_begin(), + E = CatchSwitch->handler_end(); + I != E; ++I) { + BasicBlock *HandlerBB = *I; + auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI()); + if (!HandlerSet.insert({CatchPad, Empty}).second) { + CatchSwitch->removeHandler(I); + --I; + --E; + Changed = true; + } + } } Changed |= ConstantFoldTerminator(BB, true); @@ -1514,8 +1553,8 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { return true; // Check if the function is specifically marked as a gc leaf function. - // - // TODO: we should be checking the attributes on the call site as well. + if (CS.hasFnAttr("gc-leaf-function")) + return true; if (const Function *F = CS.getCalledFunction()) return F->hasFnAttribute("gc-leaf-function"); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index d0932f834cf5..3bb3fa5a301f 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -3448,18 +3449,26 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break; if (BBI->mayHaveSideEffects()) { - if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (auto *SI = dyn_cast<StoreInst>(BBI)) { if (SI->isVolatile()) break; - } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + } else if (auto *LI = dyn_cast<LoadInst>(BBI)) { if (LI->isVolatile()) break; - } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) { + } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) { if (RMWI->isVolatile()) break; - } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) { + } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) { if (CXI->isVolatile()) break; + } else if (isa<CatchPadInst>(BBI)) { + // A catchpad may invoke exception object constructors and such, which + // in some languages can be arbitrary code, so be conservative by + // default. + // For CoreCLR, it just involves a type test, so can be removed. + if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) != + EHPersonality::CoreCLR) + break; } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) && !isa<LandingPadInst>(BBI)) { break; @@ -3485,7 +3494,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { for (unsigned i = 0, e = Preds.size(); i != e; ++i) { TerminatorInst *TI = Preds[i]->getTerminator(); IRBuilder<> Builder(TI); - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (auto *BI = dyn_cast<BranchInst>(TI)) { if (BI->isUnconditional()) { if (BI->getSuccessor(0) == BB) { new UnreachableInst(TI->getContext(), TI); @@ -3502,7 +3511,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { Changed = true; } } - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) if (i.getCaseSuccessor() == BB) { @@ -3511,18 +3520,49 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { --i; --e; Changed = true; } - } else if ((isa<InvokeInst>(TI) && - cast<InvokeInst>(TI)->getUnwindDest() == BB) || - isa<CatchSwitchInst>(TI)) { - removeUnwindEdge(TI->getParent()); - Changed = true; + } else if (auto *II = dyn_cast<InvokeInst>(TI)) { + if (II->getUnwindDest() == BB) { + removeUnwindEdge(TI->getParent()); + Changed = true; + } + } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) { + if (CSI->getUnwindDest() == BB) { + removeUnwindEdge(TI->getParent()); + Changed = true; + continue; + } + + for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(), + E = CSI->handler_end(); + I != E; ++I) { + if (*I == BB) { + CSI->removeHandler(I); + --I; + --E; + Changed = true; + } + } + if (CSI->getNumHandlers() == 0) { + BasicBlock *CatchSwitchBB = CSI->getParent(); + if (CSI->hasUnwindDest()) { + // Redirect preds to the unwind dest + CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest()); + } else { + // Rewrite all preds to unwind to caller (or from invoke to call). + SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB)); + for (BasicBlock *EHPred : EHPreds) + removeUnwindEdge(EHPred); + } + // The catchswitch is no longer reachable. + new UnreachableInst(CSI->getContext(), CSI); + CSI->eraseFromParent(); + Changed = true; + } } else if (isa<CleanupReturnInst>(TI)) { new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; } - // TODO: We can remove a catchswitch if all it's catchpads end in - // unreachable. } // If this block is now dead, remove it. diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 81dea6d1b9ae..dc5fee523d4c 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -57,8 +57,7 @@ static bool ignoreCallingConv(LibFunc::Func Func) { Func == LibFunc::llabs || Func == LibFunc::strlen; } -/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the -/// value is equal or not-equal to zero. +/// Return true if it only matters that the value is equal or not-equal to zero. static bool isOnlyUsedInZeroEqualityComparison(Value *V) { for (User *U : V->users()) { if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) @@ -72,8 +71,7 @@ static bool isOnlyUsedInZeroEqualityComparison(Value *V) { return true; } -/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality -/// comparisons with With. +/// Return true if it is only used in equality comparisons with With. static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { for (User *U : V->users()) { if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) @@ -249,12 +247,12 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { !FT->getParamType(2)->isIntegerTy()) return nullptr; - // Extract some information from the instruction + // Extract some information from the instruction. Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); uint64_t Len; - // We don't do anything if length is not constant + // We don't do anything if length is not constant. if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) Len = LengthArg->getZExtValue(); else @@ -272,12 +270,12 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { if (SrcLen == 0 || Len == 0) return Dst; - // We don't optimize this case + // We don't optimize this case. if (Len < SrcLen) return nullptr; // strncat(x, s, c) -> strcat(x, s) - // s is constant so the strcat can be optimized further + // s is constant so the strcat can be optimized further. return emitStrLenMemCpy(Src, Dst, SrcLen, B); } @@ -310,7 +308,8 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) - return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); + return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), + "strchr"); return nullptr; } @@ -490,8 +489,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Type *PT = Callee->getFunctionType()->getParamType(0); Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); - Value *DstEnd = - B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); + Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst, + ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. @@ -599,7 +598,8 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { if (I == StringRef::npos) // No match. return Constant::getNullValue(CI->getType()); - return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), "strpbrk"); + return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), + "strpbrk"); } // strpbrk(s, "a") -> strchr(s, 'a') @@ -878,8 +878,10 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { Type *RHSPtrTy = IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); - Value *LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv"); - Value *RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv"); + Value *LHSV = + B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv"); + Value *RHSV = + B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv"); return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); } @@ -992,6 +994,10 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, Value *V = valueHasFloatPrecision(CI->getArgOperand(0)); if (V == nullptr) return nullptr; + + // Propagate fast-math flags from the existing call to the new call. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.SetFastMathFlags(CI->getFastMathFlags()); // floor((double)floatval) -> (double)floorf(floatval) if (Callee->isIntrinsic()) { @@ -1027,6 +1033,10 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { if (V2 == nullptr) return nullptr; + // Propagate fast-math flags from the existing call to the new call. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.SetFastMathFlags(CI->getFastMathFlags()); + // fmin((double)floatval1, (double)floatval2) // -> (double)fminf(floatval1, floatval2) // TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP(). @@ -1117,7 +1127,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Callee->getAttributes()); } - bool unsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent()); + bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent()); // pow(exp(x), y) -> exp(x*y) // pow(exp2(x), y) -> exp2(x * y) @@ -1126,7 +1136,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { // underflow behavior quite dramatically. // Example: x = 1000, y = 0.001. // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). - if (unsafeFPMath) { + if (UnsafeFPMath) { if (auto *OpC = dyn_cast<CallInst>(Op1)) { IRBuilder<>::FastMathFlagGuard Guard(B); FastMathFlags FMF; @@ -1157,7 +1167,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { LibFunc::fabsl)) { // In -ffast-math, pow(x, 0.5) -> sqrt(x). - if (unsafeFPMath) + if (UnsafeFPMath) return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, Callee->getAttributes()); @@ -1183,7 +1193,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); // In -ffast-math, generate repeated fmul instead of generating pow(x, n). - if (unsafeFPMath) { + if (UnsafeFPMath) { APFloat V = abs(Op2C->getValueAPF()); // We limit to a max of 7 fmul(s). Thus max exponent is 32. // This transformation applies to integer exponents only. @@ -1291,12 +1301,9 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { // function, do that first. Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); - if ((Name == "fmin" && hasFloatVersion(Name)) || - (Name == "fmax" && hasFloatVersion(Name))) { - Value *Ret = optimizeBinaryDoubleFP(CI, B); - if (Ret) + if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name)) + if (Value *Ret = optimizeBinaryDoubleFP(CI, B)) return Ret; - } // Make sure this has 2 arguments of FP type which match the result type. FunctionType *FT = Callee->getFunctionType(); @@ -1307,14 +1314,12 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { IRBuilder<>::FastMathFlagGuard Guard(B); FastMathFlags FMF; - Function *F = CI->getParent()->getParent(); - if (canUseUnsafeFPMath(F)) { + if (CI->hasUnsafeAlgebra()) { // Unsafe algebra sets all fast-math-flags to true. FMF.setUnsafeAlgebra(); } else { // At a minimum, no-nans-fp-math must be true. - Attribute Attr = F->getFnAttribute("no-nans-fp-math"); - if (Attr.getValueAsString() != "true") + if (!CI->hasNoNaNs()) return nullptr; // No-signed-zeros is implied by the definitions of fmax/fmin themselves: // "Ideally, fmax would be sensitive to the sign of zero, for example @@ -2169,7 +2174,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { LibFunc::Func Func; Function *Callee = CI->getCalledFunction(); StringRef FuncName = Callee->getName(); - IRBuilder<> Builder(CI); + + SmallVector<OperandBundleDef, 2> OpBundles; + CI->getOperandBundlesAsDefs(OpBundles); + IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles); bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; // Command-line parameter overrides function attribute. @@ -2419,7 +2427,8 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, return false; } -Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { +Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, + IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk)) @@ -2433,7 +2442,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> & return nullptr; } -Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { +Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, + IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk)) @@ -2447,7 +2457,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> return nullptr; } -Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) { +Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, + IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk)) @@ -2539,7 +2550,10 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { LibFunc::Func Func; Function *Callee = CI->getCalledFunction(); StringRef FuncName = Callee->getName(); - IRBuilder<> Builder(CI); + + SmallVector<OperandBundleDef, 2> OpBundles; + CI->getOperandBundlesAsDefs(OpBundles); + IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles); bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; // First, check that this is a known library functions. diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 1add78e01657..2e361d38ed0b 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -218,12 +218,12 @@ static Metadata *mapMetadataOp(Metadata *Op, } /// Resolve uniquing cycles involving the given metadata. -static void resolveCycles(Metadata *MD, bool MDMaterialized) { +static void resolveCycles(Metadata *MD, bool AllowTemps) { if (auto *N = dyn_cast_or_null<MDNode>(MD)) { - if (!MDMaterialized && N->isTemporary()) + if (AllowTemps && N->isTemporary()) return; if (!N->isResolved()) - N->resolveCycles(MDMaterialized); + N->resolveCycles(AllowTemps); } } @@ -253,7 +253,7 @@ static bool remapOperands(MDNode &Node, // Resolve uniquing cycles underneath distinct nodes on the fly so they // don't infect later operands. if (IsDistinct) - resolveCycles(New, !(Flags & RF_HaveUnmaterializedMetadata)); + resolveCycles(New, Flags & RF_HaveUnmaterializedMetadata); } } @@ -401,7 +401,7 @@ Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM, return NewMD; // Resolve cycles involving the entry metadata. - resolveCycles(NewMD, !(Flags & RF_HaveUnmaterializedMetadata)); + resolveCycles(NewMD, Flags & RF_HaveUnmaterializedMetadata); // Remap the operands of distinct MDNodes. while (!DistinctWorklist.empty()) diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index a627dd665179..2c0d317d16bc 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4294,12 +4294,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { continue; } - if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, - Reductions[Phi])) { - if (Reductions[Phi].hasUnsafeAlgebra()) - Requirements->addUnsafeAlgebraInst( - Reductions[Phi].getUnsafeAlgebraInst()); - AllowedExit.insert(Reductions[Phi].getLoopExitInstr()); + RecurrenceDescriptor RedDes; + if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes)) { + if (RedDes.hasUnsafeAlgebra()) + Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); + AllowedExit.insert(RedDes.getLoopExitInstr()); + Reductions[Phi] = RedDes; continue; } diff --git a/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 4177388840be..6e9a9484dc88 100644 --- a/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -493,7 +493,8 @@ struct MatchableInfo { void initialize(const AsmMatcherInfo &Info, SmallPtrSetImpl<Record*> &SingletonRegisters, - AsmVariantInfo const &Variant); + AsmVariantInfo const &Variant, + bool HasMnemonicFirst); /// validate - Return true if this matchable is a valid thing to match against /// and perform a bunch of validity checking. @@ -502,20 +503,21 @@ struct MatchableInfo { /// findAsmOperand - Find the AsmOperand with the specified name and /// suboperand index. int findAsmOperand(StringRef N, int SubOpIdx) const { - for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) - if (N == AsmOperands[i].SrcOpName && - SubOpIdx == AsmOperands[i].SubOpIdx) - return i; - return -1; + auto I = std::find_if(AsmOperands.begin(), AsmOperands.end(), + [&](const AsmOperand &Op) { + return Op.SrcOpName == N && Op.SubOpIdx == SubOpIdx; + }); + return (I != AsmOperands.end()) ? I - AsmOperands.begin() : -1; } /// findAsmOperandNamed - Find the first AsmOperand with the specified name. /// This does not check the suboperand index. int findAsmOperandNamed(StringRef N) const { - for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) - if (N == AsmOperands[i].SrcOpName) - return i; - return -1; + auto I = std::find_if(AsmOperands.begin(), AsmOperands.end(), + [&](const AsmOperand &Op) { + return Op.SrcOpName == N; + }); + return (I != AsmOperands.end()) ? I - AsmOperands.begin() : -1; } void buildInstructionResultOperands(); @@ -587,7 +589,7 @@ struct MatchableInfo { HasGT = true; } - return !(HasLT ^ HasGT); + return HasLT == HasGT; } void dump() const; @@ -595,8 +597,7 @@ struct MatchableInfo { private: void tokenizeAsmString(AsmMatcherInfo const &Info, AsmVariantInfo const &Variant); - void addAsmOperand(size_t Start, size_t End, - std::string const &SeparatorCharacters); + void addAsmOperand(StringRef Token, bool IsIsolatedToken = false); }; /// SubtargetFeatureInfo - Helper class for storing information on a subtarget @@ -837,7 +838,8 @@ extractSingletonRegisterForAsmOperand(MatchableInfo::AsmOperand &Op, void MatchableInfo::initialize(const AsmMatcherInfo &Info, SmallPtrSetImpl<Record*> &SingletonRegisters, - AsmVariantInfo const &Variant) { + AsmVariantInfo const &Variant, + bool HasMnemonicFirst) { AsmVariantID = Variant.AsmVariantNo; AsmString = CodeGenInstruction::FlattenAsmStringVariants(AsmString, @@ -845,6 +847,24 @@ void MatchableInfo::initialize(const AsmMatcherInfo &Info, tokenizeAsmString(Info, Variant); + // The first token of the instruction is the mnemonic, which must be a + // simple string, not a $foo variable or a singleton register. + if (AsmOperands.empty()) + PrintFatalError(TheDef->getLoc(), + "Instruction '" + TheDef->getName() + "' has no tokens"); + + assert(!AsmOperands[0].Token.empty()); + if (HasMnemonicFirst) { + Mnemonic = AsmOperands[0].Token; + if (Mnemonic[0] == '$') + PrintFatalError(TheDef->getLoc(), + "Invalid instruction mnemonic '" + Mnemonic + "'!"); + + // Remove the first operand, it is tracked in the mnemonic field. + AsmOperands.erase(AsmOperands.begin()); + } else if (AsmOperands[0].Token[0] != '$') + Mnemonic = AsmOperands[0].Token; + // Compute the require features. for (Record *Predicate : TheDef->getValueAsListOfDefs("Predicates")) if (const SubtargetFeatureInfo *Feature = @@ -867,16 +887,8 @@ void MatchableInfo::initialize(const AsmMatcherInfo &Info, } /// Append an AsmOperand for the given substring of AsmString. -void MatchableInfo::addAsmOperand(size_t Start, size_t End, - std::string const &Separators) { - StringRef String = AsmString; - // Look for separators before and after to figure out is this token is - // isolated. Accept '$$' as that's how we escape '$'. - bool IsIsolatedToken = - (!Start || Separators.find(String[Start - 1]) != StringRef::npos || - String.substr(Start - 1, 2) == "$$") && - (End >= String.size() || Separators.find(String[End]) != StringRef::npos); - AsmOperands.push_back(AsmOperand(IsIsolatedToken, String.slice(Start, End))); +void MatchableInfo::addAsmOperand(StringRef Token, bool IsIsolatedToken) { + AsmOperands.push_back(AsmOperand(IsIsolatedToken, Token)); } /// tokenizeAsmString - Tokenize a simplified assembly string. @@ -885,50 +897,58 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info, StringRef String = AsmString; size_t Prev = 0; bool InTok = false; - std::string Separators = Variant.TokenizingCharacters + - Variant.SeparatorCharacters; + bool IsIsolatedToken = true; for (size_t i = 0, e = String.size(); i != e; ++i) { - if(Variant.BreakCharacters.find(String[i]) != std::string::npos) { - if(InTok) { - addAsmOperand(Prev, i, Separators); + char Char = String[i]; + if (Variant.BreakCharacters.find(Char) != std::string::npos) { + if (InTok) { + addAsmOperand(String.slice(Prev, i), false); Prev = i; + IsIsolatedToken = false; } InTok = true; continue; } - if(Variant.TokenizingCharacters.find(String[i]) != std::string::npos) { - if(InTok) { - addAsmOperand(Prev, i, Separators); + if (Variant.TokenizingCharacters.find(Char) != std::string::npos) { + if (InTok) { + addAsmOperand(String.slice(Prev, i), IsIsolatedToken); InTok = false; + IsIsolatedToken = false; } - addAsmOperand(i, i + 1, Separators); + addAsmOperand(String.slice(i, i + 1), IsIsolatedToken); Prev = i + 1; + IsIsolatedToken = true; continue; } - if(Variant.SeparatorCharacters.find(String[i]) != std::string::npos) { - if(InTok) { - addAsmOperand(Prev, i, Separators); + if (Variant.SeparatorCharacters.find(Char) != std::string::npos) { + if (InTok) { + addAsmOperand(String.slice(Prev, i), IsIsolatedToken); InTok = false; } Prev = i + 1; + IsIsolatedToken = true; continue; } - switch (String[i]) { + + switch (Char) { case '\\': if (InTok) { - addAsmOperand(Prev, i, Separators); + addAsmOperand(String.slice(Prev, i), false); InTok = false; + IsIsolatedToken = false; } ++i; assert(i != String.size() && "Invalid quoted character"); - addAsmOperand(i, i + 1, Separators); + addAsmOperand(String.slice(i, i + 1), IsIsolatedToken); Prev = i + 1; + IsIsolatedToken = false; break; case '$': { - if (InTok && Prev != i) { - addAsmOperand(Prev, i, Separators); + if (InTok) { + addAsmOperand(String.slice(Prev, i), false); InTok = false; + IsIsolatedToken = false; } // If this isn't "${", start new identifier looking like "$xxx" @@ -940,26 +960,20 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info, size_t EndPos = String.find('}', i); assert(EndPos != StringRef::npos && "Missing brace in operand reference!"); - addAsmOperand(i, EndPos+1, Separators); + addAsmOperand(String.slice(i, EndPos+1), IsIsolatedToken); Prev = EndPos + 1; i = EndPos; + IsIsolatedToken = false; break; } + default: InTok = true; + break; } } if (InTok && Prev != String.size()) - addAsmOperand(Prev, StringRef::npos, Separators); - - // The first token of the instruction is the mnemonic, which must be a - // simple string, not a $foo variable or a singleton register. - if (AsmOperands.empty()) - PrintFatalError(TheDef->getLoc(), - "Instruction '" + TheDef->getName() + "' has no tokens"); - assert(!AsmOperands[0].Token.empty()); - if (AsmOperands[0].Token[0] != '$') - Mnemonic = AsmOperands[0].Token; + addAsmOperand(String.substr(Prev), IsIsolatedToken); } bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const { @@ -1352,8 +1366,7 @@ void AsmMatcherInfo::buildInfo() { // Build information about all of the AssemblerPredicates. std::vector<Record*> AllPredicates = Records.getAllDerivedDefinitions("Predicate"); - for (unsigned i = 0, e = AllPredicates.size(); i != e; ++i) { - Record *Pred = AllPredicates[i]; + for (Record *Pred : AllPredicates) { // Ignore predicates that are not intended for the assembler. if (!Pred->getValueAsBit("AssemblerMatcherPredicate")) continue; @@ -1367,6 +1380,8 @@ void AsmMatcherInfo::buildInfo() { assert(SubtargetFeatures.size() <= 64 && "Too many subtarget features!"); } + bool HasMnemonicFirst = AsmParser->getValueAsBit("HasMnemonicFirst"); + // Parse the instructions; we need to do this first so that we can gather the // singleton register classes. SmallPtrSet<Record*, 16> SingletonRegisters; @@ -1398,7 +1413,7 @@ void AsmMatcherInfo::buildInfo() { auto II = llvm::make_unique<MatchableInfo>(*CGI); - II->initialize(*this, SingletonRegisters, Variant); + II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst); // Ignore instructions which shouldn't be matched and diagnose invalid // instruction definitions with an error. @@ -1426,7 +1441,7 @@ void AsmMatcherInfo::buildInfo() { auto II = llvm::make_unique<MatchableInfo>(std::move(Alias)); - II->initialize(*this, SingletonRegisters, Variant); + II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst); // Validate the alias definitions. II->validate(CommentDelimiter, false); @@ -1732,7 +1747,7 @@ static unsigned getConverterOperandID(const std::string &Name, static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName, std::vector<std::unique_ptr<MatchableInfo>> &Infos, - raw_ostream &OS) { + bool HasMnemonicFirst, raw_ostream &OS) { SmallSetVector<std::string, 16> OperandConversionKinds; SmallSetVector<std::string, 16> InstructionConversionKinds; std::vector<std::vector<uint8_t> > ConversionTable; @@ -1866,7 +1881,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName, // Add the operand entry to the instruction kind conversion row. ConversionRow.push_back(ID); - ConversionRow.push_back(OpInfo.AsmOperandNum); + ConversionRow.push_back(OpInfo.AsmOperandNum + HasMnemonicFirst); if (!IsNewConverter) break; @@ -1988,8 +2003,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName, // Output the operand conversion kind enum. OS << "enum OperatorConversionKind {\n"; - for (unsigned i = 0, e = OperandConversionKinds.size(); i != e; ++i) - OS << " " << OperandConversionKinds[i] << ",\n"; + for (const std::string &Converter : OperandConversionKinds) + OS << " " << Converter << ",\n"; OS << " CVT_NUM_CONVERTERS\n"; OS << "};\n\n"; @@ -2156,11 +2171,12 @@ static void emitIsSubclass(CodeGenTarget &Target, OS << " return false;\n"; } } - OS << " }\n"; // If there were case statements emitted into the string stream write the // default. - if (!EmittedSwitch) + if (EmittedSwitch) + OS << " }\n"; + else OS << " return false;\n"; OS << "}\n\n"; @@ -2247,19 +2263,16 @@ static void emitSubtargetFeatureFlagEnumeration(AsmMatcherInfo &Info, static void emitOperandDiagnosticTypes(AsmMatcherInfo &Info, raw_ostream &OS) { // Get the set of diagnostic types from all of the operand classes. std::set<StringRef> Types; - for (std::map<Record*, ClassInfo*>::const_iterator - I = Info.AsmOperandClasses.begin(), - E = Info.AsmOperandClasses.end(); I != E; ++I) { - if (!I->second->DiagnosticType.empty()) - Types.insert(I->second->DiagnosticType); + for (const auto &OpClassEntry : Info.AsmOperandClasses) { + if (!OpClassEntry.second->DiagnosticType.empty()) + Types.insert(OpClassEntry.second->DiagnosticType); } if (Types.empty()) return; // Now emit the enum entries. - for (std::set<StringRef>::const_iterator I = Types.begin(), E = Types.end(); - I != E; ++I) - OS << " Match_" << *I << ",\n"; + for (StringRef Type : Types) + OS << " Match_" << Type << ",\n"; OS << " END_OPERAND_DIAGNOSTIC_TYPES\n"; } @@ -2367,8 +2380,7 @@ static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info, // iteration order of the map is stable. std::map<std::string, std::vector<Record*> > AliasesFromMnemonic; - for (unsigned i = 0, e = Aliases.size(); i != e; ++i) { - Record *R = Aliases[i]; + for (Record *R : Aliases) { // FIXME: Allow AssemblerVariantName to be a comma separated list. std::string AsmVariantName = R->getValueAsString("AsmVariantName"); if (AsmVariantName != AsmParserVariantName) @@ -2381,10 +2393,8 @@ static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info, // Process each alias a "from" mnemonic at a time, building the code executed // by the string remapper. std::vector<StringMatcher::StringPair> Cases; - for (std::map<std::string, std::vector<Record*> >::iterator - I = AliasesFromMnemonic.begin(), E = AliasesFromMnemonic.end(); - I != E; ++I) { - const std::vector<Record*> &ToVec = I->second; + for (const auto &AliasEntry : AliasesFromMnemonic) { + const std::vector<Record*> &ToVec = AliasEntry.second; // Loop through each alias and emit code that handles each case. If there // are two instructions without predicates, emit an error. If there is one, @@ -2409,7 +2419,7 @@ static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info, AliasWithNoPredicate = i; continue; } - if (R->getValueAsString("ToMnemonic") == I->first) + if (R->getValueAsString("ToMnemonic") == AliasEntry.first) PrintFatalError(R->getLoc(), "MnemonicAlias to the same string"); if (!MatchCode.empty()) @@ -2427,7 +2437,7 @@ static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info, MatchCode += "return;"; - Cases.push_back(std::make_pair(I->first, MatchCode)); + Cases.push_back(std::make_pair(AliasEntry.first, MatchCode)); } StringMatcher("Mnemonic", Cases, OS).Emit(Indent); } @@ -2470,12 +2480,10 @@ static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info, static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target, const AsmMatcherInfo &Info, StringRef ClassName, StringToOffsetTable &StringTable, - unsigned MaxMnemonicIndex) { + unsigned MaxMnemonicIndex, bool HasMnemonicFirst) { unsigned MaxMask = 0; - for (std::vector<OperandMatchEntry>::const_iterator it = - Info.OperandMatchInfo.begin(), ie = Info.OperandMatchInfo.end(); - it != ie; ++it) { - MaxMask |= it->OperandMask; + for (const OperandMatchEntry &OMI : Info.OperandMatchInfo) { + MaxMask |= OMI.OperandMask; } // Emit the static custom operand parsing table; @@ -2515,10 +2523,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target, << Info.OperandMatchInfo.size() << "] = {\n"; OS << " /* Operand List Mask, Mnemonic, Operand Class, Features */\n"; - for (std::vector<OperandMatchEntry>::const_iterator it = - Info.OperandMatchInfo.begin(), ie = Info.OperandMatchInfo.end(); - it != ie; ++it) { - const OperandMatchEntry &OMI = *it; + for (const OperandMatchEntry &OMI : Info.OperandMatchInfo) { const MatchableInfo &II = *OMI.MI; OS << " { "; @@ -2589,19 +2594,25 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target, OS << " uint64_t AvailableFeatures = getAvailableFeatures();\n\n"; OS << " // Get the next operand index.\n"; - OS << " unsigned NextOpNum = Operands.size();\n"; + OS << " unsigned NextOpNum = Operands.size()" + << (HasMnemonicFirst ? " - 1" : "") << ";\n"; // Emit code to search the table. OS << " // Search the table.\n"; - OS << " std::pair<const OperandMatchEntry*, const OperandMatchEntry*>"; - OS << " MnemonicRange\n"; - OS << " (OperandMatchTable, OperandMatchTable+"; - OS << Info.OperandMatchInfo.size() << ");\n"; - OS << " if(!Mnemonic.empty())\n"; - OS << " MnemonicRange = std::equal_range(OperandMatchTable,"; - OS << " OperandMatchTable+" - << Info.OperandMatchInfo.size() << ", Mnemonic,\n" - << " LessOpcodeOperand());\n\n"; + if (HasMnemonicFirst) { + OS << " auto MnemonicRange =\n"; + OS << " std::equal_range(std::begin(OperandMatchTable), " + "std::end(OperandMatchTable),\n"; + OS << " Mnemonic, LessOpcodeOperand());\n\n"; + } else { + OS << " auto MnemonicRange = std::make_pair(std::begin(OperandMatchTable)," + " std::end(OperandMatchTable));\n"; + OS << " if (!Mnemonic.empty())\n"; + OS << " MnemonicRange =\n"; + OS << " std::equal_range(std::begin(OperandMatchTable), " + "std::end(OperandMatchTable),\n"; + OS << " Mnemonic, LessOpcodeOperand());\n\n"; + } OS << " if (MnemonicRange.first == MnemonicRange.second)\n"; OS << " return MatchOperand_NoMatch;\n\n"; @@ -2686,6 +2697,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { // Compute the information on the custom operand parsing. Info.buildOperandMatchInfo(); + bool HasMnemonicFirst = AsmParser->getValueAsBit("HasMnemonicFirst"); + // Write the output. // Information for the class declaration. @@ -2700,7 +2713,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { << "&Operands);\n"; OS << " void convertToMapAndConstraints(unsigned Kind,\n "; OS << " const OperandVector &Operands) override;\n"; - OS << " bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID);\n"; + if (HasMnemonicFirst) + OS << " bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID);\n"; OS << " unsigned MatchInstructionImpl(const OperandVector &Operands,\n" << " MCInst &Inst,\n" << " uint64_t &ErrorInfo," @@ -2761,7 +2775,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { // Generate the convertToMCInst function to convert operands into an MCInst. // Also, generate the convertToMapAndConstraints function for MS-style inline // assembly. The latter doesn't actually generate a MCInst. - emitConvertFuncs(Target, ClassName, Info.Matchables, OS); + emitConvertFuncs(Target, ClassName, Info.Matchables, HasMnemonicFirst, OS); // Emit the enumeration for classes which participate in matching. emitMatchClassEnumeration(Target, Info.Classes, OS); @@ -2883,24 +2897,26 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { } // A method to determine if a mnemonic is in the list. - OS << "bool " << Target.getName() << ClassName << "::\n" - << "mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) {\n"; - OS << " // Find the appropriate table for this asm variant.\n"; - OS << " const MatchEntry *Start, *End;\n"; - OS << " switch (VariantID) {\n"; - OS << " default: llvm_unreachable(\"invalid variant!\");\n"; - for (unsigned VC = 0; VC != VariantCount; ++VC) { - Record *AsmVariant = Target.getAsmParserVariant(VC); - int AsmVariantNo = AsmVariant->getValueAsInt("Variant"); - OS << " case " << AsmVariantNo << ": Start = std::begin(MatchTable" << VC - << "); End = std::end(MatchTable" << VC << "); break;\n"; + if (HasMnemonicFirst) { + OS << "bool " << Target.getName() << ClassName << "::\n" + << "mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) {\n"; + OS << " // Find the appropriate table for this asm variant.\n"; + OS << " const MatchEntry *Start, *End;\n"; + OS << " switch (VariantID) {\n"; + OS << " default: llvm_unreachable(\"invalid variant!\");\n"; + for (unsigned VC = 0; VC != VariantCount; ++VC) { + Record *AsmVariant = Target.getAsmParserVariant(VC); + int AsmVariantNo = AsmVariant->getValueAsInt("Variant"); + OS << " case " << AsmVariantNo << ": Start = std::begin(MatchTable" << VC + << "); End = std::end(MatchTable" << VC << "); break;\n"; + } + OS << " }\n"; + OS << " // Search the table.\n"; + OS << " auto MnemonicRange = "; + OS << "std::equal_range(Start, End, Mnemonic, LessOpcode());\n"; + OS << " return MnemonicRange.first != MnemonicRange.second;\n"; + OS << "}\n\n"; } - OS << " }\n"; - OS << " // Search the table.\n"; - OS << " std::pair<const MatchEntry*, const MatchEntry*> MnemonicRange =\n"; - OS << " std::equal_range(Start, End, Mnemonic, LessOpcode());\n"; - OS << " return MnemonicRange.first != MnemonicRange.second;\n"; - OS << "}\n\n"; // Finally, build the match function. OS << "unsigned " << Target.getName() << ClassName << "::\n" @@ -2909,8 +2925,10 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { << " bool matchingInlineAsm, unsigned VariantID) {\n"; OS << " // Eliminate obvious mismatches.\n"; - OS << " if (Operands.size() > " << MaxNumOperands << ") {\n"; - OS << " ErrorInfo = " << MaxNumOperands << ";\n"; + OS << " if (Operands.size() > " + << (MaxNumOperands + HasMnemonicFirst) << ") {\n"; + OS << " ErrorInfo = " + << (MaxNumOperands + HasMnemonicFirst) << ";\n"; OS << " return Match_InvalidOperand;\n"; OS << " }\n\n"; @@ -2919,10 +2937,15 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " uint64_t AvailableFeatures = getAvailableFeatures();\n\n"; OS << " // Get the instruction mnemonic, which is the first token.\n"; - OS << " StringRef Mnemonic;\n"; - OS << " if (Operands[0]->isToken())\n"; - OS << " Mnemonic = ((" << Target.getName() - << "Operand&)*Operands[0]).getToken();\n\n"; + if (HasMnemonicFirst) { + OS << " StringRef Mnemonic = ((" << Target.getName() + << "Operand&)*Operands[0]).getToken();\n\n"; + } else { + OS << " StringRef Mnemonic;\n"; + OS << " if (Operands[0]->isToken())\n"; + OS << " Mnemonic = ((" << Target.getName() + << "Operand&)*Operands[0]).getToken();\n\n"; + } if (HasMnemonicAliases) { OS << " // Process all MnemonicAliases to remap the mnemonic.\n"; @@ -2951,12 +2974,18 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { << "); End = std::end(MatchTable" << VC << "); break;\n"; } OS << " }\n"; + OS << " // Search the table.\n"; - OS << " std::pair<const MatchEntry*, const MatchEntry*> " - "MnemonicRange(Start, End);\n"; - OS << " unsigned SIndex = Mnemonic.empty() ? 0 : 1;\n"; - OS << " if (!Mnemonic.empty())\n"; - OS << " MnemonicRange = std::equal_range(Start, End, Mnemonic.lower(), LessOpcode());\n\n"; + if (HasMnemonicFirst) { + OS << " auto MnemonicRange = " + "std::equal_range(Start, End, Mnemonic, LessOpcode());\n\n"; + } else { + OS << " auto MnemonicRange = std::make_pair(Start, End);\n"; + OS << " unsigned SIndex = Mnemonic.empty() ? 0 : 1;\n"; + OS << " if (!Mnemonic.empty())\n"; + OS << " MnemonicRange = " + "std::equal_range(Start, End, Mnemonic.lower(), LessOpcode());\n\n"; + } OS << " // Return a more specific error code if no mnemonics match.\n"; OS << " if (MnemonicRange.first == MnemonicRange.second)\n"; @@ -2966,16 +2995,25 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { << "*ie = MnemonicRange.second;\n"; OS << " it != ie; ++it) {\n"; + if (HasMnemonicFirst) { + OS << " // equal_range guarantees that instruction mnemonic matches.\n"; + OS << " assert(Mnemonic == it->getMnemonic());\n"; + } + // Emit check that the subclasses match. OS << " bool OperandsValid = true;\n"; - OS << " for (unsigned i = SIndex; i != " << MaxNumOperands << "; ++i) {\n"; + OS << " for (unsigned i = " << (HasMnemonicFirst ? "0" : "SIndex") + << "; i != " << MaxNumOperands << "; ++i) {\n"; OS << " auto Formal = static_cast<MatchClassKind>(it->Classes[i]);\n"; - OS << " if (i >= Operands.size()) {\n"; + OS << " if (i" << (HasMnemonicFirst ? "+1" : "") + << " >= Operands.size()) {\n"; OS << " OperandsValid = (Formal == " <<"InvalidMatchClass);\n"; - OS << " if (!OperandsValid) ErrorInfo = i;\n"; + OS << " if (!OperandsValid) ErrorInfo = i" + << (HasMnemonicFirst ? "+1" : "") << ";\n"; OS << " break;\n"; OS << " }\n"; - OS << " MCParsedAsmOperand &Actual = *Operands[i];\n"; + OS << " MCParsedAsmOperand &Actual = *Operands[i" + << (HasMnemonicFirst ? "+1" : "") << "];\n"; OS << " unsigned Diag = validateOperandClass(Actual, Formal);\n"; OS << " if (Diag == Match_Success)\n"; OS << " continue;\n"; @@ -2991,8 +3029,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " // If we already had a match that only failed due to a\n"; OS << " // target predicate, that diagnostic is preferred.\n"; OS << " if (!HadMatchOtherThanPredicate &&\n"; - OS << " (it == MnemonicRange.first || ErrorInfo <= i)) {\n"; - OS << " ErrorInfo = i;\n"; + OS << " (it == MnemonicRange.first || ErrorInfo <= i" + << (HasMnemonicFirst ? "+1" : "") << ")) {\n"; + OS << " ErrorInfo = i" << (HasMnemonicFirst ? "+1" : "") << ";\n"; OS << " // InvalidOperand is the default. Prefer specificity.\n"; OS << " if (Diag != Match_InvalidOperand)\n"; OS << " RetCode = Diag;\n"; @@ -3067,7 +3106,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { if (!Info.OperandMatchInfo.empty()) emitCustomOperandParsing(OS, Target, Info, ClassName, StringTable, - MaxMnemonicIndex); + MaxMnemonicIndex, HasMnemonicFirst); OS << "#endif // GET_MATCHER_IMPLEMENTATION\n\n"; } diff --git a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp index 6246d811123d..d056de003e18 100644 --- a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -185,16 +185,12 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) { const std::vector<Record*> &ImpliesList = Feature->getValueAsListOfDefs("Implies"); - if (ImpliesList.empty()) { - OS << "{ }"; - } else { - OS << "{ "; - for (unsigned j = 0, M = ImpliesList.size(); j < M;) { - OS << Target << "::" << ImpliesList[j]->getName(); - if (++j < M) OS << ", "; - } - OS << " }"; + OS << "{"; + for (unsigned j = 0, M = ImpliesList.size(); j < M;) { + OS << " " << Target << "::" << ImpliesList[j]->getName(); + if (++j < M) OS << ","; } + OS << " }"; OS << " }"; ++NumFeatures; @@ -240,16 +236,12 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) { << "\"" << Name << "\", " << "\"Select the " << Name << " processor\", "; - if (FeatureList.empty()) { - OS << "{ }"; - } else { - OS << "{ "; - for (unsigned j = 0, M = FeatureList.size(); j < M;) { - OS << Target << "::" << FeatureList[j]->getName(); - if (++j < M) OS << ", "; - } - OS << " }"; + OS << "{"; + for (unsigned j = 0, M = FeatureList.size(); j < M;) { + OS << " " << Target << "::" << FeatureList[j]->getName(); + if (++j < M) OS << ","; } + OS << " }"; // The { } is for the "implies" section of this data structure. OS << ", { } }"; diff --git a/contrib/llvm/utils/TableGen/TableGen.cpp b/contrib/llvm/utils/TableGen/TableGen.cpp index c16a5583eb36..bcc594d69a1d 100644 --- a/contrib/llvm/utils/TableGen/TableGen.cpp +++ b/contrib/llvm/utils/TableGen/TableGen.cpp @@ -13,6 +13,7 @@ #include "TableGenBackends.h" // Declares all backends. #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" #include "llvm/TableGen/Error.h" @@ -182,6 +183,8 @@ int main(int argc, char **argv) { PrettyStackTraceProgram X(argc, argv); cl::ParseCommandLineOptions(argc, argv); + llvm_shutdown_obj Y; + return TableGenMain(argv[0], &LLVMTableGenMain); } |