diff options
Diffstat (limited to 'contrib')
398 files changed, 5946 insertions, 3808 deletions
diff --git a/contrib/llvm/include/llvm/ADT/DenseSet.h b/contrib/llvm/include/llvm/ADT/DenseSet.h index 7e5171c3f3a4..b495e25dd5e5 100644 --- a/contrib/llvm/include/llvm/ADT/DenseSet.h +++ b/contrib/llvm/include/llvm/ADT/DenseSet.h @@ -17,7 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/type_traits.h" -#include <algorithm> +#include <algorithm> #include <cstddef> #include <initializer_list> #include <iterator> diff --git a/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index fa81539a9d6f..6344e84b58eb 100644 --- a/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -43,6 +43,7 @@ class LoopInfo; class PHINode; class SelectInst; class TargetLibraryInfo; +class PhiValues; class Value; /// This is the AA result object for the basic, local, and stateless alias @@ -60,19 +61,22 @@ class BasicAAResult : public AAResultBase<BasicAAResult> { AssumptionCache &AC; DominatorTree *DT; LoopInfo *LI; + PhiValues *PV; public: BasicAAResult(const DataLayout &DL, const Function &F, const TargetLibraryInfo &TLI, AssumptionCache &AC, - DominatorTree *DT = nullptr, LoopInfo *LI = nullptr) - : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), LI(LI) {} + DominatorTree *DT = nullptr, LoopInfo *LI = nullptr, + PhiValues *PV = nullptr) + : AAResultBase(), DL(DL), F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), PV(PV) + {} BasicAAResult(const BasicAAResult &Arg) : AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), - DT(Arg.DT), LI(Arg.LI) {} + DT(Arg.DT), LI(Arg.LI), PV(Arg.PV) {} BasicAAResult(BasicAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), - AC(Arg.AC), DT(Arg.DT), LI(Arg.LI) {} + AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), PV(Arg.PV) {} /// Handle invalidation events in the new pass manager. bool invalidate(Function &Fn, const PreservedAnalyses &PA, diff --git a/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 0f3f2be9aeb4..d27b3e42bbeb 100644 --- a/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -682,7 +682,7 @@ bool sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL, SmallVectorImpl<unsigned> &SortedIndices); /// Returns true if the memory operations \p A and \p B are consecutive. -/// This is a simple API that does not depend on the analysis pass. +/// This is a simple API that does not depend on the analysis pass. bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType = true); @@ -734,7 +734,7 @@ private: /// accesses of a loop. /// /// It runs the analysis for a loop on demand. This can be initiated by -/// querying the loop access info via AM.getResult<LoopAccessAnalysis>. +/// querying the loop access info via AM.getResult<LoopAccessAnalysis>. /// getResult return a LoopAccessInfo object. See this class for the /// specifics of what information is provided. class LoopAccessAnalysis diff --git a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index 1c6ec98dfedc..1c40cffc7f67 100644 --- a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -44,6 +44,7 @@ class Instruction; class LoadInst; class PHITransAddr; class TargetLibraryInfo; +class PhiValues; class Value; /// A memory dependence query can return one of three different answers. @@ -360,13 +361,14 @@ private: AssumptionCache &AC; const TargetLibraryInfo &TLI; DominatorTree &DT; + PhiValues &PV; PredIteratorCache PredCache; public: MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, const TargetLibraryInfo &TLI, - DominatorTree &DT) - : AA(AA), AC(AC), TLI(TLI), DT(DT) {} + DominatorTree &DT, PhiValues &PV) + : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV) {} /// Handle invalidation in the new PM. bool invalidate(Function &F, const PreservedAnalyses &PA, diff --git a/contrib/llvm/include/llvm/Analysis/MustExecute.h b/contrib/llvm/include/llvm/Analysis/MustExecute.h index 8daf156567cd..97ad76d451ca 100644 --- a/contrib/llvm/include/llvm/Analysis/MustExecute.h +++ b/contrib/llvm/include/llvm/Analysis/MustExecute.h @@ -10,7 +10,7 @@ /// Contains a collection of routines for determining if a given instruction is /// guaranteed to execute if a given point in control flow is reached. The most /// common example is an instruction within a loop being provably executed if we -/// branch to the header of it's containing loop. +/// branch to the header of it's containing loop. /// //===----------------------------------------------------------------------===// @@ -58,7 +58,7 @@ void computeLoopSafetyInfo(LoopSafetyInfo *, Loop *); bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo); - + } #endif diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index e14e2bd44034..d80ae1d6845d 100644 --- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -326,7 +326,7 @@ public: bool haveFastSqrt(Type *Ty) { return false; } bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; } - + unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, diff --git a/contrib/llvm/include/llvm/Analysis/ValueTracking.h b/contrib/llvm/include/llvm/Analysis/ValueTracking.h index e6a219a8045b..c1a91a8e5981 100644 --- a/contrib/llvm/include/llvm/Analysis/ValueTracking.h +++ b/contrib/llvm/include/llvm/Analysis/ValueTracking.h @@ -464,7 +464,7 @@ class Value; /// This is equivelent to saying that all instructions within the basic block /// are guaranteed to transfer execution to their successor within the basic /// block. This has the same assumptions w.r.t. undefined behavior as the - /// instruction variant of this function. + /// instruction variant of this function. bool isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB); /// Return true if this function can prove that the instruction I diff --git a/contrib/llvm/include/llvm/BinaryFormat/Dwarf.def b/contrib/llvm/include/llvm/BinaryFormat/Dwarf.def index 57e259615d0c..944c5dd1c157 100644 --- a/contrib/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/contrib/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -856,6 +856,7 @@ HANDLE_DW_UT(0x06, split_type) // TODO: Add Mach-O and COFF names. // Official DWARF sections. HANDLE_DWARF_SECTION(DebugAbbrev, ".debug_abbrev", "debug-abbrev") +HANDLE_DWARF_SECTION(DebugAddr, ".debug_addr", "debug-addr") HANDLE_DWARF_SECTION(DebugAranges, ".debug_aranges", "debug-aranges") HANDLE_DWARF_SECTION(DebugInfo, ".debug_info", "debug-info") HANDLE_DWARF_SECTION(DebugTypes, ".debug_types", "debug-types") diff --git a/contrib/llvm/include/llvm/BinaryFormat/ELF.h b/contrib/llvm/include/llvm/BinaryFormat/ELF.h index 0f3f1939ce68..2e778779117b 100644 --- a/contrib/llvm/include/llvm/BinaryFormat/ELF.h +++ b/contrib/llvm/include/llvm/BinaryFormat/ELF.h @@ -413,8 +413,10 @@ enum { // ARM Specific e_flags enum : unsigned { - EF_ARM_SOFT_FLOAT = 0x00000200U, - EF_ARM_VFP_FLOAT = 0x00000400U, + EF_ARM_SOFT_FLOAT = 0x00000200U, // Legacy pre EABI_VER5 + EF_ARM_ABI_FLOAT_SOFT = 0x00000200U, // EABI_VER5 + EF_ARM_VFP_FLOAT = 0x00000400U, // Legacy pre EABI_VER5 + EF_ARM_ABI_FLOAT_HARD = 0x00000400U, // EABI_VER5 EF_ARM_EABI_UNKNOWN = 0x00000000U, EF_ARM_EABI_VER1 = 0x01000000U, EF_ARM_EABI_VER2 = 0x02000000U, diff --git a/contrib/llvm/include/llvm/CodeGen/GCStrategy.h b/contrib/llvm/include/llvm/CodeGen/GCStrategy.h index 91604fd2df87..f835bacfb548 100644 --- a/contrib/llvm/include/llvm/CodeGen/GCStrategy.h +++ b/contrib/llvm/include/llvm/CodeGen/GCStrategy.h @@ -104,12 +104,12 @@ public: const std::string &getName() const { return Name; } /// By default, write barriers are replaced with simple store - /// instructions. If true, you must provide a custom pass to lower + /// instructions. If true, you must provide a custom pass to lower /// calls to \@llvm.gcwrite. bool customWriteBarrier() const { return CustomWriteBarriers; } /// By default, read barriers are replaced with simple load - /// instructions. If true, you must provide a custom pass to lower + /// instructions. If true, you must provide a custom pass to lower /// calls to \@llvm.gcread. bool customReadBarrier() const { return CustomReadBarriers; } @@ -146,7 +146,7 @@ public: } /// By default, roots are left for the code generator so it can generate a - /// stack map. If true, you must provide a custom pass to lower + /// stack map. If true, you must provide a custom pass to lower /// calls to \@llvm.gcroot. bool customRoots() const { return CustomRoots; } diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 713d72eb4c9b..a8c26082f221 100644 --- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -786,7 +786,7 @@ public: /// setAction ({G_ADD, 0, LLT::scalar(32)}, Legal); /// setLegalizeScalarToDifferentSizeStrategy( /// G_ADD, 0, widenToLargerTypesAndNarrowToLargest); - /// will end up defining getAction({G_ADD, 0, T}) to return the following + /// will end up defining getAction({G_ADD, 0, T}) to return the following /// actions for different scalar types T: /// LLT::scalar(1)..LLT::scalar(31): {WidenScalar, 0, LLT::scalar(32)} /// LLT::scalar(32): {Legal, 0, LLT::scalar(32)} @@ -814,7 +814,7 @@ public: VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] = S; } - /// A SizeChangeStrategy for the common case where legalization for a + /// A SizeChangeStrategy for the common case where legalization for a /// particular operation consists of only supporting a specific set of type /// sizes. E.g. /// setAction ({G_DIV, 0, LLT::scalar(32)}, Legal); diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 983a4e680d5c..ac1673de5f3f 100644 --- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -942,6 +942,16 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr, unsigned Val, MachineMemOperand &MMO); + + /// Build and insert \p Res = G_BLOCK_ADDR \p BA + /// + /// G_BLOCK_ADDR computes the address of a basic block. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res must be a generic virtual register of a pointer type. + /// + /// \return The newly created instruction. + MachineInstrBuilder buildBlockAddress(unsigned Res, const BlockAddress *BA); }; /// A CRTP class that contains methods for building instructions that can diff --git a/contrib/llvm/include/llvm/CodeGen/MachORelocation.h b/contrib/llvm/include/llvm/CodeGen/MachORelocation.h index 8c9b7a84e5b8..cbb49695af75 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachORelocation.h +++ b/contrib/llvm/include/llvm/CodeGen/MachORelocation.h @@ -27,15 +27,15 @@ namespace llvm { uint32_t r_symbolnum; // symbol index if r_extern == 1 else section index bool r_pcrel; // was relocated pc-relative already uint8_t r_length; // length = 2 ^ r_length - bool r_extern; // + bool r_extern; // uint8_t r_type; // if not 0, machine-specific relocation type. bool r_scattered; // 1 = scattered, 0 = non-scattered int32_t r_value; // the value the item to be relocated is referring // to. - public: + public: uint32_t getPackedFields() const { if (r_scattered) - return (1 << 31) | (r_pcrel << 30) | ((r_length & 3) << 28) | + return (1 << 31) | (r_pcrel << 30) | ((r_length & 3) << 28) | ((r_type & 15) << 24) | (r_address & 0x00FFFFFF); else return (r_symbolnum << 8) | (r_pcrel << 7) | ((r_length & 3) << 5) | @@ -45,8 +45,8 @@ namespace llvm { uint32_t getRawAddress() const { return r_address; } MachORelocation(uint32_t addr, uint32_t index, bool pcrel, uint8_t len, - bool ext, uint8_t type, bool scattered = false, - int32_t value = 0) : + bool ext, uint8_t type, bool scattered = false, + int32_t value = 0) : r_address(addr), r_symbolnum(index), r_pcrel(pcrel), r_length(len), r_extern(ext), r_type(type), r_scattered(scattered), r_value(value) {} }; diff --git a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h index 6be304fa368b..554e89019b76 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h @@ -105,7 +105,7 @@ class MachineModuleInfo : public ImmutablePass { /// basic block's address of label. MMIAddrLabelMap *AddrLabelSymbols; - // TODO: Ideally, what we'd like is to have a switch that allows emitting + // TODO: Ideally, what we'd like is to have a switch that allows emitting // synchronous (precise at call-sites only) CFA into .eh_frame. However, // even under this switch, we'd like .debug_frame to be precise when using // -g. At this moment, there's no way to specify that some CFI directives diff --git a/contrib/llvm/include/llvm/CodeGen/MachineOutliner.h b/contrib/llvm/include/llvm/CodeGen/MachineOutliner.h index 4249a99a891b..95bfc24b57ff 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -19,6 +19,7 @@ #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/LivePhysRegs.h" namespace llvm { namespace outliner { @@ -74,6 +75,13 @@ public: /// cost model information. LiveRegUnits LRU; + /// Contains the accumulated register liveness information for the + /// instructions in this \p Candidate. + /// + /// This is optionally used by the target to determine which registers have + /// been used across the sequence. + LiveRegUnits UsedInSequence; + /// Return the number of instructions in this Candidate. unsigned getLength() const { return Len; } @@ -137,6 +145,12 @@ public: // outlining candidate. std::for_each(MBB->rbegin(), (MachineBasicBlock::reverse_iterator)front(), [this](MachineInstr &MI) { LRU.stepBackward(MI); }); + + // Walk over the sequence itself and figure out which registers were used + // in the sequence. + UsedInSequence.init(TRI); + std::for_each(front(), std::next(back()), + [this](MachineInstr &MI) { UsedInSequence.accumulate(MI); }); } }; diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h index 5e7837834ec8..56adc2e2fbfa 100644 --- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -252,7 +252,7 @@ class TargetRegisterInfo; MachineInstr *Instr = nullptr; ///< Alternatively, a MachineInstr. public: - SUnit *OrigNode = nullptr; ///< If not this, the node from which this node + SUnit *OrigNode = nullptr; ///< If not this, the node from which this node /// was cloned. (SD scheduling only) const MCSchedClassDesc *SchedClass = diff --git a/contrib/llvm/include/llvm/CodeGen/StackMaps.h b/contrib/llvm/include/llvm/CodeGen/StackMaps.h index 3c9850265737..e584a4136e4f 100644 --- a/contrib/llvm/include/llvm/CodeGen/StackMaps.h +++ b/contrib/llvm/include/llvm/CodeGen/StackMaps.h @@ -156,7 +156,7 @@ class StatepointOpers { // TODO:: we should change the STATEPOINT representation so that CC and // Flags should be part of meta operands, with args and deopt operands, and // gc operands all prefixed by their length and a type code. This would be - // much more consistent. + // much more consistent. public: // These values are aboolute offsets into the operands of the statepoint // instruction. diff --git a/contrib/llvm/include/llvm/CodeGen/TargetLowering.h b/contrib/llvm/include/llvm/CodeGen/TargetLowering.h index d5ff71cf9ac2..40540bd6e1ff 100644 --- a/contrib/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/contrib/llvm/include/llvm/CodeGen/TargetLowering.h @@ -718,7 +718,7 @@ public: /// always broken down into scalars in some contexts. This occurs even if the /// vector type is legal. virtual unsigned getVectorTypeBreakdownForCallingConv( - LLVMContext &Context, EVT VT, EVT &IntermediateVT, + LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const { return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, RegisterVT); @@ -1174,7 +1174,7 @@ public: /// are legal for some operations and not for other operations. /// For MIPS all vector types must be passed through the integer register set. virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, - EVT VT) const { + CallingConv::ID CC, EVT VT) const { return getRegisterType(Context, VT); } @@ -1182,6 +1182,7 @@ public: /// this occurs when a vector type is used, as vector are passed through the /// integer register set. virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, EVT VT) const { return getNumRegisters(Context, VT); } @@ -3489,10 +3490,10 @@ public: // SDValue BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector<SDNode *> *Created) const; + SmallVectorImpl<SDNode *> &Created) const; SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector<SDNode *> *Created) const; + SmallVectorImpl<SDNode *> &Created) const; /// Targets may override this function to provide custom SDIV lowering for /// power-of-2 denominators. If the target returns an empty SDValue, LLVM @@ -3500,7 +3501,7 @@ public: /// operations. virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, - std::vector<SDNode *> *Created) const; + SmallVectorImpl<SDNode *> &Created) const; /// Indicate whether this target prefers to combine FDIVs with the same /// divisor. If the transform should never be done, return zero. If the @@ -3690,7 +3691,7 @@ private: /// Given an LLVM IR type and return type attributes, compute the return value /// EVTs and flags, and optionally also the offsets, if the return value is /// being lowered to memory. -void GetReturnInfo(Type *ReturnType, AttributeList attr, +void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl<ISD::OutputArg> &Outs, const TargetLowering &TLI, const DataLayout &DL); diff --git a/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h b/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h index 5918c524d11c..8f5c9cb8c3fa 100644 --- a/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h +++ b/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h @@ -16,7 +16,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" -#include <cassert> +#include <cassert> #include <string> namespace llvm { diff --git a/contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index 538a5845466c..55a8ba630a59 100644 --- a/contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -456,7 +456,7 @@ public: /// stack frame offset. The first register is closest to the incoming stack /// pointer if stack grows down, and vice versa. /// Notice: This function does not take into account disabled CSRs. - /// In most cases you will want to use instead the function + /// In most cases you will want to use instead the function /// getCalleeSavedRegs that is implemented in MachineRegisterInfo. virtual const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const = 0; @@ -518,7 +518,7 @@ public: /// guaranteed to be restored before any uses. This is useful for targets that /// have call sequences where a GOT register may be updated by the caller /// prior to a call and is guaranteed to be restored (also by the caller) - /// after the call. + /// after the call. virtual bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const { return false; diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def index 41c538076798..b5f1cc0198dc 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def @@ -143,7 +143,6 @@ CV_SYMBOL(S_MANSLOT , 0x1120) CV_SYMBOL(S_MANMANYREG , 0x1121) CV_SYMBOL(S_MANREGREL , 0x1122) CV_SYMBOL(S_MANMANYREG2 , 0x1123) -CV_SYMBOL(S_UNAMESPACE , 0x1124) CV_SYMBOL(S_DATAREF , 0x1126) CV_SYMBOL(S_ANNOTATIONREF , 0x1128) CV_SYMBOL(S_TOKENREF , 0x1129) @@ -255,6 +254,7 @@ SYMBOL_RECORD_ALIAS(S_GMANDATA , 0x111d, ManagedGlobalData, DataSym) SYMBOL_RECORD(S_LTHREAD32 , 0x1112, ThreadLocalDataSym) SYMBOL_RECORD_ALIAS(S_GTHREAD32 , 0x1113, GlobalTLS, ThreadLocalDataSym) +SYMBOL_RECORD(S_UNAMESPACE , 0x1124, UsingNamespaceSym) #undef CV_SYMBOL #undef SYMBOL_RECORD diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h index cf267f23967b..93306824012e 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -942,6 +942,19 @@ public: uint32_t RecordOffset; }; +// S_UNAMESPACE +class UsingNamespaceSym : public SymbolRecord { +public: + explicit UsingNamespaceSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} + explicit UsingNamespaceSym(uint32_t RecordOffset) + : SymbolRecord(SymbolRecordKind::RegRelativeSym), + RecordOffset(RecordOffset) {} + + StringRef Name; + + uint32_t RecordOffset; +}; + // S_ANNOTATION using CVSymbol = CVRecord<SymbolKind>; diff --git a/contrib/llvm/include/llvm/DebugInfo/DIContext.h b/contrib/llvm/include/llvm/DebugInfo/DIContext.h index f89eb34fdd77..bbdd5e0d9c3f 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DIContext.h +++ b/contrib/llvm/include/llvm/DebugInfo/DIContext.h @@ -154,6 +154,8 @@ enum DIDumpType : unsigned { struct DIDumpOptions { unsigned DumpType = DIDT_All; unsigned RecurseDepth = -1U; + uint16_t Version = 0; // DWARF version to assume when extracting. + uint8_t AddrSize = 4; // Address byte size to assume when extracting. bool ShowAddresses = true; bool ShowChildren = false; bool ShowParents = false; diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index fe7430c9f04c..f5419fe02421 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -323,6 +323,10 @@ public: /// have initialized the relevant target descriptions. Error loadRegisterInfo(const object::ObjectFile &Obj); + /// Get address size from CUs. + /// TODO: refactor compile_units() to make this const. + uint8_t getCUAddrSize(); + private: /// Return the compile unit which contains instruction with provided /// address. diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h index 10e146b70ec7..1ed087520b30 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h @@ -51,6 +51,8 @@ public: /// reflect the absolute address of this pointer. Optional<uint64_t> getEncodedPointer(uint32_t *Offset, uint8_t Encoding, uint64_t AbsPosOffset = 0) const; + + size_t size() const { return Section == nullptr ? 0 : Section->Data.size(); } }; } // end namespace llvm diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h new file mode 100644 index 000000000000..ffbd1b06d1e2 --- /dev/null +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h @@ -0,0 +1,98 @@ +//===- DWARFDebugAddr.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDEBUGADDR_H +#define LLVM_DEBUGINFO_DWARFDEBUGADDR_H + +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include <cstdint> +#include <map> +#include <vector> + +namespace llvm { + +class Error; +class raw_ostream; + +/// A class representing an address table as specified in DWARF v5. +/// The table consists of a header followed by an array of address values from +/// .debug_addr section. +class DWARFDebugAddrTable { +public: + struct Header { + /// The total length of the entries for this table, not including the length + /// field itself. + uint32_t Length = 0; + /// The DWARF version number. + uint16_t Version = 5; + /// The size in bytes of an address on the target architecture. For + /// segmented addressing, this is the size of the offset portion of the + /// address. + uint8_t AddrSize; + /// The size in bytes of a segment selector on the target architecture. + /// If the target system uses a flat address space, this value is 0. + uint8_t SegSize = 0; + }; + +private: + dwarf::DwarfFormat Format; + uint32_t HeaderOffset; + Header HeaderData; + uint32_t DataSize = 0; + std::vector<uint64_t> Addrs; + +public: + void clear(); + + /// Extract an entire table, including all addresses. + Error extract(DWARFDataExtractor Data, uint32_t *OffsetPtr, + uint16_t Version, uint8_t AddrSize, + std::function<void(Error)> WarnCallback); + + uint32_t getHeaderOffset() const { return HeaderOffset; } + uint8_t getAddrSize() const { return HeaderData.AddrSize; } + void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const; + + /// Return the address based on a given index. + Expected<uint64_t> getAddrEntry(uint32_t Index) const; + + /// Return the size of the table header including the length + /// but not including the addresses. + uint8_t getHeaderSize() const { + switch (Format) { + case dwarf::DwarfFormat::DWARF32: + return 8; // 4 + 2 + 1 + 1 + case dwarf::DwarfFormat::DWARF64: + return 16; // 12 + 2 + 1 + 1 + } + llvm_unreachable("Invalid DWARF format (expected DWARF32 or DWARF64)"); + } + + /// Returns the length of this table, including the length field, or 0 if the + /// length has not been determined (e.g. because the table has not yet been + /// parsed, or there was a problem in parsing). + uint32_t getLength() const; + + /// Verify that the given length is valid for this table. + bool hasValidLength() const { return getLength() != 0; } + + /// Invalidate Length field to stop further processing. + void invalidateLength() { HeaderData.Length = 0; } + + /// Returns the length of the array of addresses. + uint32_t getDataSize() const; +}; + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_DWARFDEBUGADDR_H diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h index 6e6b57cbcbd4..c77034f6348f 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -46,7 +46,7 @@ class DWARFDie { public: DWARFDie() = default; - DWARFDie(DWARFUnit *Unit, const DWARFDebugInfoEntry * D) : U(Unit), Die(D) {} + DWARFDie(DWARFUnit *Unit, const DWARFDebugInfoEntry *D) : U(Unit), Die(D) {} bool isValid() const { return U && Die; } explicit operator bool() const { return isValid(); } @@ -82,9 +82,7 @@ public: } /// Returns true for a valid DIE that terminates a sibling chain. - bool isNULL() const { - return getAbbreviationDeclarationPtr() == nullptr; - } + bool isNULL() const { return getAbbreviationDeclarationPtr() == nullptr; } /// Returns true if DIE represents a subprogram (not inlined). bool isSubprogramDIE() const; @@ -129,7 +127,6 @@ public: void dump(raw_ostream &OS, unsigned indent = 0, DIDumpOptions DumpOpts = DIDumpOptions()) const; - /// Convenience zero-argument overload for debugging. LLVM_DUMP_METHOD void dump() const; @@ -275,12 +272,16 @@ public: iterator begin() const; iterator end() const; + + std::reverse_iterator<iterator> rbegin() const; + std::reverse_iterator<iterator> rend() const; + iterator_range<iterator> children() const; }; -class DWARFDie::attribute_iterator : - public iterator_facade_base<attribute_iterator, std::forward_iterator_tag, - const DWARFAttribute> { +class DWARFDie::attribute_iterator + : public iterator_facade_base<attribute_iterator, std::forward_iterator_tag, + const DWARFAttribute> { /// The DWARF DIE we are extracting attributes from. DWARFDie Die; /// The value vended to clients via the operator*() or operator->(). @@ -288,6 +289,9 @@ class DWARFDie::attribute_iterator : /// The attribute index within the abbreviation declaration in Die. uint32_t Index; + friend bool operator==(const attribute_iterator &LHS, + const attribute_iterator &RHS); + /// Update the attribute index and attempt to read the attribute value. If the /// attribute is able to be read, update AttrValue and the Index member /// variable. If the attribute value is not able to be read, an appropriate @@ -303,12 +307,21 @@ public: attribute_iterator &operator--(); explicit operator bool() const { return AttrValue.isValid(); } const DWARFAttribute &operator*() const { return AttrValue; } - bool operator==(const attribute_iterator &X) const { return Index == X.Index; } }; +inline bool operator==(const DWARFDie::attribute_iterator &LHS, + const DWARFDie::attribute_iterator &RHS) { + return LHS.Index == RHS.Index; +} + +inline bool operator!=(const DWARFDie::attribute_iterator &LHS, + const DWARFDie::attribute_iterator &RHS) { + return !(LHS == RHS); +} + inline bool operator==(const DWARFDie &LHS, const DWARFDie &RHS) { return LHS.getDebugInfoEntry() == RHS.getDebugInfoEntry() && - LHS.getDwarfUnit() == RHS.getDwarfUnit(); + LHS.getDwarfUnit() == RHS.getDwarfUnit(); } inline bool operator!=(const DWARFDie &LHS, const DWARFDie &RHS) { @@ -323,11 +336,15 @@ class DWARFDie::iterator : public iterator_facade_base<iterator, std::bidirectional_iterator_tag, const DWARFDie> { DWARFDie Die; + + friend std::reverse_iterator<llvm::DWARFDie::iterator>; + friend bool operator==(const DWARFDie::iterator &LHS, + const DWARFDie::iterator &RHS); + public: iterator() = default; - explicit iterator(DWARFDie D) : Die(D) { - } + explicit iterator(DWARFDie D) : Die(D) {} iterator &operator++() { Die = Die.getSibling(); @@ -339,11 +356,19 @@ public: return *this; } - explicit operator bool() const { return Die.isValid(); } const DWARFDie &operator*() const { return Die; } - bool operator==(const iterator &X) const { return Die == X.Die; } }; +inline bool operator==(const DWARFDie::iterator &LHS, + const DWARFDie::iterator &RHS) { + return LHS.Die == RHS.Die; +} + +inline bool operator!=(const DWARFDie::iterator &LHS, + const DWARFDie::iterator &RHS) { + return !(LHS == RHS); +} + // These inline functions must follow the DWARFDie::iterator definition above // as they use functions from that class. inline DWARFDie::iterator DWARFDie::begin() const { @@ -360,4 +385,80 @@ inline iterator_range<DWARFDie::iterator> DWARFDie::children() const { } // end namespace llvm +namespace std { + +template <> +class reverse_iterator<llvm::DWARFDie::iterator> + : public llvm::iterator_facade_base< + reverse_iterator<llvm::DWARFDie::iterator>, + bidirectional_iterator_tag, const llvm::DWARFDie> { + +private: + llvm::DWARFDie Die; + bool AtEnd; + +public: + reverse_iterator(llvm::DWARFDie::iterator It) + : Die(It.Die), AtEnd(!It.Die.getPreviousSibling()) { + if (!AtEnd) + Die = Die.getPreviousSibling(); + } + + reverse_iterator<llvm::DWARFDie::iterator> &operator++() { + assert(!AtEnd && "Incrementing rend"); + llvm::DWARFDie D = Die.getPreviousSibling(); + if (D) + Die = D; + else + AtEnd = true; + return *this; + } + + reverse_iterator<llvm::DWARFDie::iterator> &operator--() { + if (AtEnd) { + AtEnd = false; + return *this; + } + Die = Die.getSibling(); + assert(!Die.isNULL() && "Decrementing rbegin"); + return *this; + } + + const llvm::DWARFDie &operator*() const { + assert(Die.isValid()); + return Die; + } + + // FIXME: We should be able to specify the equals operator as a friend, but + // that causes the compiler to think the operator overload is ambiguous + // with the friend declaration and the actual definition as candidates. + bool equals(const reverse_iterator<llvm::DWARFDie::iterator> &RHS) const { + return Die == RHS.Die && AtEnd == RHS.AtEnd; + } +}; + +} // namespace std + +namespace llvm { + +inline bool operator==(const std::reverse_iterator<DWARFDie::iterator> &LHS, + const std::reverse_iterator<DWARFDie::iterator> &RHS) { + return LHS.equals(RHS); +} + +inline bool operator!=(const std::reverse_iterator<DWARFDie::iterator> &LHS, + const std::reverse_iterator<DWARFDie::iterator> &RHS) { + return !(LHS == RHS); +} + +inline std::reverse_iterator<DWARFDie::iterator> DWARFDie::rbegin() const { + return llvm::make_reverse_iterator(end()); +} + +inline std::reverse_iterator<DWARFDie::iterator> DWARFDie::rend() const { + return llvm::make_reverse_iterator(begin()); +} + +} // end namespace llvm + #endif // LLVM_DEBUGINFO_DWARFDIE_H diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h index 569c50602f3a..1e5f6ced597a 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h @@ -14,7 +14,10 @@ #include "llvm/Support/thread.h" #include <map> #include <mutex> +#include <set> #include <sstream> +#include <string> +#include <vector> namespace llvm { namespace orc { @@ -205,6 +208,42 @@ std::mutex RPCTypeName<std::vector<T>>::NameMutex; template <typename T> std::string RPCTypeName<std::vector<T>>::Name; +template <typename T> class RPCTypeName<std::set<T>> { +public: + static const char *getName() { + std::lock_guard<std::mutex> Lock(NameMutex); + if (Name.empty()) + raw_string_ostream(Name) + << "std::set<" << RPCTypeName<T>::getName() << ">"; + return Name.data(); + } + +private: + static std::mutex NameMutex; + static std::string Name; +}; + +template <typename T> std::mutex RPCTypeName<std::set<T>>::NameMutex; +template <typename T> std::string RPCTypeName<std::set<T>>::Name; + +template <typename K, typename V> class RPCTypeName<std::map<K, V>> { +public: + static const char *getName() { + std::lock_guard<std::mutex> Lock(NameMutex); + if (Name.empty()) + raw_string_ostream(Name) + << "std::map<" << RPCTypeNameSequence<K, V>() << ">"; + return Name.data(); + } + +private: + static std::mutex NameMutex; + static std::string Name; +}; + +template <typename K, typename V> +std::mutex RPCTypeName<std::map<K, V>>::NameMutex; +template <typename K, typename V> std::string RPCTypeName<std::map<K, V>>::Name; /// The SerializationTraits<ChannelT, T> class describes how to serialize and /// deserialize an instance of type T to/from an abstract channel of type @@ -527,15 +566,20 @@ public: }; /// SerializationTraits default specialization for std::pair. -template <typename ChannelT, typename T1, typename T2> -class SerializationTraits<ChannelT, std::pair<T1, T2>> { +template <typename ChannelT, typename T1, typename T2, typename T3, typename T4> +class SerializationTraits<ChannelT, std::pair<T1, T2>, std::pair<T3, T4>> { public: - static Error serialize(ChannelT &C, const std::pair<T1, T2> &V) { - return serializeSeq(C, V.first, V.second); + static Error serialize(ChannelT &C, const std::pair<T3, T4> &V) { + if (auto Err = SerializationTraits<ChannelT, T1, T3>::serialize(C, V.first)) + return Err; + return SerializationTraits<ChannelT, T2, T4>::serialize(C, V.second); } - static Error deserialize(ChannelT &C, std::pair<T1, T2> &V) { - return deserializeSeq(C, V.first, V.second); + static Error deserialize(ChannelT &C, std::pair<T3, T4> &V) { + if (auto Err = + SerializationTraits<ChannelT, T1, T3>::deserialize(C, V.first)) + return Err; + return SerializationTraits<ChannelT, T2, T4>::deserialize(C, V.second); } }; @@ -589,6 +633,9 @@ public: /// Deserialize a std::vector<T> to a std::vector<T>. static Error deserialize(ChannelT &C, std::vector<T> &V) { + assert(V.empty() && + "Expected default-constructed vector to deserialize into"); + uint64_t Count = 0; if (auto Err = deserializeSeq(C, Count)) return Err; @@ -602,6 +649,92 @@ public: } }; +template <typename ChannelT, typename T, typename T2> +class SerializationTraits<ChannelT, std::set<T>, std::set<T2>> { +public: + /// Serialize a std::set<T> from std::set<T2>. + static Error serialize(ChannelT &C, const std::set<T2> &S) { + if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size()))) + return Err; + + for (const auto &E : S) + if (auto Err = SerializationTraits<ChannelT, T, T2>::serialize(C, E)) + return Err; + + return Error::success(); + } + + /// Deserialize a std::set<T> to a std::set<T>. + static Error deserialize(ChannelT &C, std::set<T2> &S) { + assert(S.empty() && "Expected default-constructed set to deserialize into"); + + uint64_t Count = 0; + if (auto Err = deserializeSeq(C, Count)) + return Err; + + while (Count-- != 0) { + T2 Val; + if (auto Err = SerializationTraits<ChannelT, T, T2>::deserialize(C, Val)) + return Err; + + auto Added = S.insert(Val).second; + if (!Added) + return make_error<StringError>("Duplicate element in deserialized set", + orcError(OrcErrorCode::UnknownORCError)); + } + + return Error::success(); + } +}; + +template <typename ChannelT, typename K, typename V, typename K2, typename V2> +class SerializationTraits<ChannelT, std::map<K, V>, std::map<K2, V2>> { +public: + /// Serialize a std::map<K, V> from std::map<K2, V2>. + static Error serialize(ChannelT &C, const std::map<K2, V2> &M) { + if (auto Err = serializeSeq(C, static_cast<uint64_t>(M.size()))) + return Err; + + for (const auto &E : M) { + if (auto Err = + SerializationTraits<ChannelT, K, K2>::serialize(C, E.first)) + return Err; + if (auto Err = + SerializationTraits<ChannelT, V, V2>::serialize(C, E.second)) + return Err; + } + + return Error::success(); + } + + /// Deserialize a std::map<K, V> to a std::map<K, V>. + static Error deserialize(ChannelT &C, std::map<K2, V2> &M) { + assert(M.empty() && "Expected default-constructed map to deserialize into"); + + uint64_t Count = 0; + if (auto Err = deserializeSeq(C, Count)) + return Err; + + while (Count-- != 0) { + std::pair<K2, V2> Val; + if (auto Err = + SerializationTraits<ChannelT, K, K2>::deserialize(C, Val.first)) + return Err; + + if (auto Err = + SerializationTraits<ChannelT, V, V2>::deserialize(C, Val.second)) + return Err; + + auto Added = M.insert(Val).second; + if (!Added) + return make_error<StringError>("Duplicate element in deserialized map", + orcError(OrcErrorCode::UnknownORCError)); + } + + return Error::success(); + } +}; + } // end namespace rpc } // end namespace orc } // end namespace llvm diff --git a/contrib/llvm/include/llvm/IR/Attributes.td b/contrib/llvm/include/llvm/IR/Attributes.td index 1019f867aab0..39978c41ac72 100644 --- a/contrib/llvm/include/llvm/IR/Attributes.td +++ b/contrib/llvm/include/llvm/IR/Attributes.td @@ -236,3 +236,4 @@ def : MergeRule<"adjustCallerSSPLevel">; def : MergeRule<"adjustCallerStackProbes">; def : MergeRule<"adjustCallerStackProbeSize">; def : MergeRule<"adjustMinLegalVectorWidth">; +def : MergeRule<"adjustNullPointerValidAttr">; diff --git a/contrib/llvm/include/llvm/IR/Instruction.h b/contrib/llvm/include/llvm/IR/Instruction.h index a3bf25056ee5..643c2a0761d1 100644 --- a/contrib/llvm/include/llvm/IR/Instruction.h +++ b/contrib/llvm/include/llvm/IR/Instruction.h @@ -547,7 +547,7 @@ public: /// may have side effects cannot be removed without semantically changing the /// generated program. bool isSafeToRemove() const; - + /// Return true if the instruction is a variety of EH-block. bool isEHPad() const { switch (getOpcode()) { diff --git a/contrib/llvm/include/llvm/IR/Instructions.h b/contrib/llvm/include/llvm/IR/Instructions.h index a2cb84a071f2..9be8bd1a07bc 100644 --- a/contrib/llvm/include/llvm/IR/Instructions.h +++ b/contrib/llvm/include/llvm/IR/Instructions.h @@ -4016,7 +4016,7 @@ public: void setDoesNotThrow() { addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); } - + /// Return the function called, or null if this is an /// indirect function invocation. /// diff --git a/contrib/llvm/include/llvm/IR/Intrinsics.td b/contrib/llvm/include/llvm/IR/Intrinsics.td index 64455573ff19..0cec754dd649 100644 --- a/contrib/llvm/include/llvm/IR/Intrinsics.td +++ b/contrib/llvm/include/llvm/IR/Intrinsics.td @@ -541,7 +541,7 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_exp : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_exp : Intrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 8555db01645f..9f361410b9b8 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1191,7 +1191,7 @@ def int_amdgcn_ds_bpermute : // Deep learning intrinsics. //===----------------------------------------------------------------------===// -// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c) +// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_fdot2 : GCCBuiltin<"__builtin_amdgcn_fdot2">, @@ -1200,12 +1200,13 @@ def int_amdgcn_fdot2 : [ llvm_v2f16_ty, // %a llvm_v2f16_ty, // %b - llvm_float_ty // %c + llvm_float_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; -// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c) +// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_sdot2 : GCCBuiltin<"__builtin_amdgcn_sdot2">, @@ -1214,12 +1215,13 @@ def int_amdgcn_sdot2 : [ llvm_v2i16_ty, // %a llvm_v2i16_ty, // %b - llvm_i32_ty // %c + llvm_i32_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; -// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c) +// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %c def int_amdgcn_udot2 : GCCBuiltin<"__builtin_amdgcn_udot2">, @@ -1228,12 +1230,13 @@ def int_amdgcn_udot2 : [ llvm_v2i16_ty, // %a llvm_v2i16_ty, // %b - llvm_i32_ty // %c + llvm_i32_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; -// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c) +// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c def int_amdgcn_sdot4 : GCCBuiltin<"__builtin_amdgcn_sdot4">, @@ -1242,12 +1245,13 @@ def int_amdgcn_sdot4 : [ llvm_i32_ty, // %a llvm_i32_ty, // %b - llvm_i32_ty // %c + llvm_i32_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; -// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c) +// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c def int_amdgcn_udot4 : GCCBuiltin<"__builtin_amdgcn_udot4">, @@ -1256,12 +1260,13 @@ def int_amdgcn_udot4 : [ llvm_i32_ty, // %a llvm_i32_ty, // %b - llvm_i32_ty // %c + llvm_i32_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; -// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c) +// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c def int_amdgcn_sdot8 : @@ -1271,12 +1276,13 @@ def int_amdgcn_sdot8 : [ llvm_i32_ty, // %a llvm_i32_ty, // %b - llvm_i32_ty // %c + llvm_i32_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; -// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c) +// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp) // %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + // %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c def int_amdgcn_udot8 : @@ -1286,7 +1292,8 @@ def int_amdgcn_udot8 : [ llvm_i32_ty, // %a llvm_i32_ty, // %b - llvm_i32_ty // %c + llvm_i32_ty, // %c + llvm_i1_ty // %clamp ], [IntrNoMem, IntrSpeculatable] >; diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsARM.td b/contrib/llvm/include/llvm/IR/IntrinsicsARM.td index f25d2f1dbb5d..4e11f9c29dd0 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsARM.td @@ -275,7 +275,7 @@ def int_arm_stc : GCCBuiltin<"__builtin_arm_stc">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>; def int_arm_stcl : GCCBuiltin<"__builtin_arm_stcl">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>; -def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">, +def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>; def int_arm_stc2l : GCCBuiltin<"__builtin_arm_stc2l">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>; diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td index c4e753af25ca..3433aaa402eb 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1,10 +1,10 @@ //===- IntrinsicsPowerPC.td - Defines PowerPC intrinsics ---*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines all of the PowerPC-specific intrinsics. @@ -122,21 +122,21 @@ class PowerPC_Vec_FFF_Intrinsic<string GCCIntSuffix> /// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16i8 /// vectors and returns one. These intrinsics have no side effects. -class PowerPC_Vec_BBB_Intrinsic<string GCCIntSuffix> +class PowerPC_Vec_BBB_Intrinsic<string GCCIntSuffix> : PowerPC_Vec_Intrinsic<GCCIntSuffix, [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; /// PowerPC_Vec_HHH_Intrinsic - A PowerPC intrinsic that takes two v8i16 /// vectors and returns one. These intrinsics have no side effects. -class PowerPC_Vec_HHH_Intrinsic<string GCCIntSuffix> +class PowerPC_Vec_HHH_Intrinsic<string GCCIntSuffix> : PowerPC_Vec_Intrinsic<GCCIntSuffix, [llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; /// PowerPC_Vec_WWW_Intrinsic - A PowerPC intrinsic that takes two v4i32 /// vectors and returns one. These intrinsics have no side effects. -class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix> +class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix> : PowerPC_Vec_Intrinsic<GCCIntSuffix, [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; @@ -267,7 +267,7 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_altivec_vcmpgtud : GCCBuiltin<"__builtin_altivec_vcmpgtud">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - + def int_ppc_altivec_vcmpequw : GCCBuiltin<"__builtin_altivec_vcmpequw">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; @@ -283,7 +283,7 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_altivec_vcmpnezw : GCCBuiltin<"__builtin_altivec_vcmpnezw">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; - + def int_ppc_altivec_vcmpequh : GCCBuiltin<"__builtin_altivec_vcmpequh">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; @@ -355,7 +355,7 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_altivec_vcmpnezw_p : GCCBuiltin<"__builtin_altivec_vcmpnezw_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty], [IntrNoMem]>; - + def int_ppc_altivec_vcmpequh_p : GCCBuiltin<"__builtin_altivec_vcmpequh_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty], [IntrNoMem]>; @@ -474,10 +474,10 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vmsumshs : GCCBuiltin<"__builtin_altivec_vmsumshs">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vmsumubm : GCCBuiltin<"__builtin_altivec_vmsumubm">, - Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, + Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, @@ -544,7 +544,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". // Other multiplies. def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; // Packs. @@ -626,21 +626,21 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". // Add Extended Quadword def int_ppc_altivec_vaddeuqm : GCCBuiltin<"__builtin_altivec_vaddeuqm">, - Intrinsic<[llvm_v1i128_ty], + Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; def int_ppc_altivec_vaddecuq : GCCBuiltin<"__builtin_altivec_vaddecuq">, - Intrinsic<[llvm_v1i128_ty], + Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; // Sub Extended Quadword def int_ppc_altivec_vsubeuqm : GCCBuiltin<"__builtin_altivec_vsubeuqm">, - Intrinsic<[llvm_v1i128_ty], + Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; def int_ppc_altivec_vsubecuq : GCCBuiltin<"__builtin_altivec_vsubecuq">, - Intrinsic<[llvm_v1i128_ty], + Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; } @@ -657,7 +657,7 @@ def int_ppc_altivec_vslw : PowerPC_Vec_WWW_Intrinsic<"vslw">; // Right Shifts. def int_ppc_altivec_vsr : PowerPC_Vec_WWW_Intrinsic<"vsr">; def int_ppc_altivec_vsro : PowerPC_Vec_WWW_Intrinsic<"vsro">; - + def int_ppc_altivec_vsrb : PowerPC_Vec_BBB_Intrinsic<"vsrb">; def int_ppc_altivec_vsrh : PowerPC_Vec_HHH_Intrinsic<"vsrh">; def int_ppc_altivec_vsrw : PowerPC_Vec_WWW_Intrinsic<"vsrw">; @@ -679,10 +679,10 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrNoMem]>; def int_ppc_altivec_vperm : GCCBuiltin<"__builtin_altivec_vperm_4si">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_altivec_vsel : GCCBuiltin<"__builtin_altivec_vsel_4si">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vgbbd : GCCBuiltin<"__builtin_altivec_vgbbd">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; diff --git a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h index 90036c6ce248..f6752f2817ba 100644 --- a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h +++ b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h @@ -285,7 +285,7 @@ private: SpecificBumpPtrAllocator<AUFoldingSetNode> AUFoldingSetNodeAllocator; // Maps from a pass to it's associated entry in UniqueAnalysisUsages. Does - // not own the storage associated with either key or value.. + // not own the storage associated with either key or value.. DenseMap<Pass *, AnalysisUsage*> AnUsageMap; /// Collection of PassInfo objects found via analysis IDs and in this top diff --git a/contrib/llvm/include/llvm/IR/Statepoint.h b/contrib/llvm/include/llvm/IR/Statepoint.h index c8e905b21a30..8908e1b0d090 100644 --- a/contrib/llvm/include/llvm/IR/Statepoint.h +++ b/contrib/llvm/include/llvm/IR/Statepoint.h @@ -325,7 +325,7 @@ public: explicit Statepoint(CallSite CS) : Base(CS) {} }; -/// Common base class for representing values projected from a statepoint. +/// Common base class for representing values projected from a statepoint. /// Currently, the only projections available are gc.result and gc.relocate. class GCProjectionInst : public IntrinsicInst { public: diff --git a/contrib/llvm/include/llvm/IR/User.h b/contrib/llvm/include/llvm/IR/User.h index d6a603ce845d..aea31467f2fa 100644 --- a/contrib/llvm/include/llvm/IR/User.h +++ b/contrib/llvm/include/llvm/IR/User.h @@ -101,10 +101,10 @@ public: void operator delete(void *Usr); /// Placement delete - required by std, called if the ctor throws. void operator delete(void *Usr, unsigned) { - // Note: If a subclass manipulates the information which is required to calculate the - // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has + // Note: If a subclass manipulates the information which is required to calculate the + // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has // to restore the changed information to the original value, since the dtor of that class - // is not called if the ctor fails. + // is not called if the ctor fails. User::operator delete(Usr); #ifndef LLVM_ENABLE_EXCEPTIONS @@ -113,10 +113,10 @@ public: } /// Placement delete - required by std, called if the ctor throws. void operator delete(void *Usr, unsigned, bool) { - // Note: If a subclass manipulates the information which is required to calculate the - // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has + // Note: If a subclass manipulates the information which is required to calculate the + // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has // to restore the changed information to the original value, since the dtor of that class - // is not called if the ctor fails. + // is not called if the ctor fails. User::operator delete(Usr); #ifndef LLVM_ENABLE_EXCEPTIONS diff --git a/contrib/llvm/include/llvm/LinkAllIR.h b/contrib/llvm/include/llvm/LinkAllIR.h index 9a9f3d3a677f..4f4af7187be4 100644 --- a/contrib/llvm/include/llvm/LinkAllIR.h +++ b/contrib/llvm/include/llvm/LinkAllIR.h @@ -44,7 +44,7 @@ namespace { llvm::LLVMContext Context; (void)new llvm::Module("", Context); (void)new llvm::UnreachableInst(Context); - (void) llvm::createVerifierPass(); + (void) llvm::createVerifierPass(); } } ForceVMCoreLinking; } diff --git a/contrib/llvm/include/llvm/MC/MCDwarf.h b/contrib/llvm/include/llvm/MC/MCDwarf.h index 785f42d2f9d7..2bfaf19cf2c6 100644 --- a/contrib/llvm/include/llvm/MC/MCDwarf.h +++ b/contrib/llvm/include/llvm/MC/MCDwarf.h @@ -362,6 +362,13 @@ public: static void Encode(MCContext &Context, MCDwarfLineTableParams Params, int64_t LineDelta, uint64_t AddrDelta, raw_ostream &OS); + /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas using + /// fixed length operands. + static bool FixedEncode(MCContext &Context, + MCDwarfLineTableParams Params, + int64_t LineDelta, uint64_t AddrDelta, + raw_ostream &OS, uint32_t *Offset, uint32_t *Size); + /// Utility function to emit the encoding to a streamer. static void Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params, int64_t LineDelta, uint64_t AddrDelta); diff --git a/contrib/llvm/include/llvm/MC/MCFragment.h b/contrib/llvm/include/llvm/MC/MCFragment.h index 47b35175fec8..c999c9fc4f17 100644 --- a/contrib/llvm/include/llvm/MC/MCFragment.h +++ b/contrib/llvm/include/llvm/MC/MCFragment.h @@ -149,6 +149,7 @@ public: case MCFragment::FT_Relaxable: case MCFragment::FT_CompactEncodedInst: case MCFragment::FT_Data: + case MCFragment::FT_Dwarf: return true; } } @@ -232,7 +233,7 @@ public: static bool classof(const MCFragment *F) { MCFragment::FragmentType Kind = F->getKind(); return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data || - Kind == MCFragment::FT_CVDefRange; + Kind == MCFragment::FT_CVDefRange || Kind == MCFragment::FT_Dwarf;; } }; @@ -514,7 +515,7 @@ public: } }; -class MCDwarfLineAddrFragment : public MCFragment { +class MCDwarfLineAddrFragment : public MCEncodedFragmentWithFixups<8, 1> { /// LineDelta - the value of the difference between the two line numbers /// between two .loc dwarf directives. int64_t LineDelta; @@ -523,15 +524,11 @@ class MCDwarfLineAddrFragment : public MCFragment { /// make up the address delta between two .loc dwarf directives. const MCExpr *AddrDelta; - SmallString<8> Contents; - public: MCDwarfLineAddrFragment(int64_t LineDelta, const MCExpr &AddrDelta, MCSection *Sec = nullptr) - : MCFragment(FT_Dwarf, false, Sec), LineDelta(LineDelta), - AddrDelta(&AddrDelta) { - Contents.push_back(0); - } + : MCEncodedFragmentWithFixups<8, 1>(FT_Dwarf, false, Sec), + LineDelta(LineDelta), AddrDelta(&AddrDelta) {} /// \name Accessors /// @{ @@ -540,9 +537,6 @@ public: const MCExpr &getAddrDelta() const { return *AddrDelta; } - SmallString<8> &getContents() { return Contents; } - const SmallString<8> &getContents() const { return Contents; } - /// @} static bool classof(const MCFragment *F) { diff --git a/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h b/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h index 484f03b4d854..e1673208d875 100644 --- a/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h +++ b/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h @@ -64,7 +64,7 @@ public: /// Returns true if at least one of the register writes performed by /// \param Inst implicitly clears the upper portion of all super-registers. - /// + /// /// Example: on X86-64, a write to EAX implicitly clears the upper half of /// RAX. Also (still on x86) an XMM write perfomed by an AVX 128-bit /// instruction implicitly clears the upper portion of the correspondent @@ -87,6 +87,19 @@ public: const MCInst &Inst, APInt &Writes) const; + /// Returns true if \param Inst is a dependency breaking instruction for the + /// given subtarget. + /// + /// The value computed by a dependency breaking instruction is not dependent + /// on the inputs. An example of dependency breaking instruction on X86 is + /// `XOR %eax, %eax`. + /// TODO: In future, we could implement an alternative approach where this + /// method returns `true` if the input instruction is not dependent on + /// some/all of its input operands. An APInt mask could then be used to + /// identify independent operands. + virtual bool isDependencyBreaking(const MCSubtargetInfo &STI, + const MCInst &Inst) const; + /// Given a branch instruction try to get the address the branch /// targets. Return true on success, and the address in Target. virtual bool diff --git a/contrib/llvm/include/llvm/MC/MCParser/AsmCond.h b/contrib/llvm/include/llvm/MC/MCParser/AsmCond.h index 8e7bfc521556..a6e0fbd7f337 100644 --- a/contrib/llvm/include/llvm/MC/MCParser/AsmCond.h +++ b/contrib/llvm/include/llvm/MC/MCParser/AsmCond.h @@ -15,7 +15,7 @@ namespace llvm { /// AsmCond - Class to support conditional assembly /// /// The conditional assembly feature (.if, .else, .elseif and .endif) is -/// implemented with AsmCond that tells us what we are in the middle of +/// implemented with AsmCond that tells us what we are in the middle of /// processing. Ignore can be either true or false. When true we are ignoring /// the block of code in the middle of a conditional. diff --git a/contrib/llvm/include/llvm/MC/MCStreamer.h b/contrib/llvm/include/llvm/MC/MCStreamer.h index 0a5d80c6d778..e4d0dc03b87c 100644 --- a/contrib/llvm/include/llvm/MC/MCStreamer.h +++ b/contrib/llvm/include/llvm/MC/MCStreamer.h @@ -297,8 +297,8 @@ public: /// If the comment includes embedded \n's, they will each get the comment /// prefix as appropriate. The added comment should not end with a \n. /// By default, each comment is terminated with an end of line, i.e. the - /// EOL param is set to true by default. If one prefers not to end the - /// comment with a new line then the EOL param should be passed + /// EOL param is set to true by default. If one prefers not to end the + /// comment with a new line then the EOL param should be passed /// with a false value. virtual void AddComment(const Twine &T, bool EOL = true) {} diff --git a/contrib/llvm/include/llvm/Object/MachO.h b/contrib/llvm/include/llvm/Object/MachO.h index 531b3d249035..159c1765ab86 100644 --- a/contrib/llvm/include/llvm/Object/MachO.h +++ b/contrib/llvm/include/llvm/Object/MachO.h @@ -333,7 +333,7 @@ public: relocation_iterator locrel_begin() const; relocation_iterator locrel_end() const; - + void moveRelocationNext(DataRefImpl &Rel) const override; uint64_t getRelocationOffset(DataRefImpl Rel) const override; symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override; diff --git a/contrib/llvm/include/llvm/PassAnalysisSupport.h b/contrib/llvm/include/llvm/PassAnalysisSupport.h index 118718747659..a075eb557472 100644 --- a/contrib/llvm/include/llvm/PassAnalysisSupport.h +++ b/contrib/llvm/include/llvm/PassAnalysisSupport.h @@ -231,7 +231,7 @@ AnalysisType &Pass::getAnalysisID(AnalysisID PI) const { // should be a small number, we just do a linear search over a (dense) // vector. Pass *ResultPass = Resolver->findImplPass(PI); - assert(ResultPass && + assert(ResultPass && "getAnalysis*() called on an analysis that was not " "'required' by pass!"); diff --git a/contrib/llvm/include/llvm/PassRegistry.h b/contrib/llvm/include/llvm/PassRegistry.h index 93edc12bdc7b..57462138c5ae 100644 --- a/contrib/llvm/include/llvm/PassRegistry.h +++ b/contrib/llvm/include/llvm/PassRegistry.h @@ -9,7 +9,7 @@ // // This file defines PassRegistry, a class that is used in the initialization // and registration of passes. At application startup, passes are registered -// with the PassRegistry, which is later provided to the PassManager for +// with the PassRegistry, which is later provided to the PassManager for // dependency resolution and similar tasks. // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index 1ca56dcaf9c5..ecb284d30de0 100644 --- a/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -207,7 +207,7 @@ struct CounterMappingRegion { /// A CodeRegion associates some code with a counter CodeRegion, - /// An ExpansionRegion represents a file expansion region that associates + /// An ExpansionRegion represents a file expansion region that associates /// a source range with the expansion of a virtual source file, such as /// for a macro instantiation or #include file. ExpansionRegion, diff --git a/contrib/llvm/include/llvm/Support/ARMBuildAttributes.h b/contrib/llvm/include/llvm/Support/ARMBuildAttributes.h index 6c83e447cb24..b8a03765a7c0 100644 --- a/contrib/llvm/include/llvm/Support/ARMBuildAttributes.h +++ b/contrib/llvm/include/llvm/Support/ARMBuildAttributes.h @@ -213,6 +213,8 @@ enum { // Tag_ABI_VFP_args, (=28), uleb128 BaseAAPCS = 0, HardFPAAPCS = 1, + ToolChainFPPCS = 2, + CompatibleFPAAPCS = 3, // Tag_FP_HP_extension, (=36), uleb128 AllowHPFP = 1, // Allow use of Half Precision FP diff --git a/contrib/llvm/include/llvm/Support/DataExtractor.h b/contrib/llvm/include/llvm/Support/DataExtractor.h index 3a6ada6c77df..2b1639856e79 100644 --- a/contrib/llvm/include/llvm/Support/DataExtractor.h +++ b/contrib/llvm/include/llvm/Support/DataExtractor.h @@ -15,7 +15,7 @@ namespace llvm { -/// An auxiliary type to facilitate extraction of 3-byte entities. +/// An auxiliary type to facilitate extraction of 3-byte entities. struct Uint24 { uint8_t Bytes[3]; Uint24(uint8_t U) { diff --git a/contrib/llvm/include/llvm/Support/GenericDomTree.h b/contrib/llvm/include/llvm/Support/GenericDomTree.h index 115abc23e2c6..c716e4a4d300 100644 --- a/contrib/llvm/include/llvm/Support/GenericDomTree.h +++ b/contrib/llvm/include/llvm/Support/GenericDomTree.h @@ -530,11 +530,10 @@ protected: /// CFG about its children and inverse children. This implies that deletions /// of CFG edges must not delete the CFG nodes before calling this function. /// - /// Batch updates should be generally faster when performing longer sequences - /// of updates than calling insertEdge/deleteEdge manually multiple times, as - /// it can reorder the updates and remove redundant ones internally. - /// The batch updater is also able to detect sequences of zero and exactly one - /// update -- it's optimized to do less work in these cases. + /// The applyUpdates function can reorder the updates and remove redundant + /// ones internally. The batch updater is also able to detect sequences of + /// zero and exactly one update -- it's optimized to do less work in these + /// cases. /// /// Note that for postdominators it automatically takes care of applying /// updates on reverse edges internally (so there's no need to swap the @@ -854,10 +853,15 @@ protected: assert(isReachableFromEntry(B)); assert(isReachableFromEntry(A)); + const unsigned ALevel = A->getLevel(); const DomTreeNodeBase<NodeT> *IDom; - while ((IDom = B->getIDom()) != nullptr && IDom != A && IDom != B) + + // Don't walk nodes above A's subtree. When we reach A's level, we must + // either find A or be in some other subtree not dominated by A. + while ((IDom = B->getIDom()) != nullptr && IDom->getLevel() >= ALevel) B = IDom; // Walk up the tree - return IDom != nullptr; + + return B == A; } /// Wipe this tree's state without releasing any resources. diff --git a/contrib/llvm/include/llvm/Support/MemoryBuffer.h b/contrib/llvm/include/llvm/Support/MemoryBuffer.h index 535579ecff53..8933295d4ea4 100644 --- a/contrib/llvm/include/llvm/Support/MemoryBuffer.h +++ b/contrib/llvm/include/llvm/Support/MemoryBuffer.h @@ -43,7 +43,6 @@ class MemoryBuffer { const char *BufferStart; // Start of the buffer. const char *BufferEnd; // End of the buffer. - protected: MemoryBuffer() = default; @@ -148,9 +147,6 @@ public: virtual BufferKind getBufferKind() const = 0; MemoryBufferRef getMemBufferRef() const; - -private: - virtual void anchor(); }; /// This class is an extension of MemoryBuffer, which allows copy-on-write diff --git a/contrib/llvm/include/llvm/Support/SmallVectorMemoryBuffer.h b/contrib/llvm/include/llvm/Support/SmallVectorMemoryBuffer.h index f43c2fb8f826..c4a600e7f37d 100644 --- a/contrib/llvm/include/llvm/Support/SmallVectorMemoryBuffer.h +++ b/contrib/llvm/include/llvm/Support/SmallVectorMemoryBuffer.h @@ -49,6 +49,9 @@ public: init(this->SV.begin(), this->SV.end(), false); } + // Key function. + ~SmallVectorMemoryBuffer() override; + StringRef getBufferIdentifier() const override { return BufferName; } BufferKind getBufferKind() const override { return MemoryBuffer_Malloc; } @@ -56,7 +59,6 @@ public: private: SmallVector<char, 0> SV; std::string BufferName; - void anchor() override; }; } // namespace llvm diff --git a/contrib/llvm/include/llvm/Support/TargetOpcodes.def b/contrib/llvm/include/llvm/Support/TargetOpcodes.def index 21f5c7e709b8..63491a5f01d2 100644 --- a/contrib/llvm/include/llvm/Support/TargetOpcodes.def +++ b/contrib/llvm/include/llvm/Support/TargetOpcodes.def @@ -470,12 +470,15 @@ HANDLE_TARGET_OPCODE(G_BSWAP) /// Generic AddressSpaceCast. HANDLE_TARGET_OPCODE(G_ADDRSPACE_CAST) +/// Generic block address +HANDLE_TARGET_OPCODE(G_BLOCK_ADDR) + // TODO: Add more generic opcodes as we move along. /// Marker for the end of the generic opcode. /// This is used to check if an opcode is in the range of the /// generic opcodes. -HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_ADDRSPACE_CAST) +HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_BLOCK_ADDR) /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific post-isel opcode values start here. diff --git a/contrib/llvm/include/llvm/Support/xxhash.h b/contrib/llvm/include/llvm/Support/xxhash.h index f7ca460188a2..6fd67ff9ce1c 100644 --- a/contrib/llvm/include/llvm/Support/xxhash.h +++ b/contrib/llvm/include/llvm/Support/xxhash.h @@ -38,10 +38,12 @@ #ifndef LLVM_SUPPORT_XXHASH_H #define LLVM_SUPPORT_XXHASH_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" namespace llvm { uint64_t xxHash64(llvm::StringRef Data); +uint64_t xxHash64(llvm::ArrayRef<uint8_t> Data); } #endif diff --git a/contrib/llvm/include/llvm/Target/GenericOpcodes.td b/contrib/llvm/include/llvm/Target/GenericOpcodes.td index d72746a0838a..79cc1e4d9eee 100644 --- a/contrib/llvm/include/llvm/Target/GenericOpcodes.td +++ b/contrib/llvm/include/llvm/Target/GenericOpcodes.td @@ -131,6 +131,13 @@ def G_ADDRSPACE_CAST : GenericInstruction { let InOperandList = (ins type1:$src); let hasSideEffects = 0; } + +def G_BLOCK_ADDR : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins unknown:$ba); + let hasSideEffects = 0; +} + //------------------------------------------------------------------------------ // Binary ops. //------------------------------------------------------------------------------ diff --git a/contrib/llvm/include/llvm/Target/TargetCallingConv.td b/contrib/llvm/include/llvm/Target/TargetCallingConv.td index 3d8639dfe1da..95d2b4226294 100644 --- a/contrib/llvm/include/llvm/Target/TargetCallingConv.td +++ b/contrib/llvm/include/llvm/Target/TargetCallingConv.td @@ -1,10 +1,10 @@ //===- TargetCallingConv.td - Target Calling Conventions ---*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the target-independent interfaces with which targets diff --git a/contrib/llvm/include/llvm/Target/TargetInstrPredicate.td b/contrib/llvm/include/llvm/Target/TargetInstrPredicate.td index d38279b0d65e..8d57cae02d22 100644 --- a/contrib/llvm/include/llvm/Target/TargetInstrPredicate.td +++ b/contrib/llvm/include/llvm/Target/TargetInstrPredicate.td @@ -13,7 +13,7 @@ // an instruction. Each MCInstPredicate class has a well-known semantic, and it // is used by a PredicateExpander to generate code for MachineInstr and/or // MCInst. -// +// // MCInstPredicate definitions can be used to construct MCSchedPredicate // definitions. An MCSchedPredicate can be used in place of a SchedPredicate // when defining SchedReadVariant and SchedWriteVariant used by a processor @@ -63,7 +63,7 @@ // // New MCInstPredicate classes must be added to this file. For each new class // XYZ, an "expandXYZ" method must be added to the PredicateExpander. -// +// //===----------------------------------------------------------------------===// // Forward declarations. diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h b/contrib/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h index 068f81776a03..d00e950222a0 100644 --- a/contrib/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h +++ b/contrib/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h @@ -82,7 +82,7 @@ private: bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock); // If true, this pass is a nop unless the target architecture has branch - // divergence. + // divergence. const bool OnlyIfDivergentTarget = false; TargetTransformInfo *TTI = nullptr; diff --git a/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index fab8334d4c66..0e5254acb0d3 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -74,7 +74,7 @@ class Value; /// vararg functions can be extracted. This is safe, if all vararg handling /// code is extracted, including vastart. If AllowAlloca is true, then /// extraction of blocks containing alloca instructions would be possible, - /// however code extractor won't validate whether extraction is legal. + /// however code extractor won't validate whether extraction is legal. CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr, diff --git a/contrib/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/contrib/llvm/include/llvm/Transforms/Utils/FunctionComparator.h index 7698a068717a..35ba0950343c 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/FunctionComparator.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/FunctionComparator.h @@ -18,7 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueMap.h" #include "llvm/Support/AtomicOrdering.h" diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h b/contrib/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h index e0caf7741ff3..5f6488e08b5a 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h @@ -134,7 +134,7 @@ public: private: void loadAndParseMapFiles(); - SymbolRewriter::RewriteDescriptorList Descriptors; + SymbolRewriter::RewriteDescriptorList Descriptors; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp index 8aee81b1f1d8..8f903fa4f1e8 100644 --- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -142,7 +142,7 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, Alias = SetMayAlias; AST.TotalMayAliasSetSize += size(); } else { - // First entry of must alias must have maximum size! + // First entry of must alias must have maximum size! P->updateSizeAndAAInfo(Size, AAInfo); } assert(Result != NoAlias && "Cannot be part of must set!"); @@ -251,9 +251,9 @@ void AliasSetTracker::clear() { for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end(); I != E; ++I) I->second->eraseFromList(); - + PointerMap.clear(); - + // The alias sets should all be clear now. AliasSets.clear(); } @@ -269,7 +269,7 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr, for (iterator I = begin(), E = end(); I != E;) { iterator Cur = I++; if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue; - + if (!FoundSet) { // If this is the first alias set ptr can go into. FoundSet = &*Cur; // Remember it. } else { // Otherwise, we must merge the sets. @@ -336,13 +336,13 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, // Return the set! return *Entry.getAliasSet(*this)->getForwardedTarget(*this); } - + if (AliasSet *AS = mergeAliasSetsForPointer(Pointer, Size, AAInfo)) { // Add it to the alias set it aliases. AS->addPointer(*this, Entry, Size, AAInfo); return *AS; } - + // Otherwise create a new alias set to hold the loaded pointer. AliasSets.push_back(new AliasSet()); AliasSets.back().addPointer(*this, Entry, Size, AAInfo); @@ -526,10 +526,10 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { AS->SetSize--; TotalMayAliasSetSize--; } - + // Stop using the alias set. AS->dropRef(*this); - + PointerMap.erase(I); } diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 96326347b712..1a24ae3dba15 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/PhiValues.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" @@ -93,7 +94,8 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA, // depend on them. if (Inv.invalidate<AssumptionAnalysis>(Fn, PA) || (DT && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)) || - (LI && Inv.invalidate<LoopAnalysis>(Fn, PA))) + (LI && Inv.invalidate<LoopAnalysis>(Fn, PA)) || + (PV && Inv.invalidate<PhiValuesAnalysis>(Fn, PA))) return true; // Otherwise this analysis result remains valid. @@ -1527,34 +1529,70 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, return Alias; } - SmallPtrSet<Value *, 4> UniqueSrc; SmallVector<Value *, 4> V1Srcs; bool isRecursive = false; - for (Value *PV1 : PN->incoming_values()) { - if (isa<PHINode>(PV1)) - // If any of the source itself is a PHI, return MayAlias conservatively - // to avoid compile time explosion. The worst possible case is if both - // sides are PHI nodes. In which case, this is O(m x n) time where 'm' - // and 'n' are the number of PHI sources. + if (PV) { + // If we have PhiValues then use it to get the underlying phi values. + const PhiValues::ValueSet &PhiValueSet = PV->getValuesForPhi(PN); + // If we have more phi values than the search depth then return MayAlias + // conservatively to avoid compile time explosion. The worst possible case + // is if both sides are PHI nodes. In which case, this is O(m x n) time + // where 'm' and 'n' are the number of PHI sources. + if (PhiValueSet.size() > MaxLookupSearchDepth) return MayAlias; - - if (EnableRecPhiAnalysis) - if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) { - // Check whether the incoming value is a GEP that advances the pointer - // result of this PHI node (e.g. in a loop). If this is the case, we - // would recurse and always get a MayAlias. Handle this case specially - // below. - if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 && - isa<ConstantInt>(PV1GEP->idx_begin())) { - isRecursive = true; - continue; + // Add the values to V1Srcs + for (Value *PV1 : PhiValueSet) { + if (EnableRecPhiAnalysis) { + if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) { + // Check whether the incoming value is a GEP that advances the pointer + // result of this PHI node (e.g. in a loop). If this is the case, we + // would recurse and always get a MayAlias. Handle this case specially + // below. + if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 && + isa<ConstantInt>(PV1GEP->idx_begin())) { + isRecursive = true; + continue; + } } } - - if (UniqueSrc.insert(PV1).second) V1Srcs.push_back(PV1); + } + } else { + // If we don't have PhiInfo then just look at the operands of the phi itself + // FIXME: Remove this once we can guarantee that we have PhiInfo always + SmallPtrSet<Value *, 4> UniqueSrc; + for (Value *PV1 : PN->incoming_values()) { + if (isa<PHINode>(PV1)) + // If any of the source itself is a PHI, return MayAlias conservatively + // to avoid compile time explosion. The worst possible case is if both + // sides are PHI nodes. In which case, this is O(m x n) time where 'm' + // and 'n' are the number of PHI sources. + return MayAlias; + + if (EnableRecPhiAnalysis) + if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) { + // Check whether the incoming value is a GEP that advances the pointer + // result of this PHI node (e.g. in a loop). If this is the case, we + // would recurse and always get a MayAlias. Handle this case specially + // below. + if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 && + isa<ConstantInt>(PV1GEP->idx_begin())) { + isRecursive = true; + continue; + } + } + + if (UniqueSrc.insert(PV1).second) + V1Srcs.push_back(PV1); + } } + // If V1Srcs is empty then that means that the phi has no underlying non-phi + // value. This should only be possible in blocks unreachable from the entry + // block, but return MayAlias just in case. + if (V1Srcs.empty()) + return MayAlias; + // If this PHI node is recursive, set the size of the accessed memory to // unknown to represent all the possible values the GEP could advance the // pointer to. @@ -1879,7 +1917,8 @@ BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) { AM.getResult<TargetLibraryAnalysis>(F), AM.getResult<AssumptionAnalysis>(F), &AM.getResult<DominatorTreeAnalysis>(F), - AM.getCachedResult<LoopAnalysis>(F)); + AM.getCachedResult<LoopAnalysis>(F), + AM.getCachedResult<PhiValuesAnalysis>(F)); } BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) { @@ -1891,12 +1930,12 @@ char BasicAAWrapperPass::ID = 0; void BasicAAWrapperPass::anchor() {} INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa", - "Basic Alias Analysis (stateless AA impl)", true, true) + "Basic Alias Analysis (stateless AA impl)", false, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa", - "Basic Alias Analysis (stateless AA impl)", true, true) + "Basic Alias Analysis (stateless AA impl)", false, true) FunctionPass *llvm::createBasicAAWrapperPass() { return new BasicAAWrapperPass(); @@ -1907,10 +1946,12 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) { auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>(); auto &DTWP = getAnalysis<DominatorTreeWrapperPass>(); auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + auto *PVWP = getAnalysisIfAvailable<PhiValuesWrapperPass>(); Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, TLIWP.getTLI(), ACT.getAssumptionCache(F), &DTWP.getDomTree(), - LIWP ? &LIWP->getLoopInfo() : nullptr)); + LIWP ? &LIWP->getLoopInfo() : nullptr, + PVWP ? &PVWP->getResult() : nullptr)); return false; } @@ -1920,6 +1961,7 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addUsedIfAvailable<PhiValuesWrapperPass>(); } BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp index fc25cef8ddca..5b170dfa7903 100644 --- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp +++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp @@ -124,7 +124,7 @@ namespace { } char CFGPrinterLegacyPass::ID = 0; -INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file", +INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file", false, true) PreservedAnalyses CFGPrinterPass::run(Function &F, diff --git a/contrib/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp index 7d5d2d2e4496..cbdf5f63c557 100644 --- a/contrib/llvm/lib/Analysis/CallGraph.cpp +++ b/contrib/llvm/lib/Analysis/CallGraph.cpp @@ -166,7 +166,7 @@ void CallGraphNode::print(raw_ostream &OS) const { OS << "Call graph node for function: '" << F->getName() << "'"; else OS << "Call graph node <<null function>>"; - + OS << "<<" << this << ">> #uses=" << getNumReferences() << '\n'; for (const auto &I : *this) { diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp index f2211edba216..4c33c420b65d 100644 --- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp @@ -41,7 +41,7 @@ using namespace llvm; #define DEBUG_TYPE "cgscc-passmgr" -static cl::opt<unsigned> +static cl::opt<unsigned> MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4)); STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC"); @@ -97,13 +97,13 @@ public: } PassManagerType getPassManagerType() const override { - return PMT_CallGraphPassManager; + return PMT_CallGraphPassManager; } - + private: bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, bool &DevirtualizedCall); - + bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, CallGraph &CG, bool &CallGraphUpToDate, bool &DevirtualizedCall); @@ -142,21 +142,21 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, if (EmitICRemark) emitInstrCountChangedRemark(P, M, InstrCount); } - + // After the CGSCCPass is done, when assertions are enabled, use // RefreshCallGraph to verify that the callgraph was correctly updated. #ifndef NDEBUG if (Changed) RefreshCallGraph(CurSCC, CG, true); #endif - + return Changed; } - + assert(PM->getPassManagerType() == PMT_FunctionPassManager && "Invalid CGPassManager member"); FPPassManager *FPP = (FPPassManager*)P; - + // Run pass P on all functions in the current SCC. for (CallGraphNode *CGN : CurSCC) { if (Function *F = CGN->getFunction()) { @@ -168,7 +168,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, F->getContext().yield(); } } - + // The function pass(es) modified the IR, they may have clobbered the // callgraph. if (Changed && CallGraphUpToDate) { @@ -199,7 +199,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, bool MadeChange = false; bool DevirtualizedCall = false; - + // Scan all functions in the SCC. unsigned FunctionNo = 0; for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end(); @@ -207,14 +207,14 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, CallGraphNode *CGN = *SCCIdx; Function *F = CGN->getFunction(); if (!F || F->isDeclaration()) continue; - + // Walk the function body looking for call sites. Sync up the call sites in // CGN with those actually in the function. // Keep track of the number of direct and indirect calls that were // invalidated and removed. unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0; - + // Get the set of call sites currently in the function. for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { // If this call site is null, then the function pass deleted the call @@ -226,7 +226,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, CallSites.count(I->first) || // If the call edge is not from a call or invoke, or it is a - // instrinsic call, then the function pass RAUW'd a call with + // instrinsic call, then the function pass RAUW'd a call with // another value. This can happen when constant folding happens // of well known functions etc. !CallSite(I->first) || @@ -236,18 +236,18 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, CallSite(I->first).getCalledFunction()->getIntrinsicID()))) { assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); - + // If this was an indirect call site, count it. if (!I->second->getFunction()) ++NumIndirectRemoved; - else + else ++NumDirectRemoved; - + // Just remove the edge from the set of callees, keep track of whether // I points to the last element of the vector. bool WasLast = I + 1 == E; CGN->removeCallEdge(I); - + // If I pointed to the last element of the vector, we have to bail out: // iterator checking rejects comparisons of the resultant pointer with // end. @@ -256,10 +256,10 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, E = CGN->end(); continue; } - + assert(!CallSites.count(I->first) && "Call site occurs in node multiple times"); - + CallSite CS(I->first); if (CS) { Function *Callee = CS.getCalledFunction(); @@ -269,7 +269,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, } ++I; } - + // Loop over all of the instructions in the function, getting the callsites. // Keep track of the number of direct/indirect calls added. unsigned NumDirectAdded = 0, NumIndirectAdded = 0; @@ -280,7 +280,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, if (!CS) continue; Function *Callee = CS.getCalledFunction(); if (Callee && Callee->isIntrinsic()) continue; - + // If this call site already existed in the callgraph, just verify it // matches up to expectations and remove it from CallSites. DenseMap<Value*, CallGraphNode*>::iterator ExistingIt = @@ -290,11 +290,11 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, // Remove from CallSites since we have now seen it. CallSites.erase(ExistingIt); - + // Verify that the callee is right. if (ExistingNode->getFunction() == CS.getCalledFunction()) continue; - + // If we are in checking mode, we are not allowed to actually mutate // the callgraph. If this is a case where we can infer that the // callgraph is less precise than it could be (e.g. an indirect call @@ -303,10 +303,10 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, if (CheckingMode && CS.getCalledFunction() && ExistingNode->getFunction() == nullptr) continue; - + assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); - + // If not, we either went from a direct call to indirect, indirect to // direct, or direct to different direct. CallGraphNode *CalleeNode; @@ -328,7 +328,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, MadeChange = true; continue; } - + assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); @@ -341,11 +341,11 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, CalleeNode = CG.getCallsExternalNode(); ++NumIndirectAdded; } - + CGN->addCalledFunction(CS, CalleeNode); MadeChange = true; } - + // We scanned the old callgraph node, removing invalidated call sites and // then added back newly found call sites. One thing that can happen is // that an old indirect call site was deleted and replaced with a new direct @@ -359,13 +359,13 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, if (NumIndirectRemoved > NumIndirectAdded && NumDirectRemoved < NumDirectAdded) DevirtualizedCall = true; - + // After scanning this function, if we still have entries in callsites, then // they are dangling pointers. WeakTrackingVH should save us for this, so // abort if // this happens. assert(CallSites.empty() && "Dangling pointers found in call sites map"); - + // Periodically do an explicit clear to remove tombstones when processing // large scc's. if ((FunctionNo & 15) == 15) @@ -392,7 +392,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, bool &DevirtualizedCall) { bool Changed = false; - + // Keep track of whether the callgraph is known to be up-to-date or not. // The CGSSC pass manager runs two types of passes: // CallGraphSCC Passes and other random function passes. Because other @@ -406,7 +406,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, for (unsigned PassNo = 0, e = getNumContainedPasses(); PassNo != e; ++PassNo) { Pass *P = getContainedPass(PassNo); - + // If we're in -debug-pass=Executions mode, construct the SCC node list, // otherwise avoid constructing this string as it is expensive. if (isPassDebuggingExecutionsOrMore()) { @@ -423,23 +423,23 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions); } dumpRequiredSet(P); - + initializeAnalysisImpl(P); - + // Actually run this pass on the current SCC. Changed |= RunPassOnSCC(P, CurSCC, CG, CallGraphUpToDate, DevirtualizedCall); - + if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, ""); dumpPreservedSet(P); - - verifyPreservedAnalysis(P); + + verifyPreservedAnalysis(P); removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); removeDeadPasses(P, "", ON_CG_MSG); } - + // If the callgraph was left out of date (because the last pass run was a // functionpass), refresh it before we move on to the next SCC. if (!CallGraphUpToDate) @@ -452,7 +452,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, bool CGPassManager::runOnModule(Module &M) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); bool Changed = doInitialization(CG); - + // Walk the callgraph in bottom-up SCC order. scc_iterator<CallGraph*> CGI = scc_begin(&CG); @@ -485,7 +485,7 @@ bool CGPassManager::runOnModule(Module &M) { DevirtualizedCall = false; Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall); } while (Iteration++ < MaxIterations && DevirtualizedCall); - + if (DevirtualizedCall) LLVM_DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration @@ -500,7 +500,7 @@ bool CGPassManager::runOnModule(Module &M) { /// Initialize CG bool CGPassManager::doInitialization(CallGraph &CG) { bool Changed = false; - for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { + for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) { assert(PM->getPassManagerType() == PMT_FunctionPassManager && "Invalid CGPassManager member"); @@ -515,7 +515,7 @@ bool CGPassManager::doInitialization(CallGraph &CG) { /// Finalize CG bool CGPassManager::doFinalization(CallGraph &CG) { bool Changed = false; - for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { + for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) { assert(PM->getPassManagerType() == PMT_FunctionPassManager && "Invalid CGPassManager member"); @@ -541,7 +541,7 @@ void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { Nodes[i] = New; break; } - + // Update the active scc_iterator so that it doesn't contain dangling // pointers to the old CallGraphNode. scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context; @@ -555,18 +555,18 @@ void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { /// Assign pass manager to manage this pass. void CallGraphSCCPass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { - // Find CGPassManager + // Find CGPassManager while (!PMS.empty() && PMS.top()->getPassManagerType() > PMT_CallGraphPassManager) PMS.pop(); assert(!PMS.empty() && "Unable to handle Call Graph Pass"); CGPassManager *CGP; - + if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager) CGP = (CGPassManager*)PMS.top(); else { - // Create new Call Graph SCC Pass Manager if it does not exist. + // Create new Call Graph SCC Pass Manager if it does not exist. assert(!PMS.empty() && "Unable to create Call Graph Pass Manager"); PMDataManager *PMD = PMS.top(); @@ -608,7 +608,7 @@ namespace { class PrintCallGraphPass : public CallGraphSCCPass { std::string Banner; raw_ostream &OS; // raw_ostream to print on. - + public: static char ID; @@ -640,10 +640,10 @@ namespace { } return false; } - + StringRef getPassName() const override { return "Print CallGraph IR"; } }; - + } // end anonymous namespace. char PrintCallGraphPass::ID = 0; diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp index 58c5bccff65d..e7637cd88327 100644 --- a/contrib/llvm/lib/Analysis/DemandedBits.cpp +++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp @@ -272,7 +272,7 @@ void DemandedBits::performAnalysis() { // Analysis already completed for this function. return; Analyzed = true; - + Visited.clear(); AliveBits.clear(); @@ -367,7 +367,7 @@ void DemandedBits::performAnalysis() { APInt DemandedBits::getDemandedBits(Instruction *I) { performAnalysis(); - + const DataLayout &DL = I->getModule()->getDataLayout(); auto Found = AliveBits.find(I); if (Found != AliveBits.end()) diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp index 197aee9dacb7..2c503609d96b 100644 --- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp @@ -409,7 +409,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { if (Constant *C = GV->getInitializer()) if (!C->isNullValue()) return false; - + // Walk the user list of the global. If we find anything other than a direct // load or store, bail out. for (User *U : GV->users()) { @@ -464,7 +464,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { return true; } -void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) { +void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) { // We do a bottom-up SCC traversal of the call graph. In other words, we // visit all callees before callers (leaf-first). unsigned SCCID = 0; @@ -633,7 +633,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV, Inputs.push_back(V); do { const Value *Input = Inputs.pop_back_val(); - + if (isa<GlobalValue>(Input) || isa<Argument>(Input) || isa<CallInst>(Input) || isa<InvokeInst>(Input)) // Arguments to functions or returns from functions are inherently @@ -654,7 +654,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV, if (auto *LI = dyn_cast<LoadInst>(Input)) { Inputs.push_back(GetUnderlyingObject(LI->getPointerOperand(), DL)); continue; - } + } if (auto *SI = dyn_cast<SelectInst>(Input)) { const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL); const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL); @@ -672,7 +672,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV, } continue; } - + return false; } while (!Inputs.empty()); @@ -754,7 +754,7 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, // non-addr-taken globals. continue; } - + // Recurse through a limited number of selects, loads and PHIs. This is an // arbitrary depth of 4, lower numbers could be used to fix compile time // issues if needed, but this is generally expected to be only be important diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index 519d6d67be51..7fc7c15a0c25 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -65,6 +65,48 @@ static Value *SimplifyCastInst(unsigned, Value *, Type *, static Value *SimplifyGEPInst(Type *, ArrayRef<Value *>, const SimplifyQuery &, unsigned); +static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal, + Value *FalseVal) { + BinaryOperator::BinaryOps BinOpCode; + if (auto *BO = dyn_cast<BinaryOperator>(Cond)) + BinOpCode = BO->getOpcode(); + else + return nullptr; + + CmpInst::Predicate ExpectedPred, Pred1, Pred2; + if (BinOpCode == BinaryOperator::Or) { + ExpectedPred = ICmpInst::ICMP_NE; + } else if (BinOpCode == BinaryOperator::And) { + ExpectedPred = ICmpInst::ICMP_EQ; + } else + return nullptr; + + // %A = icmp eq %TV, %FV + // %B = icmp eq %X, %Y (and one of these is a select operand) + // %C = and %A, %B + // %D = select %C, %TV, %FV + // --> + // %FV + + // %A = icmp ne %TV, %FV + // %B = icmp ne %X, %Y (and one of these is a select operand) + // %C = or %A, %B + // %D = select %C, %TV, %FV + // --> + // %TV + Value *X, *Y; + if (!match(Cond, m_c_BinOp(m_c_ICmp(Pred1, m_Specific(TrueVal), + m_Specific(FalseVal)), + m_ICmp(Pred2, m_Value(X), m_Value(Y)))) || + Pred1 != Pred2 || Pred1 != ExpectedPred) + return nullptr; + + if (X == TrueVal || X == FalseVal || Y == TrueVal || Y == FalseVal) + return BinOpCode == BinaryOperator::Or ? TrueVal : FalseVal; + + return nullptr; +} + /// For a boolean type or a vector of boolean type, return false or a vector /// with every element false. static Constant *getFalse(Type *Ty) { @@ -1283,6 +1325,23 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1)))) return X; + // ((X << A) | Y) >> A -> X if effective width of Y is not larger than A. + // We can return X as we do in the above case since OR alters no bits in X. + // SimplifyDemandedBits in InstCombine can do more general optimization for + // bit manipulation. This pattern aims to provide opportunities for other + // optimizers by supporting a simple but common case in InstSimplify. + Value *Y; + const APInt *ShRAmt, *ShLAmt; + if (match(Op1, m_APInt(ShRAmt)) && + match(Op0, m_c_Or(m_NUWShl(m_Value(X), m_APInt(ShLAmt)), m_Value(Y))) && + *ShRAmt == *ShLAmt) { + const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + const unsigned Width = Op0->getType()->getScalarSizeInBits(); + const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros(); + if (EffWidthY <= ShRAmt->getZExtValue()) + return X; + } + return nullptr; } @@ -3752,6 +3811,9 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse)) return V; + if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal)) + return V; + return nullptr; } @@ -4604,149 +4666,131 @@ static bool maskIsAllZeroOrUndef(Value *Mask) { return true; } -template <typename IterTy> -static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, - const SimplifyQuery &Q, unsigned MaxRecurse) { +static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, + const SimplifyQuery &Q) { + // Idempotent functions return the same result when called repeatedly. Intrinsic::ID IID = F->getIntrinsicID(); - unsigned NumOperands = std::distance(ArgBegin, ArgEnd); - - // Unary Ops - if (NumOperands == 1) { - // Perform idempotent optimizations - if (IsIdempotent(IID)) { - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) { - if (II->getIntrinsicID() == IID) - return II; - } - } + if (IsIdempotent(IID)) + if (auto *II = dyn_cast<IntrinsicInst>(Op0)) + if (II->getIntrinsicID() == IID) + return II; - Value *IIOperand = *ArgBegin; - Value *X; - switch (IID) { - case Intrinsic::fabs: { - if (SignBitMustBeZero(IIOperand, Q.TLI)) - return IIOperand; - return nullptr; - } - case Intrinsic::bswap: { - // bswap(bswap(x)) -> x - if (match(IIOperand, m_BSwap(m_Value(X)))) - return X; - return nullptr; - } - case Intrinsic::bitreverse: { - // bitreverse(bitreverse(x)) -> x - if (match(IIOperand, m_BitReverse(m_Value(X)))) - return X; - return nullptr; - } - case Intrinsic::exp: { - // exp(log(x)) -> x - if (Q.CxtI->hasAllowReassoc() && - match(IIOperand, m_Intrinsic<Intrinsic::log>(m_Value(X)))) - return X; - return nullptr; - } - case Intrinsic::exp2: { - // exp2(log2(x)) -> x - if (Q.CxtI->hasAllowReassoc() && - match(IIOperand, m_Intrinsic<Intrinsic::log2>(m_Value(X)))) - return X; - return nullptr; - } - case Intrinsic::log: { - // log(exp(x)) -> x - if (Q.CxtI->hasAllowReassoc() && - match(IIOperand, m_Intrinsic<Intrinsic::exp>(m_Value(X)))) - return X; - return nullptr; - } - case Intrinsic::log2: { - // log2(exp2(x)) -> x - if (Q.CxtI->hasAllowReassoc() && - match(IIOperand, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) { - return X; - } - return nullptr; - } - default: - return nullptr; - } + Value *X; + switch (IID) { + case Intrinsic::fabs: + if (SignBitMustBeZero(Op0, Q.TLI)) return Op0; + break; + case Intrinsic::bswap: + // bswap(bswap(x)) -> x + if (match(Op0, m_BSwap(m_Value(X)))) return X; + break; + case Intrinsic::bitreverse: + // bitreverse(bitreverse(x)) -> x + if (match(Op0, m_BitReverse(m_Value(X)))) return X; + break; + case Intrinsic::exp: + // exp(log(x)) -> x + if (Q.CxtI->hasAllowReassoc() && + match(Op0, m_Intrinsic<Intrinsic::log>(m_Value(X)))) return X; + break; + case Intrinsic::exp2: + // exp2(log2(x)) -> x + if (Q.CxtI->hasAllowReassoc() && + match(Op0, m_Intrinsic<Intrinsic::log2>(m_Value(X)))) return X; + break; + case Intrinsic::log: + // log(exp(x)) -> x + if (Q.CxtI->hasAllowReassoc() && + match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X)))) return X; + break; + case Intrinsic::log2: + // log2(exp2(x)) -> x + if (Q.CxtI->hasAllowReassoc() && + match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) return X; + break; + default: + break; } - // Binary Ops - if (NumOperands == 2) { - Value *LHS = *ArgBegin; - Value *RHS = *(ArgBegin + 1); - Type *ReturnType = F->getReturnType(); + return nullptr; +} - switch (IID) { - case Intrinsic::usub_with_overflow: - case Intrinsic::ssub_with_overflow: { - // X - X -> { 0, false } - if (LHS == RHS) - return Constant::getNullValue(ReturnType); +static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, + const SimplifyQuery &Q) { + Intrinsic::ID IID = F->getIntrinsicID(); + Type *ReturnType = F->getReturnType(); + switch (IID) { + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + // X - X -> { 0, false } + if (Op0 == Op1) + return Constant::getNullValue(ReturnType); + // X - undef -> undef + // undef - X -> undef + if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) + return UndefValue::get(ReturnType); + break; + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + // X + undef -> undef + if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) + return UndefValue::get(ReturnType); + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + // 0 * X -> { 0, false } + // X * 0 -> { 0, false } + if (match(Op0, m_Zero()) || match(Op1, m_Zero())) + return Constant::getNullValue(ReturnType); + // undef * X -> { 0, false } + // X * undef -> { 0, false } + if (match(Op0, m_Undef()) || match(Op1, m_Undef())) + return Constant::getNullValue(ReturnType); + break; + case Intrinsic::load_relative: + if (auto *C0 = dyn_cast<Constant>(Op0)) + if (auto *C1 = dyn_cast<Constant>(Op1)) + return SimplifyRelativeLoad(C0, C1, Q.DL); + break; + case Intrinsic::powi: + if (auto *Power = dyn_cast<ConstantInt>(Op1)) { + // powi(x, 0) -> 1.0 + if (Power->isZero()) + return ConstantFP::get(Op0->getType(), 1.0); + // powi(x, 1) -> x + if (Power->isOne()) + return Op0; + } + break; + case Intrinsic::maxnum: + case Intrinsic::minnum: + // If one argument is NaN, return the other argument. + if (match(Op0, m_NaN())) return Op1; + if (match(Op1, m_NaN())) return Op0; + break; + default: + break; + } - // X - undef -> undef - // undef - X -> undef - if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) - return UndefValue::get(ReturnType); + return nullptr; +} - return nullptr; - } - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: { - // X + undef -> undef - if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) - return UndefValue::get(ReturnType); +template <typename IterTy> +static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, + const SimplifyQuery &Q) { + // Intrinsics with no operands have some kind of side effect. Don't simplify. + unsigned NumOperands = std::distance(ArgBegin, ArgEnd); + if (NumOperands == 0) + return nullptr; - return nullptr; - } - case Intrinsic::umul_with_overflow: - case Intrinsic::smul_with_overflow: { - // 0 * X -> { 0, false } - // X * 0 -> { 0, false } - if (match(LHS, m_Zero()) || match(RHS, m_Zero())) - return Constant::getNullValue(ReturnType); - - // undef * X -> { 0, false } - // X * undef -> { 0, false } - if (match(LHS, m_Undef()) || match(RHS, m_Undef())) - return Constant::getNullValue(ReturnType); + Intrinsic::ID IID = F->getIntrinsicID(); + if (NumOperands == 1) + return simplifyUnaryIntrinsic(F, ArgBegin[0], Q); - return nullptr; - } - case Intrinsic::load_relative: { - Constant *C0 = dyn_cast<Constant>(LHS); - Constant *C1 = dyn_cast<Constant>(RHS); - if (C0 && C1) - return SimplifyRelativeLoad(C0, C1, Q.DL); - return nullptr; - } - case Intrinsic::powi: - if (ConstantInt *Power = dyn_cast<ConstantInt>(RHS)) { - // powi(x, 0) -> 1.0 - if (Power->isZero()) - return ConstantFP::get(LHS->getType(), 1.0); - // powi(x, 1) -> x - if (Power->isOne()) - return LHS; - } - return nullptr; - case Intrinsic::maxnum: - case Intrinsic::minnum: - // If one argument is NaN, return the other argument. - if (match(LHS, m_NaN())) - return RHS; - if (match(RHS, m_NaN())) - return LHS; - return nullptr; - default: - return nullptr; - } - } + if (NumOperands == 2) + return simplifyBinaryIntrinsic(F, ArgBegin[0], ArgBegin[1], Q); - // Simplify calls to llvm.masked.load.* + // Handle intrinsics with 3 or more arguments. switch (IID) { case Intrinsic::masked_load: { Value *MaskArg = ArgBegin[2]; @@ -4756,6 +4800,19 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, return PassthruArg; return nullptr; } + case Intrinsic::fshl: + case Intrinsic::fshr: { + Value *ShAmtArg = ArgBegin[2]; + const APInt *ShAmtC; + if (match(ShAmtArg, m_APInt(ShAmtC))) { + // If there's effectively no shift, return the 1st arg or 2nd arg. + // TODO: For vectors, we could check each element of a non-splat constant. + APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth()); + if (ShAmtC->urem(BitWidth).isNullValue()) + return ArgBegin[IID == Intrinsic::fshl ? 0 : 1]; + } + return nullptr; + } default: return nullptr; } @@ -4780,7 +4837,7 @@ static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin, return nullptr; if (F->isIntrinsic()) - if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse)) + if (Value *Ret = simplifyIntrinsic(F, ArgBegin, ArgEnd, Q)) return Ret; if (!canConstantFoldCallTo(CS, F)) diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index 435b6f205199..ee0148e0d795 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -725,7 +725,7 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV, // frequently arranged such that dominating ones come first and we quickly // find a path to function entry. TODO: We should consider explicitly // canonicalizing to make this true rather than relying on this happy - // accident. + // accident. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { ValueLatticeElement EdgeResult; if (!getEdgeValue(Val, *PI, BB, EdgeResult)) diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp index c6175bf9bee9..a24d66011b8d 100644 --- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -176,8 +176,8 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, /// Calculate Start and End points of memory access. /// Let's assume A is the first access and B is a memory access on N-th loop -/// iteration. Then B is calculated as: -/// B = A + Step*N . +/// iteration. Then B is calculated as: +/// B = A + Step*N . /// Step value may be positive or negative. /// N is a calculated back-edge taken count: /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 @@ -1317,7 +1317,7 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, return false; } -/// Given a non-constant (unknown) dependence-distance \p Dist between two +/// Given a non-constant (unknown) dependence-distance \p Dist between two /// memory accesses, that have the same stride whose absolute value is given /// in \p Stride, and that have the same type size \p TypeByteSize, /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is @@ -1336,19 +1336,19 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, // If we can prove that // (**) |Dist| > BackedgeTakenCount * Step - // where Step is the absolute stride of the memory accesses in bytes, + // where Step is the absolute stride of the memory accesses in bytes, // then there is no dependence. // - // Ratioanle: - // We basically want to check if the absolute distance (|Dist/Step|) - // is >= the loop iteration count (or > BackedgeTakenCount). - // This is equivalent to the Strong SIV Test (Practical Dependence Testing, - // Section 4.2.1); Note, that for vectorization it is sufficient to prove + // Ratioanle: + // We basically want to check if the absolute distance (|Dist/Step|) + // is >= the loop iteration count (or > BackedgeTakenCount). + // This is equivalent to the Strong SIV Test (Practical Dependence Testing, + // Section 4.2.1); Note, that for vectorization it is sufficient to prove // that the dependence distance is >= VF; This is checked elsewhere. - // But in some cases we can prune unknown dependence distances early, and - // even before selecting the VF, and without a runtime test, by comparing - // the distance against the loop iteration count. Since the vectorized code - // will be executed only if LoopCount >= VF, proving distance >= LoopCount + // But in some cases we can prune unknown dependence distances early, and + // even before selecting the VF, and without a runtime test, by comparing + // the distance against the loop iteration count. Since the vectorized code + // will be executed only if LoopCount >= VF, proving distance >= LoopCount // also guarantees that distance >= VF. // const uint64_t ByteStride = Stride * TypeByteSize; @@ -1360,8 +1360,8 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType()); uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType()); - // The dependence distance can be positive/negative, so we sign extend Dist; - // The multiplication of the absolute stride in bytes and the + // The dependence distance can be positive/negative, so we sign extend Dist; + // The multiplication of the absolute stride in bytes and the // backdgeTakenCount is non-negative, so we zero extend Product. if (DistTypeSize > ProductTypeSize) CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); @@ -2212,24 +2212,24 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { "versioning:"); LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); - // Avoid adding the "Stride == 1" predicate when we know that + // Avoid adding the "Stride == 1" predicate when we know that // Stride >= Trip-Count. Such a predicate will effectively optimize a single // or zero iteration loop, as Trip-Count <= Stride == 1. - // + // // TODO: We are currently not making a very informed decision on when it is // beneficial to apply stride versioning. It might make more sense that the - // users of this analysis (such as the vectorizer) will trigger it, based on - // their specific cost considerations; For example, in cases where stride + // users of this analysis (such as the vectorizer) will trigger it, based on + // their specific cost considerations; For example, in cases where stride // versioning does not help resolving memory accesses/dependences, the - // vectorizer should evaluate the cost of the runtime test, and the benefit - // of various possible stride specializations, considering the alternatives - // of using gather/scatters (if available). - + // vectorizer should evaluate the cost of the runtime test, and the benefit + // of various possible stride specializations, considering the alternatives + // of using gather/scatters (if available). + const SCEV *StrideExpr = PSE->getSCEV(Stride); - const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); + const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); // Match the types so we can compare the stride and the BETakenCount. - // The Stride can be positive/negative, so we sign extend Stride; + // The Stride can be positive/negative, so we sign extend Stride; // The backdgeTakenCount is non-negative, so we zero extend BETakenCount. const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType()); @@ -2243,7 +2243,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType()); const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount); // Since TripCount == BackEdgeTakenCount + 1, checking: - // "Stride >= TripCount" is equivalent to checking: + // "Stride >= TripCount" is equivalent to checking: // Stride - BETakenCount > 0 if (SE->isKnownPositive(StrideMinusBETaken)) { LLVM_DEBUG( diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp index 5c0cbb26484c..5a6bbd7b2ac6 100644 --- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp +++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp @@ -118,7 +118,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { } else { SmallVector<NonLocalDepResult, 4> NLDI; assert( (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) || - isa<VAArgInst>(Inst)) && "Unknown memory instruction!"); + isa<VAArgInst>(Inst)) && "Unknown memory instruction!"); MDA.getNonLocalPointerDependency(Inst, NLDI); DepSet &InstDeps = Deps[Inst]; diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 7eeefd54f007..feae53c54ecb 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/PhiValues.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" @@ -1513,6 +1514,8 @@ void MemoryDependenceResults::invalidateCachedPointerInfo(Value *Ptr) { RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false)); // Flush load info for the pointer. RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true)); + // Invalidate phis that use the pointer. + PV.invalidateValue(Ptr); } void MemoryDependenceResults::invalidateCachedPredecessors() { @@ -1671,6 +1674,9 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) { } } + // Invalidate phis that use the removed instruction. + PV.invalidateValue(RemInst); + assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); LLVM_DEBUG(verifyRemoved(RemInst)); } @@ -1730,7 +1736,8 @@ MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &AC = AM.getResult<AssumptionAnalysis>(F); auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - return MemoryDependenceResults(AA, AC, TLI, DT); + auto &PV = AM.getResult<PhiValuesAnalysis>(F); + return MemoryDependenceResults(AA, AC, TLI, DT, PV); } char MemoryDependenceWrapperPass::ID = 0; @@ -1741,6 +1748,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass) INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep", "Memory Dependence Analysis", false, true) @@ -1758,6 +1766,7 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<PhiValuesWrapperPass>(); AU.addRequiredTransitive<AAResultsWrapperPass>(); AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); } @@ -1773,7 +1782,8 @@ bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &P // Check whether the analyses we depend on became invalid for any reason. if (Inv.invalidate<AAManager>(F, PA) || Inv.invalidate<AssumptionAnalysis>(F, PA) || - Inv.invalidate<DominatorTreeAnalysis>(F, PA)) + Inv.invalidate<DominatorTreeAnalysis>(F, PA) || + Inv.invalidate<PhiValuesAnalysis>(F, PA)) return true; // Otherwise this analysis result remains valid. @@ -1789,6 +1799,7 @@ bool MemoryDependenceWrapperPass::runOnFunction(Function &F) { auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - MemDep.emplace(AA, AC, TLI, DT); + auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult(); + MemDep.emplace(AA, AC, TLI, DT, PV); return false; } diff --git a/contrib/llvm/lib/Analysis/MustExecute.cpp b/contrib/llvm/lib/Analysis/MustExecute.cpp index fc4049874622..8e85366b4618 100644 --- a/contrib/llvm/lib/Analysis/MustExecute.cpp +++ b/contrib/llvm/lib/Analysis/MustExecute.cpp @@ -235,7 +235,7 @@ public: } - void printInfoComment(const Value &V, formatted_raw_ostream &OS) override { + void printInfoComment(const Value &V, formatted_raw_ostream &OS) override { if (!MustExec.count(&V)) return; @@ -245,7 +245,7 @@ public: OS << " ; (mustexec in " << NumLoops << " loops: "; else OS << " ; (mustexec in: "; - + bool first = true; for (const Loop *L : Loops) { if (!first) @@ -264,6 +264,6 @@ bool MustExecutePrinter::runOnFunction(Function &F) { MustExecuteAnnotatedWriter Writer(F, DT, LI); F.print(dbgs(), &Writer); - + return false; } diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index aa95ace93014..0e715b8814ff 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4839,7 +4839,7 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI // Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy) // for each of StartVal and Accum - auto getExtendedExpr = [&](const SCEV *Expr, + auto getExtendedExpr = [&](const SCEV *Expr, bool CreateSignExtend) -> const SCEV * { assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy); @@ -4935,11 +4935,11 @@ ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { return Rewrite; } -// FIXME: This utility is currently required because the Rewriter currently -// does not rewrite this expression: -// {0, +, (sext ix (trunc iy to ix) to iy)} +// FIXME: This utility is currently required because the Rewriter currently +// does not rewrite this expression: +// {0, +, (sext ix (trunc iy to ix) to iy)} // into {0, +, %step}, -// even when the following Equal predicate exists: +// even when the following Equal predicate exists: // "%step == (sext ix (trunc iy to ix) to iy)". bool PredicatedScalarEvolution::areAddRecsEqualWithPreds( const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const { diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index 9de2f789c89c..7233a86e5daf 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -721,7 +721,7 @@ struct ReductionData { static Optional<ReductionData> getReductionData(Instruction *I) { Value *L, *R; if (m_BinOp(m_Value(L), m_Value(R)).match(I)) - return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); + return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); if (auto *SI = dyn_cast<SelectInst>(I)) { if (m_SMin(m_Value(L), m_Value(R)).match(SI) || m_SMax(m_Value(L), m_Value(R)).match(SI) || @@ -730,8 +730,8 @@ static Optional<ReductionData> getReductionData(Instruction *I) { m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast<CmpInst>(SI->getCondition()); - return ReductionData(RK_MinMax, CI->getOpcode(), L, R); - } + return ReductionData(RK_MinMax, CI->getOpcode(), L, R); + } if (m_UMin(m_Value(L), m_Value(R)).match(SI) || m_UMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast<CmpInst>(SI->getCondition()); @@ -851,11 +851,11 @@ static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, // We look for a sequence of shuffle,shuffle,add triples like the following // that builds a pairwise reduction tree. - // + // // (X0, X1, X2, X3) // (X0 + X1, X2 + X3, undef, undef) // ((X0 + X1) + (X2 + X3), undef, undef, undef) - // + // // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, @@ -916,7 +916,7 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, // We look for a sequence of shuffles and adds like the following matching one // fadd, shuffle vector pair at a time. - // + // // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf @@ -927,7 +927,7 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned MaskStart = 1; Instruction *RdxOp = RdxStart; - SmallVector<int, 32> ShuffleMask(NumVecElems, 0); + SmallVector<int, 32> ShuffleMask(NumVecElems, 0); unsigned NumVecElemsRemain = NumVecElems; while (NumVecElemsRemain - 1) { // Check for the right reduction operation. @@ -1093,7 +1093,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { case Instruction::InsertElement: { const InsertElementInst * IE = cast<InsertElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); - unsigned Idx = -1; + unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); return getVectorInstrCost(I->getOpcode(), @@ -1104,7 +1104,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { // TODO: Identify and add costs for insert/extract subvector, etc. if (Shuffle->changesLength()) return -1; - + if (Shuffle->isIdentity()) return 0; diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 04a7b73c22bf..0ef39163bda3 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -71,7 +71,7 @@ #include <cassert> #include <cstdint> #include <iterator> -#include <utility> +#include <utility> using namespace llvm; using namespace llvm::PatternMatch; @@ -3828,7 +3828,7 @@ static bool checkRippleForSignedAdd(const KnownBits &LHSKnown, // If either of the values is known to be non-negative, adding them can only // overflow if the second is also non-negative, so we can assume that. - // Two non-negative numbers will only overflow if there is a carry to the + // Two non-negative numbers will only overflow if there is a carry to the // sign bit, so we can check if even when the values are as big as possible // there is no overflow to the sign bit. if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) { @@ -3855,7 +3855,7 @@ static bool checkRippleForSignedAdd(const KnownBits &LHSKnown, } // If we reached here it means that we know nothing about the sign bits. - // In this case we can't know if there will be an overflow, since by + // In this case we can't know if there will be an overflow, since by // changing the sign bits any two values can be made to overflow. return false; } @@ -3905,7 +3905,7 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, // operands. bool LHSOrRHSKnownNonNegative = (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()); - bool LHSOrRHSKnownNegative = + bool LHSOrRHSKnownNegative = (LHSKnown.isNegative() || RHSKnown.isNegative()); if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT); @@ -4454,7 +4454,7 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth); if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) return SPR; - + if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -4630,7 +4630,7 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; } } - + if (isKnownNegation(TrueVal, FalseVal)) { // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can // match against either LHS or sext(LHS). diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp index 599b59bf61e8..7cf74dd16f5a 100644 --- a/contrib/llvm/lib/AsmParser/LLParser.cpp +++ b/contrib/llvm/lib/AsmParser/LLParser.cpp @@ -842,7 +842,7 @@ static void maybeSetDSOLocal(bool DSOLocal, GlobalValue &GV) { } /// parseIndirectSymbol: -/// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier +/// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier /// OptionalVisibility OptionalDLLStorageClass /// OptionalThreadLocal OptionalUnnamedAddr // 'alias|ifunc' IndirectSymbol @@ -3935,7 +3935,7 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, EmissionKindField &Result Lex.Lex(); return false; } - + template <> bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfAttEncodingField &Result) { diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index be75df0820d9..87b47dc354b5 100644 --- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3809,7 +3809,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { continue; // The mapping from OriginalId to GUID may return a GUID // that corresponds to a static variable. Filter it out here. - // This can happen when + // This can happen when // 1) There is a call to a library function which does not have // a CallValidId; // 2) There is a static variable with the OriginalGUID identical diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h index 181da83dc88b..d93716287981 100644 --- a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h @@ -46,7 +46,7 @@ public: MachineBasicBlock::iterator End, unsigned InsertPosIndex, DbgValueVector &DbgValues) = 0; - + /// Update liveness information to account for the current /// instruction, which will not be scheduled. virtual void Observe(MachineInstr &MI, unsigned Count, diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp index 4a226527cb5b..c8305ad9c547 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -24,8 +24,26 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) { return IterBool.first->second.Number; } + +void AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) { + static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize(); + Asm.OutStreamer->SwitchSection(Section); + + uint64_t Length = sizeof(uint16_t) // version + + sizeof(uint8_t) // address_size + + sizeof(uint8_t) // segment_selector_size + + AddrSize * Pool.size(); // entries + Asm.emitInt32(Length); // TODO: Support DWARF64 format. + Asm.emitInt16(Asm.getDwarfVersion()); + Asm.emitInt8(AddrSize); + Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size. +} + // Emit addresses into the section given. void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { + if (Asm.getDwarfVersion() >= 5) + emitHeader(Asm, AddrSection); + if (Pool.empty()) return; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h index 5350006bf744..d5008fab5563 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h @@ -50,6 +50,9 @@ public: bool hasBeenUsed() const { return HasBeenUsed; } void resetUsedFlag() { HasBeenUsed = false; } + +private: + void emitHeader(AsmPrinter &Asm, MCSection *Section); }; } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 8761fae9dd22..500e7a00196f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -364,7 +364,9 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else UseSectionsAsReferences = DwarfSectionsAsReferences == Enable; - GenerateTypeUnits = GenerateDwarfTypeUnits; + // Don't generate type units for unsupported object file formats. + GenerateTypeUnits = + A->TM.getTargetTriple().isOSBinFormatELF() && GenerateDwarfTypeUnits; TheAccelTableKind = computeAccelTableKind( DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple()); @@ -886,8 +888,7 @@ void DwarfDebug::endModule() { emitDebugInfoDWO(); emitDebugAbbrevDWO(); emitDebugLineDWO(); - // Emit DWO addresses. - AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); + emitDebugAddr(); } // Emit info into the dwarf accelerator table sections. @@ -2136,7 +2137,7 @@ void DwarfDebug::emitDebugRanges() { return; } - if (getDwarfVersion() >= 5 && NoRangesPresent()) + if (NoRangesPresent()) return; // Start the dwarf ranges section. @@ -2297,6 +2298,12 @@ void DwarfDebug::emitDebugStrDWO() { OffSec, /* UseRelativeOffsets = */ false); } +// Emit DWO addresses. +void DwarfDebug::emitDebugAddr() { + assert(useSplitDwarf() && "No split dwarf?"); + AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); +} + MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { if (!useSplitDwarf()) return nullptr; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 0c7be5d27dfe..abf2e43b1312 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -447,6 +447,9 @@ class DwarfDebug : public DebugHandlerBase { /// Emit the debug str dwo section. void emitDebugStrDWO(); + /// Emit DWO addresses. + void emitDebugAddr(); + /// Flags to let the linker know we have emitted new style pubnames. Only /// emit it here if we don't have a skeleton CU for split dwarf. void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 952b0d99a95a..0637d952eba4 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -112,7 +112,7 @@ protected: uint64_t OffsetInBits = 0; unsigned DwarfVersion; - /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister. + /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister. unsigned SubRegisterSizeInBits = 0; unsigned SubRegisterOffsetInBits = 0; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index c90bd568162d..049f349b009a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -95,6 +95,6 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { } } else { ScopeVars.Locals.push_back(Var); - } + } return true; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 43b835b2c4aa..600f4a78fda0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1182,7 +1182,7 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) { addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath()); if (!M->getISysRoot().empty()) addString(MDie, dwarf::DW_AT_LLVM_isysroot, M->getISysRoot()); - + return &MDie; } @@ -1691,7 +1691,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { } void DwarfTypeUnit::emitHeader(bool UseOffsets) { - DwarfUnit::emitCommonHeader(UseOffsets, + DwarfUnit::emitCommonHeader(UseOffsets, DD->useSplitDwarf() ? dwarf::DW_UT_split_type : dwarf::DW_UT_type); Asm->OutStreamer->AddComment("Type Signature"); diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp index f2615edaece2..e28fc6fb9d4f 100644 --- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -362,19 +362,19 @@ IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, /// Convert an atomic load of a non-integral type to an integer load of the /// equivalent bitwidth. See the function comment on -/// convertAtomicStoreToIntegerType for background. +/// convertAtomicStoreToIntegerType for background. LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { auto *M = LI->getModule(); Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout()); IRBuilder<> Builder(LI); - + Value *Addr = LI->getPointerOperand(); Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); - + auto *NewLI = Builder.CreateLoad(NewAddr); NewLI->setAlignment(LI->getAlignment()); NewLI->setVolatile(LI->isVolatile()); @@ -452,7 +452,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), M->getDataLayout()); Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); - + Value *Addr = SI->getPointerOperand(); Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); @@ -920,14 +920,14 @@ Value *AtomicExpand::insertRMWLLSCLoop( /// the equivalent bitwidth. We used to not support pointer cmpxchg in the /// IR. As a migration step, we convert back to what use to be the standard /// way to represent a pointer cmpxchg so that we can update backends one by -/// one. +/// one. AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { auto *M = CI->getModule(); Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), M->getDataLayout()); IRBuilder<> Builder(CI); - + Value *Addr = CI->getPointerOperand(); Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); @@ -935,8 +935,8 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst * Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); - - + + auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal, CI->getSuccessOrdering(), CI->getFailureOrdering(), diff --git a/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp index abac555d6602..3a9b20aa661d 100644 --- a/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp +++ b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file contains the boilerplate required to define our various built in -// gc lowering strategies. +// gc lowering strategies. // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 840e5ede6444..5a5960b16130 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -530,7 +530,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits, // Kill instructions can define registers but are really nops, and there // might be a real definition earlier that needs to be paired with uses // dominated by this kill. - + // FIXME: It may be possible to remove the isKill() restriction once PR18663 // has been properly fixed. There can be value in processing kills as seen // in the AggressiveAntiDepBreaker class. diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp index 456fa799e8e1..fe3d29657942 100644 --- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp +++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp @@ -159,7 +159,7 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) { auto NMI = GCStrategyMap.find(Name); if (NMI != GCStrategyMap.end()) return NMI->getValue(); - + for (auto& Entry : GCRegistry::entries()) { if (Name == Entry.getName()) { std::unique_ptr<GCStrategy> S = Entry.instantiate(); @@ -171,11 +171,11 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) { } if (GCRegistry::begin() == GCRegistry::end()) { - // In normal operation, the registry should not be empty. There should + // In normal operation, the registry should not be empty. There should // be the builtin GCs if nothing else. The most likely scenario here is - // that we got here without running the initializers used by the Registry + // that we got here without running the initializers used by the Registry // itself and it's registration mechanism. - const std::string error = ("unsupported GC: " + Name).str() + + const std::string error = ("unsupported GC: " + Name).str() + " (did you remember to link and initialize the CodeGen library?)"; report_fatal_error(error); } else diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index bafb7a05536d..80da50562d32 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" @@ -33,6 +34,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -1503,6 +1505,8 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { Ops.push_back(getOrCreateVReg(*CV->getOperand(i))); } EntryBuilder.buildMerge(Reg, Ops); + } else if (auto *BA = dyn_cast<BlockAddress>(&C)) { + EntryBuilder.buildBlockAddress(Reg, BA); } else return false; @@ -1611,19 +1615,20 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { ArgIt++; } - // And translate the function! - for (const BasicBlock &BB : F) { - MachineBasicBlock &MBB = getMBB(BB); + // Need to visit defs before uses when translating instructions. + ReversePostOrderTraversal<const Function *> RPOT(&F); + for (const BasicBlock *BB : RPOT) { + MachineBasicBlock &MBB = getMBB(*BB); // Set the insertion point of all the following translations to // the end of this basic block. CurBuilder.setMBB(MBB); - for (const Instruction &Inst : BB) { + for (const Instruction &Inst : *BB) { if (translate(Inst)) continue; OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", - Inst.getDebugLoc(), &BB); + Inst.getDebugLoc(), BB); R << "unable to translate instruction: " << ore::NV("Opcode", &Inst); if (ORE->allowExtraAnalysis("gisel-irtranslator")) { diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 9df931eb81b3..3271b54aa830 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -809,6 +809,15 @@ MachineIRBuilderBase::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr, MMO); } +MachineInstrBuilder +MachineIRBuilderBase::buildBlockAddress(unsigned Res, const BlockAddress *BA) { +#ifndef NDEBUG + assert(getMRI()->getType(Res).isPointer() && "invalid res type"); +#endif + + return buildInstr(TargetOpcode::G_BLOCK_ADDR).addDef(Res).addBlockAddress(BA); +} + void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src, bool IsExtend) { #ifndef NDEBUG diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp index ca56f4e0c4f1..9f7f5e392a9a 100644 --- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp @@ -56,7 +56,7 @@ // - it makes linker optimizations less useful (order files, LOHs, ...) // - it forces usage of indexed addressing (which isn't necessarily "free") // - it can increase register pressure when the uses are disparate enough. -// +// // We use heuristics to discover the best global grouping we can (cf cl::opts). // // ===---------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index eb4099964242..707113bd973b 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -113,22 +113,22 @@ void IntrinsicLowering::AddPrototypes(Module &M) { case Intrinsic::memcpy: M.getOrInsertFunction("memcpy", Type::getInt8PtrTy(Context), - Type::getInt8PtrTy(Context), - Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), DL.getIntPtrType(Context)); break; case Intrinsic::memmove: M.getOrInsertFunction("memmove", Type::getInt8PtrTy(Context), - Type::getInt8PtrTy(Context), - Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), DL.getIntPtrType(Context)); break; case Intrinsic::memset: M.getOrInsertFunction("memset", Type::getInt8PtrTy(Context), - Type::getInt8PtrTy(Context), - Type::getInt32Ty(M.getContext()), + Type::getInt8PtrTy(Context), + Type::getInt32Ty(M.getContext()), DL.getIntPtrType(Context)); break; case Intrinsic::sqrt: @@ -210,13 +210,13 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { "bswap.5"); Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), "bswap.4"); - Value* Tmp3 = Builder.CreateLShr(V, + Value* Tmp3 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24), "bswap.3"); - Value* Tmp2 = Builder.CreateLShr(V, + Value* Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 40), "bswap.2"); - Value* Tmp1 = Builder.CreateLShr(V, + Value* Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 56), "bswap.1"); Tmp7 = Builder.CreateAnd(Tmp7, @@ -274,7 +274,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { for (unsigned n = 0; n < WordSize; ++n) { Value *PartValue = V; - for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); + for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); i <<= 1, ++ct) { Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]); Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1"); @@ -381,7 +381,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::siglongjmp: { // Insert the call to abort - ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(), + ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(), Type::getVoidTy(Context)); break; } @@ -392,7 +392,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::bswap: CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI)); break; - + case Intrinsic::ctlz: CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI)); break; @@ -420,7 +420,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); break; } - + case Intrinsic::get_dynamic_area_offset: errs() << "WARNING: this target does not support the custom llvm.get." "dynamic.area.offset. It is being lowered to a constant 0\n"; @@ -473,7 +473,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::assume: case Intrinsic::var_annotation: break; // Strip out these intrinsics - + case Intrinsic::memcpy: { Type *IntPtr = DL.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp index fea83e92de8f..417bd9d5aebe 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -340,7 +340,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, /// address the spill location in a target independent way. int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI, unsigned &Reg) { - assert(MI.hasOneMemOperand() && + assert(MI.hasOneMemOperand() && "Spill instruction does not have exactly one memory operand?"); auto MMOI = MI.memoperands_begin(); const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue(); @@ -472,7 +472,7 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, int FI; const MachineMemOperand *MMO; - // TODO: Handle multiple stores folded into one. + // TODO: Handle multiple stores folded into one. if (!MI.hasOneMemOperand()) return false; diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index 054cc97f8374..639cd80768fc 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -314,10 +314,10 @@ public: MMI.deleteMachineFunctionFor(F); return true; } - + StringRef getPassName() const override { return "Free MachineFunction"; - } + } }; } // end anonymous namespace diff --git a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp index 28e4e2c6c87a..a712afec0959 100644 --- a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp @@ -620,10 +620,8 @@ struct InstructionMapper { /// queried for candidates. /// /// \param MBB The \p MachineBasicBlock to be translated into integers. - /// \param TRI \p TargetRegisterInfo for the module. - /// \param TII \p TargetInstrInfo for the module. + /// \param TII \p TargetInstrInfo for the function. void convertToUnsignedVec(MachineBasicBlock &MBB, - const TargetRegisterInfo &TRI, const TargetInstrInfo &TII) { unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB); @@ -729,7 +727,6 @@ struct MachineOutliner : public ModulePass { /// its leaf children to find the locations of its substring. /// /// \param ST A suffix tree to query. - /// \param TII TargetInstrInfo for the target. /// \param Mapper Contains outlining mapping information. /// \param[out] CandidateList Filled with candidates representing each /// beneficial substring. @@ -738,7 +735,7 @@ struct MachineOutliner : public ModulePass { /// /// \returns The length of the longest candidate found. unsigned - findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, + findCandidates(SuffixTree &ST, InstructionMapper &Mapper, std::vector<std::shared_ptr<Candidate>> &CandidateList, std::vector<OutlinedFunction> &FunctionList); @@ -770,14 +767,12 @@ struct MachineOutliner : public ModulePass { /// \param[out] FunctionList Filled with functions corresponding to each type /// of \p Candidate. /// \param ST The suffix tree for the module. - /// \param TII TargetInstrInfo for the module. /// /// \returns The length of the longest candidate found. 0 if there are none. unsigned buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList, std::vector<OutlinedFunction> &FunctionList, - SuffixTree &ST, InstructionMapper &Mapper, - const TargetInstrInfo &TII); + SuffixTree &ST, InstructionMapper &Mapper); /// Helper function for pruneOverlaps. /// Removes \p C from the candidate list, and updates its \p OutlinedFunction. @@ -795,11 +790,9 @@ struct MachineOutliner : public ModulePass { /// \param[in,out] FunctionList A list of functions to be outlined. /// \param Mapper Contains instruction mapping info for outlining. /// \param MaxCandidateLen The length of the longest candidate. - /// \param TII TargetInstrInfo for the module. void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList, std::vector<OutlinedFunction> &FunctionList, - InstructionMapper &Mapper, unsigned MaxCandidateLen, - const TargetInstrInfo &TII); + InstructionMapper &Mapper, unsigned MaxCandidateLen); /// Construct a suffix tree on the instructions in \p M and outline repeated /// strings from that tree. @@ -892,7 +885,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) { } unsigned MachineOutliner::findCandidates( - SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper, + SuffixTree &ST, InstructionMapper &Mapper, std::vector<std::shared_ptr<Candidate>> &CandidateList, std::vector<OutlinedFunction> &FunctionList) { CandidateList.clear(); @@ -945,7 +938,7 @@ unsigned MachineOutliner::findCandidates( // AA (where each "A" is an instruction). // // We might have some portion of the module that looks like this: - // AAAAAA (6 A's) + // AAAAAA (6 A's) // // In this case, there are 5 different copies of "AA" in this range, but // at most 3 can be outlined. If only outlining 3 of these is going to @@ -979,8 +972,16 @@ unsigned MachineOutliner::findCandidates( // We've found something we might want to outline. // Create an OutlinedFunction to store it and check if it'd be beneficial // to outline. + if (CandidatesForRepeatedSeq.empty()) + continue; + + // Arbitrarily choose a TII from the first candidate. + // FIXME: Should getOutliningCandidateInfo move to TargetMachine? + const TargetInstrInfo *TII = + CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo(); + OutlinedFunction OF = - TII.getOutliningCandidateInfo(CandidatesForRepeatedSeq); + TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq); // If we deleted every candidate, then there's nothing to outline. if (OF.Candidates.empty()) @@ -1036,7 +1037,7 @@ void MachineOutliner::prune(Candidate &C, void MachineOutliner::pruneOverlaps( std::vector<std::shared_ptr<Candidate>> &CandidateList, std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper, - unsigned MaxCandidateLen, const TargetInstrInfo &TII) { + unsigned MaxCandidateLen) { // Return true if this candidate became unbeneficial for outlining in a // previous step. @@ -1127,13 +1128,13 @@ void MachineOutliner::pruneOverlaps( unsigned MachineOutliner::buildCandidateList( std::vector<std::shared_ptr<Candidate>> &CandidateList, std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST, - InstructionMapper &Mapper, const TargetInstrInfo &TII) { + InstructionMapper &Mapper) { std::vector<unsigned> CandidateSequence; // Current outlining candidate. unsigned MaxCandidateLen = 0; // Length of the longest candidate. MaxCandidateLen = - findCandidates(ST, TII, Mapper, CandidateList, FunctionList); + findCandidates(ST, Mapper, CandidateList, FunctionList); // Sort the candidates in decending order. This will simplify the outlining // process when we have to remove the candidates from the mapping by @@ -1339,10 +1340,6 @@ bool MachineOutliner::runOnModule(Module &M) { return false; MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); - const TargetSubtargetInfo &STI = - MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget(); - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - const TargetInstrInfo *TII = STI.getInstrInfo(); // If the user passed -enable-machine-outliner=always or // -enable-machine-outliner, the pass will run on all functions in the module. @@ -1382,6 +1379,8 @@ bool MachineOutliner::runOnModule(Module &M) { if (!MF) continue; + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF)) continue; @@ -1405,7 +1404,7 @@ bool MachineOutliner::runOnModule(Module &M) { continue; // MBB is suitable for outlining. Map it to a list of unsigneds. - Mapper.convertToUnsignedVec(MBB, *TRI, *TII); + Mapper.convertToUnsignedVec(MBB, *TII); } } @@ -1416,10 +1415,10 @@ bool MachineOutliner::runOnModule(Module &M) { // Find all of the outlining candidates. unsigned MaxCandidateLen = - buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII); + buildCandidateList(CandidateList, FunctionList, ST, Mapper); // Remove candidates that overlap with other candidates. - pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII); + pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen); // Outline each of the candidates and return true if something was outlined. bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper); diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 6095bdd06b69..f632a9bd457f 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -383,7 +383,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { assert(FromReg != ToReg && "Cannot replace a reg with itself"); const TargetRegisterInfo *TRI = getTargetRegisterInfo(); - + // TODO: This could be more efficient by bulk changing the operands. for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { MachineOperand &O = *I; diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 773661965f18..542491eabbf2 100644 --- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -254,14 +254,14 @@ public: private: MachineInstr *PHI; unsigned idx; - + public: explicit PHI_iterator(MachineInstr *P) // begin iterator : PHI(P), idx(1) {} PHI_iterator(MachineInstr *P, bool) // end iterator : PHI(P), idx(PHI->getNumOperands()) {} - PHI_iterator &operator++() { idx += 2; return *this; } + PHI_iterator &operator++() { idx += 2; return *this; } bool operator==(const PHI_iterator& x) const { return idx == x.idx; } bool operator!=(const PHI_iterator& x) const { return !operator==(x); } diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 354f46e9e625..1fd40f757351 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -509,7 +509,7 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, } ToSplit.insert(std::make_pair(FromBB, ToBB)); - + return true; } diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index b444cd31eba2..79ca6adf95c4 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -655,7 +655,7 @@ static bool getDataDeps(const MachineInstr &UseMI, // Debug values should not be included in any calculations. if (UseMI.isDebugInstr()) return false; - + bool HasPhysRegs = false; for (MachineInstr::const_mop_iterator I = UseMI.operands_begin(), E = UseMI.operands_end(); I != E; ++I) { @@ -1167,7 +1167,7 @@ MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { computeInstrDepths(MBB); if (!TBI.HasValidInstrHeights) computeInstrHeights(MBB); - + return Trace(*this, TBI); } diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index d644e41abc5b..318776136e24 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1077,8 +1077,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { auto VerifyStackMapConstant = [&](unsigned Offset) { if (!MI->getOperand(Offset).isImm() || - MI->getOperand(Offset).getImm() != StackMaps::ConstantOp || - !MI->getOperand(Offset + 1).isImm()) + MI->getOperand(Offset).getImm() != StackMaps::ConstantOp || + !MI->getOperand(Offset + 1).isImm()) report("stack map constant to STATEPOINT not well formed!", MI); }; const unsigned VarStart = StatepointOpers(MI).getVarIdx(); diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index a878c34f9aa4..3660586c1358 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -594,7 +594,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator ReloadAfter = RestoreAfter ? std::next(MBBI) : MBBI; MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter); - LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n'); + if (ReloadBefore != MBB.end()) + LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n'); ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore); Scavenged.Restore = &*std::prev(SpillBefore); LiveUnits.removeReg(Reg); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7a99687757f8..a8c4b85df321 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -72,7 +72,6 @@ #include <string> #include <tuple> #include <utility> -#include <vector> using namespace llvm; @@ -483,9 +482,6 @@ namespace { /// returns false. bool findBetterNeighborChains(StoreSDNode *St); - /// Match "(X shl/srl V1) & V2" where V2 may not be present. - bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask); - /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { @@ -2671,6 +2667,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); + // fold (A-(B-C)) -> A+(C-B) + if (N1.getOpcode() == ISD::SUB && N1.hasOneUse()) + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1), + N1.getOperand(0))); + // fold (X - (-Y * Z)) -> (X + (Y * Z)) if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { if (N1.getOperand(0).getOpcode() == ISD::SUB && @@ -2740,6 +2742,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + // Prefer an add for more folding potential and possibly better codegen: + // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1) + if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { + SDValue ShAmt = N1.getOperand(1); + ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); + if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) { + SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt); + return DAG.getNode(ISD::ADD, DL, VT, N0, SRA); + } + } + return SDValue(); } @@ -4205,8 +4218,8 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, // Allow one node which will masked along with any loads found. if (NodeToMask) return false; - - // Also ensure that the node to be masked only produces one data result. + + // Also ensure that the node to be masked only produces one data result. NodeToMask = Op.getNode(); if (NodeToMask->getNumValues() > 1) { bool HasValue = false; @@ -5148,25 +5161,140 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return SDValue(); } -/// Match "(X shl/srl V1) & V2" where V2 may not be present. -bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { - if (Op.getOpcode() == ISD::AND) { - if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { - Mask = Op.getOperand(1); - Op = Op.getOperand(0); - } else { - return false; - } +static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) { + if (Op.getOpcode() == ISD::AND && + DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { + Mask = Op.getOperand(1); + return Op.getOperand(0); } + return Op; +} +/// Match "(X shl/srl V1) & V2" where V2 may not be present. +static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, + SDValue &Mask) { + Op = stripConstantMask(DAG, Op, Mask); if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { Shift = Op; return true; } - return false; } +/// Helper function for visitOR to extract the needed side of a rotate idiom +/// from a shl/srl/mul/udiv. This is meant to handle cases where +/// InstCombine merged some outside op with one of the shifts from +/// the rotate pattern. +/// \returns An empty \c SDValue if the needed shift couldn't be extracted. +/// Otherwise, returns an expansion of \p ExtractFrom based on the following +/// patterns: +/// +/// (or (mul v c0) (shrl (mul v c1) c2)): +/// expands (mul v c0) -> (shl (mul v c1) c3) +/// +/// (or (udiv v c0) (shl (udiv v c1) c2)): +/// expands (udiv v c0) -> (shrl (udiv v c1) c3) +/// +/// (or (shl v c0) (shrl (shl v c1) c2)): +/// expands (shl v c0) -> (shl (shl v c1) c3) +/// +/// (or (shrl v c0) (shl (shrl v c1) c2)): +/// expands (shrl v c0) -> (shrl (shrl v c1) c3) +/// +/// Such that in all cases, c3+c2==bitwidth(op v c1). +static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, + SDValue ExtractFrom, SDValue &Mask, + const SDLoc &DL) { + assert(OppShift && ExtractFrom && "Empty SDValue"); + assert( + (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && + "Existing shift must be valid as a rotate half"); + + ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); + // Preconditions: + // (or (op0 v c0) (shiftl/r (op0 v c1) c2)) + // + // Find opcode of the needed shift to be extracted from (op0 v c0). + unsigned Opcode = ISD::DELETED_NODE; + bool IsMulOrDiv = false; + // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift + // opcode or its arithmetic (mul or udiv) variant. + auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) { + IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant; + if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift) + return false; + Opcode = NeededShift; + return true; + }; + // op0 must be either the needed shift opcode or the mul/udiv equivalent + // that the needed shift can be extracted from. + if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) && + (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV))) + return SDValue(); + + // op0 must be the same opcode on both sides, have the same LHS argument, + // and produce the same value type. + SDValue OppShiftLHS = OppShift.getOperand(0); + EVT ShiftedVT = OppShiftLHS.getValueType(); + if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() || + OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) || + ShiftedVT != ExtractFrom.getValueType()) + return SDValue(); + + // Amount of the existing shift. + ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); + // Constant mul/udiv/shift amount from the RHS of the shift's LHS op. + ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1)); + // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op. + ConstantSDNode *ExtractFromCst = + isConstOrConstSplat(ExtractFrom.getOperand(1)); + // TODO: We should be able to handle non-uniform constant vectors for these values + // Check that we have constant values. + if (!OppShiftCst || !OppShiftCst->getAPIntValue() || + !OppLHSCst || !OppLHSCst->getAPIntValue() || + !ExtractFromCst || !ExtractFromCst->getAPIntValue()) + return SDValue(); + + // Compute the shift amount we need to extract to complete the rotate. + const unsigned VTWidth = ShiftedVT.getScalarSizeInBits(); + APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue(); + if (NeededShiftAmt.isNegative()) + return SDValue(); + // Normalize the bitwidth of the two mul/udiv/shift constant operands. + APInt ExtractFromAmt = ExtractFromCst->getAPIntValue(); + APInt OppLHSAmt = OppLHSCst->getAPIntValue(); + zeroExtendToMatch(ExtractFromAmt, OppLHSAmt); + + // Now try extract the needed shift from the ExtractFrom op and see if the + // result matches up with the existing shift's LHS op. + if (IsMulOrDiv) { + // Op to extract from is a mul or udiv by a constant. + // Check: + // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0 + // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0 + const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(), + NeededShiftAmt.getZExtValue()); + APInt ResultAmt; + APInt Rem; + APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem); + if (Rem != 0 || ResultAmt != OppLHSAmt) + return SDValue(); + } else { + // Op to extract from is a shift by a constant. + // Check: + // c2 - (bitwidth(op0 v c0) - c1) == c0 + if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc( + ExtractFromAmt.getBitWidth())) + return SDValue(); + } + + // Return the expanded shift op that should allow a rotate to be formed. + EVT ShiftVT = OppShift.getOperand(1).getValueType(); + EVT ResVT = ExtractFrom.getValueType(); + SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT); + return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode); +} + // Return true if we can prove that, whenever Neg and Pos are both in the // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: @@ -5333,13 +5461,40 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // Match "(X shl/srl V1) & V2" where V2 may not be present. SDValue LHSShift; // The shift. SDValue LHSMask; // AND value if any. - if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) - return nullptr; // Not part of a rotate. + matchRotateHalf(DAG, LHS, LHSShift, LHSMask); SDValue RHSShift; // The shift. SDValue RHSMask; // AND value if any. - if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) - return nullptr; // Not part of a rotate. + matchRotateHalf(DAG, RHS, RHSShift, RHSMask); + + // If neither side matched a rotate half, bail + if (!LHSShift && !RHSShift) + return nullptr; + + // InstCombine may have combined a constant shl, srl, mul, or udiv with one + // side of the rotate, so try to handle that here. In all cases we need to + // pass the matched shift from the opposite side to compute the opcode and + // needed shift amount to extract. We still want to do this if both sides + // matched a rotate half because one half may be a potential overshift that + // can be broken down (ie if InstCombine merged two shl or srl ops into a + // single one). + + // Have LHS side of the rotate, try to extract the needed shift from the RHS. + if (LHSShift) + if (SDValue NewRHSShift = + extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL)) + RHSShift = NewRHSShift; + // Have RHS side of the rotate, try to extract the needed shift from the LHS. + if (RHSShift) + if (SDValue NewLHSShift = + extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL)) + LHSShift = NewLHSShift; + + // If a side is still missing, nothing else we can do. + if (!RHSShift || !LHSShift) + return nullptr; + + // At this point we've matched or extracted a shift op on each side. if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) return nullptr; // Not shifting the same value. @@ -10270,7 +10425,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N10.getOperand(0))), DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), - N0, Flags); + N0, Flags); } } @@ -10333,7 +10488,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N0.getOperand(2).getOperand(0), N0.getOperand(2).getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, - N1), Flags), Flags); + N1), Flags), Flags); } // fold (fsub x, (fma y, z, (fmul u, v))) @@ -10348,7 +10503,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N1.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N20), - N21, N0, Flags), Flags); + N21, N0, Flags), Flags); } @@ -10368,7 +10523,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)), DAG.getNode(ISD::FNEG, SL, VT, - N1), Flags), Flags); + N1), Flags), Flags); } } } @@ -10396,7 +10551,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)), DAG.getNode(ISD::FNEG, SL, VT, - N1), Flags), Flags); + N1), Flags), Flags); } } } @@ -10419,7 +10574,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { VT, N1200)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), - N0, Flags), Flags); + N0, Flags), Flags); } } @@ -10450,7 +10605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { VT, N1020)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), - N0, Flags), Flags); + N0, Flags), Flags); } } } @@ -10506,7 +10661,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { Y, Flags); if (XC1 && XC1->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); } return SDValue(); }; @@ -10530,7 +10685,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { if (XC0 && XC0->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); if (XC1 && XC1->isExactlyValue(+1.0)) @@ -10838,12 +10993,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath || + if (Options.UnsafeFPMath || (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) { // fold (fmul A, 0) -> 0 if (N1CFP && N1CFP->isZero()) return N1; - } + } if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 @@ -11258,7 +11413,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue DAGCombiner::visitFSQRT(SDNode *N) { SDNodeFlags Flags = N->getFlags(); - if (!DAG.getTarget().Options.UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) return SDValue(); @@ -17913,9 +18068,9 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { if (C->isNullValue()) return SDValue(); - std::vector<SDNode *> Built; + SmallVector<SDNode *, 8> Built; SDValue S = - TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); + TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built); for (SDNode *N : Built) AddToWorklist(N); @@ -17933,8 +18088,8 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { if (C->isNullValue()) return SDValue(); - std::vector<SDNode *> Built; - SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); + SmallVector<SDNode *, 8> Built; + SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built); for (SDNode *N : Built) AddToWorklist(N); @@ -17959,9 +18114,9 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { if (C->isNullValue()) return SDValue(); - std::vector<SDNode *> Built; + SmallVector<SDNode *, 8> Built; SDValue S = - TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); + TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built); for (SDNode *N : Built) AddToWorklist(N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index e4a9d557d386..795ade588b8f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1130,7 +1130,7 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys); SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL); + GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL); bool CanLowerReturn = TLI.CanLowerReturn( CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext()); @@ -1548,7 +1548,7 @@ void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) { MachineInstr *CurLastLocalValue = getLastLocalValue(); if (CurLastLocalValue != SavedLastLocalValue) { - // Find the first local value instruction to be deleted. + // Find the first local value instruction to be deleted. // This is the instruction after SavedLastLocalValue if it is non-NULL. // Otherwise it's the first instruction in the block. MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue); @@ -1569,7 +1569,7 @@ bool FastISel::selectInstruction(const Instruction *I) { if (!handlePHINodesInSuccessorBlocks(I->getParent())) { // PHI node handling may have generated local value instructions, // even though it failed to handle all PHI nodes. - // We remove these instructions because SelectionDAGISel will generate + // We remove these instructions because SelectionDAGISel will generate // them again. removeDeadLocalValueCode(SavedLastLocalValue); return false; @@ -1630,7 +1630,7 @@ bool FastISel::selectInstruction(const Instruction *I) { DbgLoc = DebugLoc(); // Undo phi node updates, because they will be added again by SelectionDAG. if (isa<TerminatorInst>(I)) { - // PHI node handling may have generated local value instructions. + // PHI node handling may have generated local value instructions. // We remove them because SelectionDAGISel will generate them again. removeDeadLocalValueCode(SavedLastLocalValue); FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 42c7181dac41..d3c31911d677 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -89,10 +89,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI, + CallingConv::ID CC = Fn->getCallingConv(); + + GetReturnInfo(CC, Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI, mf.getDataLayout()); - CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, - Fn->isVarArg(), Outs, Fn->getContext()); + CanLowerReturn = + TLI->CanLowerReturn(CC, *MF, Fn->isVarArg(), Outs, Fn->getContext()); // If this personality uses funclets, we need to do a bit more work. DenseMap<const AllocaInst *, TinyPtrVector<int *>> CatchObjects; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b0ae1e0399fb..9aa0ea15f3b7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -153,7 +153,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { // of Endianness. LLVM's APFloat representation is not Endian sensitive, // and so always converts into a 128-bit APInt in a non-Endian-sensitive // way. However, APInt's are serialized in an Endian-sensitive fashion, - // so on big-Endian targets, the two doubles are output in the wrong + // so on big-Endian targets, the two doubles are output in the wrong // order. Fix this by manually flipping the order of the high 64 bits // and the low 64 bits here. if (DAG.getDataLayout().isBigEndian() && @@ -815,7 +815,7 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { case ISD::ConstantFP: // Leaf node. - case ISD::CopyFromReg: // Operand is a register that we know to be left + case ISD::CopyFromReg: // Operand is a register that we know to be left // unchanged by SoftenFloatResult(). case ISD::Register: // Leaf node. return true; @@ -838,7 +838,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) { if (N->getNumOperands() == 3) return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0); - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2, + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2, N->getOperand(3)), 0); } @@ -1898,7 +1898,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: - case ISD::FTRUNC: R = PromoteFloatRes_UnaryOp(N); break; + case ISD::FTRUNC: + case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break; // Binary FP Operations case ISD::FADD: diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 2c6b1ee7900f..135922d6f267 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -510,7 +510,7 @@ private: SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); // Return true if we can skip softening the given operand or SDNode because - // either it was soften before by SoftenFloatResult and references to the + // either it was soften before by SoftenFloatResult and references to the // operand were replaced by ReplaceValueWith or it's value type is legal in HW // registers and the operand can be left unchanged. bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 67928d4bdbd5..3a98a7a904cb 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -131,7 +131,7 @@ class VectorLegalizer { SDValue ExpandCTLZ(SDValue Op); SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); SDValue ExpandStrictFPOp(SDValue Op); - + /// Implements vector promotion. /// /// This is essentially just bitcasting the operands to a different type and @@ -315,7 +315,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does // ISD::STRICT_FSQRT. - Action = TLI.getStrictFPOperationAction(Node->getOpcode(), + Action = TLI.getStrictFPOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::ADD: @@ -397,12 +397,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::FP_ROUND_INREG: - Action = TLI.getOperationAction(Node->getOpcode(), + Action = TLI.getOperationAction(Node->getOpcode(), cast<VTSDNode>(Node->getOperand(1))->getVT()); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - Action = TLI.getOperationAction(Node->getOpcode(), + Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; case ISD::MSCATTER: @@ -736,7 +736,7 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::CTTZ_ZERO_UNDEF: return ExpandCTTZ_ZERO_UNDEF(Op); case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: + case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: case ISD::STRICT_FDIV: case ISD::STRICT_FSQRT: @@ -1153,24 +1153,24 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { SmallVector<SDValue, 32> OpChains; for (unsigned i = 0; i < NumElems; ++i) { SmallVector<SDValue, 4> Opers; - SDValue Idx = DAG.getConstant(i, dl, + SDValue Idx = DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())); // The Chain is the first operand. Opers.push_back(Chain); - // Now process the remaining operands. + // Now process the remaining operands. for (unsigned j = 1; j < NumOpers; ++j) { SDValue Oper = Op.getOperand(j); EVT OperVT = Oper.getValueType(); if (OperVT.isVector()) - Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Oper, Idx); Opers.push_back(Oper); } - + SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); OpValues.push_back(ScalarOp.getValue(0)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1cd43ace48f3..f5d9dd234afd 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1068,14 +1068,14 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, OpsLo.push_back(Chain); OpsHi.push_back(Chain); - // Now process the remaining operands. + // Now process the remaining operands. for (unsigned i = 1; i < NumOps; ++i) { - SDValue Op = N->getOperand(i); - SDValue OpLo = Op; - SDValue OpHi = Op; + SDValue Op = N->getOperand(i); + SDValue OpLo = Op; + SDValue OpHi = Op; EVT InVT = Op.getValueType(); - if (InVT.isVector()) { + if (InVT.isVector()) { // If the input also splits, handle it directly for a // compile time speedup. Otherwise split it by hand. if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) @@ -1092,10 +1092,10 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, EVT HiValueVTs[] = {HiVT, MVT::Other}; Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo); Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi); - + // Build a factor node to remember that this Op is independent of the // other one. - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); // Legalize the chain result - switch anything that used the old chain to diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1aa8df29af3b..5f6b6010cae2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -157,31 +157,36 @@ static cl::opt<unsigned> SwitchPeelThreshold( // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; -// True if the Value passed requires ABI mangling as it is a parameter to a -// function or a return value from a function which is not an intrinsic. -static bool isABIRegCopy(const Value *V) { - const bool IsRetInst = V && isa<ReturnInst>(V); - const bool IsCallInst = V && isa<CallInst>(V); - const bool IsInLineAsm = - IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm(); - const bool IsIndirectFunctionCall = - IsCallInst && !IsInLineAsm && - !static_cast<const CallInst *>(V)->getCalledFunction(); - // It is possible that the call instruction is an inline asm statement or an - // indirect function call in which case the return value of - // getCalledFunction() would be nullptr. - const bool IsInstrinsicCall = - IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall && - static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() != - Intrinsic::not_intrinsic; - - return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall)); +// Return the calling convention if the Value passed requires ABI mangling as it +// is a parameter to a function or a return value from a function which is not +// an intrinsic. +static Optional<CallingConv::ID> getABIRegCopyCC(const Value *V) { + if (auto *R = dyn_cast<ReturnInst>(V)) + return R->getParent()->getParent()->getCallingConv(); + + if (auto *CI = dyn_cast<CallInst>(V)) { + const bool IsInlineAsm = CI->isInlineAsm(); + const bool IsIndirectFunctionCall = + !IsInlineAsm && !CI->getCalledFunction(); + + // It is possible that the call instruction is an inline asm statement or an + // indirect function call in which case the return value of + // getCalledFunction() would be nullptr. + const bool IsInstrinsicCall = + !IsInlineAsm && !IsIndirectFunctionCall && + CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic; + + if (!IsInlineAsm && !IsInstrinsicCall) + return CI->getCallingConv(); + } + + return None; } static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - bool IsABIRegCopy); + Optional<CallingConv::ID> CC); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type @@ -191,11 +196,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - Optional<ISD::NodeType> AssertOp = None, - bool IsABIRegCopy = false) { + Optional<CallingConv::ID> CC = None, + Optional<ISD::NodeType> AssertOp = None) { if (ValueVT.isVector()) - return getCopyFromPartsVector(DAG, DL, Parts, NumParts, - PartVT, ValueVT, V, IsABIRegCopy); + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V, + CC); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -236,8 +241,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, // Assemble the trailing non-power-of-2 part. unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); - Hi = getCopyFromParts(DAG, DL, - Parts + RoundParts, OddParts, PartVT, OddVT, V); + Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT, + OddVT, V, CC); // Combine the round and odd parts. Lo = Val; @@ -267,7 +272,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); - Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC); } } @@ -340,9 +345,11 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - bool IsABIRegCopy) { + Optional<CallingConv::ID> CallConv) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); + const bool IsABIRegCopy = CallConv.hasValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; @@ -355,8 +362,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, if (IsABIRegCopy) { NumRegs = TLI.getVectorTypeBreakdownForCallingConv( - *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, - RegisterVT); + *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); } else { NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, @@ -470,7 +477,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V, bool IsABIRegCopy); + MVT PartVT, const Value *V, + Optional<CallingConv::ID> CallConv); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for @@ -478,14 +486,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - ISD::NodeType ExtendKind = ISD::ANY_EXTEND, - bool IsABIRegCopy = false) { + Optional<CallingConv::ID> CallConv = None, + ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V, - IsABIRegCopy); + CallConv); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; @@ -564,7 +572,8 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, DAG.getIntPtrConstant(RoundBits, DL)); - getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V, + CallConv); if (DAG.getDataLayout().isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. @@ -605,16 +614,16 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, std::reverse(Parts, Parts + OrigNumParts); } - /// getCopyToPartsVector - Create a series of nodes that contain the specified /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - bool IsABIRegCopy) { + Optional<CallingConv::ID> CallConv) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const bool IsABIRegCopy = CallConv.hasValue(); if (NumParts == 1) { EVT PartEVT = PartVT; @@ -679,8 +688,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, unsigned NumRegs; if (IsABIRegCopy) { NumRegs = TLI.getVectorTypeBreakdownForCallingConv( - *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, - RegisterVT); + *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); } else { NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, @@ -720,7 +729,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) - getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. @@ -729,29 +738,32 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) - getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); + getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V, + CallConv); } } RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, - EVT valuevt, bool IsABIMangledValue) + EVT valuevt, Optional<CallingConv::ID> CC) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), - RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {} + RegCount(1, regs.size()), CallConv(CC) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty, - bool IsABIMangledValue) { + Optional<CallingConv::ID> CC) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); - IsABIMangled = IsABIMangledValue; + CallConv = CC; for (EVT ValueVT : ValueVTs) { - unsigned NumRegs = IsABIMangledValue - ? TLI.getNumRegistersForCallingConv(Context, ValueVT) - : TLI.getNumRegisters(Context, ValueVT); - MVT RegisterVT = IsABIMangledValue - ? TLI.getRegisterTypeForCallingConv(Context, ValueVT) - : TLI.getRegisterType(Context, ValueVT); + unsigned NumRegs = + isABIMangled() + ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT) + : TLI.getNumRegisters(Context, ValueVT); + MVT RegisterVT = + isABIMangled() + ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT) + : TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); @@ -777,9 +789,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = RegCount[Value]; - MVT RegisterVT = IsABIMangled - ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value]) - : RegVTs[Value]; + MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( + *DAG.getContext(), + CallConv.getValue(), RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -837,8 +850,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, RegisterVT, P, DAG.getValueType(FromVT)); } - Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), - NumRegs, RegisterVT, ValueVT, V); + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs, + RegisterVT, ValueVT, V, CallConv); Part += NumRegs; Parts.clear(); } @@ -859,15 +872,16 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumParts = RegCount[Value]; - MVT RegisterVT = IsABIMangled - ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value]) - : RegVTs[Value]; + MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv( + *DAG.getContext(), + CallConv.getValue(), RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; - getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), - &Parts[Part], NumParts, RegisterVT, V, ExtendKind); + getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], + NumParts, RegisterVT, V, CallConv, ExtendKind); Part += NumParts; } @@ -1164,7 +1178,7 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V)); + DAG.getDataLayout(), InReg, Ty, getABIRegCopyCC(V)); SDValue Chain = DAG.getEntryNode(); Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); @@ -1355,7 +1369,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType(), isABIRegCopy(V)); + Inst->getType(), getABIRegCopyCC(V)); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1589,12 +1603,14 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); - unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT); - MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT); + CallingConv::ID CC = F->getCallingConv(); + + unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT); + MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT); SmallVector<SDValue, 4> Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, &I, ExtendKind, true); + &Parts[0], NumParts, PartVT, &I, CC, ExtendKind); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -4929,7 +4945,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, - V->getType(), isABIRegCopy(V)); + V->getType(), getABIRegCopyCC(V)); if (RFV.occupiesMultipleRegs()) { unsigned Offset = 0; for (auto RegAndSize : RFV.getRegsAndSizes()) { @@ -4971,7 +4987,7 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, unsigned DbgSDNodeOrder) { if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe - // stack slot locations. + // stack slot locations. // // Consider "int x = 0; int *px = &x;". There are two kinds of interesting // debug values here after optimization: @@ -5288,7 +5304,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // The PHI node may be split up into several MI PHI nodes (in // FunctionLoweringInfo::set). RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType(), false); + V->getType(), None); if (RFV.occupiesMultipleRegs()) { unsigned Offset = 0; unsigned BitsToDescribe = 0; @@ -7182,10 +7198,11 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, /// uses features that we can't model on machineinstrs, we have SDISel do the /// allocation. This produces generally horrible, but correct, code. /// -/// OpInfo describes the operand. +/// OpInfo describes the operand +/// RefOpInfo describes the matching operand if any, the operand otherwise static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, - const SDLoc &DL, - SDISelAsmOperandInfo &OpInfo) { + const SDLoc &DL, SDISelAsmOperandInfo &OpInfo, + SDISelAsmOperandInfo &RefOpInfo) { LLVMContext &Context = *DAG.getContext(); MachineFunction &MF = DAG.getMachineFunction(); @@ -7195,8 +7212,8 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, // If this is a constraint for a single physreg, or a constraint for a // register class, find it. std::pair<unsigned, const TargetRegisterClass *> PhysReg = - TLI.getRegForInlineAsmConstraint(&TRI, OpInfo.ConstraintCode, - OpInfo.ConstraintVT); + TLI.getRegForInlineAsmConstraint(&TRI, RefOpInfo.ConstraintCode, + RefOpInfo.ConstraintVT); unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) { @@ -7238,6 +7255,11 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); } + // No need to allocate a matching input constraint since the constraint it's + // matching to has already been allocated. + if (OpInfo.isMatchingInputConstraint()) + return; + MVT RegVT; EVT ValueVT = OpInfo.ConstraintVT; @@ -7486,19 +7508,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. - if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); + SDISelAsmOperandInfo &RefOpInfo = + OpInfo.isMatchingInputConstraint() + ? ConstraintOperands[OpInfo.getMatchedOperand()] + : ConstraintOperands[i]; + if (RefOpInfo.ConstraintType == TargetLowering::C_Register) + GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo); } // Third pass - Loop over all of the operands, assigning virtual or physregs // to register class operands. for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + SDISelAsmOperandInfo &RefOpInfo = + OpInfo.isMatchingInputConstraint() + ? ConstraintOperands[OpInfo.getMatchedOperand()] + : ConstraintOperands[i]; // C_Register operands have already been allocated, Other/Memory don't need // to be. - if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); + if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass) + GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -8289,7 +8319,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); + GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); bool CanLowerReturn = this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), @@ -8305,7 +8335,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); MachineFunction &MF = CLI.DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); - Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); + Type *StackSlotPtrType = PointerType::get(CLI.RetTy, + DL.getAllocaAddrSpace()); DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL)); ArgListEntry Entry; @@ -8331,10 +8362,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } else { for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = - getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); - unsigned NumRegs = - getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); + MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, VT); + unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; @@ -8443,9 +8474,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); - MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); - unsigned NumParts = - getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); + MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, VT); + unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, VT); SmallVector<SDValue, 4> Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -8477,7 +8509,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, - CLI.CS.getInstruction(), ExtendKind, true); + CLI.CS.getInstruction(), CLI.CallConv, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -8577,14 +8609,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = - getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); - unsigned NumRegs = - getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); + MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, VT); + unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), + CLI.CallConv, VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr, - AssertOp, true)); + CLI.CallConv, AssertOp)); CurReg += NumRegs; } @@ -8623,8 +8655,8 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { // If this is an InlineAsm we have to match the registers required, not the // notional registers required by the type. - RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType(), isABIRegCopy(V)); + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(), + getABIRegCopyCC(V)); SDValue Chain = DAG.getEntryNode(); ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == @@ -8937,10 +8969,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (ArgCopyElisionCandidates.count(&Arg)) Flags.setCopyElisionCandidate(); - MVT RegisterVT = - TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); - unsigned NumRegs = - TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); + MVT RegisterVT = TLI->getRegisterTypeForCallingConv( + *CurDAG->getContext(), F.getCallingConv(), VT); + unsigned NumRegs = TLI->getNumRegistersForCallingConv( + *CurDAG->getContext(), F.getCallingConv(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, ArgNo, PartBase+i*RegisterVT.getStoreSize()); @@ -8995,8 +9027,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); Optional<ISD::NodeType> AssertOp = None; - SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, - RegVT, VT, nullptr, AssertOp); + SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, + nullptr, F.getCallingConv(), AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); @@ -9046,10 +9078,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; - MVT PartVT = - TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); - unsigned NumParts = - TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); + MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), + F.getCallingConv(), VT); + unsigned NumParts = TLI->getNumRegistersForCallingConv( + *CurDAG->getContext(), F.getCallingConv(), VT); // Even an apparant 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the @@ -9062,8 +9094,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, - PartVT, VT, nullptr, AssertOp, - true)); + PartVT, VT, nullptr, + F.getCallingConv(), AssertOp)); } i += NumParts; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index e421984b8af2..4b5dda982f1b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -1015,14 +1015,18 @@ struct RegsForValue { /// Records if this value needs to be treated in an ABI dependant manner, /// different to normal type legalization. - bool IsABIMangled = false; + Optional<CallingConv::ID> CallConv; RegsForValue() = default; RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt, - bool IsABIMangledValue = false); + Optional<CallingConv::ID> CC = None); RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty, - bool IsABIMangledValue = false); + Optional<CallingConv::ID> CC); + + bool isABIMangled() const { + return CallConv.hasValue(); + } /// Add the specified values to this one. void append(const RegsForValue &RHS) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 5cf06e62b80c..54cbd6859f70 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -419,10 +419,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, Builder.getFrameIndexTy())); } else if (LiveInOnly) { // If this value is live in (not live-on-return, or live-through), we can - // treat it the same way patchpoint treats it's "live in" values. We'll - // end up folding some of these into stack references, but they'll be + // treat it the same way patchpoint treats it's "live in" values. We'll + // end up folding some of these into stack references, but they'll be // handled by the register allocator. Note that we do not have the notion - // of a late use so these values might be placed in registers which are + // of a late use so these values might be placed in registers which are // clobbered by the call. This is fine for live-in. Ops.push_back(Incoming); } else { @@ -498,7 +498,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, auto isGCValue =[&](const Value *V) { return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V); }; - + // Before we actually start lowering (and allocating spill slots for values), // reserve any stack slots which we judge to be profitable to reuse for a // particular value. This is purely an optimization over the code below and @@ -861,7 +861,8 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, // completely and make statepoint call to return a tuple. unsigned Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), Reg, RetTy, true); + DAG.getDataLayout(), Reg, RetTy, + ISP.getCallSite().getCallingConv()); SDValue Chain = DAG.getEntryNode(); RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index fa867fcec366..e317268fa5f4 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3421,7 +3421,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, /// with the multiplicative inverse of the constant. static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, const SDLoc &dl, SelectionDAG &DAG, - std::vector<SDNode *> &Created) { + SmallVectorImpl<SDNode *> &Created) { assert(d != 0 && "Division by zero!"); // Shift the value upfront if it is even, so the LSB is one. @@ -3450,8 +3450,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, } SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, - SelectionDAG &DAG, - std::vector<SDNode *> *Created) const { + SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) const { AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.isIntDivCheap(N->getValueType(0), Attr)) @@ -3465,9 +3465,7 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector<SDNode *> *Created) const { - assert(Created && "No vector to hold sdiv ops."); - + SmallVectorImpl<SDNode *> &Created) const { EVT VT = N->getValueType(0); SDLoc dl(N); @@ -3478,7 +3476,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, // If the sdiv has an 'exact' bit we can use a simpler lowering. if (N->getFlags().hasExact()) - return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created); + return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, Created); APInt::ms magics = Divisor.magic(); @@ -3496,15 +3494,18 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, DAG.getConstant(magics.m, dl, VT)).getNode(), 1); else return SDValue(); // No mulhs or equvialent + + Created.push_back(Q.getNode()); + // If d > 0 and m < 0, add the numerator if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); - Created->push_back(Q.getNode()); + Created.push_back(Q.getNode()); } // If d < 0 and m > 0, subtract the numerator. if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); - Created->push_back(Q.getNode()); + Created.push_back(Q.getNode()); } auto &DL = DAG.getDataLayout(); // Shift right algebraic if shift value is nonzero @@ -3512,14 +3513,14 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, Q = DAG.getNode( ISD::SRA, dl, VT, Q, DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL))); - Created->push_back(Q.getNode()); + Created.push_back(Q.getNode()); } // Extract the sign bit and add it to the quotient SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, getShiftAmountTy(Q.getValueType(), DL))); - Created->push_back(T.getNode()); + Created.push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); } @@ -3529,9 +3530,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector<SDNode *> *Created) const { - assert(Created && "No vector to hold udiv ops."); - + SmallVectorImpl<SDNode *> &Created) const { EVT VT = N->getValueType(0); SDLoc dl(N); auto &DL = DAG.getDataLayout(); @@ -3554,7 +3553,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, Q = DAG.getNode( ISD::SRL, dl, VT, Q, DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL))); - Created->push_back(Q.getNode()); + Created.push_back(Q.getNode()); // Get magic number for the shifted divisor. magics = Divisor.lshr(Shift).magicu(Shift); @@ -3573,7 +3572,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, else return SDValue(); // No mulhu or equivalent - Created->push_back(Q.getNode()); + Created.push_back(Q.getNode()); if (magics.a == 0) { assert(magics.s < Divisor.getBitWidth() && @@ -3583,13 +3582,13 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL))); } else { SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); - Created->push_back(NPQ.getNode()); + Created.push_back(NPQ.getNode()); NPQ = DAG.getNode( ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL))); - Created->push_back(NPQ.getNode()); + Created.push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); - Created->push_back(NPQ.getNode()); + Created.push_back(NPQ.getNode()); return DAG.getNode( ISD::SRL, dl, VT, NPQ, DAG.getConstant(magics.s - 1, dl, @@ -3994,7 +3993,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { // Scalarize the load and let the individual components be handled. SDValue Scalarized = scalarizeVectorLoad(LD, DAG); if (Scalarized->getOpcode() == ISD::MERGE_VALUES) - return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1)); + return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1)); return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1)); } diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index 25d405bf63de..3e12b32b12d4 100644 --- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -175,7 +175,7 @@ bool ShadowStackGCLowering::doInitialization(Module &M) { } if (!Active) return false; - + // struct FrameMap { // int32_t NumRoots; // Number of roots in stack frame. // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. @@ -286,7 +286,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { if (!F.hasGC() || F.getGC() != std::string("shadow-stack")) return false; - + LLVMContext &Context = F.getContext(); // Find calls to llvm.gcroot. diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h index ed664e4f81a3..8fbe724045e6 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -233,7 +233,7 @@ public: /// - Create a SplitEditor from a SplitAnalysis. /// - Start a new live interval with openIntv. /// - Mark the places where the new interval is entered using enterIntv* -/// - Mark the ranges where the new interval is used with useIntv* +/// - Mark the ranges where the new interval is used with useIntv* /// - Mark the places where the interval is exited with exitIntv*. /// - Finish the current interval with closeIntv and repeat from 2. /// - Rewrite instructions with finish(). diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index 43f4bad595e3..7b1b76821daa 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -632,7 +632,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::BITREVERSE, VT, Expand); - + // These library functions default to expand. setOperationAction(ISD::FROUND, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); @@ -924,7 +924,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, // STATEPOINT Deopt Spill - live-through, read only, indirect // STATEPOINT Deopt Alloca - live-through, read only, direct // (We're currently conservative and mark the deopt slots read/write in - // practice.) + // practice.) // STATEPOINT GC Spill - live-through, read/write, indirect // STATEPOINT GC Alloca - live-through, read/write, direct // The live-in vs live-through is handled already (the live through ones are @@ -1337,7 +1337,8 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT /// type of the given function. This does not require a DAG or a return value, /// and is suitable for use before any DAGs for the function are constructed. /// TODO: Move this out of TargetLowering.cpp. -void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr, +void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, + AttributeList attr, SmallVectorImpl<ISD::OutputArg> &Outs, const TargetLowering &TLI, const DataLayout &DL) { SmallVector<EVT, 4> ValueVTs; @@ -1365,9 +1366,9 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr, } unsigned NumParts = - TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT); + TLI.getNumRegistersForCallingConv(ReturnType->getContext(), CC, VT); MVT PartVT = - TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT); + TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), CC, VT); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -1410,7 +1411,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, *Fast = true; return true; } - + // This is a misaligned access. return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast); } diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index b5dd2d4cca89..f6b91a2f0231 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -422,32 +422,34 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) { return ".data.rel.ro"; } +static unsigned getEntrySizeForKind(SectionKind Kind) { + if (Kind.isMergeable1ByteCString()) + return 1; + else if (Kind.isMergeable2ByteCString()) + return 2; + else if (Kind.isMergeable4ByteCString()) + return 4; + else if (Kind.isMergeableConst4()) + return 4; + else if (Kind.isMergeableConst8()) + return 8; + else if (Kind.isMergeableConst16()) + return 16; + else if (Kind.isMergeableConst32()) + return 32; + else { + // We shouldn't have mergeable C strings or mergeable constants that we + // didn't handle above. + assert(!Kind.isMergeableCString() && "unknown string width"); + assert(!Kind.isMergeableConst() && "unknown data width"); + return 0; + } +} + static MCSectionELF *selectELFSectionForGlobal( MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang, const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags, unsigned *NextUniqueID, const MCSymbolELF *AssociatedSymbol) { - unsigned EntrySize = 0; - if (Kind.isMergeableCString()) { - if (Kind.isMergeable2ByteCString()) { - EntrySize = 2; - } else if (Kind.isMergeable4ByteCString()) { - EntrySize = 4; - } else { - EntrySize = 1; - assert(Kind.isMergeable1ByteCString() && "unknown string width"); - } - } else if (Kind.isMergeableConst()) { - if (Kind.isMergeableConst4()) { - EntrySize = 4; - } else if (Kind.isMergeableConst8()) { - EntrySize = 8; - } else if (Kind.isMergeableConst16()) { - EntrySize = 16; - } else { - assert(Kind.isMergeableConst32() && "unknown data width"); - EntrySize = 32; - } - } StringRef Group = ""; if (const Comdat *C = getELFComdat(GO)) { @@ -455,7 +457,9 @@ static MCSectionELF *selectELFSectionForGlobal( Group = C->getName(); } - bool UniqueSectionNames = TM.getUniqueSectionNames(); + // Get the section entry size based on the kind. + unsigned EntrySize = getEntrySizeForKind(Kind); + SmallString<128> Name; if (Kind.isMergeableCString()) { // We also need alignment here. @@ -479,16 +483,17 @@ static MCSectionELF *selectELFSectionForGlobal( Name += *OptionalPrefix; } - if (EmitUniqueSection && UniqueSectionNames) { - Name.push_back('.'); - TM.getNameWithPrefix(Name, GO, Mang, true); - } unsigned UniqueID = MCContext::GenericSectionID; - if (EmitUniqueSection && !UniqueSectionNames) { - UniqueID = *NextUniqueID; - (*NextUniqueID)++; + if (EmitUniqueSection) { + if (TM.getUniqueSectionNames()) { + Name.push_back('.'); + TM.getNameWithPrefix(Name, GO, Mang, true /*MayAlwaysUsePrivate*/); + } else { + UniqueID = *NextUniqueID; + (*NextUniqueID)++; + } } - // Use 0 as the unique ID for execute-only text + // Use 0 as the unique ID for execute-only text. if (Kind.isExecuteOnly()) UniqueID = 0; return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags, diff --git a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp index 3fca2f4ee4fe..2db03288f2ac 100644 --- a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -166,7 +166,7 @@ static cl::opt<CFLAAType> UseCFLAA( "Enable unification-based CFL-AA"), clEnumValN(CFLAAType::Andersen, "anders", "Enable inclusion-based CFL-AA"), - clEnumValN(CFLAAType::Both, "both", + clEnumValN(CFLAAType::Both, "both", "Enable both variants of CFL-AA"))); /// Option names for limiting the codegen pipeline. diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp index e629c13f133f..65d0a7a774fe 100644 --- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -54,7 +54,7 @@ static cl::opt<bool> DemoteCatchSwitchPHIOnlyOpt( cl::desc("Demote catchswitch BBs only (for wasm EH)"), cl::init(false)); namespace { - + class WinEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. diff --git a/contrib/llvm/lib/DebugInfo/CodeView/RecordName.cpp b/contrib/llvm/lib/DebugInfo/CodeView/RecordName.cpp index e50c43a1d481..d868ae237a44 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/RecordName.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/RecordName.cpp @@ -307,6 +307,9 @@ static int getSymbolNameOffset(CVSymbol Sym) { // See BPRelativeSym case SymbolKind::S_BPREL32: return 8; + // See UsingNamespaceSym + case SymbolKind::S_UNAMESPACE: + return 0; default: return -1; } diff --git a/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp b/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp index af249adc9774..f8bf961f22a1 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -611,6 +611,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) { return Error::success(); } +Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, + UsingNamespaceSym &UN) { + W.printString("Namespace", UN.Name); + return Error::success(); +} + Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) { W.printNumber("Length", CVR.length()); return Error::success(); diff --git a/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp index 923837a45d9f..e77c8e8f02f5 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp @@ -463,3 +463,11 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) { return Error::success(); } + +Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, + UsingNamespaceSym &UN) { + + error(IO.mapStringZ(UN.Name)); + + return Error::success(); +} diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp index 95082d4a8e03..839ab6f0a705 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -428,7 +428,7 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind, case SymbolKind::S_DEFRANGE_SUBFIELD: break; - // No type refernces. + // No type references. case SymbolKind::S_LABEL32: case SymbolKind::S_OBJNAME: case SymbolKind::S_COMPILE: @@ -439,6 +439,7 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind, case SymbolKind::S_FRAMEPROC: case SymbolKind::S_THUNK32: case SymbolKind::S_FRAMECOOKIE: + case SymbolKind::S_UNAMESPACE: break; // Scope ending symbols. case SymbolKind::S_END: diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp index e4f39dd988e1..2e29c9d7dfa0 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -226,7 +226,10 @@ bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx, if (IsSecondPass && MapPos >= Map.size()) { // FIXME: Print a more useful error. We can give the current record and the // index that we think its pointing to. - LastError = joinErrors(std::move(*LastError), errorCorruptRecord()); + if (LastError) + LastError = joinErrors(std::move(*LastError), errorCorruptRecord()); + else + LastError = errorCorruptRecord(); } ++NumBadIndices; diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp index adada672af00..f49ab40fad9a 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -38,7 +38,7 @@ DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() { } bool -DWARFAbbreviationDeclaration::extract(DataExtractor Data, +DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint32_t* OffsetPtr) { clear(); const uint32_t Offset = *OffsetPtr; diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index da13c5047f77..9d2554ff9e2e 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -17,6 +17,7 @@ #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h" #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h" #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" @@ -249,6 +250,36 @@ static void dumpStringOffsetsSection( } } +// Dump the .debug_addr section. +static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData, + DIDumpOptions DumpOpts, uint16_t Version, + uint8_t AddrSize) { + // TODO: Make this more general: add callback types to Error.h, create + // implementation and make all DWARF classes use them. + static auto WarnCallback = [](Error Warn) { + handleAllErrors(std::move(Warn), [](ErrorInfoBase &Info) { + WithColor::warning() << Info.message() << '\n'; + }); + }; + uint32_t Offset = 0; + while (AddrData.isValidOffset(Offset)) { + DWARFDebugAddrTable AddrTable; + uint32_t TableOffset = Offset; + if (Error Err = AddrTable.extract(AddrData, &Offset, Version, + AddrSize, WarnCallback)) { + WithColor::error() << toString(std::move(Err)) << '\n'; + // Keep going after an error, if we can, assuming that the length field + // could be read. If it couldn't, stop reading the section. + if (!AddrTable.hasValidLength()) + break; + uint64_t Length = AddrTable.getLength(); + Offset = TableOffset + Length; + } else { + AddrTable.dump(OS, DumpOpts); + } + } +} + // Dump the .debug_rnglists or .debug_rnglists.dwo section (DWARF v5). static void dumpRnglistsSection(raw_ostream &OS, DWARFDataExtractor &rnglistData, @@ -455,18 +486,16 @@ void DWARFContext::dump( } } + if (shouldDump(Explicit, ".debug_addr", DIDT_ID_DebugAddr, + DObj->getAddrSection().Data)) { + DWARFDataExtractor AddrData(*DObj, DObj->getAddrSection(), + isLittleEndian(), 0); + dumpAddrSection(OS, AddrData, DumpOpts, getMaxVersion(), getCUAddrSize()); + } + if (shouldDump(Explicit, ".debug_ranges", DIDT_ID_DebugRanges, DObj->getRangeSection().Data)) { - // In fact, different compile units may have different address byte - // sizes, but for simplicity we just use the address byte size of the - // last compile unit (there is no easy and fast way to associate address - // range list and the compile unit it describes). - // FIXME: savedAddressByteSize seems sketchy. - uint8_t savedAddressByteSize = 0; - for (const auto &CU : compile_units()) { - savedAddressByteSize = CU->getAddressByteSize(); - break; - } + uint8_t savedAddressByteSize = getCUAddrSize(); DWARFDataExtractor rangesData(*DObj, DObj->getRangeSection(), isLittleEndian(), savedAddressByteSize); uint32_t offset = 0; @@ -474,7 +503,7 @@ void DWARFContext::dump( while (rangesData.isValidOffset(offset)) { if (Error E = rangeList.extract(rangesData, &offset)) { WithColor::error() << toString(std::move(E)) << '\n'; - break; + break; } rangeList.dump(OS); } @@ -1584,3 +1613,17 @@ Error DWARFContext::loadRegisterInfo(const object::ObjectFile &Obj) { RegInfo.reset(TheTarget->createMCRegInfo(TT.str())); return Error::success(); } + +uint8_t DWARFContext::getCUAddrSize() { + // In theory, different compile units may have different address byte + // sizes, but for simplicity we just use the address byte size of the + // last compile unit. In practice the address size field is repeated across + // various DWARF headers (at least in version 5) to make it easier to dump + // them independently, not to enable varying the address size. + uint8_t Addr = 0; + for (const auto &CU : compile_units()) { + Addr = CU->getAddressByteSize(); + break; + } + return Addr; +} diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp new file mode 100644 index 000000000000..7085ca067ba6 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp @@ -0,0 +1,198 @@ +//===- DWARFDebugAddr.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" + +using namespace llvm; + +void DWARFDebugAddrTable::clear() { + HeaderData = {}; + Addrs.clear(); + invalidateLength(); +} + +Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, + uint32_t *OffsetPtr, + uint16_t Version, + uint8_t AddrSize, + std::function<void(Error)> WarnCallback) { + clear(); + HeaderOffset = *OffsetPtr; + // Read and verify the length field. + if (!Data.isValidOffsetForDataOfSize(*OffsetPtr, sizeof(uint32_t))) + return createStringError(errc::invalid_argument, + "section is not large enough to contain a " + ".debug_addr table length at offset 0x%" + PRIx32, *OffsetPtr); + uint16_t UnitVersion; + if (Version == 0) { + WarnCallback(createStringError(errc::invalid_argument, + "DWARF version is not defined in CU," + " assuming version 5")); + UnitVersion = 5; + } else { + UnitVersion = Version; + } + // TODO: Add support for DWARF64. + Format = dwarf::DwarfFormat::DWARF32; + if (UnitVersion >= 5) { + HeaderData.Length = Data.getU32(OffsetPtr); + if (HeaderData.Length == 0xffffffffu) { + invalidateLength(); + return createStringError(errc::not_supported, + "DWARF64 is not supported in .debug_addr at offset 0x%" PRIx32, + HeaderOffset); + } + if (HeaderData.Length + sizeof(uint32_t) < sizeof(Header)) { + uint32_t TmpLength = getLength(); + invalidateLength(); + return createStringError(errc::invalid_argument, + ".debug_addr table at offset 0x%" PRIx32 + " has too small length (0x%" PRIx32 + ") to contain a complete header", + HeaderOffset, TmpLength); + } + uint32_t End = HeaderOffset + getLength(); + if (!Data.isValidOffsetForDataOfSize(HeaderOffset, End - HeaderOffset)) { + uint32_t TmpLength = getLength(); + invalidateLength(); + return createStringError(errc::invalid_argument, + "section is not large enough to contain a .debug_addr table " + "of length 0x%" PRIx32 " at offset 0x%" PRIx32, + TmpLength, HeaderOffset); + } + + HeaderData.Version = Data.getU16(OffsetPtr); + HeaderData.AddrSize = Data.getU8(OffsetPtr); + HeaderData.SegSize = Data.getU8(OffsetPtr); + DataSize = getDataSize(); + } else { + HeaderData.Version = UnitVersion; + HeaderData.AddrSize = AddrSize; + // TODO: Support for non-zero SegSize. + HeaderData.SegSize = 0; + DataSize = Data.size(); + } + + // Perform basic validation of the remaining header fields. + + // We support DWARF version 5 for now as well as pre-DWARF5 + // implementations of .debug_addr table, which doesn't contain a header + // and consists only of a series of addresses. + if (HeaderData.Version > 5) { + return createStringError(errc::not_supported, "version %" PRIu16 + " of .debug_addr section at offset 0x%" PRIx32 " is not supported", + HeaderData.Version, HeaderOffset); + } + // FIXME: For now we just treat version mismatch as an error, + // however the correct way to associate a .debug_addr table + // with a .debug_info table is to look at the DW_AT_addr_base + // attribute in the info table. + if (HeaderData.Version != UnitVersion) + return createStringError(errc::invalid_argument, + ".debug_addr table at offset 0x%" PRIx32 + " has version %" PRIu16 + " which is different from the version suggested" + " by the DWARF unit header: %" PRIu16, + HeaderOffset, HeaderData.Version, UnitVersion); + if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8) + return createStringError(errc::not_supported, + ".debug_addr table at offset 0x%" PRIx32 + " has unsupported address size %" PRIu8, + HeaderOffset, HeaderData.AddrSize); + if (HeaderData.AddrSize != AddrSize && AddrSize != 0) + return createStringError(errc::invalid_argument, + ".debug_addr table at offset 0x%" PRIx32 + " has address size %" PRIu8 + " which is different from CU address size %" PRIu8, + HeaderOffset, HeaderData.AddrSize, AddrSize); + + // TODO: add support for non-zero segment selector size. + if (HeaderData.SegSize != 0) + return createStringError(errc::not_supported, + ".debug_addr table at offset 0x%" PRIx32 + " has unsupported segment selector size %" PRIu8, + HeaderOffset, HeaderData.SegSize); + if (DataSize % HeaderData.AddrSize != 0) { + invalidateLength(); + return createStringError(errc::invalid_argument, + ".debug_addr table at offset 0x%" PRIx32 + " contains data of size %" PRIu32 + " which is not a multiple of addr size %" PRIu8, + HeaderOffset, DataSize, HeaderData.AddrSize); + } + Data.setAddressSize(HeaderData.AddrSize); + uint32_t AddrCount = DataSize / HeaderData.AddrSize; + for (uint32_t I = 0; I < AddrCount; ++I) + if (HeaderData.AddrSize == 4) + Addrs.push_back(Data.getU32(OffsetPtr)); + else + Addrs.push_back(Data.getU64(OffsetPtr)); + return Error::success(); +} + +void DWARFDebugAddrTable::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { + if (DumpOpts.Verbose) + OS << format("0x%8.8" PRIx32 ": ", HeaderOffset); + OS << format("Addr Section: length = 0x%8.8" PRIx32 + ", version = 0x%4.4" PRIx16 ", " + "addr_size = 0x%2.2" PRIx8 ", seg_size = 0x%2.2" PRIx8 "\n", + HeaderData.Length, HeaderData.Version, HeaderData.AddrSize, + HeaderData.SegSize); + + static const char *Fmt32 = "0x%8.8" PRIx32; + static const char *Fmt64 = "0x%16.16" PRIx64; + std::string AddrFmt = "\n"; + std::string AddrFmtVerbose = " => "; + if (HeaderData.AddrSize == 4) { + AddrFmt.append(Fmt32); + AddrFmtVerbose.append(Fmt32); + } + else { + AddrFmt.append(Fmt64); + AddrFmtVerbose.append(Fmt64); + } + + if (Addrs.size() > 0) { + OS << "Addrs: ["; + for (uint64_t Addr : Addrs) { + OS << format(AddrFmt.c_str(), Addr); + if (DumpOpts.Verbose) + OS << format(AddrFmtVerbose.c_str(), + Addr + HeaderOffset + sizeof(HeaderData)); + } + OS << "\n]\n"; + } +} + +Expected<uint64_t> DWARFDebugAddrTable::getAddrEntry(uint32_t Index) const { + if (Index < Addrs.size()) + return Addrs[Index]; + return createStringError(errc::invalid_argument, + "Index %" PRIu32 " is out of range of the " + ".debug_addr table at offset 0x%" PRIx32, + Index, HeaderOffset); +} + +uint32_t DWARFDebugAddrTable::getLength() const { + if (HeaderData.Length == 0) + return 0; + // TODO: DWARF64 support. + return HeaderData.Length + sizeof(uint32_t); +} + +uint32_t DWARFDebugAddrTable::getDataSize() const { + if (DataSize != 0) + return DataSize; + if (getLength() == 0) + return 0; + return getLength() - getHeaderSize(); +} diff --git a/contrib/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/contrib/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index 2a89faff9647..08be524ab464 100644 --- a/contrib/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/contrib/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -155,7 +155,7 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, // of the function's code, not the descriptor. uint64_t OpdOffset = SymbolAddress - OpdAddress; uint32_t OpdOffset32 = OpdOffset; - if (OpdOffset == OpdOffset32 && + if (OpdOffset == OpdOffset32 && OpdExtractor->isValidOffsetForAddress(OpdOffset32)) SymbolAddress = OpdExtractor->getAddress(&OpdOffset32); } diff --git a/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp b/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp index 5bfd2e6ff87e..72e4b56c05e3 100644 --- a/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp +++ b/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp @@ -450,6 +450,8 @@ class ReferenceType : public Node { const Node *Pointee; ReferenceKind RK; + mutable bool Printing = false; + // Dig through any refs to refs, collapsing the ReferenceTypes as we go. The // rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any // other combination collapses to a lvalue ref. @@ -476,6 +478,9 @@ public: } void printLeft(OutputStream &s) const override { + if (Printing) + return; + SwapAndRestore<bool> SavePrinting(Printing, true); std::pair<ReferenceKind, const Node *> Collapsed = collapse(s); Collapsed.second->printLeft(s); if (Collapsed.second->hasArray(s)) @@ -486,6 +491,9 @@ public: s += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&"); } void printRight(OutputStream &s) const override { + if (Printing) + return; + SwapAndRestore<bool> SavePrinting(Printing, true); std::pair<ReferenceKind, const Node *> Collapsed = collapse(s); if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s)) s += ")"; diff --git a/contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp b/contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp index 596359b7d990..3eac87d61011 100644 --- a/contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp +++ b/contrib/llvm/lib/Demangle/MicrosoftDemangle.cpp @@ -29,15 +29,27 @@ // the demangler is 3x faster with this allocator compared to one with // STL containers. namespace { + constexpr size_t AllocUnit = 4096; + class ArenaAllocator { struct AllocatorNode { uint8_t *Buf = nullptr; size_t Used = 0; + size_t Capacity = 0; AllocatorNode *Next = nullptr; }; + void addNode(size_t Capacity) { + AllocatorNode *NewHead = new AllocatorNode; + NewHead->Buf = new uint8_t[Capacity]; + NewHead->Next = Head; + NewHead->Capacity = Capacity; + Head = NewHead; + NewHead->Used = 0; + } + public: - ArenaAllocator() : Head(new AllocatorNode) { Head->Buf = new uint8_t[Unit]; } + ArenaAllocator() { addNode(AllocUnit); } ~ArenaAllocator() { while (Head) { @@ -49,10 +61,25 @@ public: } } + char *allocUnalignedBuffer(size_t Length) { + uint8_t *Buf = Head->Buf + Head->Used; + + Head->Used += Length; + if (Head->Used > Head->Capacity) { + // It's possible we need a buffer which is larger than our default unit + // size, so we need to be careful to add a node with capacity that is at + // least as large as what we need. + addNode(std::max(AllocUnit, Length)); + Head->Used = Length; + Buf = Head->Buf; + } + + return reinterpret_cast<char *>(Buf); + } + template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) { size_t Size = sizeof(T); - assert(Size < Unit); assert(Head && Head->Buf); size_t P = (size_t)Head->Buf + Head->Used; @@ -62,20 +89,15 @@ public: size_t Adjustment = AlignedP - P; Head->Used += Size + Adjustment; - if (Head->Used < Unit) + if (Head->Used < Head->Capacity) return new (PP) T(std::forward<Args>(ConstructorArgs)...); - AllocatorNode *NewHead = new AllocatorNode; - NewHead->Buf = new uint8_t[ArenaAllocator::Unit]; - NewHead->Next = Head; - Head = NewHead; - NewHead->Used = Size; - return new (NewHead->Buf) T(std::forward<Args>(ConstructorArgs)...); + addNode(AllocUnit); + Head->Used = Size; + return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...); } private: - static constexpr size_t Unit = 4096; - AllocatorNode *Head = nullptr; }; } // namespace @@ -117,7 +139,7 @@ enum class StorageClass : uint8_t { enum class QualifierMangleMode { Drop, Mangle, Result }; -enum class PointerAffinity { Pointer, Reference }; +enum class PointerAffinity { Pointer, Reference, RValueReference }; // Calling conventions enum class CallingConv : uint8_t { @@ -141,7 +163,6 @@ enum class PrimTy : uint8_t { None, Function, Ptr, - Ref, MemberPtr, Array, @@ -155,6 +176,8 @@ enum class PrimTy : uint8_t { Char, Schar, Uchar, + Char16, + Char32, Short, Ushort, Int, @@ -167,6 +190,7 @@ enum class PrimTy : uint8_t { Float, Double, Ldouble, + Nullptr }; // Function classes @@ -183,15 +207,30 @@ enum FuncClass : uint8_t { namespace { struct Type; +struct Name; -// Represents a list of parameters (template params or function arguments. -// It's represented as a linked list. -struct ParamList { +struct FunctionParams { bool IsVariadic = false; Type *Current = nullptr; - ParamList *Next = nullptr; + FunctionParams *Next = nullptr; +}; + +struct TemplateParams { + bool IsTemplateTemplate = false; + bool IsAliasTemplate = false; + + // Type can be null if this is a template template parameter. In that case + // only Name will be valid. + Type *ParamType = nullptr; + + // Name can be valid if this is a template template parameter (see above) or + // this is a function declaration (e.g. foo<&SomeFunc>). In the latter case + // Name contains the name of the function and Type contains the signature. + Name *ParamName = nullptr; + + TemplateParams *Next = nullptr; }; // The type class. Mangled symbols are first parsed and converted to @@ -232,7 +271,7 @@ struct Name { StringView Operator; // Template parameters. Null if not a template. - ParamList TemplateParams; + TemplateParams *TParams = nullptr; // Nested BackReferences (e.g. "A::B::C") are represented as a linked list. Name *Next = nullptr; @@ -243,6 +282,8 @@ struct PointerType : public Type { void outputPre(OutputStream &OS) override; void outputPost(OutputStream &OS) override; + PointerAffinity Affinity; + // Represents a type X in "a pointer to X", "a reference to X", // "an array of X", or "a function returning X". Type *Pointee = nullptr; @@ -276,7 +317,7 @@ struct FunctionType : public Type { CallingConv CallConvention; FuncClass FunctionClass; - ParamList Params; + FunctionParams Params; }; struct UdtType : public Type { @@ -302,9 +343,13 @@ struct ArrayType : public Type { static bool isMemberPointer(StringView MangledName) { switch (MangledName.popFront()) { + case '$': + // This is probably an rvalue reference (e.g. $$Q), and you cannot have an + // rvalue reference to a member. + return false; case 'A': // 'A' indicates a reference, and you cannot have a reference to a member - // function or member variable. + // function or member. return false; case 'P': case 'Q': @@ -386,14 +431,58 @@ static void outputCallingConvention(OutputStream &OS, CallingConv CC) { } } +static bool startsWithLocalScopePattern(StringView S) { + if (!S.consumeFront('?')) + return false; + if (S.size() < 2) + return false; + + size_t End = S.find('?'); + if (End == StringView::npos) + return false; + StringView Candidate = S.substr(0, End); + if (Candidate.empty()) + return false; + + // \?[0-9]\? + // ?@? is the discriminator 0. + if (Candidate.size() == 1) + return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9'); + + // If it's not 0-9, then it's an encoded number terminated with an @ + if (Candidate.back() != '@') + return false; + Candidate = Candidate.dropBack(); + + // An encoded number starts with B-P and all subsequent digits are in A-P. + // Note that the reason the first digit cannot be A is two fold. First, it + // would create an ambiguity with ?A which delimits the beginning of an + // anonymous namespace. Second, A represents 0, and you don't start a multi + // digit number with a leading 0. Presumably the anonymous namespace + // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J. + if (Candidate[0] < 'B' || Candidate[0] > 'P') + return false; + Candidate = Candidate.dropFront(); + while (!Candidate.empty()) { + if (Candidate[0] < 'A' || Candidate[0] > 'P') + return false; + Candidate = Candidate.dropFront(); + } + + return true; +} + +static void outputName(OutputStream &OS, const Name *TheName); + // Write a function or template parameter list. -static void outputParameterList(OutputStream &OS, const ParamList &Params) { +static void outputParameterList(OutputStream &OS, + const FunctionParams &Params) { if (!Params.Current) { OS << "void"; return; } - const ParamList *Head = &Params; + const FunctionParams *Head = &Params; while (Head) { Type::outputPre(OS, *Head->Current); Type::outputPost(OS, *Head->Current); @@ -405,12 +494,39 @@ static void outputParameterList(OutputStream &OS, const ParamList &Params) { } } -static void outputTemplateParams(OutputStream &OS, const Name &TheName) { - if (!TheName.TemplateParams.Current) +static void outputParameterList(OutputStream &OS, + const TemplateParams &Params) { + if (!Params.ParamType && !Params.ParamName) { + OS << "<>"; return; + } OS << "<"; - outputParameterList(OS, TheName.TemplateParams); + const TemplateParams *Head = &Params; + while (Head) { + // Type can be null if this is a template template parameter, + // and Name can be null if this is a simple type. + + if (Head->ParamType && Head->ParamName) { + // Function pointer. + OS << "&"; + Type::outputPre(OS, *Head->ParamType); + outputName(OS, Head->ParamName); + Type::outputPost(OS, *Head->ParamType); + } else if (Head->ParamType) { + // simple type. + Type::outputPre(OS, *Head->ParamType); + Type::outputPost(OS, *Head->ParamType); + } else { + // Template alias. + outputName(OS, Head->ParamName); + } + + Head = Head->Next; + + if (Head) + OS << ", "; + } OS << ">"; } @@ -420,29 +536,32 @@ static void outputName(OutputStream &OS, const Name *TheName) { outputSpaceIfNecessary(OS); + const Name *Previous = nullptr; // Print out namespaces or outer class BackReferences. for (; TheName->Next; TheName = TheName->Next) { + Previous = TheName; OS << TheName->Str; - outputTemplateParams(OS, *TheName); + if (TheName->TParams) + outputParameterList(OS, *TheName->TParams); OS << "::"; } // Print out a regular name. if (TheName->Operator.empty()) { OS << TheName->Str; - outputTemplateParams(OS, *TheName); + if (TheName->TParams) + outputParameterList(OS, *TheName->TParams); return; } // Print out ctor or dtor. + if (TheName->Operator == "dtor") + OS << "~"; + if (TheName->Operator == "ctor" || TheName->Operator == "dtor") { - OS << TheName->Str; - outputTemplateParams(OS, *TheName); - OS << "::"; - if (TheName->Operator == "dtor") - OS << "~"; - OS << TheName->Str; - outputTemplateParams(OS, *TheName); + OS << Previous->Str; + if (Previous->TParams) + outputParameterList(OS, *Previous->TParams); return; } @@ -514,6 +633,12 @@ void Type::outputPre(OutputStream &OS) { case PrimTy::Uchar: OS << "unsigned char"; break; + case PrimTy::Char16: + OS << "char16_t"; + break; + case PrimTy::Char32: + OS << "char32_t"; + break; case PrimTy::Short: OS << "short"; break; @@ -550,6 +675,9 @@ void Type::outputPre(OutputStream &OS) { case PrimTy::Ldouble: OS << "long double"; break; + case PrimTy::Nullptr: + OS << "std::nullptr_t"; + break; default: assert(false && "Invalid primitive type!"); } @@ -584,8 +712,10 @@ static void outputPointerIndicator(OutputStream &OS, PointerAffinity Affinity, if (Affinity == PointerAffinity::Pointer) OS << "*"; - else + else if (Affinity == PointerAffinity::Reference) OS << "&"; + else + OS << "&&"; } void PointerType::outputPre(OutputStream &OS) { @@ -596,9 +726,6 @@ void PointerType::outputPre(OutputStream &OS) { if (Quals & Q_Unaligned) OS << "__unaligned "; - PointerAffinity Affinity = (Prim == PrimTy::Ptr) ? PointerAffinity::Pointer - : PointerAffinity::Reference; - outputPointerIndicator(OS, Affinity, nullptr, Pointee); // FIXME: We should output this, but it requires updating lots of tests. @@ -668,6 +795,15 @@ void FunctionType::outputPost(OutputStream &OS) { OS << " const"; if (Quals & Q_Volatile) OS << " volatile"; + if (Quals & Q_Restrict) + OS << " __restrict"; + if (Quals & Q_Unaligned) + OS << " __unaligned"; + + if (RefKind == ReferenceKind::LValueRef) + OS << " &"; + else if (RefKind == ReferenceKind::RValueRef) + OS << " &&"; if (ReturnType) Type::outputPost(OS, *ReturnType); @@ -716,6 +852,11 @@ void ArrayType::outputPost(OutputStream &OS) { Type::outputPost(OS, *ElementType); } +struct Symbol { + Name *SymbolName = nullptr; + Type *SymbolType = nullptr; +}; + } // namespace namespace { @@ -725,63 +866,68 @@ namespace { // It also has a set of functions to cnovert Type instances to strings. class Demangler { public: - Demangler(OutputStream &OS, StringView s) : OS(OS), MangledName(s) {} + Demangler() = default; // You are supposed to call parse() first and then check if error is true. If // it is false, call output() to write the formatted name to the given stream. - void parse(); - void output(); + Symbol *parse(StringView &MangledName); + void output(const Symbol *S, OutputStream &OS); // True if an error occurred. bool Error = false; private: - Type *demangleVariableEncoding(); - Type *demangleFunctionEncoding(); + Type *demangleVariableEncoding(StringView &MangledName); + Type *demangleFunctionEncoding(StringView &MangledName); - Qualifiers demanglePointerExtQualifiers(); + Qualifiers demanglePointerExtQualifiers(StringView &MangledName); // Parser functions. This is a recursive-descent parser. - Type *demangleType(QualifierMangleMode QMM); - Type *demangleBasicType(); - UdtType *demangleClassType(); - PointerType *demanglePointerType(); - MemberPointerType *demangleMemberPointerType(); - FunctionType *demangleFunctionType(bool HasThisQuals, bool IsFunctionPointer); + Type *demangleType(StringView &MangledName, QualifierMangleMode QMM); + Type *demangleBasicType(StringView &MangledName); + UdtType *demangleClassType(StringView &MangledName); + PointerType *demanglePointerType(StringView &MangledName); + MemberPointerType *demangleMemberPointerType(StringView &MangledName); + FunctionType *demangleFunctionType(StringView &MangledName, bool HasThisQuals, + bool IsFunctionPointer); - ArrayType *demangleArrayType(); + ArrayType *demangleArrayType(StringView &MangledName); - ParamList demangleTemplateParameterList(); - ParamList demangleFunctionParameterList(); + TemplateParams *demangleTemplateParameterList(StringView &MangledName); + FunctionParams demangleFunctionParameterList(StringView &MangledName); - int demangleNumber(); - void demangleNamePiece(Name &Node, bool IsHead); + int demangleNumber(StringView &MangledName); - StringView demangleString(bool memorize); void memorizeString(StringView s); - Name *demangleName(); - void demangleOperator(Name *); - StringView demangleOperatorName(); - FuncClass demangleFunctionClass(); - CallingConv demangleCallingConvention(); - StorageClass demangleVariableStorageClass(); - ReferenceKind demangleReferenceKind(); - void demangleThrowSpecification(); - std::pair<Qualifiers, bool> demangleQualifiers(); + /// Allocate a copy of \p Borrowed into memory that we own. + StringView copyString(StringView Borrowed); - // The result is written to this stream. - OutputStream OS; + Name *demangleFullyQualifiedTypeName(StringView &MangledName); + Name *demangleFullyQualifiedSymbolName(StringView &MangledName); - // Mangled symbol. demangle* functions shorten this string - // as they parse it. - StringView MangledName; + Name *demangleUnqualifiedTypeName(StringView &MangledName); + Name *demangleUnqualifiedSymbolName(StringView &MangledName); - // A parsed mangled symbol. - Type *SymbolType = nullptr; + Name *demangleNameScopeChain(StringView &MangledName, Name *UnqualifiedName); + Name *demangleNameScopePiece(StringView &MangledName); - // The main symbol name. (e.g. "ns::foo" in "int ns::foo()".) - Name *SymbolName = nullptr; + Name *demangleBackRefName(StringView &MangledName); + Name *demangleClassTemplateName(StringView &MangledName); + Name *demangleOperatorName(StringView &MangledName); + Name *demangleSimpleName(StringView &MangledName, bool Memorize); + Name *demangleAnonymousNamespaceName(StringView &MangledName); + Name *demangleLocallyScopedNamePiece(StringView &MangledName); + + StringView demangleSimpleString(StringView &MangledName, bool Memorize); + + FuncClass demangleFunctionClass(StringView &MangledName); + CallingConv demangleCallingConvention(StringView &MangledName); + StorageClass demangleVariableStorageClass(StringView &MangledName); + ReferenceKind demangleReferenceKind(StringView &MangledName); + void demangleThrowSpecification(StringView &MangledName); + + std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName); // Memory allocator. ArenaAllocator Arena; @@ -809,28 +955,36 @@ private: }; } // namespace +StringView Demangler::copyString(StringView Borrowed) { + char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1); + std::strcpy(Stable, Borrowed.begin()); + + return {Stable, Borrowed.size()}; +} + // Parser entry point. -void Demangler::parse() { +Symbol *Demangler::parse(StringView &MangledName) { + Symbol *S = Arena.alloc<Symbol>(); + // MSVC-style mangled symbols must start with '?'. if (!MangledName.consumeFront("?")) { - SymbolName = Arena.alloc<Name>(); - SymbolName->Str = MangledName; - SymbolType = Arena.alloc<Type>(); - SymbolType->Prim = PrimTy::Unknown; + S->SymbolName = Arena.alloc<Name>(); + S->SymbolName->Str = MangledName; + S->SymbolType = Arena.alloc<Type>(); + S->SymbolType->Prim = PrimTy::Unknown; + return S; } // What follows is a main symbol name. This may include // namespaces or class BackReferences. - SymbolName = demangleName(); + S->SymbolName = demangleFullyQualifiedSymbolName(MangledName); // Read a variable. - if (startsWithDigit(MangledName)) { - SymbolType = demangleVariableEncoding(); - return; - } + S->SymbolType = startsWithDigit(MangledName) + ? demangleVariableEncoding(MangledName) + : demangleFunctionEncoding(MangledName); - // Read a function. - SymbolType = demangleFunctionEncoding(); + return S; } // <type-encoding> ::= <storage-class> <variable-type> @@ -840,10 +994,10 @@ void Demangler::parse() { // ::= 3 # global // ::= 4 # static local -Type *Demangler::demangleVariableEncoding() { - StorageClass SC = demangleVariableStorageClass(); +Type *Demangler::demangleVariableEncoding(StringView &MangledName) { + StorageClass SC = demangleVariableStorageClass(MangledName); - Type *Ty = demangleType(QualifierMangleMode::Drop); + Type *Ty = demangleType(MangledName, QualifierMangleMode::Drop); Ty->Storage = SC; @@ -851,17 +1005,17 @@ Type *Demangler::demangleVariableEncoding() { // ::= <type> <pointee-cvr-qualifiers> # pointers, references switch (Ty->Prim) { case PrimTy::Ptr: - case PrimTy::Ref: case PrimTy::MemberPtr: { Qualifiers ExtraChildQuals = Q_None; - Ty->Quals = Qualifiers(Ty->Quals | demanglePointerExtQualifiers()); + Ty->Quals = + Qualifiers(Ty->Quals | demanglePointerExtQualifiers(MangledName)); bool IsMember = false; - std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(); + std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName); if (Ty->Prim == PrimTy::MemberPtr) { assert(IsMember); - Name *BackRefName = demangleName(); + Name *BackRefName = demangleFullyQualifiedTypeName(MangledName); (void)BackRefName; MemberPointerType *MPTy = static_cast<MemberPointerType *>(Ty); MPTy->Pointee->Quals = Qualifiers(MPTy->Pointee->Quals | ExtraChildQuals); @@ -873,7 +1027,7 @@ Type *Demangler::demangleVariableEncoding() { break; } default: - Ty->Quals = demangleQualifiers().first; + Ty->Quals = demangleQualifiers(MangledName).first; break; } @@ -891,7 +1045,7 @@ Type *Demangler::demangleVariableEncoding() { // ::= <hex digit>+ @ # when Numbrer == 0 or >= 10 // // <hex-digit> ::= [A-P] # A = 0, B = 1, ... -int Demangler::demangleNumber() { +int Demangler::demangleNumber(StringView &MangledName) { bool neg = MangledName.consumeFront("?"); if (startsWithDigit(MangledName)) { @@ -918,23 +1072,6 @@ int Demangler::demangleNumber() { return 0; } -// Read until the next '@'. -StringView Demangler::demangleString(bool Memorize) { - for (size_t i = 0; i < MangledName.size(); ++i) { - if (MangledName[i] != '@') - continue; - StringView ret = MangledName.substr(0, i); - MangledName = MangledName.dropFront(i + 1); - - if (Memorize) - memorizeString(ret); - return ret; - } - - Error = true; - return ""; -} - // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9. // Memorize it. void Demangler::memorizeString(StringView S) { @@ -946,179 +1083,322 @@ void Demangler::memorizeString(StringView S) { BackReferences[BackRefCount++] = S; } -void Demangler::demangleNamePiece(Name &Node, bool IsHead) { - if (startsWithDigit(MangledName)) { - size_t I = MangledName[0] - '0'; - if (I >= BackRefCount) { - Error = true; - return; - } - MangledName = MangledName.dropFront(); - Node.Str = BackReferences[I]; - } else if (MangledName.consumeFront("?$")) { - // Class template. - Node.Str = demangleString(false); - Node.TemplateParams = demangleTemplateParameterList(); - } else if (!IsHead && MangledName.consumeFront("?A")) { - // Anonymous namespace starts with ?A. So does overloaded operator[], - // but the distinguishing factor is that namespace themselves are not - // mangled, only the variables and functions inside of them are. So - // an anonymous namespace will never occur as the first item in the - // name. - Node.Str = "`anonymous namespace'"; - if (!MangledName.consumeFront('@')) { - Error = true; - return; - } - } else if (MangledName.consumeFront("?")) { - // Overloaded operator. - demangleOperator(&Node); - } else { - // Non-template functions or classes. - Node.Str = demangleString(true); +Name *Demangler::demangleBackRefName(StringView &MangledName) { + assert(startsWithDigit(MangledName)); + + size_t I = MangledName[0] - '0'; + if (I >= BackRefCount) { + Error = true; + return nullptr; } -} -// Parses a name in the form of A@B@C@@ which represents C::B::A. -Name *Demangler::demangleName() { - Name *Head = nullptr; + MangledName = MangledName.dropFront(); + Name *Node = Arena.alloc<Name>(); + Node->Str = BackReferences[I]; + return Node; +} - while (!MangledName.consumeFront("@")) { - Name *Elem = Arena.alloc<Name>(); +Name *Demangler::demangleClassTemplateName(StringView &MangledName) { + assert(MangledName.startsWith("?$")); + MangledName.consumeFront("?$"); - assert(!Error); - demangleNamePiece(*Elem, Head == nullptr); - if (Error) - return nullptr; + Name *Node = demangleSimpleName(MangledName, false); + Node->TParams = demangleTemplateParameterList(MangledName); - Elem->Next = Head; - Head = Elem; - if (MangledName.empty()) { - Error = true; - return nullptr; - } - } + // Render this class template name into a string buffer so that we can + // memorize it for the purpose of back-referencing. + OutputStream OS = OutputStream::create(nullptr, nullptr, 1024); + outputName(OS, Node); + OS << '\0'; + char *Name = OS.getBuffer(); - return Head; -} + StringView Owned = copyString(Name); + memorizeString(Owned); + std::free(Name); -void Demangler::demangleOperator(Name *OpName) { - OpName->Operator = demangleOperatorName(); - if (!Error && !MangledName.empty() && MangledName.front() != '@') - demangleNamePiece(*OpName, false); + return Node; } -StringView Demangler::demangleOperatorName() { - SwapAndRestore<StringView> RestoreOnError(MangledName, MangledName); - RestoreOnError.shouldRestore(false); - - switch (MangledName.popFront()) { - case '0': - return "ctor"; - case '1': - return "dtor"; - case '2': - return " new"; - case '3': - return " delete"; - case '4': - return "="; - case '5': - return ">>"; - case '6': - return "<<"; - case '7': - return "!"; - case '8': - return "=="; - case '9': - return "!="; - case 'A': - return "[]"; - case 'C': - return "->"; - case 'D': - return "*"; - case 'E': - return "++"; - case 'F': - return "--"; - case 'G': - return "-"; - case 'H': - return "+"; - case 'I': - return "&"; - case 'J': - return "->*"; - case 'K': - return "/"; - case 'L': - return "%"; - case 'M': - return "<"; - case 'N': - return "<="; - case 'O': - return ">"; - case 'P': - return ">="; - case 'Q': - return ","; - case 'R': - return "()"; - case 'S': - return "~"; - case 'T': - return "^"; - case 'U': - return "|"; - case 'V': - return "&&"; - case 'W': - return "||"; - case 'X': - return "*="; - case 'Y': - return "+="; - case 'Z': - return "-="; - case '_': { - if (MangledName.empty()) - break; +Name *Demangler::demangleOperatorName(StringView &MangledName) { + assert(MangledName.startsWith('?')); + MangledName.consumeFront('?'); + auto NameString = [this, &MangledName]() -> StringView { switch (MangledName.popFront()) { case '0': - return "/="; + return "ctor"; case '1': - return "%="; + return "dtor"; case '2': - return ">>="; + return " new"; case '3': - return "<<="; + return " delete"; case '4': - return "&="; + return "="; case '5': - return "|="; + return ">>"; case '6': - return "^="; + return "<<"; + case '7': + return "!"; + case '8': + return "=="; + case '9': + return "!="; + case 'A': + return "[]"; + case 'C': + return "->"; + case 'D': + return "*"; + case 'E': + return "++"; + case 'F': + return "--"; + case 'G': + return "-"; + case 'H': + return "+"; + case 'I': + return "&"; + case 'J': + return "->*"; + case 'K': + return "/"; + case 'L': + return "%"; + case 'M': + return "<"; + case 'N': + return "<="; + case 'O': + return ">"; + case 'P': + return ">="; + case 'Q': + return ","; + case 'R': + return "()"; + case 'S': + return "~"; + case 'T': + return "^"; case 'U': - return " new[]"; + return "|"; case 'V': - return " delete[]"; - case '_': - if (MangledName.consumeFront("L")) - return " co_await"; + return "&&"; + case 'W': + return "||"; + case 'X': + return "*="; + case 'Y': + return "+="; + case 'Z': + return "-="; + case '_': { + if (MangledName.empty()) + break; + + switch (MangledName.popFront()) { + case '0': + return "/="; + case '1': + return "%="; + case '2': + return ">>="; + case '3': + return "<<="; + case '4': + return "&="; + case '5': + return "|="; + case '6': + return "^="; + case 'U': + return " new[]"; + case 'V': + return " delete[]"; + case '_': + if (MangledName.consumeFront("L")) + return " co_await"; + if (MangledName.consumeFront("K")) { + size_t EndPos = MangledName.find('@'); + if (EndPos == StringView::npos) + break; + StringView OpName = demangleSimpleString(MangledName, false); + size_t FullSize = OpName.size() + 3; // <space>""OpName + char *Buffer = Arena.allocUnalignedBuffer(FullSize); + Buffer[0] = ' '; + Buffer[1] = '"'; + Buffer[2] = '"'; + std::memcpy(Buffer + 3, OpName.begin(), OpName.size()); + return {Buffer, FullSize}; + } + } } - } + } + Error = true; + return ""; + }; + + Name *Node = Arena.alloc<Name>(); + Node->Operator = NameString(); + return Node; +} + +Name *Demangler::demangleSimpleName(StringView &MangledName, bool Memorize) { + StringView S = demangleSimpleString(MangledName, Memorize); + if (Error) + return nullptr; + + Name *Node = Arena.alloc<Name>(); + Node->Str = S; + return Node; +} + +StringView Demangler::demangleSimpleString(StringView &MangledName, + bool Memorize) { + StringView S; + for (size_t i = 0; i < MangledName.size(); ++i) { + if (MangledName[i] != '@') + continue; + S = MangledName.substr(0, i); + MangledName = MangledName.dropFront(i + 1); + + if (Memorize) + memorizeString(S); + return S; } Error = true; - RestoreOnError.shouldRestore(true); - return ""; + return {}; +} + +Name *Demangler::demangleAnonymousNamespaceName(StringView &MangledName) { + assert(MangledName.startsWith("?A")); + MangledName.consumeFront("?A"); + + Name *Node = Arena.alloc<Name>(); + Node->Str = "`anonymous namespace'"; + if (MangledName.consumeFront('@')) + return Node; + + Error = true; + return nullptr; +} + +Name *Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) { + assert(startsWithLocalScopePattern(MangledName)); + + Name *Node = Arena.alloc<Name>(); + MangledName.consumeFront('?'); + int ScopeIdentifier = demangleNumber(MangledName); + + // One ? to terminate the number + MangledName.consumeFront('?'); + + assert(!Error); + Symbol *Scope = parse(MangledName); + if (Error) + return nullptr; + + // Render the parent symbol's name into a buffer. + OutputStream OS = OutputStream::create(nullptr, nullptr, 1024); + OS << '`'; + output(Scope, OS); + OS << '\''; + OS << "::`" << ScopeIdentifier << "'"; + OS << '\0'; + char *Result = OS.getBuffer(); + Node->Str = copyString(Result); + std::free(Result); + return Node; +} + +// Parses a type name in the form of A@B@C@@ which represents C::B::A. +Name *Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) { + Name *TypeName = demangleUnqualifiedTypeName(MangledName); + assert(TypeName); + + Name *QualName = demangleNameScopeChain(MangledName, TypeName); + assert(QualName); + return QualName; +} + +// Parses a symbol name in the form of A@B@C@@ which represents C::B::A. +// Symbol names have slightly different rules regarding what can appear +// so we separate out the implementations for flexibility. +Name *Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) { + Name *SymbolName = demangleUnqualifiedSymbolName(MangledName); + assert(SymbolName); + + Name *QualName = demangleNameScopeChain(MangledName, SymbolName); + assert(QualName); + return QualName; +} + +Name *Demangler::demangleUnqualifiedTypeName(StringView &MangledName) { + // An inner-most name can be a back-reference, because a fully-qualified name + // (e.g. Scope + Inner) can contain other fully qualified names inside of + // them (for example template parameters), and these nested parameters can + // refer to previously mangled types. + if (startsWithDigit(MangledName)) + return demangleBackRefName(MangledName); + + if (MangledName.startsWith("?$")) + return demangleClassTemplateName(MangledName); + + return demangleSimpleName(MangledName, true); +} + +Name *Demangler::demangleUnqualifiedSymbolName(StringView &MangledName) { + if (startsWithDigit(MangledName)) + return demangleBackRefName(MangledName); + if (MangledName.startsWith("?$")) + return demangleClassTemplateName(MangledName); + if (MangledName.startsWith('?')) + return demangleOperatorName(MangledName); + return demangleSimpleName(MangledName, true); +} + +Name *Demangler::demangleNameScopePiece(StringView &MangledName) { + if (startsWithDigit(MangledName)) + return demangleBackRefName(MangledName); + + if (MangledName.startsWith("?$")) + return demangleClassTemplateName(MangledName); + + if (MangledName.startsWith("?A")) + return demangleAnonymousNamespaceName(MangledName); + + if (startsWithLocalScopePattern(MangledName)) + return demangleLocallyScopedNamePiece(MangledName); + + return demangleSimpleName(MangledName, true); +} + +Name *Demangler::demangleNameScopeChain(StringView &MangledName, + Name *UnqualifiedName) { + Name *Head = UnqualifiedName; + + while (!MangledName.consumeFront("@")) { + if (MangledName.empty()) { + Error = true; + return nullptr; + } + + assert(!Error); + Name *Elem = demangleNameScopePiece(MangledName); + if (Error) + return nullptr; + + Elem->Next = Head; + Head = Elem; + } + return Head; } -FuncClass Demangler::demangleFunctionClass() { +FuncClass Demangler::demangleFunctionClass(StringView &MangledName) { SwapAndRestore<StringView> RestoreOnError(MangledName, MangledName); RestoreOnError.shouldRestore(false); @@ -1170,7 +1450,7 @@ FuncClass Demangler::demangleFunctionClass() { return Public; } -CallingConv Demangler::demangleCallingConvention() { +CallingConv Demangler::demangleCallingConvention(StringView &MangledName) { switch (MangledName.popFront()) { case 'A': case 'B': @@ -1200,7 +1480,7 @@ CallingConv Demangler::demangleCallingConvention() { return CallingConv::None; } -StorageClass Demangler::demangleVariableStorageClass() { +StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) { assert(std::isdigit(MangledName.front())); switch (MangledName.popFront()) { @@ -1219,7 +1499,8 @@ StorageClass Demangler::demangleVariableStorageClass() { return StorageClass::None; } -std::pair<Qualifiers, bool> Demangler::demangleQualifiers() { +std::pair<Qualifiers, bool> +Demangler::demangleQualifiers(StringView &MangledName) { switch (MangledName.popFront()) { // Member qualifiers @@ -1245,54 +1526,88 @@ std::pair<Qualifiers, bool> Demangler::demangleQualifiers() { return std::make_pair(Q_None, false); } +static bool isTagType(StringView S) { + switch (S.front()) { + case 'T': // union + case 'U': // struct + case 'V': // class + case 'W': // enum + return true; + } + return false; +} + +static bool isPointerType(StringView S) { + if (S.startsWith("$$Q")) // foo && + return true; + + switch (S.front()) { + case 'A': // foo & + case 'P': // foo * + case 'Q': // foo *const + case 'R': // foo *volatile + case 'S': // foo *const volatile + return true; + } + return false; +} + +static bool isArrayType(StringView S) { return S[0] == 'Y'; } + +static bool isFunctionType(StringView S) { + return S.startsWith("$$A8@@") || S.startsWith("$$A6"); +} + // <variable-type> ::= <type> <cvr-qualifiers> // ::= <type> <pointee-cvr-qualifiers> # pointers, references -Type *Demangler::demangleType(QualifierMangleMode QMM) { +Type *Demangler::demangleType(StringView &MangledName, + QualifierMangleMode QMM) { Qualifiers Quals = Q_None; bool IsMember = false; bool IsMemberKnown = false; if (QMM == QualifierMangleMode::Mangle) { - std::tie(Quals, IsMember) = demangleQualifiers(); + std::tie(Quals, IsMember) = demangleQualifiers(MangledName); IsMemberKnown = true; } else if (QMM == QualifierMangleMode::Result) { if (MangledName.consumeFront('?')) { - std::tie(Quals, IsMember) = demangleQualifiers(); + std::tie(Quals, IsMember) = demangleQualifiers(MangledName); IsMemberKnown = true; } } Type *Ty = nullptr; - switch (MangledName.front()) { - case 'T': // union - case 'U': // struct - case 'V': // class - case 'W': // enum - Ty = demangleClassType(); - break; - case 'A': // foo & - case 'P': // foo * - case 'Q': // foo *const - case 'R': // foo *volatile - case 'S': // foo *const volatile + if (isTagType(MangledName)) + Ty = demangleClassType(MangledName); + else if (isPointerType(MangledName)) { if (!IsMemberKnown) IsMember = isMemberPointer(MangledName); + if (IsMember) - Ty = demangleMemberPointerType(); + Ty = demangleMemberPointerType(MangledName); else - Ty = demanglePointerType(); - break; - case 'Y': - Ty = demangleArrayType(); - break; - default: - Ty = demangleBasicType(); - break; + Ty = demanglePointerType(MangledName); + } else if (isArrayType(MangledName)) + Ty = demangleArrayType(MangledName); + else if (isFunctionType(MangledName)) { + if (MangledName.consumeFront("$$A8@@")) + Ty = demangleFunctionType(MangledName, true, false); + else { + assert(MangledName.startsWith("$$A6")); + MangledName.consumeFront("$$A6"); + Ty = demangleFunctionType(MangledName, false, false); + } + } else { + Ty = demangleBasicType(MangledName); + assert(Ty && !Error); + if (!Ty || Error) + return Ty; } + Ty->Quals = Qualifiers(Ty->Quals | Quals); return Ty; } -ReferenceKind Demangler::demangleReferenceKind() { +ReferenceKind Demangler::demangleReferenceKind(StringView &MangledName) { if (MangledName.consumeFront('G')) return ReferenceKind::LValueRef; else if (MangledName.consumeFront('H')) @@ -1300,55 +1615,61 @@ ReferenceKind Demangler::demangleReferenceKind() { return ReferenceKind::None; } -void Demangler::demangleThrowSpecification() { +void Demangler::demangleThrowSpecification(StringView &MangledName) { if (MangledName.consumeFront('Z')) return; Error = true; } -FunctionType *Demangler::demangleFunctionType(bool HasThisQuals, +FunctionType *Demangler::demangleFunctionType(StringView &MangledName, + bool HasThisQuals, bool IsFunctionPointer) { FunctionType *FTy = Arena.alloc<FunctionType>(); FTy->Prim = PrimTy::Function; FTy->IsFunctionPointer = IsFunctionPointer; if (HasThisQuals) { - FTy->Quals = demanglePointerExtQualifiers(); - FTy->RefKind = demangleReferenceKind(); - FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers().first); + FTy->Quals = demanglePointerExtQualifiers(MangledName); + FTy->RefKind = demangleReferenceKind(MangledName); + FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first); } // Fields that appear on both member and non-member functions. - FTy->CallConvention = demangleCallingConvention(); + FTy->CallConvention = demangleCallingConvention(MangledName); // <return-type> ::= <type> // ::= @ # structors (they have no declared return type) bool IsStructor = MangledName.consumeFront('@'); if (!IsStructor) - FTy->ReturnType = demangleType(QualifierMangleMode::Result); + FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result); - FTy->Params = demangleFunctionParameterList(); + FTy->Params = demangleFunctionParameterList(MangledName); - demangleThrowSpecification(); + demangleThrowSpecification(MangledName); return FTy; } -Type *Demangler::demangleFunctionEncoding() { - FuncClass FC = demangleFunctionClass(); +Type *Demangler::demangleFunctionEncoding(StringView &MangledName) { + FuncClass FC = demangleFunctionClass(MangledName); bool HasThisQuals = !(FC & (Global | Static)); - FunctionType *FTy = demangleFunctionType(HasThisQuals, false); + FunctionType *FTy = demangleFunctionType(MangledName, HasThisQuals, false); FTy->FunctionClass = FC; return FTy; } // Reads a primitive type. -Type *Demangler::demangleBasicType() { +Type *Demangler::demangleBasicType(StringView &MangledName) { Type *Ty = Arena.alloc<Type>(); + if (MangledName.consumeFront("$$T")) { + Ty->Prim = PrimTy::Nullptr; + return Ty; + } + switch (MangledName.popFront()) { case 'X': Ty->Prim = PrimTy::Void; @@ -1407,16 +1728,26 @@ Type *Demangler::demangleBasicType() { case 'W': Ty->Prim = PrimTy::Wchar; break; + case 'S': + Ty->Prim = PrimTy::Char16; + break; + case 'U': + Ty->Prim = PrimTy::Char32; + break; default: - assert(false); + Error = true; + return nullptr; } break; } + default: + Error = true; + return nullptr; } return Ty; } -UdtType *Demangler::demangleClassType() { +UdtType *Demangler::demangleClassType(StringView &MangledName) { UdtType *UTy = Arena.alloc<UdtType>(); switch (MangledName.popFront()) { @@ -1440,12 +1771,15 @@ UdtType *Demangler::demangleClassType() { assert(false); } - UTy->UdtName = demangleName(); + UTy->UdtName = demangleFullyQualifiedTypeName(MangledName); return UTy; } static std::pair<Qualifiers, PointerAffinity> demanglePointerCVQualifiers(StringView &MangledName) { + if (MangledName.consumeFront("$$Q")) + return std::make_pair(Q_None, PointerAffinity::RValueReference); + switch (MangledName.popFront()) { case 'A': return std::make_pair(Q_None, PointerAffinity::Reference); @@ -1466,27 +1800,27 @@ demanglePointerCVQualifiers(StringView &MangledName) { // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type> // # the E is required for 64-bit non-static pointers -PointerType *Demangler::demanglePointerType() { +PointerType *Demangler::demanglePointerType(StringView &MangledName) { PointerType *Pointer = Arena.alloc<PointerType>(); - PointerAffinity Affinity; - std::tie(Pointer->Quals, Affinity) = demanglePointerCVQualifiers(MangledName); + std::tie(Pointer->Quals, Pointer->Affinity) = + demanglePointerCVQualifiers(MangledName); - Pointer->Prim = - (Affinity == PointerAffinity::Pointer) ? PrimTy::Ptr : PrimTy::Ref; + Pointer->Prim = PrimTy::Ptr; if (MangledName.consumeFront("6")) { - Pointer->Pointee = demangleFunctionType(false, true); + Pointer->Pointee = demangleFunctionType(MangledName, false, true); return Pointer; } - Qualifiers ExtQuals = demanglePointerExtQualifiers(); + Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); - Pointer->Pointee = demangleType(QualifierMangleMode::Mangle); + Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle); return Pointer; } -MemberPointerType *Demangler::demangleMemberPointerType() { +MemberPointerType * +Demangler::demangleMemberPointerType(StringView &MangledName) { MemberPointerType *Pointer = Arena.alloc<MemberPointerType>(); Pointer->Prim = PrimTy::MemberPtr; @@ -1494,27 +1828,27 @@ MemberPointerType *Demangler::demangleMemberPointerType() { std::tie(Pointer->Quals, Affinity) = demanglePointerCVQualifiers(MangledName); assert(Affinity == PointerAffinity::Pointer); - Qualifiers ExtQuals = demanglePointerExtQualifiers(); + Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); if (MangledName.consumeFront("8")) { - Pointer->MemberName = demangleName(); - Pointer->Pointee = demangleFunctionType(true, true); + Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName); + Pointer->Pointee = demangleFunctionType(MangledName, true, true); } else { Qualifiers PointeeQuals = Q_None; bool IsMember = false; - std::tie(PointeeQuals, IsMember) = demangleQualifiers(); + std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName); assert(IsMember); - Pointer->MemberName = demangleName(); + Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName); - Pointer->Pointee = demangleType(QualifierMangleMode::Drop); + Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop); Pointer->Pointee->Quals = PointeeQuals; } return Pointer; } -Qualifiers Demangler::demanglePointerExtQualifiers() { +Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) { Qualifiers Quals = Q_None; if (MangledName.consumeFront('E')) Quals = Qualifiers(Quals | Q_Pointer64); @@ -1526,11 +1860,11 @@ Qualifiers Demangler::demanglePointerExtQualifiers() { return Quals; } -ArrayType *Demangler::demangleArrayType() { +ArrayType *Demangler::demangleArrayType(StringView &MangledName) { assert(MangledName.front() == 'Y'); MangledName.popFront(); - int Dimension = demangleNumber(); + int Dimension = demangleNumber(MangledName); if (Dimension <= 0) { Error = true; return nullptr; @@ -1540,7 +1874,7 @@ ArrayType *Demangler::demangleArrayType() { ArrayType *Dim = ATy; for (int I = 0; I < Dimension; ++I) { Dim->Prim = PrimTy::Array; - Dim->ArrayDimension = demangleNumber(); + Dim->ArrayDimension = demangleNumber(MangledName); Dim->NextDimension = Arena.alloc<ArrayType>(); Dim = Dim->NextDimension; } @@ -1554,19 +1888,20 @@ ArrayType *Demangler::demangleArrayType() { Error = true; } - ATy->ElementType = demangleType(QualifierMangleMode::Drop); + ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop); Dim->ElementType = ATy->ElementType; return ATy; } // Reads a function or a template parameters. -ParamList Demangler::demangleFunctionParameterList() { +FunctionParams +Demangler::demangleFunctionParameterList(StringView &MangledName) { // Empty parameter list. if (MangledName.consumeFront('X')) return {}; - ParamList *Head; - ParamList **Current = &Head; + FunctionParams *Head; + FunctionParams **Current = &Head; while (!Error && !MangledName.startsWith('@') && !MangledName.startsWith('Z')) { @@ -1578,7 +1913,7 @@ ParamList Demangler::demangleFunctionParameterList() { } MangledName = MangledName.dropFront(); - *Current = Arena.alloc<ParamList>(); + *Current = Arena.alloc<FunctionParams>(); (*Current)->Current = FunctionParamBackRefs[N]->clone(Arena); Current = &(*Current)->Next; continue; @@ -1586,8 +1921,8 @@ ParamList Demangler::demangleFunctionParameterList() { size_t OldSize = MangledName.size(); - *Current = Arena.alloc<ParamList>(); - (*Current)->Current = demangleType(QualifierMangleMode::Drop); + *Current = Arena.alloc<FunctionParams>(); + (*Current)->Current = demangleType(MangledName, QualifierMangleMode::Drop); size_t CharsConsumed = OldSize - MangledName.size(); assert(CharsConsumed != 0); @@ -1618,14 +1953,33 @@ ParamList Demangler::demangleFunctionParameterList() { return {}; } -ParamList Demangler::demangleTemplateParameterList() { - ParamList *Head; - ParamList **Current = &Head; +TemplateParams * +Demangler::demangleTemplateParameterList(StringView &MangledName) { + TemplateParams *Head; + TemplateParams **Current = &Head; while (!Error && !MangledName.startsWith('@')) { - // Template parameter lists don't participate in back-referencing. - *Current = Arena.alloc<ParamList>(); - (*Current)->Current = demangleType(QualifierMangleMode::Drop); + *Current = Arena.alloc<TemplateParams>(); + + // Empty parameter pack. + if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") || + MangledName.consumeFront("$$$V")) { + if (!MangledName.startsWith('@')) + Error = true; + continue; + } + + if (MangledName.consumeFront("$$Y")) { + (*Current)->IsTemplateTemplate = true; + (*Current)->IsAliasTemplate = true; + (*Current)->ParamName = demangleFullyQualifiedTypeName(MangledName); + } else if (MangledName.consumeFront("$1?")) { + (*Current)->ParamName = demangleFullyQualifiedSymbolName(MangledName); + (*Current)->ParamType = demangleFunctionEncoding(MangledName); + } else { + (*Current)->ParamType = + demangleType(MangledName, QualifierMangleMode::Drop); + } Current = &(*Current)->Next; } @@ -1636,12 +1990,12 @@ ParamList Demangler::demangleTemplateParameterList() { // Template parameter lists cannot be variadic, so it can only be terminated // by @. if (MangledName.consumeFront('@')) - return *Head; + return Head; Error = true; return {}; } -void Demangler::output() { +void Demangler::output(const Symbol *S, OutputStream &OS) { // Converts an AST to a string. // // Converting an AST representing a C++ type to a string is tricky due @@ -1659,26 +2013,24 @@ void Demangler::output() { // the "first half" of type declaration, and outputPost() writes the // "second half". For example, outputPre() writes a return type for a // function and outputPost() writes an parameter list. - Type::outputPre(OS, *SymbolType); - outputName(OS, SymbolName); - Type::outputPost(OS, *SymbolType); - - // Null terminate the buffer. - OS << '\0'; + Type::outputPre(OS, *S->SymbolType); + outputName(OS, S->SymbolName); + Type::outputPost(OS, *S->SymbolType); } char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N, int *Status) { - OutputStream OS = OutputStream::create(Buf, N, 1024); - - Demangler D(OS, StringView(MangledName)); - D.parse(); + Demangler D; + StringView Name{MangledName}; + Symbol *S = D.parse(Name); if (D.Error) *Status = llvm::demangle_invalid_mangled_name; else *Status = llvm::demangle_success; - D.output(); + OutputStream OS = OutputStream::create(Buf, N, 1024); + D.output(S, OS); + OS << '\0'; return OS.getBuffer(); } diff --git a/contrib/llvm/lib/Demangle/StringView.h b/contrib/llvm/lib/Demangle/StringView.h index 3416db2c2867..a89deda694c2 100644 --- a/contrib/llvm/lib/Demangle/StringView.h +++ b/contrib/llvm/lib/Demangle/StringView.h @@ -22,6 +22,8 @@ class StringView { const char *Last; public: + static const size_t npos = ~size_t(0); + template <size_t N> StringView(const char (&Str)[N]) : First(Str), Last(Str + N - 1) {} StringView(const char *First_, const char *Last_) @@ -35,6 +37,17 @@ public: return StringView(begin() + From, size() - From); } + size_t find(char C, size_t From = 0) const { + size_t FindBegin = std::min(From, size()); + // Avoid calling memchr with nullptr. + if (FindBegin < size()) { + // Just forward to memchr, which is faster than a hand-rolled loop. + if (const void *P = ::memchr(First + FindBegin, C, size() - FindBegin)) + return static_cast<const char *>(P) - First; + } + return npos; + } + StringView substr(size_t From, size_t To) const { if (To >= size()) To = size() - 1; @@ -49,11 +62,22 @@ public: return StringView(First + N, Last); } + StringView dropBack(size_t N = 1) const { + if (N >= size()) + N = size(); + return StringView(First, Last - N); + } + char front() const { assert(!empty()); return *begin(); } + char back() const { + assert(!empty()); + return *(end() - 1); + } + char popFront() { assert(!empty()); return *First++; diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp index abcdaeba8eb0..3be4bec566a0 100644 --- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -153,7 +153,7 @@ void LLVMInitializeMCJITCompilerOptions(LLVMMCJITCompilerOptions *PassedOptions, LLVMMCJITCompilerOptions options; memset(&options, 0, sizeof(options)); // Most fields are zero by default. options.CodeModel = LLVMCodeModelJITDefault; - + memcpy(PassedOptions, &options, std::min(sizeof(options), SizeOfPassedOptions)); } @@ -171,14 +171,14 @@ LLVMBool LLVMCreateMCJITCompilerForModule( "LLVM library mismatch."); return 1; } - + // Defend against the user having an old version of the API by ensuring that // any fields they didn't see are cleared. We must defend against fields being // set to the bitwise equivalent of zero, and assume that this means "do the // default" as if that option hadn't been available. LLVMInitializeMCJITCompilerOptions(&options, sizeof(options)); memcpy(&options, PassedOptions, SizeOfPassedOptions); - + TargetOptions targetOptions; targetOptions.EnableFastISel = options.EnableFastISel; std::unique_ptr<Module> Mod(unwrap(M)); @@ -241,12 +241,12 @@ LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F, unsigned NumArgs, LLVMGenericValueRef *Args) { unwrap(EE)->finalizeObject(); - + std::vector<GenericValue> ArgVec; ArgVec.reserve(NumArgs); for (unsigned I = 0; I != NumArgs; ++I) ArgVec.push_back(*unwrap(Args[I])); - + GenericValue *Result = new GenericValue(); *Result = unwrap(EE)->runFunction(unwrap<Function>(F), ArgVec); return wrap(Result); @@ -297,7 +297,7 @@ void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global, void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) { unwrap(EE)->finalizeObject(); - + return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global)); } @@ -395,11 +395,11 @@ LLVMMCJITMemoryManagerRef LLVMCreateSimpleMCJITMemoryManager( LLVMMemoryManagerAllocateDataSectionCallback AllocateDataSection, LLVMMemoryManagerFinalizeMemoryCallback FinalizeMemory, LLVMMemoryManagerDestroyCallback Destroy) { - + if (!AllocateCodeSection || !AllocateDataSection || !FinalizeMemory || !Destroy) return nullptr; - + SimpleBindingMMFunctions functions; functions.AllocateCodeSection = AllocateCodeSection; functions.AllocateDataSection = AllocateDataSection; diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h index 1f029fb1c45b..61d8cc75d9f2 100644 --- a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h +++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h @@ -7,7 +7,7 @@ * *===----------------------------------------------------------------------===* * - * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) + * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) * Profiling API internal config. * * NOTE: This file comes in a style different from the rest of LLVM @@ -213,7 +213,7 @@ typedef pthread_mutex_t mutex_t; #define __itt_thread_id() GetCurrentThreadId() #define __itt_thread_yield() SwitchToThread() #ifndef ITT_SIMPLE_INIT -ITT_INLINE long +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) { @@ -273,7 +273,7 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) } #endif /* ITT_ARCH==ITT_ARCH_IA64 */ #ifndef ITT_SIMPLE_INIT -ITT_INLINE long +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) { diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h index 8d16ee85d141..efd2b1a33f75 100644 --- a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h +++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h @@ -7,7 +7,7 @@ * *===----------------------------------------------------------------------===* * - * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) + * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) * Profiling API declaration. * * NOTE: This file comes in a style different from the rest of LLVM @@ -28,54 +28,54 @@ typedef enum iJIT_jvm_event { /* shutdown */ - - /* + + /* * Program exiting EventSpecificData NA */ - iJVM_EVENT_TYPE_SHUTDOWN = 2, + iJVM_EVENT_TYPE_SHUTDOWN = 2, /* JIT profiling */ - - /* + + /* * issued after method code jitted into memory but before code is executed * EventSpecificData is an iJIT_Method_Load */ - iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED=13, + iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED=13, - /* issued before unload. Method code will no longer be executed, but code - * and info are still in memory. The VTune profiler may capture method + /* issued before unload. Method code will no longer be executed, but code + * and info are still in memory. The VTune profiler may capture method * code only at this point EventSpecificData is iJIT_Method_Id */ - iJVM_EVENT_TYPE_METHOD_UNLOAD_START, + iJVM_EVENT_TYPE_METHOD_UNLOAD_START, /* Method Profiling */ - /* method name, Id and stack is supplied - * issued when a method is about to be entered EventSpecificData is + /* method name, Id and stack is supplied + * issued when a method is about to be entered EventSpecificData is * iJIT_Method_NIDS */ - iJVM_EVENT_TYPE_ENTER_NIDS = 19, + iJVM_EVENT_TYPE_ENTER_NIDS = 19, - /* method name, Id and stack is supplied - * issued when a method is about to be left EventSpecificData is + /* method name, Id and stack is supplied + * issued when a method is about to be left EventSpecificData is * iJIT_Method_NIDS */ - iJVM_EVENT_TYPE_LEAVE_NIDS + iJVM_EVENT_TYPE_LEAVE_NIDS } iJIT_JVM_EVENT; typedef enum _iJIT_ModeFlags { /* No need to Notify VTune, since VTune is not running */ - iJIT_NO_NOTIFICATIONS = 0x0000, + iJIT_NO_NOTIFICATIONS = 0x0000, - /* when turned on the jit must call + /* when turned on the jit must call * iJIT_NotifyEvent * ( * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, * ) * for all the method already jitted */ - iJIT_BE_NOTIFY_ON_LOAD = 0x0001, + iJIT_BE_NOTIFY_ON_LOAD = 0x0001, /* when turned on the jit must call * iJIT_NotifyEvent @@ -83,19 +83,19 @@ typedef enum _iJIT_ModeFlags * iJVM_EVENT_TYPE_METHOD_UNLOAD_FINISHED, * ) for all the method that are unloaded */ - iJIT_BE_NOTIFY_ON_UNLOAD = 0x0002, + iJIT_BE_NOTIFY_ON_UNLOAD = 0x0002, /* when turned on the jit must instrument all * the currently jited code with calls on * method entries */ - iJIT_BE_NOTIFY_ON_METHOD_ENTRY = 0x0004, + iJIT_BE_NOTIFY_ON_METHOD_ENTRY = 0x0004, /* when turned on the jit must instrument all * the currently jited code with calls * on method exit */ - iJIT_BE_NOTIFY_ON_METHOD_EXIT = 0x0008 + iJIT_BE_NOTIFY_ON_METHOD_EXIT = 0x0008 } iJIT_ModeFlags; @@ -104,13 +104,13 @@ typedef enum _iJIT_ModeFlags typedef enum _iJIT_IsProfilingActiveFlags { /* No profiler is running. Currently not used */ - iJIT_NOTHING_RUNNING = 0x0000, + iJIT_NOTHING_RUNNING = 0x0000, /* Sampling is running. This is the default value * returned by iJIT_IsProfilingActive() */ - iJIT_SAMPLING_ON = 0x0001, - + iJIT_SAMPLING_ON = 0x0001, + /* Call Graph is running */ iJIT_CALLGRAPH_ON = 0x0002 @@ -135,7 +135,7 @@ typedef struct _iJIT_Method_Id /* Id of the method (same as the one passed in * the iJIT_Method_Load struct */ - unsigned int method_id; + unsigned int method_id; } *piJIT_Method_Id, iJIT_Method_Id; @@ -149,13 +149,13 @@ typedef struct _iJIT_Method_Id typedef struct _iJIT_Method_NIDS { /* unique method ID */ - unsigned int method_id; + unsigned int method_id; /* NOTE: no need to fill this field, it's filled by VTune */ - unsigned int stack_id; + unsigned int stack_id; /* method name (just the method, without the class) */ - char* method_name; + char* method_name; } *piJIT_Method_NIDS, iJIT_Method_NIDS; /* structures for the events: @@ -168,51 +168,51 @@ typedef struct _LineNumberInfo unsigned int Offset; /* source line number from the beginning of the source file */ - unsigned int LineNumber; + unsigned int LineNumber; } *pLineNumberInfo, LineNumberInfo; typedef struct _iJIT_Method_Load { /* unique method ID - can be any unique value, (except 0 - 999) */ - unsigned int method_id; + unsigned int method_id; /* method name (can be with or without the class and signature, in any case * the class name will be added to it) */ - char* method_name; + char* method_name; /* virtual address of that method - This determines the method range for the * iJVM_EVENT_TYPE_ENTER/LEAVE_METHOD_ADDR events */ - void* method_load_address; + void* method_load_address; /* Size in memory - Must be exact */ - unsigned int method_size; + unsigned int method_size; /* Line Table size in number of entries - Zero if none */ unsigned int line_number_size; /* Pointer to the beginning of the line numbers info array */ - pLineNumberInfo line_number_table; + pLineNumberInfo line_number_table; /* unique class ID */ - unsigned int class_id; - + unsigned int class_id; + /* class file name */ - char* class_file_name; + char* class_file_name; /* source file name */ - char* source_file_name; + char* source_file_name; /* bits supplied by the user for saving in the JIT file */ - void* user_data; + void* user_data; /* the size of the user data buffer */ - unsigned int user_data_size; + unsigned int user_data_size; /* NOTE: no need to fill this field, it's filled by VTune */ - iJDEnvironmentType env; + iJDEnvironmentType env; } *piJIT_Method_Load, iJIT_Method_Load; @@ -241,7 +241,7 @@ typedef void (*iJIT_ModeChangedEx)(void *UserData, iJIT_ModeFlags Flags); int JITAPI iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData); /* The new mode call back routine */ -void JITAPI iJIT_RegisterCallbackEx(void *userdata, +void JITAPI iJIT_RegisterCallbackEx(void *userdata, iJIT_ModeChangedEx NewModeCallBackFuncEx); iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive(void); diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp index 9e77d160c30b..39cf6d4a32a3 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -85,7 +85,7 @@ static void executeFMulInst(GenericValue &Dest, GenericValue Src1, } } -static void executeFDivInst(GenericValue &Dest, GenericValue Src1, +static void executeFDivInst(GenericValue &Dest, GenericValue Src1, GenericValue Src2, Type *Ty) { switch (Ty->getTypeID()) { IMPLEMENT_BINARY_OPERATOR(/, Float); @@ -96,7 +96,7 @@ static void executeFDivInst(GenericValue &Dest, GenericValue Src1, } } -static void executeFRemInst(GenericValue &Dest, GenericValue Src1, +static void executeFRemInst(GenericValue &Dest, GenericValue Src1, GenericValue Src2, Type *Ty) { switch (Ty->getTypeID()) { case Type::FloatTyID: @@ -281,7 +281,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) { GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue R; // Result - + switch (I.getPredicate()) { case ICmpInst::ICMP_EQ: R = executeICMP_EQ(Src1, Src2, Ty); break; case ICmpInst::ICMP_NE: R = executeICMP_NE(Src1, Src2, Ty); break; @@ -297,7 +297,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) { dbgs() << "Don't know how to handle this ICmp predicate!\n-->" << I; llvm_unreachable(nullptr); } - + SetValue(&I, R, SF); } @@ -552,10 +552,10 @@ static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2, Src2.AggregateVal[_i].DoubleVal))); } } else if (Ty->isFloatTy()) - Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && + Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && Src2.FloatVal == Src2.FloatVal)); else { - Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal && + Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal && Src2.DoubleVal == Src2.DoubleVal)); } return Dest; @@ -583,10 +583,10 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2, Src2.AggregateVal[_i].DoubleVal))); } } else if (Ty->isFloatTy()) - Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || + Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || Src2.FloatVal != Src2.FloatVal)); else { - Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal || + Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal || Src2.DoubleVal != Src2.DoubleVal)); } return Dest; @@ -613,15 +613,15 @@ void Interpreter::visitFCmpInst(FCmpInst &I) { GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue R; // Result - + switch (I.getPredicate()) { default: dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I; llvm_unreachable(nullptr); break; - case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false); + case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false); break; - case FCmpInst::FCMP_TRUE: R = executeFCMP_BOOL(Src1, Src2, Ty, true); + case FCmpInst::FCMP_TRUE: R = executeFCMP_BOOL(Src1, Src2, Ty, true); break; case FCmpInst::FCMP_ORD: R = executeFCMP_ORD(Src1, Src2, Ty); break; case FCmpInst::FCMP_UNO: R = executeFCMP_UNO(Src1, Src2, Ty); break; @@ -638,11 +638,11 @@ void Interpreter::visitFCmpInst(FCmpInst &I) { case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break; case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break; } - + SetValue(&I, R, SF); } -static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, +static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Result; switch (predicate) { @@ -747,12 +747,12 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { case Instruction::FRem: if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) for (unsigned i = 0; i < R.AggregateVal.size(); ++i) - R.AggregateVal[i].FloatVal = + R.AggregateVal[i].FloatVal = fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal); else { if (cast<VectorType>(Ty)->getElementType()->isDoubleTy()) for (unsigned i = 0; i < R.AggregateVal.size(); ++i) - R.AggregateVal[i].DoubleVal = + R.AggregateVal[i].DoubleVal = fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal); else { dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n"; @@ -965,7 +965,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) { Type *Ty = I.getType()->getElementType(); // Type to be allocated // Get the number of elements being allocated by the array... - unsigned NumElements = + unsigned NumElements = getOperandValue(I.getOperand(0), SF).IntVal.getZExtValue(); unsigned TypeSize = (size_t)getDataLayout().getTypeAllocSize(Ty); @@ -1011,7 +1011,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I, GenericValue IdxGV = getOperandValue(I.getOperand(), SF); int64_t Idx; - unsigned BitWidth = + unsigned BitWidth = cast<IntegerType>(I.getOperand()->getType())->getBitWidth(); if (BitWidth == 32) Idx = (int64_t)(int32_t)IdxGV.IntVal.getZExtValue(); @@ -2037,13 +2037,13 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, case Instruction::And: Dest.IntVal = Op0.IntVal & Op1.IntVal; break; case Instruction::Or: Dest.IntVal = Op0.IntVal | Op1.IntVal; break; case Instruction::Xor: Dest.IntVal = Op0.IntVal ^ Op1.IntVal; break; - case Instruction::Shl: + case Instruction::Shl: Dest.IntVal = Op0.IntVal.shl(Op1.IntVal.getZExtValue()); break; - case Instruction::LShr: + case Instruction::LShr: Dest.IntVal = Op0.IntVal.lshr(Op1.IntVal.getZExtValue()); break; - case Instruction::AShr: + case Instruction::AShr: Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue()); break; default: @@ -2100,7 +2100,7 @@ void Interpreter::callFunction(Function *F, ArrayRef<GenericValue> ArgVals) { // Handle non-varargs arguments... unsigned i = 0; - for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI, ++i) SetValue(&*AI, ArgVals[i], StackFrame); diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h index 5c16448404bb..33542e7e43ad 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -132,8 +132,8 @@ public: void visitLoadInst(LoadInst &I); void visitStoreInst(StoreInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); - void visitPHINode(PHINode &PN) { - llvm_unreachable("PHI nodes already handled!"); + void visitPHINode(PHINode &PN) { + llvm_unreachable("PHI nodes already handled!"); } void visitTruncInst(TruncInst &I); void visitZExtInst(ZExtInst &I); @@ -224,7 +224,7 @@ private: // Helper functions ExecutionContext &SF); GenericValue executeBitCastInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF); - GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal, + GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal, Type *Ty, ExecutionContext &SF); void popStackAndReturnValueToCaller(Type *RetTy, GenericValue Result); diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp index e774af05ebdd..75d4c2b5134e 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp @@ -119,10 +119,10 @@ void RTDyldMemoryManager::deregisterEHFramesInProcess(uint8_t *Addr, void RTDyldMemoryManager::registerEHFramesInProcess(uint8_t *Addr, size_t Size) { - // On Linux __register_frame takes a single argument: + // On Linux __register_frame takes a single argument: // a pointer to the start of the .eh_frame section. - // How can it find the end? Because crtendS.o is linked + // How can it find the end? Because crtendS.o is linked // in and it has an .eh_frame section with four zero chars. __register_frame(Addr); } @@ -255,7 +255,7 @@ RTDyldMemoryManager::getSymbolAddressInProcess(const std::string &Name) { return (uint64_t)&__morestack; #endif #endif // __linux__ && __GLIBC__ - + // See ARM_MATH_IMPORTS definition for explanation #if defined(__BIONIC__) && defined(__arm__) if (Name.compare(0, 8, "__aeabi_") == 0) { diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index cc6729d21320..f9a81c7bd1b0 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1430,7 +1430,7 @@ RuntimeDyldELF::processRelocationRef( } else { processSimpleRelocation(SectionID, Offset, RelType, Value); } - + } else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) { if (RelType == ELF::R_PPC64_REL24) { // Determine ABI variant in use for this object. diff --git a/contrib/llvm/lib/FuzzMutate/FuzzerCLI.cpp b/contrib/llvm/lib/FuzzMutate/FuzzerCLI.cpp index 6f5a5c067a97..a70dad37dfcf 100644 --- a/contrib/llvm/lib/FuzzMutate/FuzzerCLI.cpp +++ b/contrib/llvm/lib/FuzzMutate/FuzzerCLI.cpp @@ -93,7 +93,7 @@ void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) { Args.push_back("-passes=gvn"); } else if (Opt == "sccp") { Args.push_back("-passes=sccp"); - + } else if (Opt == "loop_predication") { Args.push_back("-passes=loop-predication"); } else if (Opt == "guard_widening") { @@ -114,7 +114,7 @@ void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) { Args.push_back("-passes=strength-reduce"); } else if (Opt == "irce") { Args.push_back("-passes=irce"); - + } else if (Triple(Opt).getArch()) { Args.push_back("-mtriple=" + Opt.str()); } else { @@ -204,6 +204,6 @@ std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size, auto M = parseModule(Data, Size, Context); if (!M || verifyModule(*M, &errs())) return nullptr; - + return M; } diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp index 9e5f55d49756..d87187481be0 100644 --- a/contrib/llvm/lib/IR/Attributes.cpp +++ b/contrib/llvm/lib/IR/Attributes.cpp @@ -1709,6 +1709,15 @@ adjustMinLegalVectorWidth(Function &Caller, const Function &Callee) { } } +/// If the inlined function has "null-pointer-is-valid=true" attribute, +/// set this attribute in the caller post inlining. +static void +adjustNullPointerValidAttr(Function &Caller, const Function &Callee) { + if (Callee.nullPointerIsDefined() && !Caller.nullPointerIsDefined()) { + Caller.addFnAttr(Callee.getFnAttribute("null-pointer-is-valid")); + } +} + #define GET_ATTR_COMPAT_FUNC #include "AttributesCompatFunc.inc" diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp index ef62a23b5358..f098ad9725b6 100644 --- a/contrib/llvm/lib/IR/AutoUpgrade.cpp +++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp @@ -94,7 +94,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 - Name.startswith("avx512.kunpck") || //added in 6.0 + Name.startswith("avx512.kunpck") || //added in 6.0 Name.startswith("avx2.pabs.") || // Added in 6.0 Name.startswith("avx512.mask.pabs.") || // Added in 6.0 Name.startswith("avx512.broadcastm") || // Added in 6.0 diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp index aba329b80508..72090f5bac3e 100644 --- a/contrib/llvm/lib/IR/Function.cpp +++ b/contrib/llvm/lib/IR/Function.cpp @@ -586,7 +586,7 @@ static std::string getMangledTypeStr(Type* Ty) { if (FT->isVarArg()) Result += "vararg"; // Ensure nested function types are distinguishable. - Result += "f"; + Result += "f"; } else if (isa<VectorType>(Ty)) { Result += "v" + utostr(Ty->getVectorNumElements()) + getMangledTypeStr(Ty->getVectorElementType()); diff --git a/contrib/llvm/lib/IR/InlineAsm.cpp b/contrib/llvm/lib/IR/InlineAsm.cpp index 8667d7aab583..4623f69bd9a3 100644 --- a/contrib/llvm/lib/IR/InlineAsm.cpp +++ b/contrib/llvm/lib/IR/InlineAsm.cpp @@ -57,7 +57,7 @@ void InlineAsm::destroyConstant() { FunctionType *InlineAsm::getFunctionType() const { return FTy; } - + /// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the /// fields in this structure. If the constraint string is not understood, /// return true, otherwise return false. @@ -80,7 +80,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, isCommutative = false; isIndirect = false; currentAlternativeIndex = 0; - + // Parse prefixes. if (*I == '~') { Type = isClobber; @@ -100,7 +100,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, } if (I == E) return true; // Just a prefix, like "==" or "~". - + // Parse the modifiers. bool DoneWithModifiers = false; while (!DoneWithModifiers) { @@ -124,13 +124,13 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, case '*': // Register preferencing. return true; // Not supported. } - + if (!DoneWithModifiers) { ++I; if (I == E) return true; // Just prefixes and modifiers! } } - + // Parse the various constraints. while (I != E) { if (*I == '{') { // Physical register reference. @@ -150,7 +150,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, if (N >= ConstraintsSoFar.size() || ConstraintsSoFar[N].Type != isOutput|| Type != isInput) return true; // Invalid constraint number. - + // If Operand N already has a matching input, reject this. An output // can't be constrained to the same value as multiple inputs. if (isMultipleAlternative) { @@ -207,7 +207,7 @@ void InlineAsm::ConstraintInfo::selectAlternative(unsigned index) { InlineAsm::ConstraintInfoVector InlineAsm::ParseConstraints(StringRef Constraints) { ConstraintInfoVector Result; - + // Scan the constraints string. for (StringRef::iterator I = Constraints.begin(), E = Constraints.end(); I != E; ) { @@ -223,7 +223,7 @@ InlineAsm::ParseConstraints(StringRef Constraints) { } Result.push_back(Info); - + // ConstraintEnd may be either the next comma or the end of the string. In // the former case, we skip the comma. I = ConstraintEnd; @@ -235,7 +235,7 @@ InlineAsm::ParseConstraints(StringRef Constraints) { } // don't allow "xyz," } } - + return Result; } @@ -243,15 +243,15 @@ InlineAsm::ParseConstraints(StringRef Constraints) { /// specified function type, and otherwise validate the constraint string. bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) { if (Ty->isVarArg()) return false; - + ConstraintInfoVector Constraints = ParseConstraints(ConstStr); - + // Error parsing constraints. if (Constraints.empty() && !ConstStr.empty()) return false; - + unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0; unsigned NumIndirect = 0; - + for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { switch (Constraints[i].Type) { case InlineAsm::isOutput: @@ -272,7 +272,7 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) { break; } } - + switch (NumOutputs) { case 0: if (!Ty->getReturnType()->isVoidTy()) return false; @@ -285,8 +285,8 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) { if (!STy || STy->getNumElements() != NumOutputs) return false; break; - } - + } + if (Ty->getNumParams() != NumInputs) return false; return true; } diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp index e0ad0d1ea1f1..32db918dab97 100644 --- a/contrib/llvm/lib/IR/Instructions.cpp +++ b/contrib/llvm/lib/IR/Instructions.cpp @@ -310,7 +310,7 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args, "Calling a function with bad signature!"); for (unsigned i = 0; i != Args.size(); ++i) - assert((i >= FTy->getNumParams() || + assert((i >= FTy->getNumParams() || FTy->getParamType(i) == Args[i]->getType()) && "Calling a function with a bad signature!"); #endif @@ -409,7 +409,7 @@ static Instruction *createMalloc(Instruction *InsertBefore, assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) && "createMalloc needs either InsertBefore or InsertAtEnd"); - // malloc(type) becomes: + // malloc(type) becomes: // bitcast (i8* malloc(typeSize)) to type* // malloc(type, arraySize) becomes: // bitcast (i8* malloc(typeSize*arraySize)) to type* @@ -516,7 +516,7 @@ Instruction *CallInst::CreateMalloc(Instruction *InsertBefore, /// responsibility of the caller. Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd, Type *IntPtrTy, Type *AllocTy, - Value *AllocSize, Value *ArraySize, + Value *AllocSize, Value *ArraySize, Function *MallocF, const Twine &Name) { return createMalloc(nullptr, InsertAtEnd, IntPtrTy, AllocTy, AllocSize, ArraySize, None, MallocF, Name); @@ -612,7 +612,7 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal, "Invoking a function with bad signature"); for (unsigned i = 0, e = Args.size(); i != e; i++) - assert((i >= FTy->getNumParams() || + assert((i >= FTy->getNumParams() || FTy->getParamType(i) == Args[i]->getType()) && "Invoking a function with a bad signature!"); #endif @@ -912,7 +912,7 @@ FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad, // UnreachableInst Implementation //===----------------------------------------------------------------------===// -UnreachableInst::UnreachableInst(LLVMContext &Context, +UnreachableInst::UnreachableInst(LLVMContext &Context, Instruction *InsertBefore) : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable, nullptr, 0, InsertBefore) { @@ -1072,7 +1072,7 @@ bool AllocaInst::isArrayAllocation() const { bool AllocaInst::isStaticAlloca() const { // Must be constant size. if (!isa<ConstantInt>(getArraySize())) return false; - + // Must be in the entry block. const BasicBlock *Parent = getParent(); return Parent == &Parent->getParent()->front() && !isUsedWithInAlloca(); @@ -1125,7 +1125,7 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, setName(Name); } -LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, +LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAE) @@ -1380,7 +1380,7 @@ AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, // FenceInst Implementation //===----------------------------------------------------------------------===// -FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, +FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertBefore) { @@ -1388,7 +1388,7 @@ FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, setSyncScopeID(SSID); } -FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, +FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertAtEnd) { @@ -1575,14 +1575,14 @@ InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index, setName(Name); } -bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, +bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, const Value *Index) { if (!Vec->getType()->isVectorTy()) return false; // First operand of insertelement must be vector type. - + if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType()) return false;// Second operand of insertelement must be vector element type. - + if (!Index->getType()->isIntegerTy()) return false; // Third operand of insertelement must be i32. return true; @@ -1632,7 +1632,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, // V1 and V2 must be vectors of the same type. if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType()) return false; - + // Mask must be vector of i32. auto *MaskTy = dyn_cast<VectorType>(Mask->getType()); if (!MaskTy || !MaskTy->getElementType()->isIntegerTy(32)) @@ -1654,7 +1654,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, } return true; } - + if (const auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) { unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements(); for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i) @@ -1662,7 +1662,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, return false; return true; } - + // The bitcode reader can create a place holder for a forward reference // used as the shuffle mask. When this occurs, the shuffle mask will // fall into this case and fail. To avoid this error, do this bit of @@ -1687,12 +1687,12 @@ int ShuffleVectorInst::getMaskValue(const Constant *Mask, unsigned i) { void ShuffleVectorInst::getShuffleMask(const Constant *Mask, SmallVectorImpl<int> &Result) { unsigned NumElts = Mask->getType()->getVectorNumElements(); - + if (auto *CDS = dyn_cast<ConstantDataSequential>(Mask)) { for (unsigned i = 0; i != NumElts; ++i) Result.push_back(CDS->getElementAsInteger(i)); return; - } + } for (unsigned i = 0; i != NumElts; ++i) { Constant *C = Mask->getAggregateElement(i); Result.push_back(isa<UndefValue>(C) ? -1 : @@ -1806,7 +1806,7 @@ bool ShuffleVectorInst::isTransposeMask(ArrayRef<int> Mask) { // InsertValueInst Class //===----------------------------------------------------------------------===// -void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, +void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const Twine &Name) { assert(getNumOperands() == 2 && "NumOperands not initialized?"); @@ -1903,7 +1903,7 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, AssertOK(); } -BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, +BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) : Instruction(Ty, iType, @@ -1938,8 +1938,8 @@ void BinaryOperator::AssertOK() { "Tried to create a floating-point operation on a " "non-floating-point type!"); break; - case UDiv: - case SDiv: + case UDiv: + case SDiv: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); assert(getType()->isIntOrIntVectorTy() && @@ -1951,8 +1951,8 @@ void BinaryOperator::AssertOK() { assert(getType()->isFPOrFPVectorTy() && "Incorrect operand type (not floating point) for FDIV"); break; - case URem: - case SRem: + case URem: + case SRem: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); assert(getType()->isIntOrIntVectorTy() && @@ -2185,7 +2185,7 @@ bool CastInst::isLosslessCast() const { Type *DstTy = getType(); if (SrcTy == DstTy) return true; - + // Pointer to pointer is always lossless. if (SrcTy->isPointerTy()) return DstTy->isPointerTy(); @@ -2194,10 +2194,10 @@ bool CastInst::isLosslessCast() const { /// This function determines if the CastInst does not require any bits to be /// changed in order to effect the cast. Essentially, it identifies cases where -/// no code gen is necessary for the cast, hence the name no-op cast. For +/// no code gen is necessary for the cast, hence the name no-op cast. For /// example, the following are all no-op casts: /// # bitcast i32* %x to i8* -/// # bitcast <2 x i32> %x to <4 x i16> +/// # bitcast <2 x i32> %x to <4 x i16> /// # ptrtoint i32* %x to i32 ; on 32-bit plaforms only /// Determine if the described cast is a no-op. bool CastInst::isNoopCast(Instruction::CastOps Opcode, @@ -2208,7 +2208,7 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode, default: llvm_unreachable("Invalid CastOp"); case Instruction::Trunc: case Instruction::ZExt: - case Instruction::SExt: + case Instruction::SExt: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::UIToFP: @@ -2247,7 +2247,7 @@ unsigned CastInst::isEliminableCastPair( Type *DstIntPtrTy) { // Define the 144 possibilities for these two cast instructions. The values // in this matrix determine what to do in a given situation and select the - // case in the switch below. The rows correspond to firstOp, the columns + // case in the switch below. The rows correspond to firstOp, the columns // correspond to secondOp. In looking at the table below, keep in mind // the following cast properties: // @@ -2315,16 +2315,16 @@ unsigned CastInst::isEliminableCastPair( int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin] [secondOp-Instruction::CastOpsBegin]; switch (ElimCase) { - case 0: + case 0: // Categorically disallowed. return 0; - case 1: + case 1: // Allowed, use first cast's opcode. return firstOp; - case 2: + case 2: // Allowed, use second cast's opcode. return secondOp; - case 3: + case 3: // No-op cast in second op implies firstOp as long as the DestTy // is integer and we are not converting between a vector and a // non-vector type. @@ -2337,7 +2337,7 @@ unsigned CastInst::isEliminableCastPair( if (DstTy->isFloatingPointTy()) return firstOp; return 0; - case 5: + case 5: // No-op cast in first op implies secondOp as long as the SrcTy // is an integer. if (SrcTy->isIntegerTy()) @@ -2449,7 +2449,7 @@ unsigned CastInst::isEliminableCastPair( case 17: // (sitofp (zext x)) -> (uitofp x) return Instruction::UIToFP; - case 99: + case 99: // Cast combination can't happen (error in input). This is for all cases // where the MidTy is not the same for the two cast instructions. llvm_unreachable("Invalid Cast Combination"); @@ -2458,7 +2458,7 @@ unsigned CastInst::isEliminableCastPair( } } -CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty, +CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore) { assert(castIsValid(op, S, Ty) && "Invalid cast!"); // Construct and return the appropriate CastInst subclass @@ -2502,7 +2502,7 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty, } } -CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, +CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore) { if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) @@ -2510,7 +2510,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, return Create(Instruction::ZExt, S, Ty, Name, InsertBefore); } -CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, +CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) @@ -2518,7 +2518,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd); } -CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty, +CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore) { if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) @@ -2526,7 +2526,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty, return Create(Instruction::SExt, S, Ty, Name, InsertBefore); } -CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty, +CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) @@ -2543,7 +2543,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty, } CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty, - const Twine &Name, + const Twine &Name, BasicBlock *InsertAtEnd) { if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd); @@ -2636,7 +2636,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty, return Create(opcode, C, Ty, Name, InsertBefore); } -CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty, +CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty, bool isSigned, const Twine &Name, BasicBlock *InsertAtEnd) { assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() && @@ -2650,8 +2650,8 @@ CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty, return Create(opcode, C, Ty, Name, InsertAtEnd); } -CastInst *CastInst::CreateFPCast(Value *C, Type *Ty, - const Twine &Name, +CastInst *CastInst::CreateFPCast(Value *C, Type *Ty, + const Twine &Name, Instruction *InsertBefore) { assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && "Invalid cast"); @@ -2663,8 +2663,8 @@ CastInst *CastInst::CreateFPCast(Value *C, Type *Ty, return Create(opcode, C, Ty, Name, InsertBefore); } -CastInst *CastInst::CreateFPCast(Value *C, Type *Ty, - const Twine &Name, +CastInst *CastInst::CreateFPCast(Value *C, Type *Ty, + const Twine &Name, BasicBlock *InsertAtEnd) { assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && "Invalid cast"); @@ -2707,7 +2707,7 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) { return DestBits == SrcBits; // Casting from something else return SrcTy->isPointerTy(); - } + } if (DestTy->isFloatingPointTy()) { // Casting to floating pt if (SrcTy->isIntegerTy()) // Casting from integral return true; @@ -2724,7 +2724,7 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) { if (SrcTy->isPointerTy()) // Casting from pointer return true; return SrcTy->isIntegerTy(); // Casting from integral - } + } if (DestTy->isX86_MMXTy()) { if (SrcTy->isVectorTy()) return DestBits == SrcBits; // 64-bit vector to MMX @@ -2834,10 +2834,10 @@ CastInst::getCastOpcode( return BitCast; // Same size, No-op cast } } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt - if (DestIsSigned) + if (DestIsSigned) return FPToSI; // FP -> sint else - return FPToUI; // FP -> uint + return FPToUI; // FP -> uint } else if (SrcTy->isVectorTy()) { assert(DestBits == SrcBits && "Casting vector to integer of different width"); @@ -2898,7 +2898,7 @@ CastInst::getCastOpcode( /// could be broken out into the separate constructors but it is useful to have /// it in one place and to eliminate the redundant code for getting the sizes /// of the types involved. -bool +bool CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { // Check for type sanity on the arguments Type *SrcTy = S->getType(); @@ -2928,7 +2928,7 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { case Instruction::ZExt: return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() && SrcLength == DstLength && SrcBitSize < DstBitSize; - case Instruction::SExt: + case Instruction::SExt: return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() && SrcLength == DstLength && SrcBitSize < DstBitSize; case Instruction::FPTrunc: @@ -3019,138 +3019,138 @@ TruncInst::TruncInst( TruncInst::TruncInst( Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd -) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) { +) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc"); } ZExtInst::ZExtInst( Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore -) : CastInst(Ty, ZExt, S, Name, InsertBefore) { +) : CastInst(Ty, ZExt, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt"); } ZExtInst::ZExtInst( Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd -) : CastInst(Ty, ZExt, S, Name, InsertAtEnd) { +) : CastInst(Ty, ZExt, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt"); } SExtInst::SExtInst( Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore -) : CastInst(Ty, SExt, S, Name, InsertBefore) { +) : CastInst(Ty, SExt, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt"); } SExtInst::SExtInst( Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd -) : CastInst(Ty, SExt, S, Name, InsertAtEnd) { +) : CastInst(Ty, SExt, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt"); } FPTruncInst::FPTruncInst( Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore -) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) { +) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc"); } FPTruncInst::FPTruncInst( Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd -) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) { +) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc"); } FPExtInst::FPExtInst( Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore -) : CastInst(Ty, FPExt, S, Name, InsertBefore) { +) : CastInst(Ty, FPExt, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt"); } FPExtInst::FPExtInst( Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd -) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) { +) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt"); } UIToFPInst::UIToFPInst( Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore -) : CastInst(Ty, UIToFP, S, Name, InsertBefore) { +) : CastInst(Ty, UIToFP, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP"); } UIToFPInst::UIToFPInst( Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd -) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) { +) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP"); } SIToFPInst::SIToFPInst( Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore -) : CastInst(Ty, SIToFP, S, Name, InsertBefore) { +) : CastInst(Ty, SIToFP, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP"); } SIToFPInst::SIToFPInst( Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd -) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) { +) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP"); } FPToUIInst::FPToUIInst( Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore -) : CastInst(Ty, FPToUI, S, Name, InsertBefore) { +) : CastInst(Ty, FPToUI, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI"); } FPToUIInst::FPToUIInst( Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd -) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) { +) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI"); } FPToSIInst::FPToSIInst( Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore -) : CastInst(Ty, FPToSI, S, Name, InsertBefore) { +) : CastInst(Ty, FPToSI, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI"); } FPToSIInst::FPToSIInst( Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd -) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) { +) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI"); } PtrToIntInst::PtrToIntInst( Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore -) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) { +) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt"); } PtrToIntInst::PtrToIntInst( Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd -) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) { +) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt"); } IntToPtrInst::IntToPtrInst( Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore -) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) { +) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr"); } IntToPtrInst::IntToPtrInst( Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd -) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) { +) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr"); } BitCastInst::BitCastInst( Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore -) : CastInst(Ty, BitCast, S, Name, InsertBefore) { +) : CastInst(Ty, BitCast, S, Name, InsertBefore) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast"); } BitCastInst::BitCastInst( Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd -) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) { +) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) { assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast"); } @@ -3205,7 +3205,7 @@ CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, return new ICmpInst(CmpInst::Predicate(predicate), S1, S2, Name); } - + if (InsertBefore) return new FCmpInst(InsertBefore, CmpInst::Predicate(predicate), S1, S2, Name); @@ -3312,8 +3312,8 @@ StringRef CmpInst::getPredicateName(Predicate Pred) { ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) { switch (pred) { default: llvm_unreachable("Unknown icmp predicate!"); - case ICMP_EQ: case ICMP_NE: - case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE: + case ICMP_EQ: case ICMP_NE: + case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE: return pred; case ICMP_UGT: return ICMP_SGT; case ICMP_ULT: return ICMP_SLT; @@ -3325,8 +3325,8 @@ ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) { ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) { switch (pred) { default: llvm_unreachable("Unknown icmp predicate!"); - case ICMP_EQ: case ICMP_NE: - case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE: + case ICMP_EQ: case ICMP_NE: + case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE: return pred; case ICMP_SGT: return ICMP_UGT; case ICMP_SLT: return ICMP_ULT; @@ -3371,7 +3371,7 @@ CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) { case ICMP_ULT: return ICMP_UGT; case ICMP_UGE: return ICMP_ULE; case ICMP_ULE: return ICMP_UGE; - + case FCMP_FALSE: case FCMP_TRUE: case FCMP_OEQ: case FCMP_ONE: case FCMP_UEQ: case FCMP_UNE: @@ -3422,7 +3422,7 @@ CmpInst::Predicate CmpInst::getSignedPredicate(Predicate pred) { bool CmpInst::isUnsigned(Predicate predicate) { switch (predicate) { default: return false; - case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: return true; } } @@ -3430,7 +3430,7 @@ bool CmpInst::isUnsigned(Predicate predicate) { bool CmpInst::isSigned(Predicate predicate) { switch (predicate) { default: return false; - case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: return true; } } @@ -3438,17 +3438,17 @@ bool CmpInst::isSigned(Predicate predicate) { bool CmpInst::isOrdered(Predicate predicate) { switch (predicate) { default: return false; - case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT: - case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE: + case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT: + case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE: case FCmpInst::FCMP_ORD: return true; } } - + bool CmpInst::isUnordered(Predicate predicate) { switch (predicate) { default: return false; - case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT: - case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE: + case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE: case FCmpInst::FCMP_UNO: return true; } } @@ -3619,7 +3619,7 @@ void IndirectBrInst::init(Value *Address, unsigned NumDests) { void IndirectBrInst::growOperands() { unsigned e = getNumOperands(); unsigned NumOps = e*2; - + ReservedSpace = NumOps; growHungoffUses(ReservedSpace); } @@ -3665,13 +3665,13 @@ void IndirectBrInst::addDestination(BasicBlock *DestBB) { /// indirectbr instruction. void IndirectBrInst::removeDestination(unsigned idx) { assert(idx < getNumOperands()-1 && "Successor index out of range!"); - + unsigned NumOps = getNumOperands(); Use *OL = getOperandList(); // Replace this value with the last one. OL[idx+1] = OL[NumOps-1]; - + // Nuke the last value. OL[NumOps-1].set(nullptr); setNumHungOffUseOperands(NumOps-1); @@ -3725,7 +3725,7 @@ LoadInst *LoadInst::cloneImpl() const { StoreInst *StoreInst::cloneImpl() const { return new StoreInst(getOperand(0), getOperand(1), isVolatile(), getAlignment(), getOrdering(), getSyncScopeID()); - + } AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const { diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h index d5046d644187..3b2e1e81b1c1 100644 --- a/contrib/llvm/lib/IR/LLVMContextImpl.h +++ b/contrib/llvm/lib/IR/LLVMContextImpl.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file declares LLVMContextImpl, the opaque implementation +// This file declares LLVMContextImpl, the opaque implementation // of LLVMContext. // //===----------------------------------------------------------------------===// @@ -1217,7 +1217,7 @@ public: /// OwnedModules - The set of modules instantiated in this context, and which /// will be automatically deleted if this context is deleted. SmallPtrSet<Module*, 4> OwnedModules; - + LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler = nullptr; void *InlineAsmDiagContext = nullptr; @@ -1265,10 +1265,10 @@ public: using ArrayConstantsTy = ConstantUniqueMap<ConstantArray>; ArrayConstantsTy ArrayConstants; - + using StructConstantsTy = ConstantUniqueMap<ConstantStruct>; StructConstantsTy StructConstants; - + using VectorConstantsTy = ConstantUniqueMap<ConstantVector>; VectorConstantsTy VectorConstants; @@ -1293,11 +1293,11 @@ public: Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy, TokenTy; Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy; IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty; - + /// TypeAllocator - All dynamically allocated types are allocated from this. /// They live forever until the context is torn down. BumpPtrAllocator TypeAllocator; - + DenseMap<unsigned, IntegerType*> IntegerTypes; using FunctionTypeSet = DenseSet<FunctionType *, FunctionTypeKeyInfo>; @@ -1306,7 +1306,7 @@ public: StructTypeSet AnonStructTypes; StringMap<StructType*> NamedStructTypes; unsigned NamedStructTypesUniqueID = 0; - + DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes; DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes; DenseMap<Type*, PointerType*> PointerTypes; // Pointers in AddrSpace = 0 @@ -1317,7 +1317,7 @@ public: /// whether or not a value has an entry in this map. using ValueHandlesTy = DenseMap<Value *, ValueHandleBase *>; ValueHandlesTy ValueHandles; - + /// CustomMDKindNames - Map to hold the metadata string to ID mapping. StringMap<unsigned> CustomMDKindNames; diff --git a/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h b/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h index 6ddab6b4c69d..d4ad1eba33c6 100644 --- a/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h +++ b/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h @@ -33,17 +33,17 @@ void SymbolTableListTraits<ValueSubClass>::setSymTabObject(TPtr *Dest, // Do it. *Dest = Src; - + // Get the new SymTab object. ValueSymbolTable *NewST = getSymTab(getListOwner()); - + // If there is nothing to do, quick exit. if (OldST == NewST) return; - + // Move all the elements from the old symtab to the new one. ListTy &ItemList = getList(getListOwner()); if (ItemList.empty()) return; - + if (OldST) { // Remove all entries from the previous symtab. for (auto I = ItemList.begin(); I != ItemList.end(); ++I) @@ -57,7 +57,7 @@ void SymbolTableListTraits<ValueSubClass>::setSymTabObject(TPtr *Dest, if (I->hasName()) NewST->reinsertValue(&*I); } - + } template <typename ValueSubClass> diff --git a/contrib/llvm/lib/IR/ValueSymbolTable.cpp b/contrib/llvm/lib/IR/ValueSymbolTable.cpp index 0a7f2803cd4c..f4bea5604043 100644 --- a/contrib/llvm/lib/IR/ValueSymbolTable.cpp +++ b/contrib/llvm/lib/IR/ValueSymbolTable.cpp @@ -79,7 +79,7 @@ void ValueSymbolTable::reinsertValue(Value* V) { // *V << "\n"); return; } - + // Otherwise, there is a naming conflict. Rename this value. SmallString<256> UniqueName(V->getName().begin(), V->getName().end()); @@ -107,7 +107,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { // << *V << "\n"); return &*IterBool.first; } - + // Otherwise, there is a naming conflict. Rename this value. SmallString<256> UniqueName(Name.begin(), Name.end()); return makeUniqueName(V, UniqueName); diff --git a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index 90d0f9bdb885..642e538ecf92 100644 --- a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -422,7 +422,7 @@ public: int TempFD; llvm::sys::path::remove_filename(CachePath); sys::path::append(TempFilename, CachePath, "Thin-%%%%%%.tmp.o"); - std::error_code EC = + std::error_code EC = sys::fs::createUniqueFile(TempFilename, TempFD, TempFilename); if (EC) { errs() << "Error: " << EC.message() << "\n"; @@ -432,7 +432,7 @@ public: raw_fd_ostream OS(TempFD, /* ShouldClose */ true); OS << OutputBuffer.getBuffer(); } - // Rename temp file to final destination; rename is atomic + // Rename temp file to final destination; rename is atomic EC = sys::fs::rename(TempFilename, EntryPath); if (EC) sys::fs::remove(TempFilename); @@ -1048,10 +1048,10 @@ void ThinLTOCodeGenerator::run() { if (SavedObjectsDirectoryPath.empty()) { // We need to generated a memory buffer for the linker. if (!CacheEntryPath.empty()) { - // When cache is enabled, reload from the cache if possible. + // When cache is enabled, reload from the cache if possible. // Releasing the buffer from the heap and reloading it from the - // cache file with mmap helps us to lower memory pressure. - // The freed memory can be used for the next input file. + // cache file with mmap helps us to lower memory pressure. + // The freed memory can be used for the next input file. // The final binary link will read from the VFS cache (hopefully!) // or from disk (if the memory pressure was too high). auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer(); diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp index 92f615180561..ae02f50bf8bd 100644 --- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp +++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp @@ -337,7 +337,7 @@ void MCAsmStreamer::AddComment(const Twine &T, bool EOL) { if (!IsVerboseAsm) return; T.toVector(CommentToEmit); - + if (EOL) CommentToEmit.push_back('\n'); // Place comment in a new line. } @@ -655,7 +655,7 @@ void MCAsmStreamer::EmitSyntaxDirective() { EmitEOL(); } // FIXME: Currently emit unprefix'ed registers. - // The intel_syntax directive has one optional argument + // The intel_syntax directive has one optional argument // with may have a value of prefix or noprefix. } diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp index 1470e026d985..1e23b6d816e8 100644 --- a/contrib/llvm/lib/MC/MCAssembler.cpp +++ b/contrib/llvm/lib/MC/MCAssembler.cpp @@ -550,7 +550,7 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm, break; } - case MCFragment::FT_Data: + case MCFragment::FT_Data: ++stats::EmittedDataFragments; OS << cast<MCDataFragment>(F).getContents(); break; @@ -822,6 +822,9 @@ void MCAssembler::layout(MCAsmLayout &Layout) { } else if (auto *FragWithFixups = dyn_cast<MCCVDefRangeFragment>(&Frag)) { Fixups = FragWithFixups->getFixups(); Contents = FragWithFixups->getContents(); + } else if (auto *FragWithFixups = dyn_cast<MCDwarfLineAddrFragment>(&Frag)) { + Fixups = FragWithFixups->getFixups(); + Contents = FragWithFixups->getContents(); } else llvm_unreachable("Unknown fragment with fixups!"); for (const MCFixup &Fixup : Fixups) { @@ -951,16 +954,43 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, MCContext &Context = Layout.getAssembler().getContext(); uint64_t OldSize = DF.getContents().size(); int64_t AddrDelta; - bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout); - assert(Abs && "We created a line delta with an invalid expression"); - (void) Abs; + bool Abs; + if (getBackend().requiresDiffExpressionRelocations()) + Abs = DF.getAddrDelta().evaluateAsAbsolute(AddrDelta, Layout); + else { + Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout); + assert(Abs && "We created a line delta with an invalid expression"); + } int64_t LineDelta; LineDelta = DF.getLineDelta(); - SmallString<8> &Data = DF.getContents(); + SmallVectorImpl<char> &Data = DF.getContents(); Data.clear(); raw_svector_ostream OSE(Data); - MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta, - AddrDelta, OSE); + DF.getFixups().clear(); + + if (Abs) { + MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta, + AddrDelta, OSE); + } else { + uint32_t Offset; + uint32_t Size; + bool SetDelta = MCDwarfLineAddr::FixedEncode(Context, + getDWARFLinetableParams(), + LineDelta, AddrDelta, + OSE, &Offset, &Size); + // Add Fixups for address delta or new address. + const MCExpr *FixupExpr; + if (SetDelta) { + FixupExpr = &DF.getAddrDelta(); + } else { + const MCBinaryExpr *ABE = cast<MCBinaryExpr>(&DF.getAddrDelta()); + FixupExpr = ABE->getLHS(); + } + DF.getFixups().push_back( + MCFixup::create(Offset, FixupExpr, + MCFixup::getKindForSize(Size, false /*isPCRel*/))); + } + return OldSize != Data.size(); } diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp index 30e0bb562644..ad0a39991c53 100644 --- a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp +++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp @@ -38,7 +38,7 @@ using namespace llvm; // LLVMCreateDisasm() creates a disassembler for the TripleName. Symbolic // disassembly is supported by passing a block of information in the DisInfo // parameter and specifying the TagType and callback functions as described in -// the header llvm-c/Disassembler.h . The pointer to the block and the +// the header llvm-c/Disassembler.h . The pointer to the block and the // functions can all be passed as NULL. If successful, this returns a // disassembler context. If not, it returns NULL. // diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h index 25d17dafb576..f638fdc781d7 100644 --- a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h +++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h @@ -4,10 +4,10 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // -// This file defines the interface for the Disassembly library's disassembler +// This file defines the interface for the Disassembly library's disassembler // context. The disassembler is responsible for producing strings for // individual instructions according to a given architecture and disassembly // syntax. diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp index 6131fcd658b2..0461c2564ccf 100644 --- a/contrib/llvm/lib/MC/MCDwarf.cpp +++ b/contrib/llvm/lib/MC/MCDwarf.cpp @@ -492,7 +492,7 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params, // Parameters of the state machine, are next. MCOS->EmitIntValue(context.getAsmInfo()->getMinInstAlignment(), 1); - // maximum_operations_per_instruction + // maximum_operations_per_instruction // For non-VLIW architectures this field is always 1. // FIXME: VLIW architectures need to update this field accordingly. if (LineTableVersion >= 4) @@ -731,6 +731,57 @@ void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params, } } +bool MCDwarfLineAddr::FixedEncode(MCContext &Context, + MCDwarfLineTableParams Params, + int64_t LineDelta, uint64_t AddrDelta, + raw_ostream &OS, + uint32_t *Offset, uint32_t *Size) { + if (LineDelta != INT64_MAX) { + OS << char(dwarf::DW_LNS_advance_line); + encodeSLEB128(LineDelta, OS); + } + + // Use address delta to adjust address or use absolute address to adjust + // address. + bool SetDelta; + // According to DWARF spec., the DW_LNS_fixed_advance_pc opcode takes a + // single uhalf (unencoded) operand. So, the maximum value of AddrDelta + // is 65535. We set a conservative upper bound for it for relaxation. + if (AddrDelta > 60000) { + const MCAsmInfo *asmInfo = Context.getAsmInfo(); + unsigned AddrSize = asmInfo->getCodePointerSize(); + + OS << char(dwarf::DW_LNS_extended_op); + encodeULEB128(1 + AddrSize, OS); + OS << char(dwarf::DW_LNE_set_address); + // Generate fixup for the address. + *Offset = OS.tell(); + *Size = AddrSize; + SetDelta = false; + std::vector<uint8_t> FillData; + FillData.insert(FillData.begin(), AddrSize, 0); + OS.write(reinterpret_cast<char *>(FillData.data()), AddrSize); + } else { + OS << char(dwarf::DW_LNS_fixed_advance_pc); + // Generate fixup for 2-bytes address delta. + *Offset = OS.tell(); + *Size = 2; + SetDelta = true; + OS << char(0); + OS << char(0); + } + + if (LineDelta == INT64_MAX) { + OS << char(dwarf::DW_LNS_extended_op); + OS << char(1); + OS << char(dwarf::DW_LNE_end_sequence); + } else { + OS << char(dwarf::DW_LNS_copy); + } + + return SetDelta; +} + // Utility function to write a tuple for .debug_abbrev. static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) { MCOS->EmitULEB128IntValue(Name); diff --git a/contrib/llvm/lib/MC/MCInstrAnalysis.cpp b/contrib/llvm/lib/MC/MCInstrAnalysis.cpp index 8223f3a5c66f..4d7c89116893 100644 --- a/contrib/llvm/lib/MC/MCInstrAnalysis.cpp +++ b/contrib/llvm/lib/MC/MCInstrAnalysis.cpp @@ -24,6 +24,11 @@ bool MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI, return false; } +bool MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI, + const MCInst &Inst) const { + return false; +} + bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const { if (Inst.getNumOperands() == 0 || diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp index 29d34a8c1e3e..b88d2d801822 100644 --- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp @@ -950,8 +950,19 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC, } MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const { - return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP, - 0, utostr(Hash)); + switch (TT.getObjectFormat()) { + case Triple::ELF: + return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP, + 0, utostr(Hash)); + case Triple::MachO: + case Triple::COFF: + case Triple::Wasm: + case Triple::UnknownObjectFormat: + report_fatal_error("Cannot get DWARF types section for this object file " + "format: not implemented."); + break; + } + llvm_unreachable("Unknown ObjectFormatType"); } MCSection * diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 67e3512cc5bd..7bf14968c973 100644 --- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -254,7 +254,7 @@ bool ELFAsmParser::ParseSectionName(StringRef &SectionName) { if (getLexer().is(AsmToken::Comma) || getLexer().is(AsmToken::EndOfStatement)) break; - + unsigned CurSize; if (getLexer().is(AsmToken::String)) { CurSize = getTok().getIdentifier().size() + 2; diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp index 8dd4b61be68f..21a9c3604cfc 100644 --- a/contrib/llvm/lib/MC/MCStreamer.cpp +++ b/contrib/llvm/lib/MC/MCStreamer.cpp @@ -514,7 +514,7 @@ void MCStreamer::EmitCFIEscape(StringRef Values) { void MCStreamer::EmitCFIGnuArgsSize(int64_t Size) { MCSymbol *Label = EmitCFILabel(); - MCCFIInstruction Instruction = + MCCFIInstruction Instruction = MCCFIInstruction::createGnuArgsSize(Label, Size); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); if (!CurFrame) diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp index a464af1d42a7..2664528909af 100644 --- a/contrib/llvm/lib/MC/MachObjectWriter.cpp +++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp @@ -952,7 +952,7 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, const DataRegionData *Data = &(*it); uint64_t Start = getSymbolAddress(*Data->Start, Layout); uint64_t End; - if (Data->End) + if (Data->End) End = getSymbolAddress(*Data->End, Layout); else report_fatal_error("Data region not terminated"); diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp index d72da3187e07..85b1913cb23b 100644 --- a/contrib/llvm/lib/Object/COFFObjectFile.cpp +++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp @@ -339,7 +339,7 @@ unsigned COFFObjectFile::getSectionID(SectionRef Sec) const { bool COFFObjectFile::isSectionVirtual(DataRefImpl Ref) const { const coff_section *Sec = toSec(Ref); - // In COFF, a virtual section won't have any in-file + // In COFF, a virtual section won't have any in-file // content, so the file pointer to the content will be zero. return Sec->PointerToRawData == 0; } diff --git a/contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp index f67a0db690eb..745f79cd77f3 100644 --- a/contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp +++ b/contrib/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -550,6 +550,10 @@ template <> void SymbolRecordImpl<ThreadLocalDataSym>::map(IO &IO) { IO.mapRequired("DisplayName", Symbol.Name); } +template <> void SymbolRecordImpl<UsingNamespaceSym>::map(IO &IO) { + IO.mapRequired("Namespace", Symbol.Name); +} + } // end namespace detail } // end namespace CodeViewYAML } // end namespace llvm diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp index 24005c1890c9..e9e429c8031b 100644 --- a/contrib/llvm/lib/Support/APFloat.cpp +++ b/contrib/llvm/lib/Support/APFloat.cpp @@ -1752,7 +1752,7 @@ IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { if (compareAbsoluteValue(V) == cmpLessThan) V = scalbn(V, -1, rmNearestTiesToEven); V.sign = sign; - + fs = subtract(V, rmNearestTiesToEven); assert(fs==opOK); } diff --git a/contrib/llvm/lib/Support/ConvertUTF.cpp b/contrib/llvm/lib/Support/ConvertUTF.cpp index e56854a3ae42..8f02fae4f558 100644 --- a/contrib/llvm/lib/Support/ConvertUTF.cpp +++ b/contrib/llvm/lib/Support/ConvertUTF.cpp @@ -8,9 +8,9 @@ *===------------------------------------------------------------------------=*/ /* * Copyright 2001-2004 Unicode, Inc. - * + * * Disclaimer - * + * * This source code is provided as is by Unicode, Inc. No claims are * made as to fitness for any particular purpose. No warranties of any * kind are expressed or implied. The recipient agrees to determine @@ -18,9 +18,9 @@ * purchased on magnetic or optical media from Unicode, Inc., the * sole remedy for any claim will be exchange of defective media * within 90 days of receipt. - * + * * Limitations on Rights to Redistribute This Code - * + * * Unicode, Inc. hereby grants the right to freely use the information * supplied in this file in the creation of products supporting the * Unicode Standard, and to make copies of this file in any form @@ -117,7 +117,7 @@ static const char trailingBytesForUTF8[256] = { * This table contains as many values as there might be trailing bytes * in a UTF-8 sequence. */ -static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; /* @@ -143,7 +143,7 @@ static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC /* --------------------------------------------------------------------- */ ConversionResult ConvertUTF32toUTF16 ( - const UTF32** sourceStart, const UTF32* sourceEnd, + const UTF32** sourceStart, const UTF32* sourceEnd, UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { ConversionResult result = conversionOK; const UTF32* source = *sourceStart; @@ -192,7 +192,7 @@ ConversionResult ConvertUTF32toUTF16 ( /* --------------------------------------------------------------------- */ ConversionResult ConvertUTF16toUTF32 ( - const UTF16** sourceStart, const UTF16* sourceEnd, + const UTF16** sourceStart, const UTF16* sourceEnd, UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { ConversionResult result = conversionOK; const UTF16* source = *sourceStart; @@ -246,7 +246,7 @@ if (result == sourceIllegal) { return result; } ConversionResult ConvertUTF16toUTF8 ( - const UTF16** sourceStart, const UTF16* sourceEnd, + const UTF16** sourceStart, const UTF16* sourceEnd, UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { ConversionResult result = conversionOK; const UTF16* source = *sourceStart; @@ -255,7 +255,7 @@ ConversionResult ConvertUTF16toUTF8 ( UTF32 ch; unsigned short bytesToWrite = 0; const UTF32 byteMask = 0xBF; - const UTF32 byteMark = 0x80; + const UTF32 byteMark = 0x80; const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ ch = *source++; /* If we have a surrogate pair, convert to UTF32 first. */ @@ -316,7 +316,7 @@ ConversionResult ConvertUTF16toUTF8 ( /* --------------------------------------------------------------------- */ ConversionResult ConvertUTF32toUTF8 ( - const UTF32** sourceStart, const UTF32* sourceEnd, + const UTF32** sourceStart, const UTF32* sourceEnd, UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { ConversionResult result = conversionOK; const UTF32* source = *sourceStart; @@ -325,7 +325,7 @@ ConversionResult ConvertUTF32toUTF8 ( UTF32 ch; unsigned short bytesToWrite = 0; const UTF32 byteMask = 0xBF; - const UTF32 byteMark = 0x80; + const UTF32 byteMark = 0x80; ch = *source++; if (flags == strictConversion ) { /* UTF-16 surrogate values are illegal in UTF-32 */ @@ -347,7 +347,7 @@ ConversionResult ConvertUTF32toUTF8 ( ch = UNI_REPLACEMENT_CHAR; result = sourceIllegal; } - + target += bytesToWrite; if (target > targetEnd) { --source; /* Back up source pointer! */ @@ -540,7 +540,7 @@ Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) { /* --------------------------------------------------------------------- */ ConversionResult ConvertUTF8toUTF16 ( - const UTF8** sourceStart, const UTF8* sourceEnd, + const UTF8** sourceStart, const UTF8* sourceEnd, UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { ConversionResult result = conversionOK; const UTF8* source = *sourceStart; @@ -613,7 +613,7 @@ ConversionResult ConvertUTF8toUTF16 ( /* --------------------------------------------------------------------- */ static ConversionResult ConvertUTF8toUTF32Impl( - const UTF8** sourceStart, const UTF8* sourceEnd, + const UTF8** sourceStart, const UTF8* sourceEnd, UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags, Boolean InputIsPartial) { ConversionResult result = conversionOK; diff --git a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp index fd5d097d2b7e..be4b5c3e01c3 100644 --- a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp +++ b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp @@ -49,7 +49,7 @@ public: /// Called when the separate crash-recovery thread was finished, to /// indicate that we don't need to clear the thread-local CurrentContext. - void setSwitchedThread() { + void setSwitchedThread() { #if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0 SwitchedThread = true; #endif @@ -96,7 +96,7 @@ CrashRecoveryContext::~CrashRecoveryContext() { delete tmp; } tlIsRecoveringFromCrash->set(PC); - + CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; delete CRCI; } diff --git a/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp index b82aec1423f5..bd9f98b0b82d 100644 --- a/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp +++ b/contrib/llvm/lib/Support/DAGDeltaAlgorithm.cpp @@ -96,7 +96,7 @@ private: assert(PredClosure.count(Node) && "Invalid node!"); return PredClosure[Node].end(); } - + succ_iterator_ty succ_begin(change_ty Node) { assert(Successors.count(Node) && "Invalid node!"); return Successors[Node].begin(); @@ -205,7 +205,7 @@ DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl( Worklist.pop_back(); std::set<change_ty> &ChangeSuccs = SuccClosure[Change]; - for (pred_iterator_ty it = pred_begin(Change), + for (pred_iterator_ty it = pred_begin(Change), ie = pred_end(Change); it != ie; ++it) { SuccClosure[*it].insert(Change); SuccClosure[*it].insert(ChangeSuccs.begin(), ChangeSuccs.end()); @@ -222,7 +222,7 @@ DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl( for (succ_closure_iterator_ty it2 = succ_closure_begin(*it), ie2 = succ_closure_end(*it); it2 != ie2; ++it2) PredClosure[*it2].insert(*it); - + // Dump useful debug info. LLVM_DEBUG({ llvm::errs() << "-- DAGDeltaAlgorithmImpl --\n"; diff --git a/contrib/llvm/lib/Support/Errno.cpp b/contrib/llvm/lib/Support/Errno.cpp index 10be9b391b49..2149f21281d3 100644 --- a/contrib/llvm/lib/Support/Errno.cpp +++ b/contrib/llvm/lib/Support/Errno.cpp @@ -42,7 +42,7 @@ std::string StrError(int errnum) { const int MaxErrStrLen = 2000; char buffer[MaxErrStrLen]; buffer[0] = '\0'; -#endif +#endif #ifdef HAVE_STRERROR_R // strerror_r is thread-safe. diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp index ec7d57586e8b..cf9847faccd1 100644 --- a/contrib/llvm/lib/Support/FoldingSet.cpp +++ b/contrib/llvm/lib/Support/FoldingSet.cpp @@ -92,7 +92,7 @@ void FoldingSetNodeID::AddString(StringRef String) { unsigned Units = Size / 4; unsigned Pos = 0; const unsigned *Base = (const unsigned*) String.data(); - + // If the string is aligned do a bulk transfer. if (!((intptr_t)Base & 3)) { Bits.append(Base, Base + Units); @@ -121,7 +121,7 @@ void FoldingSetNodeID::AddString(StringRef String) { } } } - + // With the leftover bits. unsigned V = 0; // Pos will have overshot size by 4 - #bytes left over. @@ -141,7 +141,7 @@ void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { Bits.append(ID.Bits.begin(), ID.Bits.end()); } -/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to +/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to /// lookup the node in the FoldingSetBase. unsigned FoldingSetNodeID::ComputeHash() const { return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); @@ -192,7 +192,7 @@ static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) { // The low bit is set if this is the pointer back to the bucket. if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) return nullptr; - + return static_cast<FoldingSetBase::Node*>(NextInBucketPtr); } @@ -272,11 +272,11 @@ void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) { assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!"); void **OldBuckets = Buckets; unsigned OldNumBuckets = NumBuckets; - + // Clear out new buckets. Buckets = AllocateBuckets(NewBucketCount); // Set NumBuckets only if allocation of new buckets was succesful - NumBuckets = NewBucketCount; + NumBuckets = NewBucketCount; NumNodes = 0; // Walk the old buckets, rehashing nodes into their new place. @@ -296,7 +296,7 @@ void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) { TempID.clear(); } } - + free(OldBuckets); } @@ -324,9 +324,9 @@ FoldingSetBase::FindNodeOrInsertPos(const FoldingSetNodeID &ID, unsigned IDHash = ID.ComputeHash(); void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); void *Probe = *Bucket; - + InsertPos = nullptr; - + FoldingSetNodeID TempID; while (Node *NodeInBucket = GetNextPtr(Probe)) { if (NodeEquals(NodeInBucket, ID, IDHash, TempID)) @@ -335,14 +335,14 @@ FoldingSetBase::FindNodeOrInsertPos(const FoldingSetNodeID &ID, Probe = NodeInBucket->getNextInBucket(); } - + // Didn't find the node, return null with the bucket as the InsertPos. InsertPos = Bucket; return nullptr; } /// InsertNode - Insert the specified node into the folding set, knowing that it -/// is not already in the map. InsertPos must be obtained from +/// is not already in the map. InsertPos must be obtained from /// FindNodeOrInsertPos. void FoldingSetBase::InsertNode(Node *N, void *InsertPos) { assert(!N->getNextInBucket()); @@ -354,12 +354,12 @@ void FoldingSetBase::InsertNode(Node *N, void *InsertPos) { } ++NumNodes; - + /// The insert position is actually a bucket pointer. void **Bucket = static_cast<void**>(InsertPos); - + void *Next = *Bucket; - + // If this is the first insertion into this bucket, its next pointer will be // null. Pretend as if it pointed to itself, setting the low bit to indicate // that it is a pointer to the bucket. @@ -384,13 +384,13 @@ bool FoldingSetBase::RemoveNode(Node *N) { // Remember what N originally pointed to, either a bucket or another node. void *NodeNextPtr = Ptr; - + // Chase around the list until we find the node (or bucket) which points to N. while (true) { if (Node *NodeInBucket = GetNextPtr(Ptr)) { // Advance pointer. Ptr = NodeInBucket->getNextInBucket(); - + // We found a node that points to N, change it to point to N's next node, // removing N from the list. if (Ptr == N) { @@ -400,7 +400,7 @@ bool FoldingSetBase::RemoveNode(Node *N) { } else { void **Bucket = GetBucketPtr(Ptr); Ptr = *Bucket; - + // If we found that the bucket points to N, update the bucket to point to // whatever is next. if (Ptr == N) { @@ -432,7 +432,7 @@ FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { while (*Bucket != reinterpret_cast<void*>(-1) && (!*Bucket || !GetNextPtr(*Bucket))) ++Bucket; - + NodePtr = static_cast<FoldingSetNode*>(*Bucket); } @@ -443,7 +443,7 @@ void FoldingSetIteratorImpl::advance() { if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe)) NodePtr = NextNodeInBucket; else { - // Otherwise, this is the last link in this bucket. + // Otherwise, this is the last link in this bucket. void **Bucket = GetBucketPtr(Probe); // Skip to the next non-null non-self-cycle bucket. @@ -451,7 +451,7 @@ void FoldingSetIteratorImpl::advance() { ++Bucket; } while (*Bucket != reinterpret_cast<void*>(-1) && (!*Bucket || !GetNextPtr(*Bucket))); - + NodePtr = static_cast<FoldingSetNode*>(*Bucket); } } diff --git a/contrib/llvm/lib/Support/FormattedStream.cpp b/contrib/llvm/lib/Support/FormattedStream.cpp index a9f4409f5dde..b0cb06c1daa2 100644 --- a/contrib/llvm/lib/Support/FormattedStream.cpp +++ b/contrib/llvm/lib/Support/FormattedStream.cpp @@ -65,7 +65,7 @@ void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) { /// /// \param NewCol - The column to move to. /// -formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { +formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { // Figure out what's in the buffer and add it to the column count. ComputePosition(getBufferStart(), GetNumBytesInBuffer()); diff --git a/contrib/llvm/lib/Support/ManagedStatic.cpp b/contrib/llvm/lib/Support/ManagedStatic.cpp index 1c884dc70fc9..74f71a385027 100644 --- a/contrib/llvm/lib/Support/ManagedStatic.cpp +++ b/contrib/llvm/lib/Support/ManagedStatic.cpp @@ -43,7 +43,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), Ptr.store(Tmp, std::memory_order_release); DeleterFn = Deleter; - + // Add to list of managed statics. Next = StaticList; StaticList = this; @@ -53,7 +53,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), "Partially initialized ManagedStatic!?"); Ptr = Creator(); DeleterFn = Deleter; - + // Add to list of managed statics. Next = StaticList; StaticList = this; @@ -70,7 +70,7 @@ void ManagedStaticBase::destroy() const { // Destroy memory. DeleterFn(Ptr); - + // Cleanup. Ptr = nullptr; DeleterFn = nullptr; diff --git a/contrib/llvm/lib/Support/MemoryBuffer.cpp b/contrib/llvm/lib/Support/MemoryBuffer.cpp index 4428c2f24e32..ef9159bac284 100644 --- a/contrib/llvm/lib/Support/MemoryBuffer.cpp +++ b/contrib/llvm/lib/Support/MemoryBuffer.cpp @@ -152,7 +152,7 @@ MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize, } ErrorOr<std::unique_ptr<MemoryBuffer>> -MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize, +MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize, uint64_t Offset, bool IsVolatile) { return getFileAux<MemoryBuffer>(FilePath, -1, MapSize, Offset, false, IsVolatile); @@ -533,5 +533,4 @@ MemoryBufferRef MemoryBuffer::getMemBufferRef() const { return MemoryBufferRef(Data, Identifier); } -void MemoryBuffer::anchor() {} -void SmallVectorMemoryBuffer::anchor() {} +SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() {} diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp index a806da23ec50..098230290ed2 100644 --- a/contrib/llvm/lib/Support/Path.cpp +++ b/contrib/llvm/lib/Support/Path.cpp @@ -1157,9 +1157,13 @@ Error TempFile::keep(const Twine &Name) { setDeleteDisposition(H, true); #else std::error_code RenameEC = fs::rename(TmpName, Name); - // If we can't rename, discard the temporary file. - if (RenameEC) - remove(TmpName); + if (RenameEC) { + // If we can't rename, try to copy to work around cross-device link issues. + RenameEC = sys::fs::copy_file(TmpName, Name); + // If we can't rename or copy, discard the temporary file. + if (RenameEC) + remove(TmpName); + } sys::DontRemoveFileOnSignal(TmpName); #endif diff --git a/contrib/llvm/lib/Support/PrettyStackTrace.cpp b/contrib/llvm/lib/Support/PrettyStackTrace.cpp index f5b6e6f3652d..206de91ae239 100644 --- a/contrib/llvm/lib/Support/PrettyStackTrace.cpp +++ b/contrib/llvm/lib/Support/PrettyStackTrace.cpp @@ -1,10 +1,10 @@ //===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines some helpful functions for dealing with the possibility of @@ -72,10 +72,10 @@ static void PrintStack(raw_ostream &OS) { static void PrintCurStackTrace(raw_ostream &OS) { // Don't print an empty trace. if (!PrettyStackTraceHead) return; - + // If there are pretty stack frames registered, walk and emit them. OS << "Stack dump:\n"; - + PrintStack(OS); OS.flush(); } @@ -85,9 +85,9 @@ static void PrintCurStackTrace(raw_ostream &OS) { // If any clients of llvm try to link to libCrashReporterClient.a themselves, // only one crash info struct will be used. extern "C" { -CRASH_REPORTER_CLIENT_HIDDEN -struct crashreporter_annotations_t gCRAnnotations - __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION))) +CRASH_REPORTER_CLIENT_HIDDEN +struct crashreporter_annotations_t gCRAnnotations + __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION))) #if CRASHREPORTER_ANNOTATIONS_VERSION < 5 = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0, 0, 0 }; #else @@ -114,17 +114,17 @@ static void CrashHandler(void *) { raw_svector_ostream Stream(TmpStr); PrintCurStackTrace(Stream); } - + if (!TmpStr.empty()) { #ifdef HAVE_CRASHREPORTERCLIENT_H // Cast to void to avoid warning. (void)CRSetCrashLogMessage(TmpStr.c_str()); -#elif HAVE_CRASHREPORTER_INFO +#elif HAVE_CRASHREPORTER_INFO __crashreporter_info__ = strdup(TmpStr.c_str()); #endif errs() << TmpStr.str(); } - + #endif } diff --git a/contrib/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm/lib/Support/SourceMgr.cpp index bc15fd4e4014..d8fde7fa8990 100644 --- a/contrib/llvm/lib/Support/SourceMgr.cpp +++ b/contrib/llvm/lib/Support/SourceMgr.cpp @@ -175,14 +175,14 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, std::pair<unsigned, unsigned> LineAndCol; StringRef BufferID = "<unknown>"; std::string LineStr; - + if (Loc.isValid()) { unsigned CurBuf = FindBufferContainingLoc(Loc); assert(CurBuf && "Invalid or unspecified location!"); const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf); BufferID = CurMB->getBufferIdentifier(); - + // Scan backward to find the start of the line. const char *LineStart = Loc.getPointer(); const char *BufStart = CurMB->getBufferStart(); @@ -202,17 +202,17 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { SMRange R = Ranges[i]; if (!R.isValid()) continue; - + // If the line doesn't contain any part of the range, then ignore it. if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) continue; - + // Ignore pieces of the range that go onto other lines. if (R.Start.getPointer() < LineStart) R.Start = SMLoc::getFromPointer(LineStart); if (R.End.getPointer() > LineEnd) R.End = SMLoc::getFromPointer(LineEnd); - + // Translate from SMLoc ranges to column ranges. // FIXME: Handle multibyte characters. ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart, @@ -221,7 +221,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, LineAndCol = getLineAndColumn(Loc, CurBuf); } - + return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first, LineAndCol.second-1, Kind, Msg.str(), LineStr, ColRanges, FixIts); @@ -440,7 +440,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors, // Build the line with the caret and ranges. std::string CaretLine(NumColumns+1, ' '); - + // Expand any ranges. for (unsigned r = 0, e = Ranges.size(); r != e; ++r) { std::pair<unsigned, unsigned> R = Ranges[r]; @@ -459,14 +459,14 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors, // Finally, plop on the caret. if (unsigned(ColumnNo) <= NumColumns) CaretLine[ColumnNo] = '^'; - else + else CaretLine[NumColumns] = '^'; - + // ... and remove trailing whitespace so the output doesn't wrap for it. We // know that the line isn't completely empty because it has the caret in it at // least. CaretLine.erase(CaretLine.find_last_not_of(' ')+1); - + printSourceLine(S, LineContents); if (ShowColors) @@ -479,7 +479,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors, ++OutCol; continue; } - + // Okay, we have a tab. Insert the appropriate number of characters. do { S << CaretLine[i]; @@ -494,7 +494,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors, // Print out the replacement line, matching tabs in the source line. if (FixItInsertionLine.empty()) return; - + for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) { if (i >= LineContents.size() || LineContents[i] != '\t') { S << FixItInsertionLine[i]; diff --git a/contrib/llvm/lib/Support/StringPool.cpp b/contrib/llvm/lib/Support/StringPool.cpp index 76faabc92bb5..c591857c415d 100644 --- a/contrib/llvm/lib/Support/StringPool.cpp +++ b/contrib/llvm/lib/Support/StringPool.cpp @@ -26,10 +26,10 @@ PooledStringPtr StringPool::intern(StringRef Key) { table_t::iterator I = InternTable.find(Key); if (I != InternTable.end()) return PooledStringPtr(&*I); - + entry_t *S = entry_t::Create(Key); S->getValue().Pool = this; InternTable.insert(S); - + return PooledStringPtr(S); } diff --git a/contrib/llvm/lib/Support/StringRef.cpp b/contrib/llvm/lib/Support/StringRef.cpp index 9ba7a09f9962..f0349260e22f 100644 --- a/contrib/llvm/lib/Support/StringRef.cpp +++ b/contrib/llvm/lib/Support/StringRef.cpp @@ -389,7 +389,7 @@ static unsigned GetAutoSenseRadix(StringRef &Str) { Str = Str.substr(2); return 16; } - + if (Str.startswith("0b") || Str.startswith("0B")) { Str = Str.substr(2); return 2; diff --git a/contrib/llvm/lib/Support/TargetRegistry.cpp b/contrib/llvm/lib/Support/TargetRegistry.cpp index ed999fce5dad..c5eba5714766 100644 --- a/contrib/llvm/lib/Support/TargetRegistry.cpp +++ b/contrib/llvm/lib/Support/TargetRegistry.cpp @@ -98,7 +98,7 @@ void TargetRegistry::RegisterTarget(Target &T, const char *Name, // convenience to some clients. if (T.Name) return; - + // Add to the list of targets. T.Next = FirstTarget; FirstTarget = &T; diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc index f425d607af47..b64b013d7407 100644 --- a/contrib/llvm/lib/Support/Windows/Path.inc +++ b/contrib/llvm/lib/Support/Windows/Path.inc @@ -450,7 +450,7 @@ static std::error_code rename_handle(HANDLE FromHandle, const Twine &To) { if (std::error_code EC2 = realPathFromHandle(FromHandle, WideFrom)) return EC2; if (::MoveFileExW(WideFrom.begin(), WideTo.begin(), - MOVEFILE_REPLACE_EXISTING)) + MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED)) return std::error_code(); return mapWindowsError(GetLastError()); } diff --git a/contrib/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm/lib/Support/YAMLParser.cpp index 354b7d0740de..9ef1410b99a5 100644 --- a/contrib/llvm/lib/Support/YAMLParser.cpp +++ b/contrib/llvm/lib/Support/YAMLParser.cpp @@ -1113,7 +1113,7 @@ bool Scanner::scanDirective() { Current = skip_while(&Scanner::skip_ns_char, Current); StringRef Name(NameStart, Current - NameStart); Current = skip_while(&Scanner::skip_s_white, Current); - + Token T; if (Name == "YAML") { Current = skip_while(&Scanner::skip_ns_char, Current); diff --git a/contrib/llvm/lib/Support/regex_impl.h b/contrib/llvm/lib/Support/regex_impl.h index f8296c9ff75e..8ddac7dcf998 100644 --- a/contrib/llvm/lib/Support/regex_impl.h +++ b/contrib/llvm/lib/Support/regex_impl.h @@ -96,7 +96,7 @@ extern "C" { int llvm_regcomp(llvm_regex_t *, const char *, int); size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t); -int llvm_regexec(const llvm_regex_t *, const char *, size_t, +int llvm_regexec(const llvm_regex_t *, const char *, size_t, llvm_regmatch_t [], int); void llvm_regfree(llvm_regex_t *); size_t llvm_strlcpy(char *dst, const char *src, size_t siz); diff --git a/contrib/llvm/lib/Support/xxhash.cpp b/contrib/llvm/lib/Support/xxhash.cpp index df643f9bd639..e9dceed2c4ae 100644 --- a/contrib/llvm/lib/Support/xxhash.cpp +++ b/contrib/llvm/lib/Support/xxhash.cpp @@ -132,3 +132,7 @@ uint64_t llvm::xxHash64(StringRef Data) { return H64; } + +uint64_t llvm::xxHash64(ArrayRef<uint8_t> Data) { + return xxHash64({(const char *)Data.data(), Data.size()}); +} diff --git a/contrib/llvm/lib/TableGen/StringMatcher.cpp b/contrib/llvm/lib/TableGen/StringMatcher.cpp index 32599104f6a2..2c4d1f33997d 100644 --- a/contrib/llvm/lib/TableGen/StringMatcher.cpp +++ b/contrib/llvm/lib/TableGen/StringMatcher.cpp @@ -25,19 +25,19 @@ using namespace llvm; /// FindFirstNonCommonLetter - Find the first character in the keys of the /// string pairs that is not shared across the whole set of strings. All /// strings are assumed to have the same length. -static unsigned +static unsigned FindFirstNonCommonLetter(const std::vector<const StringMatcher::StringPair*> &Matches) { assert(!Matches.empty()); for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) { // Check to see if letter i is the same across the set. char Letter = Matches[0]->first[i]; - + for (unsigned str = 0, e = Matches.size(); str != e; ++str) if (Matches[str]->first[i] != Letter) return i; } - + return Matches[0]->first.size(); } @@ -51,7 +51,7 @@ bool StringMatcher::EmitStringMatcherForChar( unsigned IndentCount, bool IgnoreDuplicates) const { assert(!Matches.empty() && "Must have at least one string to match!"); std::string Indent(IndentCount * 2 + 4, ' '); - + // If we have verified that the entire string matches, we're done: output the // matching code. if (CharNo == Matches[0]->first.size()) { @@ -60,7 +60,7 @@ bool StringMatcher::EmitStringMatcherForChar( // If the to-execute code has \n's in it, indent each subsequent line. StringRef Code = Matches[0]->second; - + std::pair<StringRef, StringRef> Split = Code.split('\n'); OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n"; @@ -72,20 +72,20 @@ bool StringMatcher::EmitStringMatcherForChar( } return false; } - + // Bucket the matches by the character we are comparing. std::map<char, std::vector<const StringPair*>> MatchesByLetter; - + for (unsigned i = 0, e = Matches.size(); i != e; ++i) MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); - - + + // If we have exactly one bucket to match, see how many characters are common // across the whole set and match all of them at once. if (MatchesByLetter.size() == 1) { unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches); unsigned NumChars = FirstNonCommonLetter-CharNo; - + // Emit code to break out if the prefix doesn't match. if (NumChars == 1) { // Do the comparison with if (Str[1] != 'f') @@ -105,13 +105,13 @@ bool StringMatcher::EmitStringMatcherForChar( return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount, IgnoreDuplicates); } - + // Otherwise, we have multiple possible things, emit a switch on the // character. OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; OS << Indent << "default: break;\n"; - - for (std::map<char, std::vector<const StringPair*>>::iterator LI = + + for (std::map<char, std::vector<const StringPair*>>::iterator LI = MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { // TODO: escape hard stuff (like \n) if we ever care about it. OS << Indent << "case '" << LI->first << "':\t // " @@ -122,7 +122,7 @@ bool StringMatcher::EmitStringMatcherForChar( IgnoreDuplicates)) OS << Indent << " break;\n"; } - + OS << Indent << "}\n"; return true; } @@ -132,18 +132,18 @@ bool StringMatcher::EmitStringMatcherForChar( void StringMatcher::Emit(unsigned Indent, bool IgnoreDuplicates) const { // If nothing to match, just fall through. if (Matches.empty()) return; - + // First level categorization: group strings by length. std::map<unsigned, std::vector<const StringPair*>> MatchesByLength; - + for (unsigned i = 0, e = Matches.size(); i != e; ++i) MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); - + // Output a switch statement on length and categorize the elements within each // bin. OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n"; OS.indent(Indent*2+2) << "default: break;\n"; - + for (std::map<unsigned, std::vector<const StringPair*>>::iterator LI = MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { OS.indent(Indent*2+2) << "case " << LI->first << ":\t // " @@ -152,6 +152,6 @@ void StringMatcher::Emit(unsigned Indent, bool IgnoreDuplicates) const { if (EmitStringMatcherForChar(LI->second, 0, Indent, IgnoreDuplicates)) OS.indent(Indent*2+4) << "break;\n"; } - + OS.indent(Indent*2+2) << "}\n"; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 43a3ae77a170..572d1c22feea 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -3774,7 +3774,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { CallingConv::ID CC = F.getCallingConv(); SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); + GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0c72f2ebee18..de762a7bb1d4 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8580,7 +8580,7 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, SDValue AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, - std::vector<SDNode *> *Created) const { + SmallVectorImpl<SDNode *> &Created) const { AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (isIntDivCheap(N->getValueType(0), Attr)) return SDValue(N,0); // Lower SDIV as SDIV @@ -8603,11 +8603,9 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp); - if (Created) { - Created->push_back(Cmp.getNode()); - Created->push_back(Add.getNode()); - Created->push_back(CSel.getNode()); - } + Created.push_back(Cmp.getNode()); + Created.push_back(Add.getNode()); + Created.push_back(CSel.getNode()); // Divide by pow2. SDValue SRA = @@ -8618,8 +8616,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, if (Divisor.isNonNegative()) return SRA; - if (Created) - Created->push_back(SRA.getNode()); + Created.push_back(SRA.getNode()); return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 592845640a44..d783c8a6048c 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -644,7 +644,7 @@ private: SelectionDAG &DAG) const; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, - std::vector<SDNode *> *Created) const override; + SmallVectorImpl<SDNode *> &Created) const override; SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &ExtraSteps, bool &UseOneConst, bool Reciprocal) const override; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 1060c64f7b5d..15d61cd1ad26 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -57,6 +57,14 @@ class EncodedI<string cstr, list<dag> pattern> : AArch64Inst<NormalFrm, cstr> { let Size = 4; } +// Enum describing whether an instruction is +// destructive in its first source operand. +class DestructiveInstTypeEnum<bits<1> val> { + bits<1> Value = val; +} +def NotDestructive : DestructiveInstTypeEnum<0>; +def Destructive : DestructiveInstTypeEnum<1>; + // Normal instructions class I<dag oops, dag iops, string asm, string operands, string cstr, list<dag> pattern> @@ -64,6 +72,13 @@ class I<dag oops, dag iops, string asm, string operands, string cstr, dag OutOperandList = oops; dag InOperandList = iops; let AsmString = !strconcat(asm, operands); + + // Destructive operations (SVE) + DestructiveInstTypeEnum DestructiveInstType = NotDestructive; + ElementSizeEnum ElementSize = ElementSizeB; + + let TSFlags{3} = DestructiveInstType.Value; + let TSFlags{2-0} = ElementSize.Value; } class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 230480cf1cea..032d53d19620 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4851,75 +4851,92 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { return makeArrayRef(TargetFlags); } - /// Constants defining how certain sequences should be outlined. - /// This encompasses how an outlined function should be called, and what kind of - /// frame should be emitted for that outlined function. - /// - /// \p MachineOutlinerDefault implies that the function should be called with - /// a save and restore of LR to the stack. - /// - /// That is, - /// - /// I1 Save LR OUTLINED_FUNCTION: - /// I2 --> BL OUTLINED_FUNCTION I1 - /// I3 Restore LR I2 - /// I3 - /// RET - /// - /// * Call construction overhead: 3 (save + BL + restore) - /// * Frame construction overhead: 1 (ret) - /// * Requires stack fixups? Yes - /// - /// \p MachineOutlinerTailCall implies that the function is being created from - /// a sequence of instructions ending in a return. - /// - /// That is, - /// - /// I1 OUTLINED_FUNCTION: - /// I2 --> B OUTLINED_FUNCTION I1 - /// RET I2 - /// RET - /// - /// * Call construction overhead: 1 (B) - /// * Frame construction overhead: 0 (Return included in sequence) - /// * Requires stack fixups? No - /// - /// \p MachineOutlinerNoLRSave implies that the function should be called using - /// a BL instruction, but doesn't require LR to be saved and restored. This - /// happens when LR is known to be dead. - /// - /// That is, - /// - /// I1 OUTLINED_FUNCTION: - /// I2 --> BL OUTLINED_FUNCTION I1 - /// I3 I2 - /// I3 - /// RET - /// - /// * Call construction overhead: 1 (BL) - /// * Frame construction overhead: 1 (RET) - /// * Requires stack fixups? No - /// - /// \p MachineOutlinerThunk implies that the function is being created from - /// a sequence of instructions ending in a call. The outlined function is - /// called with a BL instruction, and the outlined function tail-calls the - /// original call destination. - /// - /// That is, - /// - /// I1 OUTLINED_FUNCTION: - /// I2 --> BL OUTLINED_FUNCTION I1 - /// BL f I2 - /// B f - /// * Call construction overhead: 1 (BL) - /// * Frame construction overhead: 0 - /// * Requires stack fixups? No - /// +/// Constants defining how certain sequences should be outlined. +/// This encompasses how an outlined function should be called, and what kind of +/// frame should be emitted for that outlined function. +/// +/// \p MachineOutlinerDefault implies that the function should be called with +/// a save and restore of LR to the stack. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// RET +/// +/// * Call construction overhead: 3 (save + BL + restore) +/// * Frame construction overhead: 1 (ret) +/// * Requires stack fixups? Yes +/// +/// \p MachineOutlinerTailCall implies that the function is being created from +/// a sequence of instructions ending in a return. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> B OUTLINED_FUNCTION I1 +/// RET I2 +/// RET +/// +/// * Call construction overhead: 1 (B) +/// * Frame construction overhead: 0 (Return included in sequence) +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerNoLRSave implies that the function should be called using +/// a BL instruction, but doesn't require LR to be saved and restored. This +/// happens when LR is known to be dead. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 I2 +/// I3 +/// RET +/// +/// * Call construction overhead: 1 (BL) +/// * Frame construction overhead: 1 (RET) +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerThunk implies that the function is being created from +/// a sequence of instructions ending in a call. The outlined function is +/// called with a BL instruction, and the outlined function tail-calls the +/// original call destination. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// BL f I2 +/// B f +/// * Call construction overhead: 1 (BL) +/// * Frame construction overhead: 0 +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerRegSave implies that the function should be called with a +/// save and restore of LR to an available register. This allows us to avoid +/// stack fixups. Note that this outlining variant is compatible with the +/// NoLRSave case. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// RET +/// +/// * Call construction overhead: 3 (save + BL + restore) +/// * Frame construction overhead: 1 (ret) +/// * Requires stack fixups? No enum MachineOutlinerClass { MachineOutlinerDefault, /// Emit a save, restore, call, and return. MachineOutlinerTailCall, /// Only emit a branch. MachineOutlinerNoLRSave, /// Emit a call and return. MachineOutlinerThunk, /// Emit a call and tail-call. + MachineOutlinerRegSave /// Same as default, but save to a register. }; enum MachineOutlinerMBBFlags { @@ -4927,6 +4944,27 @@ enum MachineOutlinerMBBFlags { HasCalls = 0x4 }; +unsigned +AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { + MachineFunction *MF = C.getMF(); + const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>( + MF->getSubtarget().getRegisterInfo()); + + // Check if there is an available register across the sequence that we can + // use. + for (unsigned Reg : AArch64::GPR64RegClass) { + if (!ARI->isReservedReg(*MF, Reg) && + Reg != AArch64::LR && // LR is not reserved, but don't use it. + Reg != AArch64::X16 && // X16 is not guaranteed to be preserved. + Reg != AArch64::X17 && // Ditto for X17. + C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) + return Reg; + } + + // No suitable register. Return 0. + return 0u; +} + outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { @@ -5015,11 +5053,27 @@ AArch64InstrInfo::getOutliningCandidateInfo( SetCandidateCallInfo(MachineOutlinerNoLRSave, 4); } - // LR is live, so we need to save it to the stack. + // LR is live, so we need to save it. Decide whether it should be saved to + // the stack, or if it can be saved to a register. else { - FrameID = MachineOutlinerDefault; - NumBytesToCreateFrame = 4; - SetCandidateCallInfo(MachineOutlinerDefault, 12); + if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + [this](outliner::Candidate &C) { + return findRegisterToSaveLRTo(C); + })) { + // Every candidate has an available callee-saved register for the save. + // We can save LR to a register. + FrameID = MachineOutlinerRegSave; + NumBytesToCreateFrame = 4; + SetCandidateCallInfo(MachineOutlinerRegSave, 12); + } + + else { + // At least one candidate does not have an available callee-saved + // register. We must save LR to the stack. + FrameID = MachineOutlinerDefault; + NumBytesToCreateFrame = 4; + SetCandidateCallInfo(MachineOutlinerDefault, 12); + } } // Check if the range contains a call. These require a save + restore of the @@ -5088,7 +5142,7 @@ AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const { MBB.rend(), [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); - if (!LRU.available(AArch64::LR)) + if (!LRU.available(AArch64::LR)) Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; return Flags; @@ -5114,14 +5168,14 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, // ahead and skip over them. if (MI.isKill()) return outliner::InstrType::Invisible; - + // Is this a terminator for a basic block? if (MI.isTerminator()) { // Is this the end of a function? if (MI.getParent()->succ_empty()) return outliner::InstrType::Legal; - + // It's not, so don't outline it. return outliner::InstrType::Illegal; } @@ -5424,7 +5478,7 @@ void AArch64InstrInfo::buildOutlinedFrame( MBB.insert(MBB.end(), ret); // Did we have to modify the stack by saving the link register? - if (OF.FrameConstructionID == MachineOutlinerNoLRSave) + if (OF.FrameConstructionID != MachineOutlinerDefault) return; // We modified the stack. @@ -5457,13 +5511,41 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( // We want to return the spot where we inserted the call. MachineBasicBlock::iterator CallPt; - // We have a default call. Save the link register. - MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) - .addReg(AArch64::SP, RegState::Define) - .addReg(AArch64::LR) - .addReg(AArch64::SP) - .addImm(-16); - It = MBB.insert(It, STRXpre); + // Instructions for saving and restoring LR around the call instruction we're + // going to insert. + MachineInstr *Save; + MachineInstr *Restore; + // Can we save to a register? + if (C.CallConstructionID == MachineOutlinerRegSave) { + // FIXME: This logic should be sunk into a target-specific interface so that + // we don't have to recompute the register. + unsigned Reg = findRegisterToSaveLRTo(C); + assert(Reg != 0 && "No callee-saved register available?"); + + // Save and restore LR from that register. + Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg) + .addReg(AArch64::XZR) + .addReg(AArch64::LR) + .addImm(0); + Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR) + .addReg(AArch64::XZR) + .addReg(Reg) + .addImm(0); + } else { + // We have the default case. Save and restore from SP. + Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::SP) + .addImm(-16); + Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR, RegState::Define) + .addReg(AArch64::SP) + .addImm(16); + } + + It = MBB.insert(It, Save); It++; // Insert the call. @@ -5472,13 +5554,11 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( CallPt = It; It++; - // Restore the link register. - MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) - .addReg(AArch64::SP, RegState::Define) - .addReg(AArch64::LR, RegState::Define) - .addReg(AArch64::SP) - .addImm(16); - It = MBB.insert(It, LDRXpost); - + It = MBB.insert(It, Restore); return CallPt; } + +bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault( + MachineFunction &MF) const { + return MF.getFunction().optForMinSize(); +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 0e5953f6216d..11882e238b70 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -249,6 +249,7 @@ public: insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, const outliner::Candidate &C) const override; + bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; /// Returns true if the instruction sets to an immediate value that can be /// executed more efficiently. bool isExynosResetFast(const MachineInstr &MI) const; @@ -271,6 +272,10 @@ private: ArrayRef<MachineOperand> Cond) const; bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg, const MachineRegisterInfo *MRI) const; + + /// Returns an unused general-purpose register which can be used for + /// constructing an outlined call if one exists. Returns 0 otherwise. + unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; }; /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg @@ -339,6 +344,32 @@ static inline bool isIndirectBranchOpcode(int Opc) { return Opc == AArch64::BR; } +// struct TSFlags { +#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits +#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 1-bit +// } + +namespace AArch64 { + +enum ElementSizeType { + ElementSizeMask = TSFLAG_ELEMENT_SIZE_TYPE(0x7), + ElementSizeNone = TSFLAG_ELEMENT_SIZE_TYPE(0x0), + ElementSizeB = TSFLAG_ELEMENT_SIZE_TYPE(0x1), + ElementSizeH = TSFLAG_ELEMENT_SIZE_TYPE(0x2), + ElementSizeS = TSFLAG_ELEMENT_SIZE_TYPE(0x3), + ElementSizeD = TSFLAG_ELEMENT_SIZE_TYPE(0x4), +}; + +enum DestructiveInstType { + DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1), + NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0), + Destructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1), +}; + +#undef TSFLAG_ELEMENT_SIZE_TYPE +#undef TSFLAG_DESTRUCTIVE_INST_TYPE +} + } // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 4d7ca2349ed1..b2b500320b5c 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -21,6 +21,7 @@ #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -94,6 +95,10 @@ private: void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const; + // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. + void materializeLargeCMVal(MachineInstr &I, const Value *V, + unsigned char OpFlags) const; + const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; @@ -655,6 +660,45 @@ bool AArch64InstructionSelector::selectVaStartDarwin( return true; } +void AArch64InstructionSelector::materializeLargeCMVal( + MachineInstr &I, const Value *V, unsigned char OpFlags) const { + MachineBasicBlock &MBB = *I.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineIRBuilder MIB(I); + + auto MovZ = MIB.buildInstr(AArch64::MOVZXi, &AArch64::GPR64RegClass); + MovZ->addOperand(MF, I.getOperand(1)); + MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | + AArch64II::MO_NC); + MovZ->addOperand(MF, MachineOperand::CreateImm(0)); + constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); + + auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset, + unsigned ForceDstReg) { + unsigned DstReg = ForceDstReg + ? ForceDstReg + : MRI.createVirtualRegister(&AArch64::GPR64RegClass); + auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); + if (auto *GV = dyn_cast<GlobalValue>(V)) { + MovI->addOperand(MF, MachineOperand::CreateGA( + GV, MovZ->getOperand(1).getOffset(), Flags)); + } else { + MovI->addOperand( + MF, MachineOperand::CreateBA(cast<BlockAddress>(V), + MovZ->getOperand(1).getOffset(), Flags)); + } + MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); + constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); + return DstReg; + }; + unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(), + AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); + DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); + BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); + return; +} + bool AArch64InstructionSelector::select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const { assert(I.getParent() && "Instruction should be in a basic block!"); @@ -936,36 +980,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I, I.getOperand(1).setTargetFlags(OpFlags); } else if (TM.getCodeModel() == CodeModel::Large) { // Materialize the global using movz/movk instructions. - unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); - auto InsertPt = std::next(I.getIterator()); - auto MovZ = - BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi)) - .addDef(MovZDstReg); - MovZ->addOperand(MF, I.getOperand(1)); - MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | - AArch64II::MO_NC); - MovZ->addOperand(MF, MachineOperand::CreateImm(0)); - constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); - - auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, - unsigned Offset, unsigned ForceDstReg) { - unsigned DstReg = - ForceDstReg ? ForceDstReg - : MRI.createVirtualRegister(&AArch64::GPR64RegClass); - auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(), - TII.get(AArch64::MOVKXi)) - .addDef(DstReg) - .addReg(SrcReg); - MovI->addOperand(MF, MachineOperand::CreateGA( - GV, MovZ->getOperand(1).getOffset(), Flags)); - MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); - constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); - return DstReg; - }; - unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(), - AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); - DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); - BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); + materializeLargeCMVal(I, GV, OpFlags); I.eraseFromParent(); return true; } else { @@ -1482,7 +1497,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I, .addImm(1); I.eraseFromParent(); return true; - case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_IMPLICIT_DEF: { I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); const unsigned DstReg = I.getOperand(0).getReg(); @@ -1492,6 +1507,25 @@ bool AArch64InstructionSelector::select(MachineInstr &I, RBI.constrainGenericRegister(DstReg, *DstRC, MRI); return true; } + case TargetOpcode::G_BLOCK_ADDR: { + if (TM.getCodeModel() == CodeModel::Large) { + materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); + I.eraseFromParent(); + return true; + } else { + I.setDesc(TII.get(AArch64::MOVaddrBA)); + auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), + I.getOperand(0).getReg()) + .addBlockAddress(I.getOperand(1).getBlockAddress(), + /* Offset */ 0, AArch64II::MO_PAGE) + .addBlockAddress( + I.getOperand(1).getBlockAddress(), /* Offset */ 0, + AArch64II::MO_NC | AArch64II::MO_PAGEOFF); + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); + } + } + } return false; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 9b8c0a34efba..327c758a7f8e 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -293,6 +293,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); } + getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); + // Merge/Unmerge for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 798340f8fed8..e42214d15699 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -146,7 +146,7 @@ public: Optional<bool> hasRedZone() const { return HasRedZone; } void setHasRedZone(bool s) { HasRedZone = s; } - + int getVarArgsStackIndex() const { return VarArgsStackIndex; } void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 7a653e117fd1..bbf401b474ca 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -764,18 +764,35 @@ def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>; def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>; } +// Enum descibing the element size for destructive +// operations. +class ElementSizeEnum<bits<3> val> { + bits<3> Value = val; +} + +def ElementSizeNone : ElementSizeEnum<0>; +def ElementSizeB : ElementSizeEnum<1>; +def ElementSizeH : ElementSizeEnum<2>; +def ElementSizeS : ElementSizeEnum<3>; +def ElementSizeD : ElementSizeEnum<4>; +def ElementSizeQ : ElementSizeEnum<5>; // Unused + class SVERegOp <string Suffix, AsmOperandClass C, + ElementSizeEnum Size, RegisterClass RC> : RegisterOperand<RC> { + ElementSizeEnum ElementSize; + + let ElementSize = Size; let PrintMethod = !if(!eq(Suffix, ""), "printSVERegOp<>", "printSVERegOp<'" # Suffix # "'>"); let ParserMatchClass = C; } -class PPRRegOp <string Suffix, AsmOperandClass C, - RegisterClass RC> : SVERegOp<Suffix, C, RC> {} -class ZPRRegOp <string Suffix, AsmOperandClass C, - RegisterClass RC> : SVERegOp<Suffix, C, RC> {} +class PPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size, + RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {} +class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size, + RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {} //****************************************************************************** @@ -805,11 +822,11 @@ def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>; def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>; def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>; -def PPRAny : PPRRegOp<"", PPRAsmOpAny, PPR>; -def PPR8 : PPRRegOp<"b", PPRAsmOp8, PPR>; -def PPR16 : PPRRegOp<"h", PPRAsmOp16, PPR>; -def PPR32 : PPRRegOp<"s", PPRAsmOp32, PPR>; -def PPR64 : PPRRegOp<"d", PPRAsmOp64, PPR>; +def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>; +def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>; +def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>; +def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>; +def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>; def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>; def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>; @@ -817,11 +834,11 @@ def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>; def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>; def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>; -def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, PPR_3b>; -def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, PPR_3b>; -def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, PPR_3b>; -def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, PPR_3b>; -def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, PPR_3b>; +def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>; +def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, ElementSizeB, PPR_3b>; +def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, ElementSizeH, PPR_3b>; +def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, ElementSizeS, PPR_3b>; +def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>; //****************************************************************************** @@ -874,28 +891,28 @@ def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>; def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>; def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>; -def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ZPR>; -def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ZPR>; -def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ZPR>; -def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ZPR>; -def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ZPR>; -def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ZPR>; +def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ElementSizeNone, ZPR>; +def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ElementSizeB, ZPR>; +def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ElementSizeH, ZPR>; +def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ElementSizeS, ZPR>; +def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ElementSizeD, ZPR>; +def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ElementSizeQ, ZPR>; def ZPRAsmOp3b8 : ZPRAsmOperand<"Vector3bB", 8, "_3b">; def ZPRAsmOp3b16 : ZPRAsmOperand<"Vector3bH", 16, "_3b">; def ZPRAsmOp3b32 : ZPRAsmOperand<"Vector3bS", 32, "_3b">; -def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ZPR_3b>; -def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ZPR_3b>; -def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ZPR_3b>; +def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ElementSizeB, ZPR_3b>; +def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ElementSizeH, ZPR_3b>; +def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ElementSizeS, ZPR_3b>; def ZPRAsmOp4b16 : ZPRAsmOperand<"Vector4bH", 16, "_4b">; def ZPRAsmOp4b32 : ZPRAsmOperand<"Vector4bS", 32, "_4b">; def ZPRAsmOp4b64 : ZPRAsmOperand<"Vector4bD", 64, "_4b">; -def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ZPR_4b>; -def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ZPR_4b>; -def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ZPR_4b>; +def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ElementSizeH, ZPR_4b>; +def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ElementSizeS, ZPR_4b>; +def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ElementSizeD, ZPR_4b>; class FPRasZPR<int Width> : AsmOperandClass{ let Name = "FPR" # Width # "asZPR"; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 16e6ddda6398..0fde68011e86 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -220,10 +220,33 @@ let Predicates = [HasSVE] in { def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">; def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">; + defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">; + defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">; + def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>; def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>; def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>; def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>; + def BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa">; + def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">; + def BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb">; + def BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs">; + + def BRKN_PPzP : sve_int_brkn<0b0, "brkn">; + def BRKNS_PPzP : sve_int_brkn<0b1, "brkns">; + + defm BRKA_PPzP : sve_int_break_z<0b000, "brka">; + defm BRKA_PPmP : sve_int_break_m<0b001, "brka">; + defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas">; + defm BRKB_PPzP : sve_int_break_z<0b100, "brkb">; + defm BRKB_PPmP : sve_int_break_m<0b101, "brkb">; + defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs">; + + def PTEST_PP : sve_int_ptest<0b010000, "ptest">; + def PFALSE : sve_int_pfalse<0b000000, "pfalse">; + defm PFIRST : sve_int_pfirst<0b00000, "pfirst">; + defm PNEXT : sve_int_pnext<0b00110, "pnext">; + def AND_PPzPP : sve_int_pred_log<0b0000, "and">; def BIC_PPzPP : sve_int_pred_log<0b0001, "bic">; def EOR_PPzPP : sve_int_pred_log<0b0010, "eor">; @@ -731,6 +754,21 @@ let Predicates = [HasSVE] in { defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">; defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">; + defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt">; + defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele">; + defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo">; + defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels">; + + defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt">; + defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele">; + defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo">; + defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels">; + + def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>; + def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>; + def CTERMEQ_XX : sve_int_cterm<0b1, 0b0, "ctermeq", GPR64>; + def CTERMNE_XX : sve_int_cterm<0b1, 0b1, "ctermne", GPR64>; + def RDVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdvl">; def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">; def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">; @@ -854,40 +892,40 @@ let Predicates = [HasSVE] in { defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">; - def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16>; - def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32>; - def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16>; - def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32>; - def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32>; - def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16>; - def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16>; - def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32>; - def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16>; - def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32>; - def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16>; - def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64>; - def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32>; - def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64>; - def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64>; - def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64>; - def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16>; - def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32>; - def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16>; - def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16>; - def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32>; - def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16>; - def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64>; - def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64>; - def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32>; - def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32>; - def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64>; - def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32>; - def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64>; - def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32>; - def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64>; - def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64>; - def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64>; - def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64>; + def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>; + def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>; + def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>; + def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>; + def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>; + def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>; + def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>; + def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>; + def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>; + def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>; + def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>; + def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>; + def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>; + def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>; + def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>; + def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>; + def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>; + def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>; + def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>; + def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>; + def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>; + def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>; + def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>; + def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>; + def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>; + def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>; + def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>; + def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>; + def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>; + def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>; + def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>; + def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>; + def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>; + def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">; defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 01a997e5aed7..120d71381c67 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -255,6 +255,9 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, // AArch64 supports the MachineOutliner. setMachineOutliner(true); + + // AArch64 supports default outlining behaviour. + setSupportsDefaultOutlining(true); } AArch64TargetMachine::~AArch64TargetMachine() = default; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index d75fef7b0171..96e751e86971 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -577,7 +577,7 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, unsigned NumVectorInstToHideOverhead = 10; int MaxMergeDistance = 64; - if (Ty->isVectorTy() && SE && + if (Ty->isVectorTy() && SE && !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) return NumVectorInstToHideOverhead; diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index a51c41d70915..30a9a08f2346 100644 --- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/AArch64MCExpr.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "MCTargetDesc/AArch64TargetStreamer.h" +#include "AArch64InstrInfo.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -79,6 +80,67 @@ private: // Map of register aliases registers via the .req directive. StringMap<std::pair<RegKind, unsigned>> RegisterReqs; + class PrefixInfo { + public: + static PrefixInfo CreateFromInst(const MCInst &Inst, uint64_t TSFlags) { + PrefixInfo Prefix; + switch (Inst.getOpcode()) { + case AArch64::MOVPRFX_ZZ: + Prefix.Active = true; + Prefix.Dst = Inst.getOperand(0).getReg(); + break; + case AArch64::MOVPRFX_ZPmZ_B: + case AArch64::MOVPRFX_ZPmZ_H: + case AArch64::MOVPRFX_ZPmZ_S: + case AArch64::MOVPRFX_ZPmZ_D: + Prefix.Active = true; + Prefix.Predicated = true; + Prefix.ElementSize = TSFlags & AArch64::ElementSizeMask; + assert(Prefix.ElementSize != AArch64::ElementSizeNone && + "No destructive element size set for movprfx"); + Prefix.Dst = Inst.getOperand(0).getReg(); + Prefix.Pg = Inst.getOperand(2).getReg(); + break; + case AArch64::MOVPRFX_ZPzZ_B: + case AArch64::MOVPRFX_ZPzZ_H: + case AArch64::MOVPRFX_ZPzZ_S: + case AArch64::MOVPRFX_ZPzZ_D: + Prefix.Active = true; + Prefix.Predicated = true; + Prefix.ElementSize = TSFlags & AArch64::ElementSizeMask; + assert(Prefix.ElementSize != AArch64::ElementSizeNone && + "No destructive element size set for movprfx"); + Prefix.Dst = Inst.getOperand(0).getReg(); + Prefix.Pg = Inst.getOperand(1).getReg(); + break; + default: + break; + } + + return Prefix; + } + + PrefixInfo() : Active(false), Predicated(false) {} + bool isActive() const { return Active; } + bool isPredicated() const { return Predicated; } + unsigned getElementSize() const { + assert(Predicated); + return ElementSize; + } + unsigned getDstReg() const { return Dst; } + unsigned getPgReg() const { + assert(Predicated); + return Pg; + } + + private: + bool Active; + bool Predicated; + unsigned ElementSize; + unsigned Dst; + unsigned Pg; + } NextPrefix; + AArch64TargetStreamer &getTargetStreamer() { MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast<AArch64TargetStreamer &>(TS); @@ -113,7 +175,8 @@ private: bool parseDirectiveReq(StringRef Name, SMLoc L); bool parseDirectiveUnreq(SMLoc L); - bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc); + bool validateInstruction(MCInst &Inst, SMLoc &IDLoc, + SmallVectorImpl<SMLoc> &Loc); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -3665,12 +3728,89 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, return false; } +static inline bool isMatchingOrAlias(unsigned ZReg, unsigned Reg) { + assert((ZReg >= AArch64::Z0) && (ZReg <= AArch64::Z31)); + return (ZReg == ((Reg - AArch64::B0) + AArch64::Z0)) || + (ZReg == ((Reg - AArch64::H0) + AArch64::Z0)) || + (ZReg == ((Reg - AArch64::S0) + AArch64::Z0)) || + (ZReg == ((Reg - AArch64::D0) + AArch64::Z0)) || + (ZReg == ((Reg - AArch64::Q0) + AArch64::Z0)) || + (ZReg == ((Reg - AArch64::Z0) + AArch64::Z0)); +} + // FIXME: This entire function is a giant hack to provide us with decent // operand range validation/diagnostics until TableGen/MC can be extended // to support autogeneration of this kind of validation. -bool AArch64AsmParser::validateInstruction(MCInst &Inst, - SmallVectorImpl<SMLoc> &Loc) { +bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc, + SmallVectorImpl<SMLoc> &Loc) { const MCRegisterInfo *RI = getContext().getRegisterInfo(); + const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); + + // A prefix only applies to the instruction following it. Here we extract + // prefix information for the next instruction before validating the current + // one so that in the case of failure we don't erronously continue using the + // current prefix. + PrefixInfo Prefix = NextPrefix; + NextPrefix = PrefixInfo::CreateFromInst(Inst, MCID.TSFlags); + + // Before validating the instruction in isolation we run through the rules + // applicable when it follows a prefix instruction. + // NOTE: brk & hlt can be prefixed but require no additional validation. + if (Prefix.isActive() && + (Inst.getOpcode() != AArch64::BRK) && + (Inst.getOpcode() != AArch64::HLT)) { + + // Prefixed intructions must have a destructive operand. + if ((MCID.TSFlags & AArch64::DestructiveInstTypeMask) == + AArch64::NotDestructive) + return Error(IDLoc, "instruction is unpredictable when following a" + " movprfx, suggest replacing movprfx with mov"); + + // Destination operands must match. + if (Inst.getOperand(0).getReg() != Prefix.getDstReg()) + return Error(Loc[0], "instruction is unpredictable when following a" + " movprfx writing to a different destination"); + + // Destination operand must not be used in any other location. + for (unsigned i = 1; i < Inst.getNumOperands(); ++i) { + if (Inst.getOperand(i).isReg() && + (MCID.getOperandConstraint(i, MCOI::TIED_TO) == -1) && + isMatchingOrAlias(Prefix.getDstReg(), Inst.getOperand(i).getReg())) + return Error(Loc[0], "instruction is unpredictable when following a" + " movprfx and destination also used as non-destructive" + " source"); + } + + auto PPRRegClass = AArch64MCRegisterClasses[AArch64::PPRRegClassID]; + if (Prefix.isPredicated()) { + int PgIdx = -1; + + // Find the instructions general predicate. + for (unsigned i = 1; i < Inst.getNumOperands(); ++i) + if (Inst.getOperand(i).isReg() && + PPRRegClass.contains(Inst.getOperand(i).getReg())) { + PgIdx = i; + break; + } + + // Instruction must be predicated if the movprfx is predicated. + if (PgIdx == -1 || + (MCID.TSFlags & AArch64::ElementSizeMask) == AArch64::ElementSizeNone) + return Error(IDLoc, "instruction is unpredictable when following a" + " predicated movprfx, suggest using unpredicated movprfx"); + + // Instruction must use same general predicate as the movprfx. + if (Inst.getOperand(PgIdx).getReg() != Prefix.getPgReg()) + return Error(IDLoc, "instruction is unpredictable when following a" + " predicated movprfx using a different general predicate"); + + // Instruction element type must match the movprfx. + if ((MCID.TSFlags & AArch64::ElementSizeMask) != Prefix.getElementSize()) + return Error(IDLoc, "instruction is unpredictable when following a" + " predicated movprfx with a different element size"); + } + } + // Check for indexed addressing modes w/ the base register being the // same as a destination/source register or pair load where // the Rt == Rt2. All of those are undefined behaviour. @@ -4516,7 +4656,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, NumOperands = Operands.size(); for (unsigned i = 1; i < NumOperands; ++i) OperandLocs.push_back(Operands[i]->getStartLoc()); - if (validateInstruction(Inst, OperandLocs)) + if (validateInstruction(Inst, IDLoc, OperandLocs)) return true; Inst.setLoc(IDLoc); @@ -4719,7 +4859,6 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { const MCObjectFileInfo::Environment Format = getContext().getObjectFileInfo()->getObjectFileType(); bool IsMachO = Format == MCObjectFileInfo::IsMachO; - bool IsCOFF = Format == MCObjectFileInfo::IsCOFF; StringRef IDVal = DirectiveID.getIdentifier(); SMLoc Loc = DirectiveID.getLoc(); @@ -4733,14 +4872,14 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveLtorg(Loc); else if (IDVal == ".unreq") parseDirectiveUnreq(Loc); - else if (!IsMachO && !IsCOFF) { - if (IDVal == ".inst") - parseDirectiveInst(Loc); + else if (IDVal == ".inst") + parseDirectiveInst(Loc); + else if (IsMachO) { + if (IDVal == MCLOHDirectiveName()) + parseDirectiveLOH(IDVal, Loc); else return true; - } else if (IDVal == MCLOHDirectiveName()) - parseDirectiveLOH(IDVal, Loc); - else + } else return true; return false; } diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp index 1b949b54590c..dee964df2635 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -39,4 +39,16 @@ void AArch64TargetStreamer::emitCurrentConstantPool() { // finish() - write out any non-empty assembler constant pools. void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); } -void AArch64TargetStreamer::emitInst(uint32_t Inst) {} +void AArch64TargetStreamer::emitInst(uint32_t Inst) { + char Buffer[4]; + + // We can't just use EmitIntValue here, as that will swap the + // endianness on big-endian systems (instructions are always + // little-endian). + for (unsigned I = 0; I < 4; ++I) { + Buffer[I] = uint8_t(Inst); + Inst >>= 8; + } + + getStreamer().EmitBytes(StringRef(Buffer, 4)); +} diff --git a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td index 17b3f6041279..7a8dd8bc5aee 100644 --- a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -282,6 +282,79 @@ let Predicates = [HasSVE] in { //===----------------------------------------------------------------------===// +// SVE Predicate Misc Group +//===----------------------------------------------------------------------===// + +class sve_int_pfalse<bits<6> opc, string asm> +: I<(outs PPR8:$Pd), (ins), + asm, "\t$Pd", + "", + []>, Sched<[]> { + bits<4> Pd; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{5-4}; + let Inst{21-19} = 0b011; + let Inst{18-16} = opc{3-1}; + let Inst{15-10} = 0b111001; + let Inst{9} = opc{0}; + let Inst{8-4} = 0b00000; + let Inst{3-0} = Pd; +} + +class sve_int_ptest<bits<6> opc, string asm> +: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn), + asm, "\t$Pg, $Pn", + "", + []>, Sched<[]> { + bits<4> Pg; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{5-4}; + let Inst{21-19} = 0b010; + let Inst{18-16} = opc{3-1}; + let Inst{15-14} = 0b11; + let Inst{13-10} = Pg; + let Inst{9} = opc{0}; + let Inst{8-5} = Pn; + let Inst{4-0} = 0b00000; + + let Defs = [NZCV]; +} + +class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm, + PPRRegOp pprty> +: I<(outs pprty:$Pdn), (ins PPRAny:$Pg, pprty:$_Pdn), + asm, "\t$Pdn, $Pg, $_Pdn", + "", + []>, Sched<[]> { + bits<4> Pdn; + bits<4> Pg; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b011; + let Inst{18-16} = opc{4-2}; + let Inst{15-11} = 0b11000; + let Inst{10-9} = opc{1-0}; + let Inst{8-5} = Pg; + let Inst{4} = 0; + let Inst{3-0} = Pdn; + + let Constraints = "$Pdn = $_Pdn"; + let Defs = [NZCV]; +} + +multiclass sve_int_pfirst<bits<5> opc, string asm> { + def : sve_int_pfirst_next<0b01, opc, asm, PPR8>; +} + +multiclass sve_int_pnext<bits<5> opc, string asm> { + def _B : sve_int_pfirst_next<0b00, opc, asm, PPR8>; + def _H : sve_int_pfirst_next<0b01, opc, asm, PPR16>; + def _S : sve_int_pfirst_next<0b10, opc, asm, PPR32>; + def _D : sve_int_pfirst_next<0b11, opc, asm, PPR64>; +} + +//===----------------------------------------------------------------------===// // SVE Predicate Count Group //===----------------------------------------------------------------------===// @@ -348,6 +421,8 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_count_v<bits<5> opc, string asm> { @@ -433,6 +508,8 @@ class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty> let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty> { @@ -738,6 +815,8 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_perm_insrs<string asm> { @@ -762,6 +841,8 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_perm_insrv<string asm> { @@ -790,6 +871,8 @@ class sve_int_perm_extract_i<string asm> let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } //===----------------------------------------------------------------------===// @@ -883,6 +966,8 @@ class sve_int_log_imm<bits<2> opc, string asm> let Constraints = "$Zdn = $_Zdn"; let DecoderMethod = "DecodeSVELogicalImmInstruction"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_log_imm<bits<2> opc, string asm, string alias> { @@ -993,6 +1078,8 @@ class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, Operand imm_ty> { @@ -1020,6 +1107,8 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_fp_2op_p_zds<bits<4> opc, string asm> { @@ -1045,6 +1134,8 @@ class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty> let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_fp_ftmad<string asm> { @@ -1106,6 +1197,8 @@ class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty> let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm> { @@ -1135,6 +1228,8 @@ class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm> { @@ -1163,6 +1258,8 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bit opc, string asm, let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm> { @@ -1253,6 +1350,8 @@ class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty> let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_fp_fcmla<string asm> { @@ -1284,6 +1383,8 @@ class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm, let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_fp_fcmla_by_indexed_elem<string asm> { @@ -1325,6 +1426,8 @@ class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty> let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_fp_fcadd<string asm> { @@ -1405,7 +1508,7 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm> { //===----------------------------------------------------------------------===// class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype, - RegisterOperand o_zprtype> + RegisterOperand o_zprtype, ElementSizeEnum size> : I<(outs o_zprtype:$Zd), (ins i_zprtype:$_Zd, PPR3bAny:$Pg, i_zprtype:$Zn), asm, "\t$Zd, $Pg/m, $Zn", "", @@ -1423,12 +1526,14 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype, let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = size; } multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> { - def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16>; - def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32>; - def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64>; + def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; + def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; + def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>; } //===----------------------------------------------------------------------===// @@ -1480,6 +1585,8 @@ class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_bin_pred_log<bits<3> opc, string asm> { @@ -1541,6 +1648,8 @@ class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm> { @@ -1571,6 +1680,8 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm, let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm> { @@ -1601,6 +1712,8 @@ class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1, let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty1.ElementSize; } multiclass sve_intx_dot<bit opc, string asm> { @@ -1629,6 +1742,8 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm, let Inst{4-0} = Zda; let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> { @@ -1670,6 +1785,8 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc, let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm> { @@ -1800,6 +1917,8 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_arith_imm0<bits<3> opc, string asm> { @@ -1825,6 +1944,8 @@ class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_arith_imm1<bits<2> opc, string asm, Operand immtype> { @@ -1885,6 +2006,8 @@ class sve_int_dup_fpimm_pred<bits<2> sz, Operand fpimmtype, let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_dup_fpimm_pred<string asm> { @@ -1917,6 +2040,9 @@ class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm, let Inst{13} = imm{8}; // sh let Inst{12-5} = imm{7-0}; // imm8 let Inst{4-0} = Zd; + + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_dup_imm_pred_merge<string asm> { @@ -2083,6 +2209,65 @@ multiclass sve_int_ucmp_vi<bits<2> opc, string asm> { //===----------------------------------------------------------------------===// +// SVE Integer Compare - Scalars Group +//===----------------------------------------------------------------------===// + +class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt> +: I<(outs), (ins rt:$Rn, rt:$Rm), + asm, "\t$Rn, $Rm", + "", + []>, Sched<[]> { + bits<5> Rm; + bits<5> Rn; + let Inst{31-23} = 0b001001011; + let Inst{22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-10} = 0b001000; + let Inst{9-5} = Rn; + let Inst{4} = opc; + let Inst{3-0} = 0b0000; + + let Defs = [NZCV]; +} + +class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm, + RegisterClass gprty, PPRRegOp pprty> +: I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm), + asm, "\t$Pd, $Rn, $Rm", + "", []>, Sched<[]> { + bits<4> Pd; + bits<5> Rm; + bits<5> Rn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b000; + let Inst{12-10} = opc{3-1}; + let Inst{9-5} = Rn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = [NZCV]; +} + +multiclass sve_int_while4_rr<bits<3> opc, string asm> { + def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>; + def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>; + def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>; + def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>; +} + +multiclass sve_int_while8_rr<bits<3> opc, string asm> { + def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>; + def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>; + def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>; + def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>; +} + + +//===----------------------------------------------------------------------===// // SVE Floating Point Fast Reduction Group //===----------------------------------------------------------------------===// @@ -2312,9 +2497,9 @@ multiclass sve_int_index_rr<string asm> { //===----------------------------------------------------------------------===// // SVE Bitwise Shift - Predicated Group //===----------------------------------------------------------------------===// - class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm, - ZPRRegOp zprty, Operand immtype> + ZPRRegOp zprty, Operand immtype, + ElementSizeEnum size> : I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm), asm, "\t$Zdn, $Pg/m, $_Zdn, $imm", "", @@ -2333,31 +2518,41 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = size; } multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> { - def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8, + ElementSizeB>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16, + ElementSizeH> { let Inst{8} = imm{3}; } - def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32, + ElementSizeS> { let Inst{9-8} = imm{4-3}; } - def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64, + ElementSizeD> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } } multiclass sve_int_bin_pred_shift_imm_right<bits<3> opc, string asm> { - def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8, + ElementSizeB>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16, + ElementSizeH> { let Inst{8} = imm{3}; } - def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32, + ElementSizeS> { let Inst{9-8} = imm{4-3}; } - def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64, + ElementSizeD> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } @@ -2383,6 +2578,8 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_bin_pred_shift<bits<3> opc, string asm> { @@ -3017,6 +3214,8 @@ class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm, let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_perm_clast_zz<bit ab, string asm> { @@ -3094,6 +3293,8 @@ class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty> let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; } multiclass sve_int_perm_splice<string asm> { @@ -3122,6 +3323,8 @@ class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm, let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_perm_rev_rbit<string asm> { @@ -3163,6 +3366,8 @@ class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty, let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_perm_cpy_r<string asm> { @@ -3198,6 +3403,8 @@ class sve_int_perm_cpy_v<bits<2> sz8_64, string asm, ZPRRegOp zprty, let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; } multiclass sve_int_perm_cpy_v<string asm> { @@ -4117,3 +4324,133 @@ multiclass sve_int_reduce_2<bits<3> opc, string asm> { def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>; def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>; } + +class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm, + ZPRRegOp zprty, string pg_suffix, dag iops> +: I<(outs zprty:$Zd), iops, + asm, "\t$Zd, $Pg"#pg_suffix#", $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_32; + let Inst{21-19} = 0b010; + let Inst{18-16} = opc; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_movprfx_pred_merge<bits<3> opc, string asm> { +let Constraints = "$Zd = $_Zd" in { + def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/m", + (ins ZPR8:$_Zd, PPR3bAny:$Pg, ZPR8:$Zn)>; + def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/m", + (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR16:$Zn)>; + def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/m", + (ins ZPR32:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn)>; + def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/m", + (ins ZPR64:$_Zd, PPR3bAny:$Pg, ZPR64:$Zn)>; +} +} + +multiclass sve_int_movprfx_pred_zero<bits<3> opc, string asm> { + def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/z", + (ins PPR3bAny:$Pg, ZPR8:$Zn)>; + def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/z", + (ins PPR3bAny:$Pg, ZPR16:$Zn)>; + def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/z", + (ins PPR3bAny:$Pg, ZPR32:$Zn)>; + def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/z", + (ins PPR3bAny:$Pg, ZPR64:$Zn)>; +} + +//===----------------------------------------------------------------------===// +// SVE Propagate Break Group +//===----------------------------------------------------------------------===// + +class sve_int_brkp<bits<2> opc, string asm> +: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm), + asm, "\t$Pd, $Pg/z, $Pn, $Pm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pg; + bits<4> Pm; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23} = 0b0; + let Inst{22} = opc{1}; + let Inst{21-20} = 0b00; + let Inst{19-16} = Pm; + let Inst{15-14} = 0b11; + let Inst{13-10} = Pg; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = !if(!eq (opc{1}, 1), [NZCV], []); +} + + +//===----------------------------------------------------------------------===// +// SVE Partition Break Group +//===----------------------------------------------------------------------===// + +class sve_int_brkn<bit S, string asm> +: I<(outs PPR8:$Pdm), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$_Pdm), + asm, "\t$Pdm, $Pg/z, $Pn, $_Pdm", + "", + []>, Sched<[]> { + bits<4> Pdm; + bits<4> Pg; + bits<4> Pn; + let Inst{31-23} = 0b001001010; + let Inst{22} = S; + let Inst{21-14} = 0b01100001; + let Inst{13-10} = Pg; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = 0b0; + let Inst{3-0} = Pdm; + + let Constraints = "$Pdm = $_Pdm"; + let Defs = !if(!eq (S, 0b1), [NZCV], []); +} + +class sve_int_break<bits<3> opc, string asm, string suffix, dag iops> +: I<(outs PPR8:$Pd), iops, + asm, "\t$Pd, $Pg"#suffix#", $Pn", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pg; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{2-1}; + let Inst{21-14} = 0b01000001; + let Inst{13-10} = Pg; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Constraints = !if(!eq (opc{0}, 1), "$Pd = $_Pd", ""); + let Defs = !if(!eq (opc{1}, 1), [NZCV], []); + +} + +multiclass sve_int_break_m<bits<3> opc, string asm> { + def NAME : sve_int_break<opc, asm, "/m", (ins PPR8:$_Pd, PPRAny:$Pg, PPR8:$Pn)>; +} + +multiclass sve_int_break_z<bits<3> opc, string asm> { + def NAME : sve_int_break<opc, asm, "/z", (ins PPRAny:$Pg, PPR8:$Pn)>; +} + diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index b201126c593b..21e44e9589d3 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -554,6 +554,7 @@ static bool fnegFoldsIntoOp(unsigned Opc) { case ISD::FTRUNC: case ISD::FRINT: case ISD::FNEARBYINT: + case ISD::FCANONICALIZE: case AMDGPUISD::RCP: case AMDGPUISD::RCP_LEGACY: case AMDGPUISD::RCP_IFLAG: @@ -907,6 +908,7 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute( LLVMContext &Ctx = Fn.getParent()->getContext(); const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn); + CallingConv::ID CC = Fn.getCallingConv(); unsigned MaxAlign = 1; uint64_t ExplicitArgOffset = 0; @@ -940,16 +942,10 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute( EVT ArgVT = ValueVTs[Value]; EVT MemVT = ArgVT; - MVT RegisterVT = - getRegisterTypeForCallingConv(Ctx, ArgVT); - unsigned NumRegs = - getNumRegistersForCallingConv(Ctx, ArgVT); - - if (!Subtarget->isAmdHsaOS() && - (ArgVT == MVT::i16 || ArgVT == MVT::i8 || ArgVT == MVT::f16)) { - // The ABI says the caller will extend these values to 32-bits. - MemVT = ArgVT.isInteger() ? MVT::i32 : MVT::f32; - } else if (NumRegs == 1) { + MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT); + unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT); + + if (NumRegs == 1) { // This argument is not split, so the IR type is the memory type. if (ArgVT.isExtended()) { // We have an extended type, like i24, so we should just use the @@ -3600,6 +3596,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, case ISD::FRINT: case ISD::FNEARBYINT: // XXX - Should fround be handled? case ISD::FSIN: + case ISD::FCANONICALIZE: case AMDGPUISD::RCP: case AMDGPUISD::RCP_LEGACY: case AMDGPUISD::RCP_IFLAG: diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 96b7568eec1f..7442a59e594f 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -342,8 +342,9 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp, def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2", - SDTypeProfile<1, 3, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>, - SDTCisFP<0>, SDTCisVec<1>]>, + SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>, + SDTCisFP<0>, SDTCisVec<1>, + SDTCisInt<4>]>, []>; def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 9426df399597..c9c932ef2f5f 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -567,6 +567,7 @@ int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding int FP16_ONE = 0x3C00; +int FP16_NEG_ONE = 0xBC00; int V2FP16_ONE = 0x3C003C00; int FP32_ONE = 0x3f800000; int FP32_NEG_ONE = 0xbf800000; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index 8cc7e38f7b29..c147830e12ed 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -100,16 +100,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { unsigned Size = DL.getTypeSizeInBits(ArgTy); unsigned AllocSize = DL.getTypeAllocSize(ArgTy); - - // Clover seems to always pad i8/i16 to i32, but doesn't properly align - // them? - // Make sure the struct elements have correct size and alignment for ext - // args. These seem to be padded up to 4-bytes but not correctly aligned. - bool IsExtArg = AllocSize < 32 && (Arg.hasZExtAttr() || Arg.hasSExtAttr()) && - !ST.isAmdHsaOS(); - if (IsExtArg) - AllocSize = 4; - uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset; ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize; @@ -164,8 +154,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { ArgPtr->getName() + ".cast"); } - assert((!IsExtArg || !IsV3) && "incompatible situation"); - if (IsV3 && Size >= 32) { V4Ty = VectorType::get(VT->getVectorElementType(), 4); // Use the hack that clang uses to avoid SelectionDAG ruining v3 loads @@ -212,20 +200,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { // TODO: Convert noalias arg to !noalias if (Size < 32 && !ArgTy->isAggregateType()) { - if (IsExtArg && OffsetDiff == 0) { - Type *I32Ty = Builder.getInt32Ty(); - bool IsSext = Arg.hasSExtAttr(); - Metadata *LowAndHigh[] = { - ConstantAsMetadata::get( - ConstantInt::get(I32Ty, IsSext ? minIntN(Size) : 0)), - ConstantAsMetadata::get( - ConstantInt::get(I32Ty, - IsSext ? maxIntN(Size) + 1 : maxUIntN(Size) + 1)) - }; - - Load->setMetadata(LLVMContext::MD_range, MDNode::get(Ctx, LowAndHigh)); - } - Value *ExtractBits = OffsetDiff == 0 ? Load : Builder.CreateLShr(Load, OffsetDiff * 8); diff --git a/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 1e0bc62c45a6..44c2d366e461 100644 --- a/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -66,6 +66,22 @@ def MIMGDimInfoTable : GenericTable { let PrimaryKeyName = "getMIMGDimInfo"; } +class MIMGLZMapping<MIMGBaseOpcode l, MIMGBaseOpcode lz> { + MIMGBaseOpcode L = l; + MIMGBaseOpcode LZ = lz; +} + +def MIMGLZMappingTable : GenericTable { + let FilterClass = "MIMGLZMapping"; + let CppTypeName = "MIMGLZMappingInfo"; + let Fields = ["L", "LZ"]; + GenericEnum TypeOf_L = MIMGBaseOpcode; + GenericEnum TypeOf_LZ = MIMGBaseOpcode; + + let PrimaryKey = ["L"]; + let PrimaryKeyName = "getMIMGLZMappingInfo"; +} + class mimg <bits<7> si, bits<7> vi = si> { field bits<7> SI = si; field bits<7> VI = vi; @@ -547,3 +563,13 @@ foreach intr = !listconcat(AMDGPUImageDimIntrinsics, AMDGPUImageDimAtomicIntrinsics) in { def : ImageDimIntrinsicInfo<intr>; } + +// L to LZ Optimization Mapping +def : MIMGLZMapping<IMAGE_SAMPLE_L, IMAGE_SAMPLE_LZ>; +def : MIMGLZMapping<IMAGE_SAMPLE_C_L, IMAGE_SAMPLE_C_LZ>; +def : MIMGLZMapping<IMAGE_SAMPLE_L_O, IMAGE_SAMPLE_LZ_O>; +def : MIMGLZMapping<IMAGE_SAMPLE_C_L_O, IMAGE_SAMPLE_C_LZ_O>; +def : MIMGLZMapping<IMAGE_GATHER4_L, IMAGE_GATHER4_LZ>; +def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>; +def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>; +def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 5b7fc2656a20..25007861fd15 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -694,6 +694,87 @@ bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const { return false; } +MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const { + // TODO: Consider splitting all arguments into 32-bit pieces. + if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { + EVT ScalarVT = VT.getScalarType(); + unsigned Size = ScalarVT.getSizeInBits(); + if (Size == 32) + return ScalarVT.getSimpleVT(); + + if (Size == 64) + return MVT::i32; + + if (Size == 16 && + Subtarget->has16BitInsts() && + isPowerOf2_32(VT.getVectorNumElements())) + return VT.isInteger() ? MVT::v2i16 : MVT::v2f16; + } + + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); +} + +unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const { + if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { + unsigned NumElts = VT.getVectorNumElements(); + EVT ScalarVT = VT.getScalarType(); + unsigned Size = ScalarVT.getSizeInBits(); + + if (Size == 32) + return NumElts; + + if (Size == 64) + return 2 * NumElts; + + // FIXME: Fails to break down as we want with v3. + if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts)) + return VT.getVectorNumElements() / 2; + } + + return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); +} + +unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, CallingConv::ID CC, + EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { + unsigned NumElts = VT.getVectorNumElements(); + EVT ScalarVT = VT.getScalarType(); + unsigned Size = ScalarVT.getSizeInBits(); + if (Size == 32) { + RegisterVT = ScalarVT.getSimpleVT(); + IntermediateVT = RegisterVT; + NumIntermediates = NumElts; + return NumIntermediates; + } + + if (Size == 64) { + RegisterVT = MVT::i32; + IntermediateVT = RegisterVT; + NumIntermediates = 2 * NumElts; + return NumIntermediates; + } + + // FIXME: We should fix the ABI to be the same on targets without 16-bit + // support, but unless we can properly handle 3-vectors, it will be still be + // inconsistent. + if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts)) { + RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16; + IntermediateVT = RegisterVT; + NumIntermediates = NumElts / 2; + return NumIntermediates; + } + } + + return TargetLowering::getVectorTypeBreakdownForCallingConv( + Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); +} + bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &CI, MachineFunction &MF, @@ -1268,6 +1349,8 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits, for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) { const ISD::InputArg *Arg = &Ins[I]; + assert(!Arg->VT.isVector() && "vector type argument should have been split"); + // First check if it's a PS input addr. if (CallConv == CallingConv::AMDGPU_PS && !Arg->Flags.isInReg() && !Arg->Flags.isByVal() && PSInputNum <= 15) { @@ -1301,25 +1384,7 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits, ++PSInputNum; } - // Second split vertices into their elements. - if (Arg->VT.isVector()) { - ISD::InputArg NewArg = *Arg; - NewArg.Flags.setSplit(); - NewArg.VT = Arg->VT.getVectorElementType(); - - // We REALLY want the ORIGINAL number of vertex elements here, e.g. a - // three or five element vertex only needs three or five registers, - // NOT four or eight. - Type *ParamType = FType->getParamType(Arg->getOrigArgIndex()); - unsigned NumElements = ParamType->getVectorNumElements(); - - for (unsigned J = 0; J != NumElements; ++J) { - Splits.push_back(NewArg); - NewArg.PartOffset += NewArg.VT.getStoreSize(); - } - } else { - Splits.push_back(*Arg); - } + Splits.push_back(*Arg); } } @@ -4490,6 +4555,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op, const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim); + const AMDGPU::MIMGLZMappingInfo *LZMappingInfo = + AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode); + unsigned IntrOpcode = Intr->BaseOpcode; SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end()); bool IsD16 = false; @@ -4575,6 +4643,18 @@ SDValue SITargetLowering::lowerImage(SDValue Op, SmallVector<SDValue, 4> VAddrs; for (unsigned i = 0; i < NumVAddrs; ++i) VAddrs.push_back(Op.getOperand(AddrIdx + i)); + + // Optimize _L to _LZ when _L is zero + if (LZMappingInfo) { + if (auto ConstantLod = + dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) { + if (ConstantLod->isZero() || ConstantLod->isNegative()) { + IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l + VAddrs.pop_back(); // remove 'lod' + } + } + } + SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs); SDValue True = DAG.getTargetConstant(1, DL, MVT::i1); @@ -4634,10 +4714,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op, int Opcode = -1; if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx8, + Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8, NumVDataDwords, NumVAddrDwords); if (Opcode == -1) - Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx6, + Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6, NumVDataDwords, NumVAddrDwords); assert(Opcode != -1); @@ -4945,7 +5025,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::amdgcn_fdot2: return DAG.getNode(AMDGPUISD::FDOT2, DL, VT, - Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), + Op.getOperand(4)); case Intrinsic::amdgcn_fmul_legacy: return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT, Op.getOperand(1), Op.getOperand(2)); @@ -6754,10 +6835,6 @@ static bool isCanonicalized(SelectionDAG &DAG, SDValue Op, return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 || ST->hasFP16Denormals(); - case ISD::FP16_TO_FP: - case ISD::FP_TO_FP16: - return ST->hasFP16Denormals(); - // It can/will be lowered or combined as a bit operation. // Need to check their input recursively to handle. case ISD::FNEG: @@ -6799,8 +6876,16 @@ SDValue SITargetLowering::performFCanonicalizeCombine( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0)); + SDValue N0 = N->getOperand(0); + // fcanonicalize undef -> qnan + if (N0.isUndef()) { + EVT VT = N->getValueType(0); + APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT)); + return DAG.getConstantFP(QNaN, SDLoc(N), VT); + } + + ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0); if (!CFP) { SDValue N0 = N->getOperand(0); EVT VT = N0.getValueType().getScalarType(); @@ -6853,7 +6938,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine( return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT); } - return N->getOperand(0); + return N0; } static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) { @@ -7544,8 +7629,10 @@ SDValue SITargetLowering::performFMACombine(SDNode *N, return SDValue(); if ((Vec1 == Vec3 && Vec2 == Vec4) || - (Vec1 == Vec4 && Vec2 == Vec3)) - return DAG.getNode(AMDGPUISD::FDOT2, SL, MVT::f32, Vec1, Vec2, FMAAcc); + (Vec1 == Vec4 && Vec2 == Vec3)) { + return DAG.getNode(AMDGPUISD::FDOT2, SL, MVT::f32, Vec1, Vec2, FMAAcc, + DAG.getTargetConstant(0, SL, MVT::i1)); + } } return SDValue(); } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h index ad049f2a71c3..5b3d49b3d8e3 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -25,6 +25,19 @@ class SITargetLowering final : public AMDGPUTargetLowering { private: const GCNSubtarget *Subtarget; +public: + MVT getRegisterTypeForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const override; + unsigned getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const override; + + unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const override; + +private: SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, uint64_t Offset) const; SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index 61c8f359e168..dc9397cf7b85 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -133,28 +133,10 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From, I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ) return true; - // V_READFIRSTLANE/V_READLANE destination register may be used as operand - // by some SALU instruction. If exec mask is zero vector instruction - // defining the register that is used by the scalar one is not executed - // and scalar instruction will operate on undefined data. For - // V_READFIRSTLANE/V_READLANE we should avoid predicated execution. - if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) || - (I->getOpcode() == AMDGPU::V_READLANE_B32)) { + if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) return true; - } - - if (I->isInlineAsm()) { - const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); - const char *AsmStr = I->getOperand(0).getSymbolName(); - - // inlineasm length estimate is number of bytes assuming the longest - // instruction. - uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI); - NumInstr += MaxAsmSize / MAI->getMaxInstLength(); - } else { - ++NumInstr; - } + ++NumInstr; if (NumInstr >= SkipThreshold) return true; } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 6c85c92454c3..f3745382a6f4 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2332,6 +2332,36 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, changesVGPRIndexingMode(MI); } +bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + + if (MI.mayStore() && isSMRD(MI)) + return true; // scalar store or atomic + + // These instructions cause shader I/O that may cause hardware lockups + // when executed with an empty EXEC mask. + // + // Note: exp with VM = DONE = 0 is automatically skipped by hardware when + // EXEC = 0, but checking for that case here seems not worth it + // given the typical code patterns. + if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT || + Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE) + return true; + + if (MI.isInlineAsm()) + return true; // conservative assumption + + // These are like SALU instructions in terms of effects, so it's questionable + // whether we should return true for those. + // + // However, executing them with EXEC = 0 causes them to operate on undefined + // data, which we avoid by returning true here. + if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || Opcode == AMDGPU::V_READLANE_B32) + return true; + + return false; +} + bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { switch (Imm.getBitWidth()) { case 32: diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 0a735257d34e..d681b926504e 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -597,6 +597,9 @@ public: return !RI.isSGPRReg(MRI, Dest); } + /// Whether we must prevent this instruction from executing with EXEC = 0. + bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; + bool isInlineConstant(const APInt &Imm) const; bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td index c3f8bfb53ef4..5c10646161b3 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1387,6 +1387,11 @@ def : GCNPat< >; def : GCNPat< + (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))), + (V_MUL_F16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src, 0, 0) +>; + +def : GCNPat< (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))), (V_PK_MUL_F16 0, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE) >; @@ -1411,6 +1416,11 @@ def : GCNPat< (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))), (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0) >; + +def : GCNPat< + (fcanonicalize (f32 (fneg (VOP3Mods f32:$src, i32:$src_mods)))), + (V_MUL_F32_e64 0, (i32 CONST.FP32_NEG_ONE), $src_mods, $src, 0, 0) +>; } let OtherPredicates = [FP32Denormals] in { diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 3fd3c75874a3..4eba19382315 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -110,6 +110,7 @@ struct MIMGInfo { #define GET_MIMGBaseOpcodesTable_IMPL #define GET_MIMGDimInfoTable_IMPL #define GET_MIMGInfoTable_IMPL +#define GET_MIMGLZMappingTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 70681c271697..5b7af8268cda 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -42,6 +42,7 @@ namespace AMDGPU { #define GET_MIMGBaseOpcode_DECL #define GET_MIMGDim_DECL #define GET_MIMGEncoding_DECL +#define GET_MIMGLZMapping_DECL #include "AMDGPUGenSearchableTables.inc" namespace IsaInfo { @@ -211,6 +212,14 @@ struct MIMGDimInfo { LLVM_READONLY const MIMGDimInfo *getMIMGDimInfo(unsigned Dim); +struct MIMGLZMappingInfo { + MIMGBaseOpcode L; + MIMGBaseOpcode LZ; +}; + +LLVM_READONLY +const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); + LLVM_READONLY int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords); diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 5c78ada3211e..b51828b54679 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -167,13 +167,30 @@ defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>; let SubtargetPredicate = HasDLInsts in { -def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>, AMDGPUfdot2>; -def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2>; -def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2>; -def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4>; -def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4>; -def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8>; -def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8>; +def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>; +def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>; +def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>; +def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; +def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; +def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; +def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>; + +multiclass DotPats<SDPatternOperator dot_op, + VOP3PInst dot_inst> { + def : GCNPat < + (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)), + (dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)), + (dot_inst.Pfl.Src2VT (VOP3PMods dot_inst.Pfl.Src2VT:$src2, i32:$src2_modifiers)), i1:$clamp), + (dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1imm $clamp))>; +} + +defm : DotPats<AMDGPUfdot2, V_DOT2_F32_F16>; +defm : DotPats<int_amdgcn_sdot2, V_DOT2_I32_I16>; +defm : DotPats<int_amdgcn_udot2, V_DOT2_U32_U16>; +defm : DotPats<int_amdgcn_sdot4, V_DOT4_I32_I8>; +defm : DotPats<int_amdgcn_udot4, V_DOT4_U32_U8>; +defm : DotPats<int_amdgcn_sdot8, V_DOT8_I32_I4>; +defm : DotPats<int_amdgcn_udot8, V_DOT8_U32_U4>; } // End SubtargetPredicate = HasDLInsts diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 2196f9b47f3b..b227eaed8d61 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -117,7 +117,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // globals from all functions in PromotedGlobals. for (auto *GV : AFI->getGlobalsPromotedToConstantPool()) PromotedGlobals.insert(GV); - + // Calculate this function's optimization goal. unsigned OptimizationGoal; if (F.hasFnAttribute(Attribute::OptimizeNone)) @@ -367,8 +367,9 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); unsigned RC; - InlineAsm::hasRegClassConstraint(Flags, RC); - if (RC == ARM::GPRPairRegClassID) { + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + if (InlineAsm::hasRegClassConstraint(Flags, RC) && + ARM::GPRPairRegClass.hasSubClassEq(TRI->getRegClass(RC))) { if (NumVals != 1) return true; const MachineOperand &MO = MI->getOperand(OpNum); @@ -990,7 +991,7 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI, if (Subtarget->isThumb1Only()) EmitAlignment(2); - + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); OutStreamer->EmitLabel(JTISymbol); diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 43e8b7d66c62..5342e6e2cd13 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -584,7 +584,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // don't know for sure yet whether we'll need that, so we guess based // on whether there are any local variables that would trigger it. unsigned StackAlign = TFI->getStackAlignment(); - if (TFI->hasFP(MF) && + if (TFI->hasFP(MF) && !((MFI.getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset)) return false; diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h index 63bf48abb7ac..543165de38d0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h @@ -269,14 +269,15 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, for (auto Reg : RegList) State.AllocateReg(Reg); + // After the first item has been allocated, the rest are packed as tightly as + // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll + // be allocating a bunch of i32 slots). + unsigned RestAlign = std::min(Align, Size); + for (auto &It : PendingMembers) { It.convertToMem(State.AllocateStack(Size, Align)); State.addLoc(It); - - // After the first item has been allocated, the rest are packed as tightly - // as possible. (E.g. an incoming i64 would have starting Align of 8, but - // we'll be allocating a bunch of i32 slots). - Align = Size; + Align = RestAlign; } // All pending members have now been allocated diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index de08eb8c6985..2c4738d3cb74 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -2128,7 +2128,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { unsigned DeadSize = 0; bool CanDeleteLEA = false; bool BaseRegKill = false; - + unsigned IdxReg = ~0U; bool IdxRegKill = true; if (isThumb2) { diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h index 5139a18f9263..55194ed94532 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h @@ -113,7 +113,7 @@ public: bool isLSDA() const { return Kind == ARMCP::CPLSDA; } bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; } bool isPromotedGlobal() const{ return Kind == ARMCP::CPPromotedGlobal; } - + int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) override; diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index 26d4aaa12acf..a66cd7053c0a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -2116,7 +2116,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { CallingConv::ID CC = F.getCallingConv(); if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); + GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index af983ce2606a..a8c75702d7b5 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -372,7 +372,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc dl; - + unsigned FramePtr = RegInfo->getFrameRegister(MF); // Determine the sizes of each callee-save spill areas and record which frame diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 081d4ff033bd..9592dd53c347 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2539,7 +2539,7 @@ void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); } }; - + if (Range->second == 0) { // 1. Mask includes the LSB -> Simply shift the top N bits off NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); @@ -2633,7 +2633,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { MachineMemOperand::MOLoad, 4, 4); cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1); - + ReplaceNode(N, ResNode); return; } @@ -2920,7 +2920,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { assert(N3.getOpcode() == ISD::Register); unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); - + if (InFlag.getOpcode() == ARMISD::CMPZ) { bool SwitchEQNEToPLMI; SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); @@ -3023,7 +3023,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } - + case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index 47222a66f798..ede276dd91bb 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3096,7 +3096,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, // need to be duplicated) or duplicating the constant wouldn't increase code // size (implying the constant is no larger than 4 bytes). const Function &F = DAG.getMachineFunction().getFunction(); - + // We rely on this decision to inline being idemopotent and unrelated to the // use-site. We know that if we inline a variable at one use site, we'll // inline it elsewhere too (and reuse the constant pool entry). Fast-isel @@ -5162,7 +5162,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG, return SDValue(); // SoftFP: read half-precision arguments: // - // t2: i32,ch = ... + // t2: i32,ch = ... // t7: i16 = truncate t2 <~~~~ Op // t8: f16 = bitcast t7 <~~~~ N // @@ -5173,7 +5173,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG, return SDValue(); } - // Half-precision return values + // Half-precision return values if (SrcVT == MVT::f16 && DstVT == MVT::i16) { if (!HasFullFP16) return SDValue(); @@ -13461,13 +13461,13 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1)); if (!RHS || RHS->getZExtValue() != 4) return false; - + Offset = Op->getOperand(1); Base = Op->getOperand(0); AM = ISD::POST_INC; return true; } - + bool isInc; bool isLegal = false; if (Subtarget->isThumb2()) diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 901138dbdfd5..db5f28480e90 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1275,7 +1275,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { // we're minimizing code size. if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill) return false; - + bool HighRegsUsed = false; for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) if (MI->getOperand(i).getReg() >= ARM::R8) { diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 816116772995..91310e81e398 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -126,7 +126,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// The amount the literal pool has been increasedby due to promoted globals. int PromotedGlobalsIncrease = 0; - + public: ARMFunctionInfo() = default; diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index d4fbf76f299f..4d685158e258 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -49,7 +49,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( case RTLIB::MEMMOVE: AEABILibcall = AEABI_MEMMOVE; break; - case RTLIB::MEMSET: + case RTLIB::MEMSET: AEABILibcall = AEABI_MEMSET; if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src)) if (ConstantSrc->getZExtValue() == 0) @@ -93,14 +93,14 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( else if (Src.getValueType().bitsLT(MVT::i32)) Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); - Entry.Node = Src; + Entry.Node = Src; Entry.Ty = Type::getInt32Ty(*DAG.getContext()); Entry.IsSExt = false; Args.push_back(Entry); } else { Entry.Node = Src; Args.push_back(Entry); - + Entry.Node = Size; Args.push_back(Entry); } @@ -121,7 +121,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( std::move(Args)) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); - + return CallResult.second; } diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index f8cae31641ff..94f9cefe429c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -389,7 +389,7 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, unsigned NumVectorInstToHideOverhead = 10; int MaxMergeDistance = 64; - if (Ty->isVectorTy() && SE && + if (Ty->isVectorTy() && SE && !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) return NumVectorInstToHideOverhead; diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index cd9fa0709020..e0cd2d8e26a6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -153,7 +153,7 @@ public: int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - int getAddressComputationCost(Type *Val, ScalarEvolution *SE, + int getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr); int getArithmeticInstrCost( diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 807d62547337..a5fbbbf26be9 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -969,7 +969,7 @@ public: // checks whether this operand is a memory operand computed as an offset // applied to PC. the offset may have 8 bits of magnitude and is represented - // with two bits of shift. textually it may be either [pc, #imm], #imm or + // with two bits of shift. textually it may be either [pc, #imm], #imm or // relocable expression... bool isThumbMemPC() const { int64_t Val = 0; @@ -2284,7 +2284,7 @@ public: } const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val); - + assert(SR && "Unknown value type!"); Inst.addOperand(MCOperand::createExpr(SR)); return; @@ -2326,7 +2326,7 @@ public: assert(isImm() && "Not an immediate!"); // If we have an immediate that's not a constant, treat it as a label - // reference needing a fixup. + // reference needing a fixup. if (!isa<MCConstantExpr>(getImm())) { Inst.addOperand(MCOperand::createExpr(getImm())); return; @@ -3419,7 +3419,7 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) - return -1; + return -1; std::string lowerCase = Tok.getString().lower(); ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase) @@ -4311,7 +4311,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - if (!Tok.is(AsmToken::Identifier)) + if (!Tok.is(AsmToken::Identifier)) return MatchOperand_NoMatch; StringRef IFlagsStr = Tok.getString(); @@ -4353,7 +4353,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { return MatchOperand_NoMatch; } unsigned SYSmvalue = Val & 0xFF; - Parser.Lex(); + Parser.Lex(); Operands.push_back(ARMOperand::CreateMSRMask(SYSmvalue, S)); return MatchOperand_Success; } @@ -4996,7 +4996,7 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst, // first decide whether or not the branch should be conditional // by looking at it's location relative to an IT block if(inITBlock()) { - // inside an IT block we cannot have any conditional branches. any + // inside an IT block we cannot have any conditional branches. any // such instructions needs to be converted to unconditional form switch(Inst.getOpcode()) { case ARM::tBcc: Inst.setOpcode(ARM::tB); break; @@ -5008,11 +5008,11 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst, unsigned Cond = static_cast<ARMOperand &>(*Operands[CondOp]).getCondCode(); switch(Inst.getOpcode()) { case ARM::tB: - case ARM::tBcc: - Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc); + case ARM::tBcc: + Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc); break; case ARM::t2B: - case ARM::t2Bcc: + case ARM::t2Bcc: Inst.setOpcode(Cond == ARMCC::AL ? ARM::t2B : ARM::t2Bcc); break; } @@ -8882,7 +8882,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, case ARM::MOVsi: { ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm()); // rrx shifts and asr/lsr of #32 is encoded as 0 - if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr) + if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr) return false; if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) { // Shifting by zero is accepted as a vanilla 'MOVr' @@ -9371,6 +9371,12 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveAlign(DirectiveID.getLoc()); // Use Generic on failure. else if (IDVal == ".thumb_set") parseDirectiveThumbSet(DirectiveID.getLoc()); + else if (IDVal == ".inst") + parseDirectiveInst(DirectiveID.getLoc()); + else if (IDVal == ".inst.n") + parseDirectiveInst(DirectiveID.getLoc(), 'n'); + else if (IDVal == ".inst.w") + parseDirectiveInst(DirectiveID.getLoc(), 'w'); else if (!IsMachO && !IsCOFF) { if (IDVal == ".arch") parseDirectiveArch(DirectiveID.getLoc()); @@ -9382,12 +9388,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveFPU(DirectiveID.getLoc()); else if (IDVal == ".fnstart") parseDirectiveFnStart(DirectiveID.getLoc()); - else if (IDVal == ".inst") - parseDirectiveInst(DirectiveID.getLoc()); - else if (IDVal == ".inst.n") - parseDirectiveInst(DirectiveID.getLoc(), 'n'); - else if (IDVal == ".inst.w") - parseDirectiveInst(DirectiveID.getLoc(), 'w'); else if (IDVal == ".object_arch") parseDirectiveObjectArch(DirectiveID.getLoc()); else if (IDVal == ".tlsdescseq") @@ -10012,8 +10012,8 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { case 'w': break; default: - return Error(Loc, "cannot determine Thumb instruction size, " - "use inst.n/inst.w instead"); + Width = 0; + break; } } else { if (Suffix) @@ -10029,6 +10029,7 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { return Error(Loc, "expected constant expression"); } + char CurSuffix = Suffix; switch (Width) { case 2: if (Value->getValue() > 0xffff) @@ -10039,11 +10040,21 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { return Error(Loc, StringRef(Suffix ? "inst.w" : "inst") + " operand is too big"); break; + case 0: + // Thumb mode, no width indicated. Guess from the opcode, if possible. + if (Value->getValue() < 0xe800) + CurSuffix = 'n'; + else if (Value->getValue() >= 0xe8000000) + CurSuffix = 'w'; + else + return Error(Loc, "cannot determine Thumb instruction size, " + "use inst.n/inst.w instead"); + break; default: llvm_unreachable("only supported widths are 2 and 4"); } - getTargetStreamer().emitInst(Value->getValue(), Suffix); + getTargetStreamer().emitInst(Value->getValue(), CurSuffix); return false; }; diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 4733cf49827e..61bec04678dd 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -620,7 +620,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const { // assume a predicate of AL. unsigned CC; CC = ITBlock.getITCC(); - if (CC == 0xF) + if (CC == 0xF) CC = ARMCC::AL; if (ITBlock.instrInITBlock()) ITBlock.advanceITState(); @@ -888,7 +888,7 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - if (RegNo == 15) + if (RegNo == 15) S = MCDisassembler::SoftFail; Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); @@ -2171,7 +2171,7 @@ static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); const FeatureBitset &FeatureBits = Dis->getSubtargetInfo().getFeatureBits(); - if (!FeatureBits[ARM::HasV8_1aOps] || + if (!FeatureBits[ARM::HasV8_1aOps] || !FeatureBits[ARM::HasV8Ops]) return MCDisassembler::Fail; @@ -4467,7 +4467,7 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, index = fieldFromInstruction(Insn, 7, 1); switch (fieldFromInstruction(Insn, 4, 2)) { - case 0: + case 0: align = 0; break; case 3: align = 4; break; @@ -5279,7 +5279,7 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, return MCDisassembler::Fail; if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; - if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder))) + if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 75ed40c18fa2..bfc32073ba18 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -834,7 +834,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, return; } - O << SYSm; + O << SYSm; return; } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index dfa339091a7b..7d04c73fb3f2 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -64,7 +64,7 @@ bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, } } -// Need to examine the Fixup when determining whether to +// Need to examine the Fixup when determining whether to // emit the relocation as an explicit symbol or as a section relative // offset unsigned ARMELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 0dab789505d5..b37b8073548f 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -740,7 +740,7 @@ getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) { if (HasConditionalBranch(MI)) - return ::getBranchTargetOpValue(MI, OpIdx, + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_condbl, Fixups, STI); return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups, STI); } @@ -766,10 +766,10 @@ uint32_t ARMMCCodeEmitter::getThumbBranchTargetOpValue( const MCSubtargetInfo &STI) const { unsigned Val = 0; const MCOperand MO = MI.getOperand(OpIdx); - + if(MO.isExpr()) return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups, STI); - else + else Val = MO.getImm() >> 1; bool I = (Val & 0x800000); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index 42371736fef4..63aa9735e8a4 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -13,6 +13,8 @@ #include "ARMTargetMachine.h" #include "llvm/MC/ConstantPools.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -47,6 +49,41 @@ void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); } // reset() - Reset any state void ARMTargetStreamer::reset() {} +void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) { + unsigned Size; + char Buffer[4]; + const bool LittleEndian = getStreamer().getContext().getAsmInfo()->isLittleEndian(); + + switch (Suffix) { + case '\0': + Size = 4; + + for (unsigned II = 0, IE = Size; II != IE; II++) { + const unsigned I = LittleEndian ? (Size - II - 1) : II; + Buffer[Size - II - 1] = uint8_t(Inst >> I * CHAR_BIT); + } + + break; + case 'n': + case 'w': + Size = (Suffix == 'n' ? 2 : 4); + + // Thumb wide instructions are emitted as a pair of 16-bit words of the + // appropriate endianness. + for (unsigned II = 0, IE = Size; II != IE; II = II + 2) { + const unsigned I0 = LittleEndian ? II + 0 : II + 1; + const unsigned I1 = LittleEndian ? II + 1 : II + 0; + Buffer[Size - II - 2] = uint8_t(Inst >> I0 * CHAR_BIT); + Buffer[Size - II - 1] = uint8_t(Inst >> I1 * CHAR_BIT); + } + + break; + default: + llvm_unreachable("Invalid Suffix"); + } + getStreamer().EmitBytes(StringRef(Buffer, Size)); +} + // The remaining callbacks should be handled separately by each // streamer. void ARMTargetStreamer::emitFnStart() {} @@ -76,7 +113,6 @@ void ARMTargetStreamer::emitArchExtension(unsigned ArchExt) {} void ARMTargetStreamer::emitObjectArch(ARM::ArchKind Arch) {} void ARMTargetStreamer::emitFPU(unsigned FPU) {} void ARMTargetStreamer::finishAttributeSection() {} -void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {} void ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {} void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {} diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp index 637e4a44c428..7f03e1463c1d 100644 --- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp +++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp @@ -233,7 +233,7 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { // On Swift, we mostly care about hazards from multiplication instructions // writing the accumulator and the pipelining of loop iterations by out-of- - // order execution. + // order execution. if (isSwift) return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI); diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index a65e22fd86e8..5c745e112b2e 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -127,7 +127,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc dl; - + unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); int CFAOffset = 0; diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp index c1515571aae5..1b412a9c6813 100644 --- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -63,6 +63,13 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm) setTruncStoreAction(MVT::i16, MVT::i8, Expand); + for (MVT VT : MVT::integer_valuetypes()) { + setOperationAction(ISD::ADDC, VT, Legal); + setOperationAction(ISD::SUBC, VT, Legal); + setOperationAction(ISD::ADDE, VT, Legal); + setOperationAction(ISD::SUBE, VT, Legal); + } + // sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types // revert into a sub since we don't have an add with immediate instruction. setOperationAction(ISD::ADD, MVT::i32, Custom); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 4791b067aa8d..ba255d30fede 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -1777,6 +1777,7 @@ namespace { const BitTracker::RegisterCell &RC); bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, const RegisterSet &AVs); + bool simplifyRCmp0(MachineInstr *MI, BitTracker::RegisterRef RD); // Cache of created instructions to avoid creating duplicates. // XXX Currently only used by genBitSplit. @@ -2567,6 +2568,127 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI, return Changed; } +bool BitSimplification::simplifyRCmp0(MachineInstr *MI, + BitTracker::RegisterRef RD) { + unsigned Opc = MI->getOpcode(); + if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi) + return false; + MachineOperand &CmpOp = MI->getOperand(2); + if (!CmpOp.isImm() || CmpOp.getImm() != 0) + return false; + + const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); + if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass) + return false; + assert(RD.Sub == 0); + + MachineBasicBlock &B = *MI->getParent(); + const DebugLoc &DL = MI->getDebugLoc(); + auto At = MI->isPHI() ? B.getFirstNonPHI() + : MachineBasicBlock::iterator(MI); + bool KnownZ = true; + bool KnownNZ = false; + + BitTracker::RegisterRef SR = MI->getOperand(1); + if (!BT.has(SR.Reg)) + return false; + const BitTracker::RegisterCell &SC = BT.lookup(SR.Reg); + unsigned F, W; + if (!HBS::getSubregMask(SR, F, W, MRI)) + return false; + + for (uint16_t I = F; I != F+W; ++I) { + const BitTracker::BitValue &V = SC[I]; + if (!V.is(0)) + KnownZ = false; + if (V.is(1)) + KnownNZ = true; + } + + auto ReplaceWithConst = [&] (int C) { + unsigned NewR = MRI.createVirtualRegister(FRC); + BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR) + .addImm(C); + HBS::replaceReg(RD.Reg, NewR, MRI); + BitTracker::RegisterCell NewRC(W); + for (uint16_t I = 0; I != W; ++I) { + NewRC[I] = BitTracker::BitValue(C & 1); + C = unsigned(C) >> 1; + } + BT.put(BitTracker::RegisterRef(NewR), NewRC); + return true; + }; + + auto IsNonZero = [] (const MachineOperand &Op) { + if (Op.isGlobal() || Op.isBlockAddress()) + return true; + if (Op.isImm()) + return Op.getImm() != 0; + if (Op.isCImm()) + return !Op.getCImm()->isZero(); + if (Op.isFPImm()) + return !Op.getFPImm()->isZero(); + return false; + }; + + auto IsZero = [] (const MachineOperand &Op) { + if (Op.isGlobal() || Op.isBlockAddress()) + return false; + if (Op.isImm()) + return Op.getImm() == 0; + if (Op.isCImm()) + return Op.getCImm()->isZero(); + if (Op.isFPImm()) + return Op.getFPImm()->isZero(); + return false; + }; + + // If the source register is known to be 0 or non-0, the comparison can + // be folded to a load of a constant. + if (KnownZ || KnownNZ) { + assert(KnownZ != KnownNZ && "Register cannot be both 0 and non-0"); + return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi)); + } + + // Special case: if the compare comes from a C2_muxii, then we know the + // two possible constants that can be the source value. + MachineInstr *InpDef = MRI.getVRegDef(SR.Reg); + if (!InpDef) + return false; + if (SR.Sub == 0 && InpDef->getOpcode() == Hexagon::C2_muxii) { + MachineOperand &Src1 = InpDef->getOperand(2); + MachineOperand &Src2 = InpDef->getOperand(3); + // Check if both are non-zero. + bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2); + if (KnownNZ1 && KnownNZ2) + return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi); + // Check if both are zero. + bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2); + if (KnownZ1 && KnownZ2) + return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi); + + // If for both operands we know that they are either 0 or non-0, + // replace the comparison with a C2_muxii, using the same predicate + // register, but with operands substituted with 0/1 accordingly. + if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) { + unsigned NewR = MRI.createVirtualRegister(FRC); + BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR) + .addReg(InpDef->getOperand(1).getReg()) + .addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi)) + .addImm(KnownZ2 == (Opc == Hexagon::A4_rcmpeqi)); + HBS::replaceReg(RD.Reg, NewR, MRI); + // Create a new cell with only the least significant bit unknown. + BitTracker::RegisterCell NewRC(W); + NewRC[0] = BitTracker::BitValue::self(); + NewRC.fill(1, W, BitTracker::BitValue::Zero); + BT.put(BitTracker::RegisterRef(NewR), NewRC); + return true; + } + } + + return false; +} + bool BitSimplification::processBlock(MachineBasicBlock &B, const RegisterSet &AVs) { if (!BT.reached(&B)) @@ -2615,6 +2737,7 @@ bool BitSimplification::processBlock(MachineBasicBlock &B, T = T || genExtractHalf(MI, RD, RC); T = T || genCombineHalf(MI, RD, RC); T = T || genExtractLow(MI, RD, RC); + T = T || simplifyRCmp0(MI, RD); Changed |= T; continue; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp index e13cfd3f655a..94aacbed6af6 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -347,9 +347,11 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, return rr0(RC, Outputs); } case C2_tfrrp: { - RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0); - W0 = 8; // XXX Pred size - return rr0(eINS(RC, eXTR(rc(1), 0, W0), 0), Outputs); + uint16_t RW = W0; + uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]); + RegisterCell RC = RegisterCell::self(Reg[0].Reg, RW); + RC.fill(PW, RW, BT::BitValue::Zero); + return rr0(eINS(RC, eXTR(rc(1), 0, PW), 0), Outputs); } // Arithmetic: @@ -950,6 +952,19 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, } default: + // For instructions that define a single predicate registers, store + // the low 8 bits of the register only. + if (unsigned DefR = getUniqueDefVReg(MI)) { + if (MRI.getRegClass(DefR) == &Hexagon::PredRegsRegClass) { + BT::RegisterRef PD(DefR, 0); + uint16_t RW = getRegBitWidth(PD); + uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]); + RegisterCell RC = RegisterCell::self(DefR, RW); + RC.fill(PW, RW, BT::BitValue::Zero); + putCell(PD, RC, Outputs); + return true; + } + } return MachineEvaluator::evaluate(MI, Inputs, Outputs); } #undef im @@ -1016,6 +1031,21 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI, return true; } +unsigned HexagonEvaluator::getUniqueDefVReg(const MachineInstr &MI) const { + unsigned DefReg = 0; + for (const MachineOperand &Op : MI.operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + if (DefReg != 0) + return 0; + DefReg = R; + } + return DefReg; +} + bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h index d9dd04e1b088..f0b7c9d91950 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h @@ -49,6 +49,7 @@ struct HexagonEvaluator : public BitTracker::MachineEvaluator { const HexagonInstrInfo &TII; private: + unsigned getUniqueDefVReg(const MachineInstr &MI) const; bool evaluateLoad(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const; bool evaluateFormalCopy(const MachineInstr &MI, const CellMapType &Inputs, diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h index 183dee36a047..de486ec4b7bd 100644 --- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h +++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h @@ -2,7 +2,7 @@ // // The LLVM Compiler Infrastructure // -// This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 2acf701b43cb..ce7db657f5e9 100644 --- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -7371,7 +7371,7 @@ bool MipsAsmParser::parseDirectiveGpWord() { getParser().getStreamer().EmitGPRel32Value(Value); if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(getLexer().getLoc(), + return Error(getLexer().getLoc(), "unexpected token, expected end of statement"); Parser.Lex(); // Eat EndOfStatement token. return false; @@ -7506,7 +7506,7 @@ bool MipsAsmParser::parseDirectiveOption() { } // Unknown option. - Warning(Parser.getTok().getLoc(), + Warning(Parser.getTok().getLoc(), "unknown option, expected 'pic0' or 'pic2'"); Parser.eatToEndOfStatement(); return false; @@ -8193,7 +8193,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".abicalls") { getTargetStreamer().emitDirectiveAbiCalls(); if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { - Error(Parser.getTok().getLoc(), + Error(Parser.getTok().getLoc(), "unexpected token, expected end of statement"); } return false; diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index fdb560f3c72f..d7f6cf91db73 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -114,7 +114,7 @@ namespace Mips { // resulting in - R_MIPS_GOT_DISP fixup_Mips_GOT_DISP, - // resulting in - R_MIPS_HIGHER/R_MICROMIPS_HIGHER + // resulting in - R_MIPS_HIGHER/R_MICROMIPS_HIGHER fixup_Mips_HIGHER, fixup_MICROMIPS_HIGHER, diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 8ffc0731abcb..2e0c25de2bc8 100644 --- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -1094,7 +1094,7 @@ void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { // ALIGN // B .tmpN // 11 NOP instructions (44 bytes) - // ADDIU T9, T9, 52 + // ADDIU T9, T9, 52 // .tmpN // // We need the 44 bytes (11 instructions) because at runtime, we'd diff --git a/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp index e82f62260b3f..a705ebb6b193 100644 --- a/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsCallLowering.cpp @@ -418,7 +418,8 @@ void MipsCallLowering::subTargetRegTypeForCallingConv( for (auto &Arg : Args) { EVT VT = TLI.getValueType(DL, Arg.Ty); - MVT RegisterVT = TLI.getRegisterTypeForCallingConv(F.getContext(), VT); + MVT RegisterVT = TLI.getRegisterTypeForCallingConv(F.getContext(), + F.getCallingConv(), VT); ISD::ArgFlagsTy Flags = Arg.Flags; Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL)); diff --git a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp index 9eb13a68e561..744523cc6cb9 100644 --- a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This pass is used to make Pc relative loads of constants. -// For now, only Mips16 will use this. +// For now, only Mips16 will use this. // // Loading constants inline is expensive on Mips16 and it's in general better // to place the constant nearby in code space and then it can be loaded with a @@ -1171,7 +1171,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { /// findAvailableWater - Look for an existing entry in the WaterList in which /// we can place the CPE referenced from U so it's within range of U's MI. /// Returns true if found, false if not. If it returns true, WaterIter -/// is set to the WaterList entry. +/// is set to the WaterList entry. /// To ensure that this pass /// terminates, the CPE location for a particular CPUser is only allowed to /// move to a lower address, so search backward from the end of the list and @@ -1231,7 +1231,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; // If the block does not end in an unconditional branch already, and if the - // end of the block is within range, make new water there. + // end of the block is within range, make new water there. if (BBHasFallthrough(UserMBB)) { // Size of branch to insert. unsigned Delta = 2; @@ -1258,7 +1258,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, } } - // What a big block. Find a place within the block to split it. + // What a big block. Find a place within the block to split it. // Try to split the block so it's fully aligned. Compute the latest split // point where we can add a 4-byte branch instruction, and then align to @@ -1582,7 +1582,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) { MachineInstr *BMI = &MBB->back(); bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); unsigned OppositeBranchOpcode = TII->getOppositeBranchOpc(Opcode); - + ++NumCBrFixed; if (BMI != MI) { if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) && @@ -1595,7 +1595,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) { // bnez L2 // b L1 unsigned BMITargetOperand = branchTargetOperand(BMI); - MachineBasicBlock *NewDest = + MachineBasicBlock *NewDest = BMI->getOperand(BMITargetOperand).getMBB(); if (isBBInRange(MI, NewDest, Br.MaxDisp)) { LLVM_DEBUG( diff --git a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp index 7b39507812ed..19b30a44e86a 100644 --- a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -1662,7 +1662,7 @@ bool MipsFastISel::selectRet(const Instruction *I) { return false; SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); + GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp index 9ffc38356b76..0677d378a115 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -111,6 +111,7 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { // The MIPS MSA ABI passes vector arguments in the integer register set. // The number of integer registers used is dependant on the ABI used. MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + CallingConv::ID CC, EVT VT) const { if (VT.isVector()) { if (Subtarget.isABI_O32()) { @@ -123,6 +124,7 @@ MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, } unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, EVT VT) const { if (VT.isVector()) return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)), @@ -131,10 +133,10 @@ unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, } unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv( - LLVMContext &Context, EVT VT, EVT &IntermediateVT, + LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const { // Break down vector types to either 2 i64s or 4 i32s. - RegisterVT = getRegisterTypeForCallingConv(Context, VT) ; + RegisterVT = getRegisterTypeForCallingConv(Context, CC, VT); IntermediateVT = RegisterVT; NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits() ? VT.getVectorNumElements() diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h index b58d92c370d8..5a0de45c44f3 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h @@ -288,17 +288,18 @@ class TargetRegisterClass; /// Return the register type for a given MVT, ensuring vectors are treated /// as a series of gpr sized integers. - MVT getRegisterTypeForCallingConv(LLVMContext &Context, + MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override; /// Return the number of registers for a given MVT, ensuring vectors are /// treated as a series of gpr sized integers. unsigned getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, EVT VT) const override; /// Break down vectors to the correct number of gpr sized integers. unsigned getVectorTypeBreakdownForCallingConv( - LLVMContext &Context, EVT VT, EVT &IntermediateVT, + LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override; /// Return the correct alignment for the current calling convention. diff --git a/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp index af0ac006bc9e..6c5b83021f74 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsInstructionSelector.cpp @@ -166,6 +166,33 @@ bool MipsInstructionSelector::select(MachineInstr &I, I.eraseFromParent(); return true; } + case G_GLOBAL_VALUE: { + if (MF.getTarget().isPositionIndependent()) + return false; + + const llvm::GlobalValue *GVal = I.getOperand(1).getGlobal(); + unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass); + MachineInstr *LUi, *ADDiu; + + LUi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi)) + .addDef(LUiReg) + .addGlobalAddress(GVal); + LUi->getOperand(1).setTargetFlags(MipsII::MO_ABS_HI); + + ADDiu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu)) + .addDef(I.getOperand(0).getReg()) + .addUse(LUiReg) + .addGlobalAddress(GVal); + ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO); + + if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI)) + return false; + if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI)) + return false; + + I.eraseFromParent(); + return true; + } default: return false; diff --git a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp index da6f9dabdaaf..fb259516be09 100644 --- a/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -36,6 +36,9 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { getActionDefinitionsBuilder(G_FRAME_INDEX) .legalFor({p0}); + getActionDefinitionsBuilder(G_GLOBAL_VALUE) + .legalFor({p0}); + computeTables(); verify(*ST.getInstrInfo()); } diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp index cef21f447205..351135079217 100644 --- a/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -88,6 +88,7 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; case G_CONSTANT: case G_FRAME_INDEX: + case G_GLOBAL_VALUE: OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr}); break; diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h index 676d702ba63e..896dd0eb0a5e 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h @@ -163,7 +163,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // HasEVA -- supports EVA ASE. bool HasEVA; - + // nomadd4 - disables generation of 4-operand madd.s, madd.d and // related instructions. bool DisableMadd4; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index 3b042c74b26c..efe98003b1c8 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -248,7 +248,7 @@ protected: private: bool GlobalsEmitted; - + // This is specific per MachineFunction. const MachineRegisterInfo *MRI; // The contents are specific for each diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp index f12ed81b6d9f..ad1d7cbb52fc 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp @@ -2,7 +2,7 @@ // // The LLVM Compiler Infrastructure // -// This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h index 10f1135ad841..5a9115f6f7f1 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h @@ -2,7 +2,7 @@ // // The LLVM Compiler Infrastructure // -// This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index ea709a73ebf2..fd7f81591426 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -175,7 +175,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O, + raw_ostream &O, const char *Modifier) { unsigned Code = MI->getOperand(OpNo).getImm(); diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index f000fbb98110..351ccefa2da2 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -35,11 +35,11 @@ public: void printRegName(raw_ostream &OS, unsigned RegNo) const override; void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) override; - + // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - + bool printAliasInstr(const MCInst *MI, raw_ostream &OS); void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, unsigned PrintMethodIdx, diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 8ac461b96b88..fb7bf23509c7 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -61,7 +61,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) { CommentString = "#"; // Uses '.section' before '.bss' directive - UsesELFSectionDirectiveForBSS = true; + UsesELFSectionDirectiveForBSS = true; // Debug Information SupportsDebugInformation = true; @@ -73,7 +73,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) { // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; - + ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr; AssemblerDialect = 1; // New-Style mnemonics. diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 2b948ca60028..57bda1403c62 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -102,7 +102,7 @@ public: unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - + // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. uint64_t getBinaryCodeForInstr(const MCInst &MI, @@ -138,7 +138,7 @@ public: default: llvm_unreachable("Invalid instruction size"); } - + ++MCNumEmitted; // Keep track of the # of mi's emitted. } @@ -147,7 +147,7 @@ private: void verifyInstructionPredicates(const MCInst &MI, uint64_t AvailableFeatures) const; }; - + } // end anonymous namespace MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII, @@ -162,7 +162,7 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); - + // Add a fixup for the branch target. Fixups.push_back(MCFixup::create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_br24)); @@ -212,7 +212,7 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); - + // Add a fixup for the immediate field. Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_half16)); @@ -226,11 +226,11 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, // displacement and the next 5 bits as the register #. assert(MI.getOperand(OpNo+1).isReg()); unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 16; - + const MCOperand &MO = MI.getOperand(OpNo); if (MO.isImm()) return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits; - + // Add a fixup for the displacement field. Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_half16)); @@ -244,11 +244,11 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, // displacement and the next 5 bits as the register #. assert(MI.getOperand(OpNo+1).isReg()); unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 14; - + const MCOperand &MO = MI.getOperand(OpNo); if (MO.isImm()) return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits; - + // Add a fixup for the displacement field. Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_half16ds)); @@ -320,7 +320,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups, STI); - + // Add a fixup for the TLS register, which simply provides a relocation // hint to the linker that this statement is part of a relocation sequence. // Return the thread-pointer register's encoding. @@ -373,7 +373,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, return Encode; } - + assert(MO.isImm() && "Relocation required in an instruction that we cannot encode!"); return MO.getImm(); diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index fe7e7aeeb182..481ba3f09cc7 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -58,7 +58,7 @@ namespace PPC { PRED_BIT_SET = 1024, PRED_BIT_UNSET = 1025 }; - + // Bit for branch taken (plus) or not-taken (minus) hint enum BranchHintBit { BR_NO_HINT = 0x0, diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h index dfdec246e868..bfc613af3dc0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.h +++ b/contrib/llvm/lib/Target/PowerPC/PPC.h @@ -66,7 +66,7 @@ namespace llvm { extern char &PPCVSXFMAMutateID; namespace PPCII { - + /// Target Operand Flag enum. enum TOF { //===------------------------------------------------------------------===// @@ -111,7 +111,7 @@ namespace llvm { MO_TLS = 8 << 4 }; } // end namespace PPCII - + } // end namespace llvm; #endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp index 64b8f1168beb..0d1bb9297bcb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -130,7 +130,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { BlockSizes[MBB->getNumber()].first = BlockSize; FuncSize += BlockSize; } - + // If the entire function is smaller than the displacement of a branch field, // we know we don't need to shrink any branches in this function. This is a // common case. @@ -138,7 +138,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { BlockSizes.clear(); return false; } - + // For each conditional branch, if the offset to its destination is larger // than the offset field allows, transform it into a long branch sequence // like this: @@ -153,7 +153,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { while (MadeChange) { // Iteratively expand branches until we reach a fixed point. MadeChange = false; - + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { MachineBasicBlock &MBB = *MFI; @@ -175,7 +175,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { MBBStartOffset += TII->getInstSizeInBytes(*I); continue; } - + // Determine the offset from the current branch to the destination // block. int BranchSize; @@ -184,7 +184,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { // start of this block to this branch, plus the sizes of all blocks // from this block to the dest. BranchSize = MBBStartOffset; - + for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i) BranchSize += BlockSizes[i].first; } else { @@ -213,7 +213,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { // 2. Target MBB PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); unsigned CRReg = I->getOperand(1).getReg(); - + // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. BuildMI(MBB, I, dl, TII->get(PPC::BCC)) .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); @@ -234,7 +234,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { } else { llvm_unreachable("Unhandled branch type!"); } - + // Uncond branch to the real destination. I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest); @@ -277,7 +277,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { EverMadeChange |= MadeChange; } - + BlockSizes.clear(); return true; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp index ed5e496b32fd..ac931f7d0ec0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -73,7 +73,7 @@ protected: if ((*PI)->empty()) continue; - + for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) { if (J == (*PI)->end()) break; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp index b00655b50229..f212894035db 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -1697,7 +1697,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); + GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index f0000c5bafd7..84dacf396462 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -174,7 +174,7 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( {PPC::V22, -160}, {PPC::V21, -176}, {PPC::V20, -192}, - + // SPE register save area (overlaps Vector save area). {PPC::S31, -8}, {PPC::S30, -16}, @@ -1229,7 +1229,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, if (MBBI != MBB.end()) dl = MBBI->getDebugLoc(); - + const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); @@ -1315,7 +1315,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); - + if (IsReturnBlock) { unsigned RetOpcode = MBBI->getOpcode(); bool UsesTCRet = RetOpcode == PPC::TCRETURNri || diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 551220466901..793a4dd7f624 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -50,7 +50,7 @@ bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) { return true; } - return false; + return false; } bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) { @@ -76,7 +76,7 @@ bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) { return true; } - return false; + return false; } // FIXME: Remove this when we don't need this: diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 1e3e14c71144..51ff8a5cf77e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1224,6 +1224,7 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, } unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv:: ID CC, EVT VT) const { if (Subtarget.hasSPE() && VT == MVT::f64) return 2; @@ -1231,6 +1232,7 @@ unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, } MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + CallingConv:: ID CC, EVT VT) const { if (Subtarget.hasSPE() && VT == MVT::f64) return MVT::i32; @@ -13102,8 +13104,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, - SelectionDAG &DAG, - std::vector<SDNode *> *Created) const { + SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) const { // fold (sdiv X, pow2) EVT VT = N->getValueType(0); if (VT == MVT::i64 && !Subtarget.isPPC64()) @@ -13120,13 +13122,11 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); - if (Created) - Created->push_back(Op.getNode()); + Created.push_back(Op.getNode()); if (IsNegPow2) { Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); - if (Created) - Created->push_back(Op.getNode()); + Created.push_back(Op.getNode()); } return Op; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index 9b8d6435515b..f174943a8004 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -665,7 +665,7 @@ namespace llvm { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, - std::vector<SDNode *> *Created) const override; + SmallVectorImpl<SDNode *> &Created) const override; unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; @@ -872,9 +872,11 @@ namespace llvm { MCContext &Ctx) const override; unsigned getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv:: ID CC, EVT VT) const override; MVT getRegisterTypeForCallingConv(LLVMContext &Context, + CallingConv:: ID CC, EVT VT) const override; private: @@ -1141,7 +1143,7 @@ namespace llvm { ISD::ArgFlagsTy &ArgFlags, CCState &State); - bool + bool CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 4669719744bc..0930f7d3b8d7 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -316,11 +316,11 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, } // For opcodes with the ReMaterializable flag set, this function is called to -// verify the instruction is really rematable. +// verify the instruction is really rematable. bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA) const { switch (MI.getOpcode()) { - default: + default: // This function should only be called for opcodes with the ReMaterializable // flag set. llvm_unreachable("Unknown rematerializable operation!"); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 2217fa4693ce..0b57dd9b618d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -360,7 +360,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { // generate direct offsets from both the pre-incremented and // post-incremented pointer values. Thus, we'll pick the first non-prefetch // instruction in each bucket, and adjust the recurrence and other offsets - // accordingly. + // accordingly. for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) { if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr)) if (II->getIntrinsicID() == Intrinsic::prefetch) diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index 62a612feb55c..e731c0bc0c23 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -75,7 +75,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, } return Sym; } - + return Sym; } @@ -130,7 +130,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, // Subtract off the PIC base if required. if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) { const MachineFunction *MF = MO.getParent()->getParent()->getParent(); - + const MCExpr *PB = MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); Expr = MCBinaryExpr::createSub(Expr, PB, Ctx); } @@ -151,7 +151,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin) { OutMI.setOpcode(MI->getOpcode()); - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MCOperand MCOp; if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index dbe1fe37ddf8..0068df19f0c8 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -891,7 +891,7 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB, auto BII = BB.getFirstInstrTerminator(); // We optimize BBs ending with a conditional branch. // We check only for BCC here, not BCCLR, because BCCLR - // will be formed only later in the pipeline. + // will be formed only later in the pipeline. if (BB.succ_size() == 2 && BII != BB.instr_end() && (*BII).getOpcode() == PPC::BCC && diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index b14bbad2039a..8a3f50aa9565 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -29,7 +29,7 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// stored. Also used as an anchor for instructions that need to be altered /// when using frame pointers (dyna_add, dyna_sub.) int FramePointerSaveIndex = 0; - + /// ReturnAddrSaveIndex - Frame index of where the return address is stored. /// int ReturnAddrSaveIndex = 0; @@ -128,7 +128,7 @@ public: int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } - + int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; } void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 6647ceace5eb..96923a97a82c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -979,7 +979,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, SReg = MF.getRegInfo().createVirtualRegister(RC); // Insert a set of rA with the full offset value before the ld, st, or add - if (isInt<16>(Offset)) + if (isInt<16>(Offset)) BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI), SReg) .addImm(Offset); else { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 226c75f704f4..b0da9b5a6d70 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -201,7 +201,7 @@ unsigned PPCTTIImpl::getUserCost(const User *U, std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, U->getType()); return LT.first * BaseT::getUserCost(U, Operands); } - + return BaseT::getUserCost(U, Operands); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 1e8a1750ec3b..1be193e08c01 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -443,7 +443,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { // We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX, // by adding special handling for narrowing copies as well as // widening ones. However, I've experimented with this, and in - // practice we currently do not appear to use STXSDX fed by + // practice we currently do not appear to use STXSDX fed by // a narrowing copy from a full vector register. Since I can't // generate any useful test cases, I've left this alone for now. case PPC::STXSDX: diff --git a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index c7a5a1e8e6ee..35f52f7d279b 100644 --- a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -190,7 +190,7 @@ public: Sparc::C8_C9, Sparc::C10_C11, Sparc::C12_C13, Sparc::C14_C15, Sparc::C16_C17, Sparc::C18_C19, Sparc::C20_C21, Sparc::C22_C23, Sparc::C24_C25, Sparc::C26_C27, Sparc::C28_C29, Sparc::C30_C31}; - + namespace { /// SparcOperand - Instances of this class represent a parsed Sparc machine @@ -459,7 +459,7 @@ public: Op.Reg.Kind = rk_CoprocPairReg; return true; } - + static std::unique_ptr<SparcOperand> MorphToMEMrr(unsigned Base, std::unique_ptr<SparcOperand> Op) { unsigned offsetReg = Op->getReg(); @@ -1000,7 +1000,7 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, unsigned &RegNo, RegKind = SparcOperand::rk_Special; return true; } - + if (name.equals("wim")) { RegNo = Sparc::WIM; RegKind = SparcOperand::rk_Special; @@ -1093,7 +1093,7 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, unsigned &RegNo, RegKind = SparcOperand::rk_CoprocReg; return true; } - + if (name.equals("tpc")) { RegNo = Sparc::TPC; RegKind = SparcOperand::rk_Special; diff --git a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp index 8e298e8316da..3e30dae1537f 100644 --- a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -350,18 +350,18 @@ DecodeStatus SparcDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, return MCDisassembler::Fail; // Calling the auto-generated decoder function. - + if (STI.getFeatureBits()[Sparc::FeatureV9]) { Result = decodeInstruction(DecoderTableSparcV932, Instr, Insn, Address, this, STI); } else { - Result = decodeInstruction(DecoderTableSparcV832, Instr, Insn, Address, this, STI); + Result = decodeInstruction(DecoderTableSparcV832, Instr, Insn, Address, this, STI); } if (Result != MCDisassembler::Fail) return Result; - + Result = decodeInstruction(DecoderTableSparc32, Instr, Insn, Address, this, STI); @@ -662,7 +662,7 @@ static DecodeStatus DecodeTRAP(MCInst &MI, unsigned insn, uint64_t Address, if (status != MCDisassembler::Success) return status; } - + // Decode CC MI.addOperand(MCOperand::createImm(cc)); diff --git a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp index 4981deae6af6..c1512cbdc44f 100644 --- a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp +++ b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp @@ -118,9 +118,9 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum, if (MO.isImm()) { switch (MI->getOpcode()) { default: - O << (int)MO.getImm(); + O << (int)MO.getImm(); return; - + case SP::TICCri: // Fall through case SP::TICCrr: // Fall through case SP::TRAPri: // Fall through @@ -128,7 +128,7 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum, case SP::TXCCri: // Fall through case SP::TXCCrr: // Fall through // Only seven-bit values up to 127. - O << ((int) MO.getImm() & 0x7f); + O << ((int) MO.getImm() & 0x7f); return; } } diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.h b/contrib/llvm/lib/Target/Sparc/Sparc.h index 4135e4e1b61d..0cea53b359eb 100644 --- a/contrib/llvm/lib/Target/Sparc/Sparc.h +++ b/contrib/llvm/lib/Target/Sparc/Sparc.h @@ -73,7 +73,7 @@ namespace llvm { FCC_LE = 13+16, // Less or Equal FCC_ULE = 14+16, // Unordered or Less or Equal FCC_O = 15+16, // Ordered - + CPCC_A = 8+32, // Always CPCC_N = 0+32, // Never CPCC_3 = 7+32, diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h index bf700d6a99d8..0cbbda787881 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -59,9 +59,9 @@ namespace llvm { public: SparcTargetLowering(const TargetMachine &TM, const SparcSubtarget &STI); SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - + bool useSoftFloat() const override; - + /// computeKnownBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index 6750763d8ee5..47b42444b94d 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -115,7 +115,7 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC) case SPCC::FCC_UE: return SPCC::FCC_LG; case SPCC::FCC_NE: return SPCC::FCC_E; case SPCC::FCC_E: return SPCC::FCC_NE; - + case SPCC::CPCC_A: return SPCC::CPCC_N; case SPCC::CPCC_N: return SPCC::CPCC_A; case SPCC::CPCC_3: LLVM_FALLTHROUGH; diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp index a0d40653fd9b..07f9e7250bd9 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp @@ -100,7 +100,7 @@ SparcTargetMachine::SparcTargetMachine( SparcTargetMachine::~SparcTargetMachine() {} -const SparcSubtarget * +const SparcSubtarget * SparcTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); @@ -119,7 +119,7 @@ SparcTargetMachine::getSubtargetImpl(const Function &F) const { F.hasFnAttribute("use-soft-float") && F.getFnAttribute("use-soft-float").getValueAsString() == "true"; - if (softFloat) + if (softFloat) FS += FS.empty() ? "+soft-float" : ",+soft-float"; auto &I = SubtargetMap[CPU + FS]; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index d300d1d88abc..b9e5788cf018 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -55,7 +55,7 @@ getNumDecoderSlots(SUnit *SU) const { else return 3; // Expanded/group-alone instruction } - + return 1; // Normal instruction } @@ -81,6 +81,7 @@ getHazardType(SUnit *m, int Stalls) { void SystemZHazardRecognizer::Reset() { CurrGroupSize = 0; + CurrGroupHas4RegOps = false; clearProcResCounters(); GrpCount = 0; LastFPdOpCycleIdx = UINT_MAX; @@ -99,6 +100,12 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { if (SC->BeginGroup) return (CurrGroupSize == 0); + // An instruction with 4 register operands will not fit in last slot. + assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) && + "Current decoder group is already full!"); + if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) + return false; + // Since a full group is handled immediately in EmitInstruction(), // SU should fit into current group. NumSlots should be 1 or 0, // since it is not a cracked or expanded instruction. @@ -108,6 +115,23 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { return true; } +bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const { + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + const MCInstrDesc &MID = MI->getDesc(); + unsigned Count = 0; + for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) { + const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF); + if (RC == nullptr) + continue; + if (OpIdx >= MID.getNumDefs() && + MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + continue; + Count++; + } + return Count >= 4; +} + void SystemZHazardRecognizer::nextGroup() { if (CurrGroupSize == 0) return; @@ -119,6 +143,7 @@ void SystemZHazardRecognizer::nextGroup() { // Reset counter for next group. CurrGroupSize = 0; + CurrGroupHas4RegOps = false; // Decrease counters for execution units by one. for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) @@ -142,7 +167,7 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return; - + for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { @@ -172,6 +197,8 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { OS << "/EndsGroup"; if (SU->isUnbuffered) OS << "/Unbuffered"; + if (has4RegOps(SU->getInstr())) + OS << "/4RegOps"; } void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { @@ -184,6 +211,7 @@ void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { dbgs() << "{ " << CurGroupDbg << " }"; dbgs() << " (" << CurrGroupSize << " decoder slot" << (CurrGroupSize > 1 ? "s":"") + << (CurrGroupHas4RegOps ? ", 4RegOps" : "") << ")\n"; } } @@ -294,11 +322,14 @@ EmitInstruction(SUnit *SU) { // Insert SU into current group by increasing number of slots used // in current group. CurrGroupSize += getNumDecoderSlots(SU); - assert (CurrGroupSize <= 3); + CurrGroupHas4RegOps |= has4RegOps(SU->getInstr()); + unsigned GroupLim = + ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3); + assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!"); // Check if current group is now full/ended. If so, move on to next // group to be ready to evaluate more candidates. - if (CurrGroupSize == 3 || SC->EndGroup) + if (CurrGroupSize == GroupLim || SC->EndGroup) nextGroup(); } @@ -306,7 +337,7 @@ int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { const MCSchedClassDesc *SC = getSchedClass(SU); if (!SC->isValid()) return 0; - + // If SU begins new group, it can either break a current group early // or fit naturally if current group is empty (negative cost). if (SC->BeginGroup) { @@ -325,6 +356,10 @@ int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { return -1; } + // An instruction with 4 register operands will not fit in last slot. + if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) + return 1; + // Most instructions can be placed in any decoder slot. return 0; } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h index 40cb3acc7009..6292feefbfea 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -45,15 +45,17 @@ namespace llvm { /// SystemZHazardRecognizer maintains the state for one MBB during scheduling. class SystemZHazardRecognizer : public ScheduleHazardRecognizer { -#ifndef NDEBUG const SystemZInstrInfo *TII; -#endif const TargetSchedModel *SchedModel; /// Keep track of the number of decoder slots used in the current /// decoder group. unsigned CurrGroupSize; + /// True if an instruction with four reg operands have been scheduled into + /// the current decoder group. + bool CurrGroupHas4RegOps; + /// The tracking of resources here are quite similar to the common /// code use of a critical resource. However, z13 differs in the way /// that it has two processor sides which may be interesting to @@ -73,6 +75,9 @@ class SystemZHazardRecognizer : public ScheduleHazardRecognizer { /// Return true if MI fits into current decoder group. bool fitsIntoCurrentGroup(SUnit *SU) const; + /// Return true if this instruction has four register operands. + bool has4RegOps(const MachineInstr *MI) const; + /// Two decoder groups per cycle are formed (for z13), meaning 2x3 /// instructions. This function returns a number between 0 and 5, /// representing the current decoder slot of the current cycle. If an SU @@ -105,11 +110,7 @@ class SystemZHazardRecognizer : public ScheduleHazardRecognizer { public: SystemZHazardRecognizer(const SystemZInstrInfo *tii, const TargetSchedModel *SM) - : -#ifndef NDEBUG - TII(tii), -#endif - SchedModel(SM) { + : TII(tii), SchedModel(SM) { Reset(); } @@ -134,7 +135,7 @@ public: /// new decoder group, this is negative if this fits the schedule or /// positive if it would mean ending a group prematurely. For normal /// instructions this returns 0. - int groupingCost(SUnit *SU) const; + int groupingCost(SUnit *SU) const; /// Return the cost of SU in regards to processor resources usage. /// A positive value means it would be better to wait with SU, while diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 302c7883f97b..e76fa71dacd7 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -527,10 +527,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::BSWAP); - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::ROTL); // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -609,7 +605,7 @@ struct AddressingMode { // True if use of index register is supported. bool IndexReg; - + AddressingMode(bool LongDispl, bool IdxReg) : LongDisplacement(LongDispl), IndexReg(IdxReg) {} }; @@ -5524,76 +5520,6 @@ SDValue SystemZTargetLowering::combineBSWAP( return SDValue(); } -SDValue SystemZTargetLowering::combineSHIFTROT( - SDNode *N, DAGCombinerInfo &DCI) const { - - SelectionDAG &DAG = DCI.DAG; - - // Shift/rotate instructions only use the last 6 bits of the second operand - // register. If the second operand is the result of an AND with an immediate - // value that has its last 6 bits set, we can safely remove the AND operation. - // - // If the AND operation doesn't have the last 6 bits set, we can't remove it - // entirely, but we can still truncate it to a 16-bit value. This prevents - // us from ending up with a NILL with a signed operand, which will cause the - // instruction printer to abort. - SDValue N1 = N->getOperand(1); - if (N1.getOpcode() == ISD::AND) { - SDValue AndMaskOp = N1->getOperand(1); - auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp); - - // The AND mask is constant - if (AndMask) { - auto AmtVal = AndMask->getZExtValue(); - - // Bottom 6 bits are set - if ((AmtVal & 0x3f) == 0x3f) { - SDValue AndOp = N1->getOperand(0); - - // This is the only use, so remove the node - if (N1.hasOneUse()) { - // Combine the AND away - DCI.CombineTo(N1.getNode(), AndOp); - - // Return N so it isn't rechecked - return SDValue(N, 0); - - // The node will be reused, so create a new node for this one use - } else { - SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N), - N->getValueType(0), N->getOperand(0), - AndOp); - DCI.AddToWorklist(Replace.getNode()); - - return Replace; - } - - // We can't remove the AND, but we can use NILL here (normally we would - // use NILF). Only keep the last 16 bits of the mask. The actual - // transformation will be handled by .td definitions. - } else if (AmtVal >> 16 != 0) { - SDValue AndOp = N1->getOperand(0); - - auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff, - SDLoc(AndMaskOp), - AndMaskOp.getValueType()); - - auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(), - AndOp, NewMask); - - SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N), - N->getValueType(0), N->getOperand(0), - NewAnd); - DCI.AddToWorklist(Replace.getNode()); - - return Replace; - } - } - } - - return SDValue(); -} - static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code // set by the CCReg instruction using the CCValid / CCMask masks, @@ -5752,10 +5678,6 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - case ISD::ROTL: return combineSHIFTROT(N, DCI); case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 0ca93a38a016..267e31a85216 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -602,7 +602,6 @@ private: SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 9d7312269957..bb5b7aae883b 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1352,8 +1352,8 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), //===----------------------------------------------------------------------===// // Logical shift left. -defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; -def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; +defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shiftop<shl>, GR32>; +def SLLG : BinaryRSY<"sllg", 0xEB0D, shiftop<shl>, GR64>; def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>; // Arithmetic shift left. @@ -1364,20 +1364,20 @@ let Defs = [CC] in { } // Logical shift right. -defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; -def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>; +defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, shiftop<srl>, GR32>; +def SRLG : BinaryRSY<"srlg", 0xEB0C, shiftop<srl>, GR64>; def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>; // Arithmetic shift right. let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { - defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; - def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>; + defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, shiftop<sra>, GR32>; + def SRAG : BinaryRSY<"srag", 0xEB0A, shiftop<sra>, GR64>; def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>; } // Rotate left. -def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>; -def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>; +def RLL : BinaryRSY<"rll", 0xEB1D, shiftop<rotl>, GR32>; +def RLLG : BinaryRSY<"rllg", 0xEB1C, shiftop<rotl>, GR64>; // Rotate second operand left and inserted selected bits into first operand. // These can act like 32-bit operands provided that the constant start and @@ -2162,29 +2162,29 @@ def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y), // Complexity is added so that we match this before we match NILF on the AND // operation alone. let AddedComplexity = 4 in { - def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)), - (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)), - (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)), - (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)), - (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)), - (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)), - (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)), - (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; - def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)), - (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>; + def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)), + (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>; } // Peepholes for turning scalar operations into block operations. diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp index fcbf4c4b5fe4..98e761ef87fe 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -129,7 +129,7 @@ SystemZPostRASchedStrategy:: SystemZPostRASchedStrategy(const MachineSchedContext *C) : MLI(C->MLI), TII(static_cast<const SystemZInstrInfo *> - (C->MF->getSubtarget().getInstrInfo())), + (C->MF->getSubtarget().getInstrInfo())), MBB(nullptr), HazardRec(nullptr) { const TargetSubtargetInfo *ST = &C->MF->getSubtarget(); SchedModel.init(ST); @@ -169,8 +169,7 @@ SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) { return *Available.begin(); } - // All nodes that are possible to schedule are stored by in the - // Available set. + // All nodes that are possible to schedule are stored in the Available set. LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec);); Candidate Best; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h index cb0304825966..ab820e5d3e63 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h @@ -26,7 +26,7 @@ using namespace llvm; namespace llvm { - + /// A MachineSchedStrategy implementation for SystemZ post RA scheduling. class SystemZPostRASchedStrategy : public MachineSchedStrategy { @@ -37,7 +37,7 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy { // non-scheduled instructions, so it would not always be possible to call // DAG->getSchedClass(SU). TargetSchedModel SchedModel; - + /// A candidate during instruction evaluation. struct Candidate { SUnit *SU = nullptr; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td index da682cb4e5ab..7bf32bf19a4a 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -357,6 +357,7 @@ def imm32zx16 : Immediate<i32, [{ }], UIMM16, "U16Imm">; def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">; +def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">; // Full 32-bit immediates. we need both signed and unsigned versions // because the assembler is picky. E.g. AFI requires signed operands diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td index 3cfe23aec417..5103867e2d9a 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -697,6 +697,16 @@ class storei<SDPatternOperator operator, SDPatternOperator store = store> : PatFrag<(ops node:$addr), (store (operator), node:$addr)>; +// Create a shift operator that optionally ignores an AND of the +// shift count with an immediate if the bottom 6 bits are all set. +def imm32bottom6set : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() & 0x3f) == 0x3f; +}]>; +class shiftop<SDPatternOperator operator> + : PatFrags<(ops node:$val, node:$count), + [(operator node:$val, node:$count), + (operator node:$val, (and node:$count, imm32bottom6set))]>; + // Vector representation of all-zeros and all-ones. def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index e2a3efda5c5e..c5cdc22f2099 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -329,7 +329,7 @@ bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) { } int SystemZTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, + unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, @@ -469,7 +469,7 @@ int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, assert (Tp->isVectorTy()); assert (ST->hasVector() && "getShuffleCost() called."); unsigned NumVectors = getNumberOfParts(Tp); - + // TODO: Since fp32 is expanded, the shuffle cost should always be 0. // FP128 values are always in scalar registers, so there is no work @@ -647,7 +647,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, return Cost; } } - + if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP || Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) { // TODO: Fix base implementation which could simplify things a bit here @@ -704,7 +704,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/); - + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && Src->isIntegerTy(1)) { // This should be extension of a compare i1 result, which is done with diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp index 42d92622d6c8..f23ea72eb513 100644 --- a/contrib/llvm/lib/Target/Target.cpp +++ b/contrib/llvm/lib/Target/Target.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements the common infrastructure (including C bindings) for +// This file implements the common infrastructure (including C bindings) for // libLLVMTarget.a, which implements target information. // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp index 907ecf46e8ff..6bcf60fafc3e 100644 --- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp +++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp @@ -92,10 +92,10 @@ static bool IsNullTerminatedString(const Constant *C) { if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(C)) { unsigned NumElts = CDS->getNumElements(); assert(NumElts != 0 && "Can't have an empty CDS"); - + if (CDS->getElementAsInteger(NumElts-1) != 0) return false; // Not null terminated. - + // Verify that the null doesn't occur anywhere else in the string. for (unsigned i = 0; i != NumElts-1; ++i) if (CDS->getElementAsInteger(i) == 0) diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index b84c2d31a63e..fafbed0bd935 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2603,11 +2603,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, bool HadVerifyError = false; // Append default arguments to "ins[bwld]" - if (Name.startswith("ins") && + if (Name.startswith("ins") && (Operands.size() == 1 || Operands.size() == 3) && (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" || Name == "ins")) { - + AddDefaultSrcDestOperands(TmpOperands, X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), DefaultMemDIOperand(NameLoc)); @@ -2615,7 +2615,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } // Append default arguments to "outs[bwld]" - if (Name.startswith("outs") && + if (Name.startswith("outs") && (Operands.size() == 1 || Operands.size() == 3) && (Name == "outsb" || Name == "outsw" || Name == "outsl" || Name == "outsd" || Name == "outs")) { diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 82e82fe1efd9..0e861d5ddbc9 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -92,7 +92,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, // the hex value of the immediate operand when it isn't in the range // [-256,255]. if (CommentStream && !HasCustomInstComment && (Imm > 255 || Imm < -256)) { - // Don't print unnecessary hex sign bits. + // Don't print unnecessary hex sign bits. if (Imm == (int16_t)(Imm)) *CommentStream << format("imm = 0x%" PRIX16 "\n", (uint16_t)Imm); else if (Imm == (int32_t)(Imm)) diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index d030f26d98de..f1d15e66918b 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -307,10 +307,84 @@ class X86MCInstrAnalysis : public MCInstrAnalysis { public: X86MCInstrAnalysis(const MCInstrInfo *MCII) : MCInstrAnalysis(MCII) {} + bool isDependencyBreaking(const MCSubtargetInfo &STI, + const MCInst &Inst) const override; bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst, APInt &Mask) const override; }; +bool X86MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI, + const MCInst &Inst) const { + if (STI.getCPU() == "btver2") { + // Reference: Agner Fog's microarchitecture.pdf - Section 20 "AMD Bobcat and + // Jaguar pipeline", subsection 8 "Dependency-breaking instructions". + switch (Inst.getOpcode()) { + default: + return false; + case X86::SUB32rr: + case X86::SUB64rr: + case X86::SBB32rr: + case X86::SBB64rr: + case X86::XOR32rr: + case X86::XOR64rr: + case X86::XORPSrr: + case X86::XORPDrr: + case X86::VXORPSrr: + case X86::VXORPDrr: + case X86::ANDNPSrr: + case X86::VANDNPSrr: + case X86::ANDNPDrr: + case X86::VANDNPDrr: + case X86::PXORrr: + case X86::VPXORrr: + case X86::PANDNrr: + case X86::VPANDNrr: + case X86::PSUBBrr: + case X86::PSUBWrr: + case X86::PSUBDrr: + case X86::PSUBQrr: + case X86::VPSUBBrr: + case X86::VPSUBWrr: + case X86::VPSUBDrr: + case X86::VPSUBQrr: + case X86::PCMPEQBrr: + case X86::PCMPEQWrr: + case X86::PCMPEQDrr: + case X86::PCMPEQQrr: + case X86::VPCMPEQBrr: + case X86::VPCMPEQWrr: + case X86::VPCMPEQDrr: + case X86::VPCMPEQQrr: + case X86::PCMPGTBrr: + case X86::PCMPGTWrr: + case X86::PCMPGTDrr: + case X86::PCMPGTQrr: + case X86::VPCMPGTBrr: + case X86::VPCMPGTWrr: + case X86::VPCMPGTDrr: + case X86::VPCMPGTQrr: + case X86::MMX_PXORirr: + case X86::MMX_PANDNirr: + case X86::MMX_PSUBBirr: + case X86::MMX_PSUBDirr: + case X86::MMX_PSUBQirr: + case X86::MMX_PSUBWirr: + case X86::MMX_PCMPGTBirr: + case X86::MMX_PCMPGTDirr: + case X86::MMX_PCMPGTWirr: + case X86::MMX_PCMPEQBirr: + case X86::MMX_PCMPEQDirr: + case X86::MMX_PCMPEQWirr: + return Inst.getOperand(1).getReg() == Inst.getOperand(2).getReg(); + case X86::CMP32rr: + case X86::CMP64rr: + return Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg(); + } + } + + return false; +} + bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst, APInt &Mask) const { diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.h b/contrib/llvm/lib/Target/X86/X86CallingConv.h index c49a6838fa44..d0fcbd313312 100644 --- a/contrib/llvm/lib/Target/X86/X86CallingConv.h +++ b/contrib/llvm/lib/Target/X86/X86CallingConv.h @@ -66,7 +66,7 @@ inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, // not to split i64 and double between a register and stack static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]); - + SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); // If this is the first part of an double/i64/i128, or if we're already diff --git a/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp b/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp index f73455cc31b8..1c5f110d8c60 100644 --- a/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp +++ b/contrib/llvm/lib/Target/X86/X86CmovConversion.cpp @@ -622,7 +622,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( // If the CMOV group is not packed, e.g., there are debug instructions between // first CMOV and last CMOV, then pack the group and make the CMOV instruction - // consecutive by moving the debug instructions to after the last CMOV. + // consecutive by moving the debug instructions to after the last CMOV. packCmovGroup(Group.front(), Group.back()); // To convert a CMOVcc instruction, we actually have to insert the diamond diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index de8b40f28a86..35a15577fe09 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -1195,7 +1195,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); + GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; @@ -2649,7 +2649,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::VMOVPDI2DIrr), ResultReg) .addReg(InputReg, RegState::Kill); - + // The result value is in the lower 16-bits of ResultReg. unsigned RegIdx = X86::sub_16bit; ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx); @@ -3687,7 +3687,7 @@ X86FastISel::fastSelectInstruction(const Instruction *I) { unsigned Reg = getRegForValue(I->getOperand(0)); if (Reg == 0) return false; - + // No instruction is needed for conversion. Reuse the register used by // the fist operand. updateValueMap(I, Reg); diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp index d85389a0a7f1..f3f7f6a37360 100644 --- a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp +++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp @@ -578,7 +578,7 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF, continue; if (OptLEA) { - if (MF.getSubtarget<X86Subtarget>().isSLM()) + if (MF.getSubtarget<X86Subtarget>().slowLEA()) processInstructionForSLM(I, MFI); else { diff --git a/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index 1ba08d39c595..c17c51a7aeac 100644 --- a/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -730,9 +730,12 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs( for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) { X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode()); - if (Cond != X86::COND_INVALID && MI.getOperand(0).isReg() && - TRI->isVirtualRegister(MI.getOperand(0).getReg())) + if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() && + TRI->isVirtualRegister(MI.getOperand(0).getReg())) { + assert(MI.getOperand(0).isDef() && + "A non-storing SETcc should always define a register!"); CondRegs[Cond] = MI.getOperand(0).getReg(); + } // Stop scanning when we see the first definition of the EFLAGS as prior to // this we would potentially capture the wrong flag state. diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp index ae748901164a..f330acff61a1 100644 --- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -347,12 +347,12 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { LiveBundle &Bundle = LiveBundles[Bundles->getBundle(Entry->getNumber(), false)]; - + // In regcall convention, some FP registers may not be passed through // the stack, so they will need to be assigned to the stack first if ((Entry->getParent()->getFunction().getCallingConv() == CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) { - // In the register calling convention, up to one FP argument could be + // In the register calling convention, up to one FP argument could be // saved in the first FP register. // If bundle.mask is non-zero and Bundle.FixCount is zero, it means // that the FP registers contain arguments. @@ -991,7 +991,7 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) { assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2)); // Reset the FP Stack - It is required because of possible leftovers from - // passed arguments. The caller should assume that the FP stack is + // passed arguments. The caller should assume that the FP stack is // returned empty (unless the callee returns values on FP stack). while (StackTop > 0) popReg(); diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp index a257ec41f75b..e207c343fac8 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -68,7 +68,7 @@ X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { // needsFrameIndexResolution - Do we need to perform FI resolution for // this function. Normally, this is required only when the function // has any stack objects. However, FI resolution actually has another job, -// not apparent from the title - it resolves callframesetup/destroy +// not apparent from the title - it resolves callframesetup/destroy // that were not simplified earlier. // So, this is required for x86 functions that have push sequences even // when there are no stack objects. @@ -607,8 +607,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, int64_t RCXShadowSlot = 0; int64_t RDXShadowSlot = 0; - // If inlining in the prolog, save RCX and RDX. - // Future optimization: don't save or restore if not live in. + // If inlining in the prolog, save RCX and RDX. if (InProlog) { // Compute the offsets. We need to account for things already // pushed onto the stack at this point: return address, frame @@ -616,15 +615,30 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize(); const bool HasFP = hasFP(MF); - RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); - RDXShadowSlot = RCXShadowSlot + 8; - // Emit the saves. - addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, - RCXShadowSlot) - .addReg(X86::RCX); - addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, - RDXShadowSlot) - .addReg(X86::RDX); + + // Check if we need to spill RCX and/or RDX. + // Here we assume that no earlier prologue instruction changes RCX and/or + // RDX, so checking the block live-ins is enough. + const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX); + const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX); + int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); + // Assign the initial slot to both registers, then change RDX's slot if both + // need to be spilled. + if (IsRCXLiveIn) + RCXShadowSlot = InitSlot; + if (IsRDXLiveIn) + RDXShadowSlot = InitSlot; + if (IsRDXLiveIn && IsRCXLiveIn) + RDXShadowSlot += 8; + // Emit the saves if needed. + if (IsRCXLiveIn) + addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, + RCXShadowSlot) + .addReg(X86::RCX); + if (IsRDXLiveIn) + addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, + RDXShadowSlot) + .addReg(X86::RDX); } else { // Not in the prolog. Copy RAX to a virtual reg. BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX); @@ -661,6 +675,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB); // Add code to roundMBB to round the final stack pointer to a page boundary. + RoundMBB->addLiveIn(FinalReg); BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg) .addReg(FinalReg) .addImm(PageMask); @@ -677,6 +692,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, .addMBB(LoopMBB); } + LoopMBB->addLiveIn(JoinReg); addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg, false, -PageSize); @@ -688,6 +704,8 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, .addImm(0) .addReg(0) .addImm(0); + + LoopMBB->addLiveIn(RoundedReg); BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr)) .addReg(RoundedReg) .addReg(ProbeReg); @@ -697,16 +715,19 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, // If in prolog, restore RDX and RCX. if (InProlog) { - addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm), - X86::RCX), - X86::RSP, false, RCXShadowSlot); - addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm), - X86::RDX), - X86::RSP, false, RDXShadowSlot); + if (RCXShadowSlot) // It means we spilled RCX in the prologue. + addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, + TII.get(X86::MOV64rm), X86::RCX), + X86::RSP, false, RCXShadowSlot); + if (RDXShadowSlot) // It means we spilled RDX in the prologue. + addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, + TII.get(X86::MOV64rm), X86::RDX), + X86::RSP, false, RDXShadowSlot); } // Now that the probing is done, add code to continueMBB to update // the stack pointer for real. + ContinueMBB->addLiveIn(SizeReg); BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP) .addReg(X86::RSP) .addReg(SizeReg); @@ -734,8 +755,6 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, CMBBI->setFlag(MachineInstr::FrameSetup); } } - - // Possible TODO: physreg liveness for InProlog case. } void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, @@ -2694,7 +2713,7 @@ bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, Regs[FoundRegs++] = Regs[0]; for (int i = 0; i < NumPops; ++i) - BuildMI(MBB, MBBI, DL, + BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]); return true; @@ -2984,7 +3003,7 @@ struct X86FrameSortingComparator { // in general. Something to keep in mind, though. if (DensityAScaled == DensityBScaled) return A.ObjectAlignment < B.ObjectAlignment; - + return DensityAScaled < DensityBScaled; } }; @@ -3020,7 +3039,7 @@ void X86FrameLowering::orderFrameObjects( if (ObjectSize == 0) // Variable size. Just use 4. SortingObjects[Obj].ObjectSize = 4; - else + else SortingObjects[Obj].ObjectSize = ObjectSize; } diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 7dcdb7967058..2820004cfc6d 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1800,17 +1800,19 @@ X86TargetLowering::getPreferredVectorAction(EVT VT) const { } MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + CallingConv::ID CC, EVT VT) const { if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return MVT::v32i8; - return TargetLowering::getRegisterTypeForCallingConv(Context, VT); + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); } unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, EVT VT) const { if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return 1; - return TargetLowering::getNumRegistersForCallingConv(Context, VT); + return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); } EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, @@ -23366,7 +23368,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl, return DAG.getBuildVector(VT, dl, Elts); } - // If the target doesn't support variable shifts, use either FP conversion + // If the target doesn't support variable shifts, use either FP conversion // or integer multiplication to avoid shifting each element individually. if (VT == MVT::v4i32) { Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT)); @@ -23509,6 +23511,24 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG)) return DAG.getNode(ISD::MUL, dl, VT, R, Scale); + // Constant ISD::SRL can be performed efficiently on vXi8/vXi16 vectors as we + // can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt). + // TODO: Improve support for the shift by zero special case. + if (Op.getOpcode() == ISD::SRL && ConstantAmt && + ((Subtarget.hasSSE41() && VT == MVT::v8i16) || + DAG.isKnownNeverZero(Amt)) && + (VT == MVT::v16i8 || VT == MVT::v8i16 || + ((VT == MVT::v32i8 || VT == MVT::v16i16) && Subtarget.hasInt256()))) { + SDValue EltBits = DAG.getConstant(VT.getScalarSizeInBits(), dl, VT); + SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt); + if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) { + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ); + SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale); + return DAG.getSelect(dl, VT, ZAmt, R, Res); + } + } + // v4i32 Non Uniform Shifts. // If the shift amount is constant we can shift each lane using the SSE2 // immediate shifts, else we need to zero-extend each lane to the lower i64 @@ -33425,33 +33445,32 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, } } - // Handle (CMOV C-1, (ADD (CTTZ X), C), (X != 0)) -> - // (ADD (CMOV (CTTZ X), -1, (X != 0)), C) or - // (CMOV (ADD (CTTZ X), C), C-1, (X == 0)) -> - // (ADD (CMOV C-1, (CTTZ X), (X == 0)), C) - if (CC == X86::COND_NE || CC == X86::COND_E) { - auto *Cnst = CC == X86::COND_E ? dyn_cast<ConstantSDNode>(TrueOp) - : dyn_cast<ConstantSDNode>(FalseOp); - SDValue Add = CC == X86::COND_E ? FalseOp : TrueOp; - - if (Cnst && Add.getOpcode() == ISD::ADD && Add.hasOneUse()) { - auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1)); - SDValue AddOp2 = Add.getOperand(0); - if (AddOp1 && (AddOp2.getOpcode() == ISD::CTTZ_ZERO_UNDEF || - AddOp2.getOpcode() == ISD::CTTZ)) { - APInt Diff = Cnst->getAPIntValue() - AddOp1->getAPIntValue(); - if (CC == X86::COND_E) { - Add = DAG.getNode(X86ISD::CMOV, DL, Add.getValueType(), AddOp2, - DAG.getConstant(Diff, DL, Add.getValueType()), - DAG.getConstant(CC, DL, MVT::i8), Cond); - } else { - Add = DAG.getNode(X86ISD::CMOV, DL, Add.getValueType(), - DAG.getConstant(Diff, DL, Add.getValueType()), - AddOp2, DAG.getConstant(CC, DL, MVT::i8), Cond); - } - return DAG.getNode(X86ISD::ADD, DL, Add.getValueType(), Add, - SDValue(AddOp1, 0)); - } + // Fold (CMOV C1, (ADD (CTTZ X), C2), (X != 0)) -> + // (ADD (CMOV C1-C2, (CTTZ X), (X != 0)), C2) + // Or (CMOV (ADD (CTTZ X), C2), C1, (X == 0)) -> + // (ADD (CMOV (CTTZ X), C1-C2, (X == 0)), C2) + if ((CC == X86::COND_NE || CC == X86::COND_E) && + Cond.getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1))) { + SDValue Add = TrueOp; + SDValue Const = FalseOp; + // Canonicalize the condition code for easier matching and output. + if (CC == X86::COND_E) { + std::swap(Add, Const); + CC = X86::COND_NE; + } + + // Ok, now make sure that Add is (add (cttz X), C2) and Const is a constant. + if (isa<ConstantSDNode>(Const) && Add.getOpcode() == ISD::ADD && + Add.hasOneUse() && isa<ConstantSDNode>(Add.getOperand(1)) && + (Add.getOperand(0).getOpcode() == ISD::CTTZ_ZERO_UNDEF || + Add.getOperand(0).getOpcode() == ISD::CTTZ) && + Add.getOperand(0).getOperand(0) == Cond.getOperand(0)) { + EVT VT = N->getValueType(0); + // This should constant fold. + SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1)); + SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0), + DAG.getConstant(CC, DL, MVT::i8), Cond); + return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1)); } } @@ -33873,31 +33892,42 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (!C) return SDValue(); - uint64_t MulAmt = C->getZExtValue(); - if (isPowerOf2_64(MulAmt)) + if (isPowerOf2_64(C->getZExtValue())) return SDValue(); + int64_t SignMulAmt = C->getSExtValue(); + assert(SignMulAmt != INT64_MIN && "Int min should have been handled!"); + uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt; + SDLoc DL(N); - if (MulAmt == 3 || MulAmt == 5 || MulAmt == 9) - return DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), - N->getOperand(1)); + if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) { + SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), + DAG.getConstant(AbsMulAmt, DL, VT)); + if (SignMulAmt < 0) + NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + NewMul); + + return NewMul; + } uint64_t MulAmt1 = 0; uint64_t MulAmt2 = 0; - if ((MulAmt % 9) == 0) { + if ((AbsMulAmt % 9) == 0) { MulAmt1 = 9; - MulAmt2 = MulAmt / 9; - } else if ((MulAmt % 5) == 0) { + MulAmt2 = AbsMulAmt / 9; + } else if ((AbsMulAmt % 5) == 0) { MulAmt1 = 5; - MulAmt2 = MulAmt / 5; - } else if ((MulAmt % 3) == 0) { + MulAmt2 = AbsMulAmt / 5; + } else if ((AbsMulAmt % 3) == 0) { MulAmt1 = 3; - MulAmt2 = MulAmt / 3; + MulAmt2 = AbsMulAmt / 3; } SDValue NewMul; + // For negative multiply amounts, only allow MulAmt2 to be a power of 2. if (MulAmt2 && - (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){ + (isPowerOf2_64(MulAmt2) || + (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) { if (isPowerOf2_64(MulAmt2) && !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD)) @@ -33919,17 +33949,19 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, DAG.getConstant(MulAmt2, DL, VT)); + + // Negate the result. + if (SignMulAmt < 0) + NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + NewMul); } else if (!Subtarget.slowLEA()) - NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL); + NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL); if (!NewMul) { - assert(MulAmt != 0 && - MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) && + assert(C->getZExtValue() != 0 && + C->getZExtValue() != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) && "Both cases that could cause potential overflows should have " "already been handled."); - int64_t SignMulAmt = C->getSExtValue(); - assert(SignMulAmt != INT64_MIN && "Int min should have been handled!"); - uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt; if (isPowerOf2_64(AbsMulAmt - 1)) { // (mul x, 2^N + 1) => (add (shl x, N), x) NewMul = DAG.getNode( @@ -36738,6 +36770,145 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, return DAG.getNode(Opc, DL, VT, LHS, RHS); } +// Attempt to match PMADDUBSW, which multiplies corresponding unsigned bytes +// from one vector with signed bytes from another vector, adds together +// adjacent pairs of 16-bit products, and saturates the result before +// truncating to 16-bits. +// +// Which looks something like this: +// (i16 (ssat (add (mul (zext (even elts (i8 A))), (sext (even elts (i8 B)))), +// (mul (zext (odd elts (i8 A)), (sext (odd elts (i8 B)))))))) +static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG, + const X86Subtarget &Subtarget, + const SDLoc &DL) { + if (!VT.isVector() || !Subtarget.hasSSSE3()) + return SDValue(); + + unsigned NumElems = VT.getVectorNumElements(); + EVT ScalarVT = VT.getVectorElementType(); + if (ScalarVT != MVT::i16 || NumElems < 8 || !isPowerOf2_32(NumElems)) + return SDValue(); + + SDValue SSatVal = detectSSatPattern(In, VT); + if (!SSatVal || SSatVal.getOpcode() != ISD::ADD) + return SDValue(); + + // Ok this is a signed saturation of an ADD. See if this ADD is adding pairs + // of multiplies from even/odd elements. + SDValue N0 = SSatVal.getOperand(0); + SDValue N1 = SSatVal.getOperand(1); + + if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL) + return SDValue(); + + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + + // TODO: Handle constant vectors and use knownbits/computenumsignbits? + // Canonicalize zero_extend to LHS. + if (N01.getOpcode() == ISD::ZERO_EXTEND) + std::swap(N00, N01); + if (N11.getOpcode() == ISD::ZERO_EXTEND) + std::swap(N10, N11); + + // Ensure we have a zero_extend and a sign_extend. + if (N00.getOpcode() != ISD::ZERO_EXTEND || + N01.getOpcode() != ISD::SIGN_EXTEND || + N10.getOpcode() != ISD::ZERO_EXTEND || + N11.getOpcode() != ISD::SIGN_EXTEND) + return SDValue(); + + // Peek through the extends. + N00 = N00.getOperand(0); + N01 = N01.getOperand(0); + N10 = N10.getOperand(0); + N11 = N11.getOperand(0); + + // Ensure the extend is from vXi8. + if (N00.getValueType().getVectorElementType() != MVT::i8 || + N01.getValueType().getVectorElementType() != MVT::i8 || + N10.getValueType().getVectorElementType() != MVT::i8 || + N11.getValueType().getVectorElementType() != MVT::i8) + return SDValue(); + + // All inputs should be build_vectors. + if (N00.getOpcode() != ISD::BUILD_VECTOR || + N01.getOpcode() != ISD::BUILD_VECTOR || + N10.getOpcode() != ISD::BUILD_VECTOR || + N11.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + // N00/N10 are zero extended. N01/N11 are sign extended. + + // For each element, we need to ensure we have an odd element from one vector + // multiplied by the odd element of another vector and the even element from + // one of the same vectors being multiplied by the even element from the + // other vector. So we need to make sure for each element i, this operator + // is being performed: + // A[2 * i] * B[2 * i] + A[2 * i + 1] * B[2 * i + 1] + SDValue ZExtIn, SExtIn; + for (unsigned i = 0; i != NumElems; ++i) { + SDValue N00Elt = N00.getOperand(i); + SDValue N01Elt = N01.getOperand(i); + SDValue N10Elt = N10.getOperand(i); + SDValue N11Elt = N11.getOperand(i); + // TODO: Be more tolerant to undefs. + if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1)); + auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1)); + auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1)); + auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1)); + if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt) + return SDValue(); + unsigned IdxN00 = ConstN00Elt->getZExtValue(); + unsigned IdxN01 = ConstN01Elt->getZExtValue(); + unsigned IdxN10 = ConstN10Elt->getZExtValue(); + unsigned IdxN11 = ConstN11Elt->getZExtValue(); + // Add is commutative so indices can be reordered. + if (IdxN00 > IdxN10) { + std::swap(IdxN00, IdxN10); + std::swap(IdxN01, IdxN11); + } + // N0 indices be the even element. N1 indices must be the next odd element. + if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || + IdxN01 != 2 * i || IdxN11 != 2 * i + 1) + return SDValue(); + SDValue N00In = N00Elt.getOperand(0); + SDValue N01In = N01Elt.getOperand(0); + SDValue N10In = N10Elt.getOperand(0); + SDValue N11In = N11Elt.getOperand(0); + // First time we find an input capture it. + if (!ZExtIn) { + ZExtIn = N00In; + SExtIn = N01In; + } + if (ZExtIn != N00In || SExtIn != N01In || + ZExtIn != N10In || SExtIn != N11In) + return SDValue(); + } + + auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, + ArrayRef<SDValue> Ops) { + // Shrink by adding truncate nodes and let DAGCombine fold with the + // sources. + EVT InVT = Ops[0].getValueType(); + assert(InVT.getScalarType() == MVT::i8 && + "Unexpected scalar element type"); + assert(InVT == Ops[1].getValueType() && "Operands' types mismatch"); + EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, + InVT.getVectorNumElements() / 2); + return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]); + }; + return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn }, + PMADDBuilder); +} + static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); @@ -36752,6 +36923,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; + // Try to detect PMADD + if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL)) + return PMAdd; + // Try to combine truncation with signed/unsigned saturation. if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget)) return Val; @@ -36793,38 +36968,14 @@ static SDValue isFNEG(SDNode *N) { if (!Op1.getValueType().isFloatingPoint()) return SDValue(); - SDValue Op0 = peekThroughBitcasts(Op.getOperand(0)); - - unsigned EltBits = Op1.getScalarValueSizeInBits(); - auto isSignMask = [&](const ConstantFP *C) { - return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits); - }; - - // There is more than one way to represent the same constant on - // the different X86 targets. The type of the node may also depend on size. - // - load scalar value and broadcast - // - BUILD_VECTOR node - // - load from a constant pool. - // We check all variants here. - if (Op1.getOpcode() == X86ISD::VBROADCAST) { - if (auto *C = getTargetConstantFromNode(Op1.getOperand(0))) - if (isSignMask(cast<ConstantFP>(C))) - return Op0; - - } else if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1)) { - if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode()) - if (isSignMask(CN->getConstantFPValue())) - return Op0; + // Extract constant bits and see if they are all sign bit masks. + APInt UndefElts; + SmallVector<APInt, 16> EltBits; + if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(), + UndefElts, EltBits, false, false)) + if (llvm::all_of(EltBits, [](APInt &I) { return I.isSignMask(); })) + return peekThroughBitcasts(Op.getOperand(0)); - } else if (auto *C = getTargetConstantFromNode(Op1)) { - if (C->getType()->isVectorTy()) { - if (auto *SplatV = C->getSplatValue()) - if (isSignMask(cast<ConstantFP>(SplatV))) - return Op0; - } else if (auto *FPConst = dyn_cast<ConstantFP>(C)) - if (isSignMask(FPConst)) - return Op0; - } return SDValue(); } @@ -37777,8 +37928,7 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, // Look through extract_vector_elts. If it comes from an FNEG, create a // new extract from the FNEG input. if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isa<ConstantSDNode>(V.getOperand(1)) && - cast<ConstantSDNode>(V.getOperand(1))->getZExtValue() == 0) { + isNullConstant(V.getOperand(1))) { if (SDValue NegVal = isFNEG(V.getOperand(0).getNode())) { NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal); V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(), @@ -38896,7 +39046,7 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1, std::swap(IdxN00, IdxN10); std::swap(IdxN01, IdxN11); } - // N0 indices be the even elemtn. N1 indices must be the next odd element. + // N0 indices be the even element. N1 indices must be the next odd element. if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || IdxN01 != 2 * i || IdxN11 != 2 * i + 1) return SDValue(); @@ -39322,8 +39472,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, if ((IdxVal == OpVT.getVectorNumElements() / 2) && Vec.getOpcode() == ISD::INSERT_SUBVECTOR && OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) { - auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2)); - if (Idx2 && Idx2->getZExtValue() == 0) { + if (isNullConstant(Vec.getOperand(2))) { SDValue SubVec2 = Vec.getOperand(1); // If needed, look through bitcasts to get to the load. if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) { diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h index 32215b170a8c..ff5006d208e5 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h @@ -1097,10 +1097,11 @@ namespace llvm { /// Customize the preferred legalization strategy for certain types. LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; - MVT getRegisterTypeForCallingConv(LLVMContext &Context, + MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override; unsigned getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, EVT VT) const override; bool isIntDivCheap(EVT VT, AttributeList Attr) const override; @@ -1125,8 +1126,8 @@ namespace llvm { bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override; - SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, - SDValue Addr, SelectionDAG &DAG) + SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, + SDValue Addr, SelectionDAG &DAG) const override; protected: diff --git a/contrib/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/contrib/llvm/lib/Target/X86/X86InstrFoldTables.cpp index 5d8400595bfa..7d31cfab4137 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFoldTables.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstrFoldTables.cpp @@ -1576,7 +1576,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = { { X86::SUBSDrr_Int, X86::SUBSDrm_Int, TB_NO_REVERSE }, { X86::SUBSSrr, X86::SUBSSrm, 0 }, { X86::SUBSSrr_Int, X86::SUBSSrm_Int, TB_NO_REVERSE }, - // FIXME: TEST*rr -> swapped operand of TEST *mr. + // FIXME: TEST*rr -> swapped operand of TEST *mr. { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 }, { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 }, { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 }, diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp index 1b61accfb42b..96db8b4e7585 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -7725,7 +7725,7 @@ X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, if (C.CallConstructionID == MachineOutlinerTailCall) { // Yes, just insert a JMP. It = MBB.insert(It, - BuildMI(MF, DebugLoc(), get(X86::JMP_1)) + BuildMI(MF, DebugLoc(), get(X86::TAILJMPd64)) .addGlobalAddress(M.getNamedValue(MF.getName()))); } else { // No, insert a call. diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index 7509b312c100..bc7afd32d494 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -1750,7 +1750,7 @@ def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags // Bit tests instructions: BT, BTS, BTR, BTC. let Defs = [EFLAGS] in { -let SchedRW = [WriteALU] in { +let SchedRW = [WriteBitTest] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, @@ -1783,7 +1783,7 @@ let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in { []>, TB, NotMemoryFoldable; } -let SchedRW = [WriteALU] in { +let SchedRW = [WriteBitTest] in { def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>, @@ -1818,7 +1818,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), } // SchedRW let hasSideEffects = 0 in { -let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB, NotMemoryFoldable; @@ -1842,7 +1842,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), NotMemoryFoldable; } -let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), @@ -1861,7 +1861,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), Requires<[In64BitMode]>; } -let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB, NotMemoryFoldable; @@ -1885,7 +1885,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), NotMemoryFoldable; } -let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; @@ -1908,7 +1908,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), Requires<[In64BitMode]>; } -let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB, NotMemoryFoldable; @@ -1932,7 +1932,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), NotMemoryFoldable; } -let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td index ee3b01159174..023137634df1 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -650,9 +650,9 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), // Double shift instructions (generalizations of rotate) //===----------------------------------------------------------------------===// -let Constraints = "$src1 = $dst", SchedRW = [WriteShiftDouble] in { +let Constraints = "$src1 = $dst" in { -let Uses = [CL] in { +let Uses = [CL], SchedRW = [WriteSHDrrcl] in { def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}", @@ -683,9 +683,9 @@ def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, TB; -} +} // SchedRW -let isCommutable = 1 in { // These instructions commute to each other. +let isCommutable = 1, SchedRW = [WriteSHDrri] in { // These instructions commute to each other. def SHLD16rri8 : Ii8<0xA4, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, u8imm:$src3), @@ -728,11 +728,10 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg, [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, (i8 imm:$src3)))]>, TB; -} -} // Constraints = "$src = $dst", SchedRW +} // SchedRW +} // Constraints = "$src = $dst" -let SchedRW = [WriteShiftDoubleLd, WriteRMW] in { -let Uses = [CL] in { +let Uses = [CL], SchedRW = [WriteSHDmrcl] in { def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL), @@ -759,8 +758,9 @@ def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL), addr:$dst)]>, TB; -} +} // SchedRW +let SchedRW = [WriteSHDmri] in { def SHLD16mri8 : Ii8<0xA4, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3), "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", diff --git a/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td b/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td index c7713fea70fa..6334d9e89a60 100755 --- a/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/contrib/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -119,8 +119,8 @@ defm : BWWriteResPair<WriteIDiv16, [BWPort0, BWDivider], 25, [1, 10]>; defm : BWWriteResPair<WriteIDiv32, [BWPort0, BWDivider], 25, [1, 10]>; defm : BWWriteResPair<WriteIDiv64, [BWPort0, BWDivider], 25, [1, 10]>; -defm : BWWriteResPair<WriteBSWAP32,[BWPort15], 1>; // -defm : BWWriteResPair<WriteBSWAP64,[BWPort06, BWPort15], 2, [1, 1], 2>; // +defm : X86WriteRes<WriteBSWAP32, [BWPort15], 1, [1], 1>; +defm : X86WriteRes<WriteBSWAP64, [BWPort06, BWPort15], 2, [1, 1], 2>; defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>; def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part. @@ -137,6 +137,7 @@ def : WriteRes<WriteSETCCStore, [BWPort06,BWPort4,BWPort237]> { let NumMicroOps = 3; } def : WriteRes<WriteLAHFSAHF, [BWPort06]>; +def : WriteRes<WriteBitTest,[BWPort06]>; // Bit Test instrs // Bit counts. defm : BWWriteResPair<WriteBSF, [BWPort1], 3>; @@ -148,8 +149,11 @@ defm : BWWriteResPair<WritePOPCNT, [BWPort1], 3>; // Integer shifts and rotates. defm : BWWriteResPair<WriteShift, [BWPort06], 1>; -// Double shift instructions. -defm : BWWriteResPair<WriteShiftDouble, [BWPort06], 1>; +// SHLD/SHRD. +defm : X86WriteRes<WriteSHDrri, [BWPort1], 3, [1], 1>; +defm : X86WriteRes<WriteSHDrrcl,[BWPort1,BWPort06,BWPort0156], 6, [1, 1, 2], 4>; +defm : X86WriteRes<WriteSHDmri, [BWPort1,BWPort23,BWPort237,BWPort0156], 9, [1, 1, 1, 1], 4>; +defm : X86WriteRes<WriteSHDmrcl,[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156], 11, [1, 1, 1, 1, 2], 6>; // BMI1 BEXTR, BMI2 BZHI defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>; @@ -600,14 +604,6 @@ def BWWriteResGroup6 : SchedWriteRes<[BWPort06]> { let ResourceCycles = [1]; } def: InstRW<[BWWriteResGroup6], (instrs CDQ, CQO)>; -def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8", - "BT(16|32|64)rr", - "BTC(16|32|64)ri8", - "BTC(16|32|64)rr", - "BTR(16|32|64)ri8", - "BTR(16|32|64)rr", - "BTS(16|32|64)ri8", - "BTS(16|32|64)rr")>; def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> { let Latency = 1; @@ -746,8 +742,6 @@ def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> { def: InstRW<[BWWriteResGroup27], (instregex "MMX_CVTPI2PSirr", "PDEP(32|64)rr", "PEXT(32|64)rr", - "SHLD(16|32|64)rri8", - "SHRD(16|32|64)rri8", "(V?)CVTDQ2PS(Y?)rr")>; def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> { @@ -1055,14 +1049,6 @@ def BWWriteResGroup66 : SchedWriteRes<[BWPort23,BWPort0156]> { def: InstRW<[BWWriteResGroup66], (instrs POP16r, POP32r, POP64r)>; def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)rmr")>; -def BWWriteResGroup67 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { - let Latency = 6; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[BWWriteResGroup67], (instregex "SHLD(16|32|64)rrCL", - "SHRD(16|32|64)rrCL")>; - def BWWriteResGroup68 : SchedWriteRes<[BWPort1,BWPort6,BWPort06,BWPort0156]> { let Latency = 6; let NumMicroOps = 4; @@ -1307,14 +1293,6 @@ def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> { def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTB(Y?)rm", "VPBROADCASTW(Y?)rm")>; -def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[BWWriteResGroup111], (instregex "SHLD(16|32|64)mri8", - "SHRD(16|32|64)mri8")>; - def BWWriteResGroup112 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> { let Latency = 9; let NumMicroOps = 5; @@ -1380,14 +1358,6 @@ def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { } def: InstRW<[BWWriteResGroup128], (instregex "VCVTDQ2PDYrm")>; -def BWWriteResGroup130 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156]> { - let Latency = 11; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,1,2]; -} -def: InstRW<[BWWriteResGroup130], (instregex "SHLD(16|32|64)mrCL", - "SHRD(16|32|64)mrCL")>; - def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { let Latency = 11; let NumMicroOps = 7; diff --git a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td index 189dd4183839..876c3e4162cf 100644 --- a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td @@ -118,17 +118,26 @@ defm : X86WriteRes<WriteLoad, [HWPort23], 5, [1], 1>; defm : X86WriteRes<WriteMove, [HWPort0156], 1, [1], 1>; def : WriteRes<WriteZero, []>; +// Arithmetic. defm : HWWriteResPair<WriteALU, [HWPort0156], 1>; -defm : HWWriteResPair<WriteADC, [HWPort06,HWPort0156], 2, [1,1], 2>; +defm : HWWriteResPair<WriteADC, [HWPort06, HWPort0156], 2, [1,1], 2>; defm : HWWriteResPair<WriteIMul, [HWPort1], 3>; defm : HWWriteResPair<WriteIMul64, [HWPort1], 3>; -defm : HWWriteResPair<WriteBSWAP32,[HWPort15], 1>; -defm : HWWriteResPair<WriteBSWAP64,[HWPort06, HWPort15], 2, [1,1], 2>; +defm : X86WriteRes<WriteBSWAP32, [HWPort15], 1, [1], 1>; +defm : X86WriteRes<WriteBSWAP64, [HWPort06, HWPort15], 2, [1,1], 2>; def : WriteRes<WriteIMulH, []> { let Latency = 3; } + +// Integer shifts and rotates. defm : HWWriteResPair<WriteShift, [HWPort06], 1>; -defm : HWWriteResPair<WriteShiftDouble, [HWPort06], 1>; + +// SHLD/SHRD. +defm : X86WriteRes<WriteSHDrri, [HWPort1], 3, [1], 1>; +defm : X86WriteRes<WriteSHDrrcl,[HWPort1, HWPort06, HWPort0156], 6, [1, 1, 2], 4>; +defm : X86WriteRes<WriteSHDmri, [HWPort1, HWPort23, HWPort237, HWPort0156], 10, [1, 1, 1, 1], 4>; +defm : X86WriteRes<WriteSHDmrcl,[HWPort1, HWPort23, HWPort237, HWPort06, HWPort0156], 12, [1, 1, 1, 1, 2], 6>; + defm : HWWriteResPair<WriteJump, [HWPort06], 1>; defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>; @@ -141,6 +150,7 @@ def : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> { let NumMicroOps = 3; } def : WriteRes<WriteLAHFSAHF, [HWPort06]>; +def : WriteRes<WriteBitTest,[HWPort06]>; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on @@ -886,14 +896,6 @@ def HWWriteResGroup7 : SchedWriteRes<[HWPort06]> { let ResourceCycles = [1]; } def: InstRW<[HWWriteResGroup7], (instrs CDQ, CQO)>; -def: InstRW<[HWWriteResGroup7], (instregex "BT(16|32|64)ri8", - "BT(16|32|64)rr", - "BTC(16|32|64)ri8", - "BTC(16|32|64)rr", - "BTR(16|32|64)ri8", - "BTR(16|32|64)rr", - "BTS(16|32|64)ri8", - "BTS(16|32|64)rr")>; def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> { let Latency = 1; @@ -1240,8 +1242,6 @@ def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> { def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPI2PSirr", "PDEP(32|64)rr", "PEXT(32|64)rr", - "SHLD(16|32|64)rri8", - "SHRD(16|32|64)rri8", "(V?)CVTDQ2PS(Y?)rr")>; def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> { @@ -1513,14 +1513,6 @@ def HWWriteResGroup83 : SchedWriteRes<[HWPort1,HWPort6,HWPort0156]> { } def: InstRW<[HWWriteResGroup83], (instregex "LAR(16|32|64)rr")>; -def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> { - let Latency = 10; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[HWWriteResGroup86], (instregex "SHLD(16|32|64)mri8", - "SHRD(16|32|64)mri8")>; - def HWWriteResGroup87 : SchedWriteRes<[HWPort1,HWPort6,HWPort23,HWPort0156]> { let Latency = 9; let NumMicroOps = 5; @@ -1638,14 +1630,6 @@ def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { } def: InstRW<[HWWriteResGroup104], (instregex "VCVTDQ2PDYrm")>; -def HWWriteResGroup105 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> { - let Latency = 6; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[HWWriteResGroup105], (instregex "SHLD(16|32|64)rrCL", - "SHRD(16|32|64)rrCL")>; - def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> { let Latency = 6; let NumMicroOps = 4; @@ -1660,14 +1644,6 @@ def HWWriteResGroup108 : SchedWriteRes<[HWPort6,HWPort0156]> { } def: InstRW<[HWWriteResGroup108], (instrs STD)>; -def HWWriteResGroup109 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort0156]> { - let Latency = 12; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,1,2]; -} -def: InstRW<[HWWriteResGroup109], (instregex "SHLD(16|32|64)mrCL", - "SHRD(16|32|64)mrCL")>; - def HWWriteResGroup114 : SchedWriteRes<[HWPort6,HWPort06,HWPort15,HWPort0156]> { let Latency = 7; let NumMicroOps = 7; diff --git a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td index 3b543c680ef4..6b7bbdea860a 100644 --- a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -106,13 +106,14 @@ def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; } def : WriteRes<WriteMove, [SBPort015]>; def : WriteRes<WriteZero, []>; +// Arithmetic. defm : SBWriteResPair<WriteALU, [SBPort015], 1>; defm : SBWriteResPair<WriteADC, [SBPort05,SBPort015], 2, [1,1], 2>; defm : SBWriteResPair<WriteIMul, [SBPort1], 3>; defm : SBWriteResPair<WriteIMul64, [SBPort1], 3>; -defm : SBWriteResPair<WriteBSWAP32,[SBPort1], 1>; -defm : SBWriteResPair<WriteBSWAP64,[SBPort1,SBPort05], 2, [1,1], 2>; +defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>; +defm : X86WriteRes<WriteBSWAP64, [SBPort1,SBPort05], 2, [1,1], 2>; defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>; defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>; @@ -125,8 +126,13 @@ defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>; def : WriteRes<WriteIMulH, []> { let Latency = 3; } +// SHLD/SHRD. +defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>; +defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>; +defm : X86WriteRes<WriteSHDmri, [SBPort4,SBPort23,SBPort05,SBPort015], 8, [1, 2, 1, 1], 5>; +defm : X86WriteRes<WriteSHDmrcl,[SBPort4,SBPort23,SBPort05,SBPort015], 10, [1, 2, 3, 1], 7>; + defm : SBWriteResPair<WriteShift, [SBPort05], 1>; -defm : SBWriteResPair<WriteShiftDouble, [SBPort05], 1>; defm : SBWriteResPair<WriteJump, [SBPort5], 1>; defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>; @@ -139,6 +145,7 @@ def : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> { let NumMicroOps = 3; } def : WriteRes<WriteLAHFSAHF, [SBPort05]>; +def : WriteRes<WriteBitTest,[SBPort05]>; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on @@ -564,14 +571,6 @@ def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> { let ResourceCycles = [1]; } def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>; -def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)ri8", - "BT(16|32|64)rr", - "BTC(16|32|64)ri8", - "BTC(16|32|64)rr", - "BTR(16|32|64)ri8", - "BTR(16|32|64)rr", - "BTS(16|32|64)ri8", - "BTS(16|32|64)rr")>; def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> { let Latency = 1; @@ -630,14 +629,6 @@ def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> { def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ)>; def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>; -def SBWriteResGroup19 : SchedWriteRes<[SBPort05,SBPort015]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup19], (instregex "SHLD(16|32|64)rri8", - "SHRD(16|32|64)rri8")>; - def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> { let Latency = 3; let NumMicroOps = 1; @@ -728,14 +719,6 @@ def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> { } def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>; -def SBWriteResGroup29_3 : SchedWriteRes<[SBPort05,SBPort015]> { - let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [3,1]; -} -def: InstRW<[SBWriteResGroup29_3], (instregex "SHLD(16|32|64)rrCL", - "SHRD(16|32|64)rrCL")>; - def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> { let Latency = 5; let NumMicroOps = 1; @@ -1027,14 +1010,6 @@ def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { } def: InstRW<[SBWriteResGroup87], (instrs FARCALL64)>; -def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8", - "SHRD(16|32|64)mri8")>; - def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { let Latency = 9; let NumMicroOps = 3; @@ -1130,14 +1105,6 @@ def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> { def: InstRW<[SBWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "ILD_F(16|32|64)m")>; -def SBWriteResGroup103_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> { - let Latency = 10; - let NumMicroOps = 7; - let ResourceCycles = [1,2,3,1]; -} -def: InstRW<[SBWriteResGroup103_2], (instregex "SHLD(16|32|64)mrCL", - "SHRD(16|32|64)mrCL")>; - def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> { let Latency = 11; let NumMicroOps = 2; diff --git a/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 1417799d76be..bda088e1512f 100644 --- a/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/contrib/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -110,8 +110,8 @@ defm : SKLWriteResPair<WriteADC, [SKLPort06], 1>; // Integer ALU + flags op defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication. defm : SKLWriteResPair<WriteIMul64, [SKLPort1], 3>; // Integer 64-bit multiplication. -defm : SKLWriteResPair<WriteBSWAP32,[SKLPort15], 1>; // -defm : SKLWriteResPair<WriteBSWAP64,[SKLPort06, SKLPort15], 2, [1,1], 2>; // +defm : X86WriteRes<WriteBSWAP32, [SKLPort15], 1, [1], 1>; +defm : X86WriteRes<WriteBSWAP64, [SKLPort06, SKLPort15], 2, [1,1], 2>; defm : SKLWriteResPair<WriteDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>; defm : SKLWriteResPair<WriteDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>; @@ -136,6 +136,7 @@ def : WriteRes<WriteSETCCStore, [SKLPort06,SKLPort4,SKLPort237]> { let NumMicroOps = 3; } def : WriteRes<WriteLAHFSAHF, [SKLPort06]>; +def : WriteRes<WriteBitTest,[SKLPort06]>; // // Bit counts. defm : SKLWriteResPair<WriteBSF, [SKLPort1], 3>; @@ -147,8 +148,11 @@ defm : SKLWriteResPair<WritePOPCNT, [SKLPort1], 3>; // Integer shifts and rotates. defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>; -// Double shift instructions. -defm : SKLWriteResPair<WriteShiftDouble, [SKLPort06], 1>; +// SHLD/SHRD. +defm : X86WriteRes<WriteSHDrri, [SKLPort1], 3, [1], 1>; +defm : X86WriteRes<WriteSHDrrcl,[SKLPort1,SKLPort06,SKLPort0156], 6, [1, 2, 1], 4>; +defm : X86WriteRes<WriteSHDmri, [SKLPort1,SKLPort23,SKLPort237,SKLPort0156], 9, [1, 1, 1, 1], 4>; +defm : X86WriteRes<WriteSHDmrcl,[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156], 11, [1, 1, 1, 2, 1], 6>; // BMI1 BEXTR, BMI2 BZHI defm : SKLWriteResPair<WriteBEXTR, [SKLPort06,SKLPort15], 2, [1,1], 2>; @@ -602,14 +606,6 @@ def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> { let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>; -def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8", - "BT(16|32|64)rr", - "BTC(16|32|64)ri8", - "BTC(16|32|64)rr", - "BTR(16|32|64)ri8", - "BTR(16|32|64)rr", - "BTS(16|32|64)ri8", - "BTS(16|32|64)rr")>; def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> { let Latency = 1; @@ -743,9 +739,7 @@ def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> { let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr", - "PEXT(32|64)rr", - "SHLD(16|32|64)rri8", - "SHRD(16|32|64)rri8")>; + "PEXT(32|64)rr")>; def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> { let Latency = 4; @@ -1096,14 +1090,6 @@ def SKLWriteResGroup78 : SchedWriteRes<[SKLPort5,SKLPort01]> { } def: InstRW<[SKLWriteResGroup78], (instregex "(V?)CVTSI642SSrr")>; -def SKLWriteResGroup79 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { - let Latency = 6; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SKLWriteResGroup79], (instregex "SHLD(16|32|64)rrCL", - "SHRD(16|32|64)rrCL")>; - def SKLWriteResGroup80 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06,SKLPort0156]> { let Latency = 6; let NumMicroOps = 4; @@ -1392,14 +1378,6 @@ def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { def: InstRW<[SKLWriteResGroup128], (instregex "(V?)PHADDSWrm", "(V?)PHSUBSWrm")>; -def SKLWriteResGroup130 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup130], (instregex "SHLD(16|32|64)mri8", - "SHRD(16|32|64)mri8")>; - def SKLWriteResGroup131 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> { let Latency = 9; let NumMicroOps = 5; @@ -1519,14 +1497,6 @@ def: InstRW<[SKLWriteResGroup152], (instregex "CVTPD2PSrm", "CVT(T?)PD2DQrm", "MMX_CVT(T?)PD2PIirm")>; -def SKLWriteResGroup153 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 11; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,2,1]; -} -def: InstRW<[SKLWriteResGroup153], (instregex "SHLD(16|32|64)mrCL", - "SHRD(16|32|64)mrCL")>; - def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 11; let NumMicroOps = 7; diff --git a/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 7095ec081bd9..9d5f8555c505 100755 --- a/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/contrib/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -110,8 +110,8 @@ defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication. defm : SKXWriteResPair<WriteIMul64, [SKXPort1], 3>; // Integer 64-bit multiplication. -defm : SKXWriteResPair<WriteBSWAP32,[SKXPort15], 1>; // -defm : SKXWriteResPair<WriteBSWAP64,[SKXPort06, SKXPort15], 2, [1,1], 2>; // +defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>; +defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>; defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; defm : SKXWriteResPair<WriteDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; @@ -136,12 +136,16 @@ def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> { let NumMicroOps = 3; } def : WriteRes<WriteLAHFSAHF, [SKXPort06]>; +def : WriteRes<WriteBitTest,[SKXPort06]>; // // Integer shifts and rotates. defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>; -// Double shift instructions. -defm : SKXWriteResPair<WriteShiftDouble, [SKXPort06], 1>; +// SHLD/SHRD. +defm : X86WriteRes<WriteSHDrri, [SKXPort1], 3, [1], 1>; +defm : X86WriteRes<WriteSHDrrcl,[SKXPort1,SKXPort06,SKXPort0156], 6, [1, 2, 1], 4>; +defm : X86WriteRes<WriteSHDmri, [SKXPort1,SKXPort23,SKXPort237,SKXPort0156], 9, [1, 1, 1, 1], 4>; +defm : X86WriteRes<WriteSHDmrcl,[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156], 11, [1, 1, 1, 2, 1], 6>; // Bit counts. defm : SKXWriteResPair<WriteBSF, [SKXPort1], 3>; @@ -615,14 +619,6 @@ def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> { let ResourceCycles = [1]; } def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>; -def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8", - "BT(16|32|64)rr", - "BTC(16|32|64)ri8", - "BTC(16|32|64)rr", - "BTR(16|32|64)ri8", - "BTR(16|32|64)rr", - "BTS(16|32|64)ri8", - "BTS(16|32|64)rr")>; def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> { let Latency = 1; @@ -783,9 +779,7 @@ def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> { let ResourceCycles = [1]; } def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr", - "PEXT(32|64)rr", - "SHLD(16|32|64)rri8", - "SHRD(16|32|64)rri8")>; + "PEXT(32|64)rr")>; def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> { let Latency = 4; @@ -1270,14 +1264,6 @@ def: InstRW<[SKXWriteResGroup82], (instregex "(V?)CVTSI642SSrr", "VCVTSI642SSZrr", "VCVTUSI642SSZrr")>; -def SKXWriteResGroup83 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { - let Latency = 6; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SKXWriteResGroup83], (instregex "SHLD(16|32|64)rrCL", - "SHRD(16|32|64)rrCL")>; - def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> { let Latency = 6; let NumMicroOps = 4; @@ -1830,14 +1816,6 @@ def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm", "(V?)PHSUBSWrm")>; -def SKXWriteResGroup145 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort0156]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKXWriteResGroup145], (instregex "SHLD(16|32|64)mri8", - "SHRD(16|32|64)mri8")>; - def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { let Latency = 9; let NumMicroOps = 5; @@ -2033,14 +2011,6 @@ def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { } def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>; -def SKXWriteResGroup168 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { - let Latency = 11; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,2,1]; -} -def: InstRW<[SKXWriteResGroup168], (instregex "SHLD(16|32|64)mrCL", - "SHRD(16|32|64)mrCL")>; - def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { let Latency = 11; let NumMicroOps = 7; diff --git a/contrib/llvm/lib/Target/X86/X86Schedule.td b/contrib/llvm/lib/Target/X86/X86Schedule.td index d0167753ccd4..ef9ce94706df 100644 --- a/contrib/llvm/lib/Target/X86/X86Schedule.td +++ b/contrib/llvm/lib/Target/X86/X86Schedule.td @@ -118,8 +118,8 @@ defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication. def WriteIMulH : SchedWrite; // Integer multiplication, high part. def WriteLEA : SchedWrite; // LEA instructions can't fold loads. -defm WriteBSWAP32: X86SchedWritePair; // Byte Order (Endiannes) Swap -defm WriteBSWAP64: X86SchedWritePair; // Byte Order (Endiannes) Swap +def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap. +def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap. // Integer division. defm WriteDiv8 : X86SchedWritePair; @@ -142,11 +142,15 @@ def WriteFCMOV : SchedWrite; // X87 conditional move. def WriteSETCC : SchedWrite; // Set register based on condition code. def WriteSETCCStore : SchedWrite; def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH. +def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support // Integer shifts and rotates. defm WriteShift : X86SchedWritePair; // Double shift instructions. -defm WriteShiftDouble : X86SchedWritePair; +def WriteSHDrri : SchedWrite; +def WriteSHDrrcl : SchedWrite; +def WriteSHDmri : SchedWrite; +def WriteSHDmrcl : SchedWrite; // BMI1 BEXTR, BMI2 BZHI defm WriteBEXTR : X86SchedWritePair; diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td b/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td index d1e902e6c43f..a7f461c456bd 100644 --- a/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/contrib/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -81,8 +81,8 @@ defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>; defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>; defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>; -defm : AtomWriteResPair<WriteBSWAP32, [AtomPort0], [AtomPort0]>; -defm : AtomWriteResPair<WriteBSWAP64, [AtomPort0], [AtomPort0]>; +defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>; +defm : X86WriteRes<WriteBSWAP64, [AtomPort0], 1, [1], 1>; defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>; defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>; @@ -108,6 +108,7 @@ def : WriteRes<WriteLAHFSAHF, [AtomPort01]> { let Latency = 2; let ResourceCycles = [2]; } +def : WriteRes<WriteBitTest,[AtomPort01]>; defm : X86WriteResUnsupported<WriteIMulH>; @@ -150,11 +151,10 @@ defm : X86WriteResPairUnsupported<WriteBZHI>; defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>; -//////////////////////////////////////////////////////////////////////////////// -// Double shift instructions. -//////////////////////////////////////////////////////////////////////////////// - -defm : AtomWriteResPair<WriteShiftDouble, [AtomPort0], [AtomPort0]>; +defm : X86WriteRes<WriteSHDrri, [AtomPort01], 2, [2], 1>; +defm : X86WriteRes<WriteSHDrrcl,[AtomPort01], 2, [2], 1>; +defm : X86WriteRes<WriteSHDmri, [AtomPort01], 4, [4], 1>; +defm : X86WriteRes<WriteSHDmrcl,[AtomPort01], 4, [4], 1>; //////////////////////////////////////////////////////////////////////////////// // Loads, stores, and moves, not folded with other operations. @@ -562,9 +562,7 @@ def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> { def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r, PUSH16rmm, PUSH32rmm, PUSH64rmm, LODSB, LODSL, LODSQ, LODSW, - SCASB, SCASL, SCASQ, SCASW, - SHLD32rrCL, SHRD32rrCL, - SHLD32rri8, SHRD32rri8)>; + SCASB, SCASL, SCASQ, SCASW)>; def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8", "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)", "XADD(8|16|32|64)rr", @@ -598,8 +596,6 @@ def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> { } def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO, JCXZ, JECXZ, JRCXZ, - SHLD32mrCL, SHRD32mrCL, - SHLD32mri8, SHRD32mri8, LD_F80m)>; def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm", "(MMX_)?PEXTRWrr(_REV)?")>; diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td index d78c343ebd5c..719e71cd25e5 100644 --- a/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/contrib/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -168,8 +168,8 @@ defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>; -defm : JWriteResIntPair<WriteBSWAP32,[JALU01], 1>; -defm : JWriteResIntPair<WriteBSWAP64,[JALU01], 1>; +defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>; +defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>; defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>; defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>; @@ -188,6 +188,7 @@ defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional m def : WriteRes<WriteSETCC, [JALU01]>; // Setcc. def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>; def : WriteRes<WriteLAHFSAHF, [JALU01]>; +def : WriteRes<WriteBitTest,[JALU01]>; // This is for simple LEAs with one or two input operands. def : WriteRes<WriteLEA, [JALU01]>; @@ -209,33 +210,11 @@ defm : X86WriteResPairUnsupported<WriteBZHI>; defm : JWriteResIntPair<WriteShift, [JALU01], 1>; -defm : JWriteResIntPair<WriteShiftDouble, [JALU01], 1>; - -def JWriteSHLDrri : SchedWriteRes<[JALU01]> { - let Latency = 3; - let ResourceCycles = [6]; - let NumMicroOps = 6; -} -def: InstRW<[JWriteSHLDrri], (instrs SHLD16rri8, SHLD32rri8, SHLD64rri8, - SHRD16rri8, SHRD32rri8, SHRD64rri8)>; - -def JWriteSHLDrrCL : SchedWriteRes<[JALU01]> { - let Latency = 4; - let ResourceCycles = [8]; - let NumMicroOps = 7; -} -def: InstRW<[JWriteSHLDrrCL], (instrs SHLD16rrCL, SHLD32rrCL, SHLD64rrCL, - SHRD16rrCL, SHRD32rrCL, SHRD64rrCL)>; - -def JWriteSHLDm : SchedWriteRes<[JLAGU, JALU01]> { - let Latency = 9; - let ResourceCycles = [1, 22]; - let NumMicroOps = 8; -} -def: InstRW<[JWriteSHLDm],(instrs SHLD16mri8, SHLD32mri8, SHLD64mri8, - SHLD16mrCL, SHLD32mrCL, SHLD64mrCL, - SHRD16mri8, SHRD32mri8, SHRD64mri8, - SHRD16mrCL, SHRD32mrCL, SHRD64mrCL)>; +// SHLD/SHRD. +defm : X86WriteRes<WriteSHDrri, [JALU01], 3, [6], 6>; +defm : X86WriteRes<WriteSHDrrcl,[JALU01], 4, [8], 7>; +defm : X86WriteRes<WriteSHDmri, [JLAGU, JALU01], 9, [1, 22], 8>; +defm : X86WriteRes<WriteSHDmrcl,[JLAGU, JALU01], 9, [1, 22], 8>; //////////////////////////////////////////////////////////////////////////////// // Loads, stores, and moves, not folded with other operations. diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td b/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td index c938a4a8939e..b1e843013707 100644 --- a/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/contrib/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -98,11 +98,16 @@ defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>; defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>; defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>; -defm : SLMWriteResPair<WriteBSWAP32,[SLM_IEC_RSV01], 1>; -defm : SLMWriteResPair<WriteBSWAP64,[SLM_IEC_RSV01], 1>; +defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>; +defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>; defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>; -defm : SLMWriteResPair<WriteShiftDouble, [SLM_IEC_RSV0], 1>; + +defm : X86WriteRes<WriteSHDrri, [SLM_IEC_RSV0], 1, [1], 1>; +defm : X86WriteRes<WriteSHDrrcl,[SLM_IEC_RSV0], 1, [1], 1>; +defm : X86WriteRes<WriteSHDmri, [SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>; +defm : X86WriteRes<WriteSHDmrcl,[SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>; + defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>; defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>; @@ -115,6 +120,7 @@ def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> { let ResourceCycles = [2,1]; } def : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>; +def : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on diff --git a/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td b/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td index d28d58580752..7184b850a195 100644 --- a/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/contrib/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -180,11 +180,16 @@ defm : ZnWriteResPair<WriteADC, [ZnALU], 1>; defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>; defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>; -defm : ZnWriteResPair<WriteBSWAP32,[ZnALU], 1, [4]>; -defm : ZnWriteResPair<WriteBSWAP64,[ZnALU], 1, [4]>; +defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>; +defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>; defm : ZnWriteResPair<WriteShift, [ZnALU], 1>; -defm : ZnWriteResPair<WriteShiftDouble, [ZnALU], 1>; + +defm : X86WriteRes<WriteSHDrri, [ZnALU], 1, [1], 1>; +defm : X86WriteResUnsupported<WriteSHDrrcl>; +defm : X86WriteResUnsupported<WriteSHDmri>; +defm : X86WriteResUnsupported<WriteSHDmrcl>; + defm : ZnWriteResPair<WriteJump, [ZnALU], 1>; defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>; @@ -193,6 +198,7 @@ defm : ZnWriteResPair<WriteCMOV2, [ZnALU], 1>; def : WriteRes<WriteSETCC, [ZnALU]>; def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>; defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>; +def : WriteRes<WriteBitTest,[ZnALU]>; // Bit counts. defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>; diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h index fedb13f89e19..85e8256a6e94 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h @@ -51,7 +51,7 @@ enum Style { } // end namespace PICStyles class X86Subtarget final : public X86GenSubtargetInfo { -public: +public: enum X86ProcFamilyEnum { Others, IntelAtom, diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index bae2ef80c365..865462622627 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2274,8 +2274,8 @@ int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { // Sign-extend all constants to a multiple of 64-bit. APInt ImmVal = Imm; - if (BitSize & 0x3f) - ImmVal = Imm.sext((BitSize + 63) & ~0x3fU); + if (BitSize % 64 != 0) + ImmVal = Imm.sext(alignTo(BitSize, 64)); // Split the constant into 64-bit chunks and calculate the cost for each // chunk. @@ -2332,9 +2332,15 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, // immediates here as the normal path expects bit 31 to be sign extended. if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue())) return TTI::TCC_Free; - LLVM_FALLTHROUGH; + ImmIdx = 1; + break; case Instruction::Add: case Instruction::Sub: + // For add/sub, we can use the opposite instruction for INT32_MIN. + if (Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000) + return TTI::TCC_Free; + ImmIdx = 1; + break; case Instruction::Mul: case Instruction::UDiv: case Instruction::SDiv: @@ -2366,7 +2372,7 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, } if (Idx == ImmIdx) { - int NumConstants = (BitSize + 63) / 64; + int NumConstants = divideCeil(BitSize, 64); int Cost = X86TTIImpl::getIntImmCost(Imm, Ty); return (Cost <= NumConstants * TTI::TCC_Basic) ? static_cast<int>(TTI::TCC_Free) diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp index 8f7c8a82380a..916bca6392de 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -146,7 +146,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } EmitAlignment(Align > 2 ? Align : 2, GV); - + if (GV->isThreadLocal()) { report_fatal_error("TLS is not supported by this target!"); } @@ -162,7 +162,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // are padded to 32 bits. if (Size < 4) OutStreamer->EmitZeros(4 - Size); - + // Mark the end of the global getTargetStreamer().emitCCBottomData(GVSym->getName()); } @@ -295,6 +295,6 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) { } // Force static initialization. -extern "C" void LLVMInitializeXCoreAsmPrinter() { +extern "C" void LLVMInitializeXCoreAsmPrinter() { RegisterAsmPrinter<XCoreAsmPrinter> X(getTheXCoreTarget()); } diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp index d5e276788f71..b0de048672df 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp @@ -63,7 +63,7 @@ static bool isZeroImm(const MachineOperand &op) { unsigned XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { int Opcode = MI.getOpcode(); - if (Opcode == XCore::LDWFI) + if (Opcode == XCore::LDWFI) { if ((MI.getOperand(1).isFI()) && // is a stack slot (MI.getOperand(2).isImm()) && // the imm is zero @@ -74,7 +74,7 @@ unsigned XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, } return 0; } - + /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If @@ -129,9 +129,9 @@ static inline bool IsBR_JT(unsigned BrOpc) { || BrOpc == XCore::BR_JT32; } -/// GetCondFromBranchOpc - Return the XCore CC that matches +/// GetCondFromBranchOpc - Return the XCore CC that matches /// the correspondent Branch instruction opcode. -static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc) +static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc) { if (IsBRT(BrOpc)) { return XCore::COND_TRUE; @@ -144,7 +144,7 @@ static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc) /// GetCondBranchFromCond - Return the Branch instruction /// opcode that matches the cc. -static inline unsigned GetCondBranchFromCond(XCore::CondCode CC) +static inline unsigned GetCondBranchFromCond(XCore::CondCode CC) { switch (CC) { default: llvm_unreachable("Illegal condition code!"); @@ -153,7 +153,7 @@ static inline unsigned GetCondBranchFromCond(XCore::CondCode CC) } } -/// GetOppositeBranchCondition - Return the inverse of the specified +/// GetOppositeBranchCondition - Return the inverse of the specified /// condition, e.g. turning COND_E to COND_NE. static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC) { @@ -209,11 +209,11 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB, TBB = LastInst->getOperand(0).getMBB(); return false; } - + XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode()); if (BranchCode == XCore::COND_INVALID) return true; // Can't handle indirect branch. - + // Conditional branch // Block ends with fall-through condbranch. @@ -222,17 +222,17 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB, Cond.push_back(LastInst->getOperand(0)); return false; } - + // Get the instruction before it if it's a terminator. MachineInstr *SecondLastInst = &*I; // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) return true; - + unsigned SecondLastOpc = SecondLastInst->getOpcode(); XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc); - + // If the block ends with conditional branch followed by unconditional, // handle it. if (BranchCode != XCore::COND_INVALID @@ -245,10 +245,10 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB, FBB = LastInst->getOperand(0).getMBB(); return false; } - + // If the block ends with two unconditional branches, handle it. The second // one is not executed, so remove it. - if (IsBRU(SecondLastInst->getOpcode()) && + if (IsBRU(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; @@ -293,7 +293,7 @@ unsigned XCoreInstrInfo::insertBranch(MachineBasicBlock &MBB, } return 1; } - + // Two-way Conditional branch. assert(Cond.size() == 2 && "Unexpected number of components!"); unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm()); @@ -313,17 +313,17 @@ XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode())) return 0; - + // Remove the branch. I->eraseFromParent(); - + I = MBB.end(); if (I == MBB.begin()) return 1; --I; if (!IsCondBranch(I->getOpcode())) return 1; - + // Remove the branch. I->eraseFromParent(); return 2; @@ -342,7 +342,7 @@ void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(0); return; } - + if (GRDest && SrcReg == XCore::SP) { BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0); return; diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h index cf469ec3cf1a..6c05ab3f10df 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -43,11 +43,11 @@ class XCoreFunctionInfo : public MachineFunctionInfo { public: XCoreFunctionInfo() = default; - + explicit XCoreFunctionInfo(MachineFunction &MF) {} - + ~XCoreFunctionInfo() override = default; - + void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; } int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp index 1915aaedc35d..e119d9555f9d 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -296,12 +296,12 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // fold constant into offset. Offset += MI.getOperand(FIOperandNum + 1).getImm(); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); - + assert(Offset%4 == 0 && "Misaligned stack offset"); LLVM_DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); Offset/=4; - + unsigned Reg = MI.getOperand(0).getReg(); assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand"); diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h index c31f5d5a7c44..9451a05d8d58 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h +++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h @@ -32,7 +32,7 @@ public: const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; - + bool enableMultipleCopyHints() const override { return true; } bool requiresRegisterScavenging(const MachineFunction &MF) const override; diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h index 140ddba68aab..ed9936ebf2b8 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h +++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.h @@ -43,7 +43,7 @@ public: XCoreSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM); - /// ParseSubtargetFeatures - Parses features string setting specified + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 31e771da3bd3..cd2bd734eb26 100644 --- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -56,7 +56,7 @@ using namespace llvm; STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); -STATISTIC(NumArgumentsReplacedWithUndef, +STATISTIC(NumArgumentsReplacedWithUndef, "Number of unread args replaced with undef"); namespace { @@ -109,7 +109,7 @@ namespace { char DAH::ID = 0; -INITIALIZE_PASS(DAH, "deadarghaX0r", +INITIALIZE_PASS(DAH, "deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", false, false) @@ -256,7 +256,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { return true; } -/// RemoveDeadArgumentsFromCallers - Checks if the given function has any +/// RemoveDeadArgumentsFromCallers - Checks if the given function has any /// arguments that are unused, and changes the caller parameters to be undefined /// instead. bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { @@ -640,7 +640,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { Result = Live; } else { // See what the effect of this use is (recording any uses that cause - // MaybeLive in MaybeLiveArgUses). + // MaybeLive in MaybeLiveArgUses). Result = SurveyUses(&*AI, MaybeLiveArgUses); } @@ -777,7 +777,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // argument. // 2) Retain the 'returned' attribute and treat the return value (but not the // entire function) as live so that it is not eliminated. - // + // // It's not clear in the general case which option is more profitable because, // even in the absence of explicit uses of the return value, code generation // is free to use the 'returned' attribute to do things like eliding diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 2797da6c0abd..010b0a29807d 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -617,7 +617,7 @@ static bool addArgumentAttrsFromCallsites(Function &F) { if (!isGuaranteedToTransferExecutionToSuccessor(&I)) break; } - + return Changed; } diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 1af7e6894777..1761d7faff57 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -357,6 +357,41 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, return Changed; } +static bool isSafeSROAElementUse(Value *V); + +/// Return true if the specified GEP is a safe user of a derived +/// expression from a global that we want to SROA. +static bool isSafeSROAGEP(User *U) { + // Check to see if this ConstantExpr GEP is SRA'able. In particular, we + // don't like < 3 operand CE's, and we don't like non-constant integer + // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some + // value of C. + if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || + !cast<Constant>(U->getOperand(1))->isNullValue()) + return false; + + gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); + ++GEPI; // Skip over the pointer index. + + // For all other level we require that the indices are constant and inrange. + // In particular, consider: A[0][i]. We cannot know that the user isn't doing + // invalid things like allowing i to index an out-of-range subscript that + // accesses A[1]. This can also happen between different members of a struct + // in llvm IR. + for (; GEPI != E; ++GEPI) { + if (GEPI.isStruct()) + continue; + + ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); + if (!IdxVal || (GEPI.isBoundedSequential() && + IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) + return false; + } + + return llvm::all_of(U->users(), + [](User *UU) { return isSafeSROAElementUse(UU); }); +} + /// Return true if the specified instruction is a safe user of a derived /// expression from a global that we want to SROA. static bool isSafeSROAElementUse(Value *V) { @@ -374,84 +409,25 @@ static bool isSafeSROAElementUse(Value *V) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getOperand(0) != V; - // Otherwise, it must be a GEP. - GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); - if (!GEPI) return false; - - if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || - !cast<Constant>(GEPI->getOperand(1))->isNullValue()) - return false; - - for (User *U : GEPI->users()) - if (!isSafeSROAElementUse(U)) - return false; - return true; -} - -/// U is a direct user of the specified global value. Look at it and its uses -/// and decide whether it is safe to SROA this global. -static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { - // The user of the global must be a GEP Inst or a ConstantExpr GEP. - if (!isa<GetElementPtrInst>(U) && - (!isa<ConstantExpr>(U) || - cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) - return false; - - // Check to see if this ConstantExpr GEP is SRA'able. In particular, we - // don't like < 3 operand CE's, and we don't like non-constant integer - // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some - // value of C. - if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || - !cast<Constant>(U->getOperand(1))->isNullValue() || - !isa<ConstantInt>(U->getOperand(2))) - return false; - - gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); - ++GEPI; // Skip over the pointer index. - - // If this is a use of an array allocation, do a bit more checking for sanity. - if (GEPI.isSequential()) { - ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); - - // Check to make sure that index falls within the array. If not, - // something funny is going on, so we won't do the optimization. - // - if (GEPI.isBoundedSequential() && - Idx->getZExtValue() >= GEPI.getSequentialNumElements()) - return false; - - // We cannot scalar repl this level of the array unless any array - // sub-indices are in-range constants. In particular, consider: - // A[0][i]. We cannot know that the user isn't doing invalid things like - // allowing i to index an out-of-range subscript that accesses A[1]. - // - // Scalar replacing *just* the outer index of the array is probably not - // going to be a win anyway, so just give up. - for (++GEPI; // Skip array index. - GEPI != E; - ++GEPI) { - if (GEPI.isStruct()) - continue; - - ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); - if (!IdxVal || - (GEPI.isBoundedSequential() && - IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) - return false; - } - } - - return llvm::all_of(U->users(), - [](User *UU) { return isSafeSROAElementUse(UU); }); + // Otherwise, it must be a GEP. Check it and its users are safe to SRA. + return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I); } /// Look at all uses of the global and decide whether it is safe for us to /// perform this transformation. static bool GlobalUsersSafeToSRA(GlobalValue *GV) { - for (User *U : GV->users()) - if (!IsUserOfGlobalSafeForSRA(U, GV)) + for (User *U : GV->users()) { + // The user of the global must be a GEP Inst or a ConstantExpr GEP. + if (!isa<GetElementPtrInst>(U) && + (!isa<ConstantExpr>(U) || + cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) return false; + // Check the gep and it's users are safe to SRA + if (!isSafeSROAGEP(U)) + return false; + } + return true; } diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp index f79b61037f1d..7d55ebecbf92 100644 --- a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -61,12 +61,12 @@ static bool PropagateConstantsIntoArguments(Function &F) { User *UR = U.getUser(); // Ignore blockaddress uses. if (isa<BlockAddress>(UR)) continue; - + // Used by a non-instruction, or not the callee of a function, do not // transform. if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR)) return false; - + CallSite CS(cast<Instruction>(UR)); if (!CS.isCallee(&U)) return false; @@ -77,11 +77,11 @@ static bool PropagateConstantsIntoArguments(Function &F) { Function::arg_iterator Arg = F.arg_begin(); for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI, ++Arg) { - + // If this argument is known non-constant, ignore it. if (ArgumentConstants[i].second) continue; - + Constant *C = dyn_cast<Constant>(*AI); if (C && ArgumentConstants[i].first == nullptr) { ArgumentConstants[i].first = C; // First constant seen. @@ -108,7 +108,7 @@ static bool PropagateConstantsIntoArguments(Function &F) { if (ArgumentConstants[i].second || AI->use_empty() || AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory())) continue; - + Value *V = ArgumentConstants[i].first; if (!V) V = UndefValue::get(AI->getType()); AI->replaceAllUsesWith(V); @@ -147,7 +147,7 @@ static bool PropagateConstantReturn(Function &F) { SmallVector<Value *,4> RetVals; StructType *STy = dyn_cast<StructType>(F.getReturnType()); if (STy) - for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) + for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) RetVals.push_back(UndefValue::get(STy->getElementType(i))); else RetVals.push_back(UndefValue::get(F.getReturnType())); @@ -172,7 +172,7 @@ static bool PropagateConstantReturn(Function &F) { // Ignore undefs, we can change them into anything if (isa<UndefValue>(V)) continue; - + // Try to see if all the rets return the same constant or argument. if (isa<Constant>(V) || isa<Argument>(V)) { if (isa<UndefValue>(RV)) { @@ -206,7 +206,7 @@ static bool PropagateConstantReturn(Function &F) { // directly? if (!Call || !CS.isCallee(&U)) continue; - + // Call result not used? if (Call->use_empty()) continue; diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 139941127dee..3bebb96c6d35 100644 --- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -27,7 +27,7 @@ // -- We define Function* container class with custom "operator<" (FunctionPtr). // -- "FunctionPtr" instances are stored in std::set collection, so every // std::set::insert operation will give you result in log(N) time. -// +// // As an optimization, a hash of the function structure is calculated first, and // two functions are only compared if they have the same hash. This hash is // cheap to compute, and has the property that if function F == G according to @@ -383,7 +383,7 @@ bool MergeFunctions::runOnModule(Module &M) { for (Function &Func : M) { if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) { HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func}); - } + } } std::stable_sort( @@ -402,7 +402,7 @@ bool MergeFunctions::runOnModule(Module &M) { Deferred.push_back(WeakTrackingVH(I->second)); } } - + do { std::vector<WeakTrackingVH> Worklist; Deferred.swap(Worklist); @@ -802,11 +802,11 @@ void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN, Function *F = FN.getFunc(); assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 && "The two functions must be equal"); - + auto I = FNodesInTree.find(F); assert(I != FNodesInTree.end() && "F should be in FNodesInTree"); assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G"); - + FnTreeType::iterator IterToFNInFnTree = I->second; assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree."); // Remove F -> FN and insert G -> FN diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp index 27d791857314..2be654258aa8 100644 --- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -77,13 +77,13 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) { // Next, check to see if any callees might throw or if there are any external // functions in this SCC: if so, we cannot prune any functions in this SCC. - // Definitions that are weak and not declared non-throwing might be + // Definitions that are weak and not declared non-throwing might be // overridden at linktime with something that throws, so assume that. // If this SCC includes the unwind instruction, we KNOW it throws, so // obviously the SCC might throw. // bool SCCMightUnwind = false, SCCMightReturn = false; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { Function *F = (*I)->getFunction(); if (!F) { diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index aa31e0d850dd..83054588a9aa 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -926,7 +926,13 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) { if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add)) return NV; - Value *X; + Value *X, *Y; + + // add (sub X, Y), -1 --> add (not Y), X + if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) && + match(Op1, m_AllOnes())) + return BinaryOperator::CreateAdd(Builder.CreateNot(Y), X); + // zext(bool) + C -> bool ? C + 1 : C if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) @@ -1608,6 +1614,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (match(Op0, m_Not(m_Value(X))) && match(Op1, m_Not(m_Value(Y)))) return BinaryOperator::CreateSub(Y, X); + // (X + -1) - Y --> ~Y + X + if (match(Op0, m_OneUse(m_Add(m_Value(X), m_AllOnes())))) + return BinaryOperator::CreateAdd(Builder.CreateNot(Op1), X); + + // Y - (X + 1) --> ~X + Y + if (match(Op1, m_OneUse(m_Add(m_Value(X), m_One())))) + return BinaryOperator::CreateAdd(Builder.CreateNot(X), Op0); + if (Constant *C = dyn_cast<Constant>(Op0)) { bool IsNegate = match(C, m_ZeroInt()); Value *X; @@ -1858,7 +1872,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { Constant *C; if (match(Op1, m_Constant(C)) && !isa<ConstantExpr>(Op1)) return BinaryOperator::CreateFAddFMF(Op0, ConstantExpr::getFNeg(C), &I); - + // X - (-Y) --> X + Y if (match(Op1, m_FNeg(m_Value(Y)))) return BinaryOperator::CreateFAddFMF(Op0, Y, &I); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 372bc41f780e..3d758e2fe7c9 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1550,31 +1550,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return DeMorgan; { - Value *A = nullptr, *B = nullptr, *C = nullptr; - // A&(A^B) => A & ~B - { - Value *tmpOp0 = Op0; - Value *tmpOp1 = Op1; - if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) { - if (A == Op1 || B == Op1 ) { - tmpOp1 = Op0; - tmpOp0 = Op1; - // Simplify below - } - } - - if (match(tmpOp1, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) { - if (B == tmpOp0) { - std::swap(A, B); - } - // Notice that the pattern (A&(~B)) is actually (A&(-1^B)), so if - // A is originally -1 (or a vector of -1 and undefs), then we enter - // an endless loop. By checking that A is non-constant we ensure that - // we will never get to the loop. - if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B - return BinaryOperator::CreateAnd(A, Builder.CreateNot(B)); - } - } + Value *A, *B, *C; + // A & (A ^ B) --> A & ~B + if (match(Op1, m_OneUse(m_c_Xor(m_Specific(Op0), m_Value(B))))) + return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(B)); + // (A ^ B) & A --> A & ~B + if (match(Op0, m_OneUse(m_c_Xor(m_Specific(Op1), m_Value(B))))) + return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(B)); // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index e8ea7396a96a..fd59c3a7c0c3 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2243,6 +2243,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Type *DstElTy = DstPTy->getElementType(); Type *SrcElTy = SrcPTy->getElementType(); + // Casting pointers between the same type, but with different address spaces + // is an addrspace cast rather than a bitcast. + if ((DstElTy == SrcElTy) && + (DstPTy->getAddressSpace() != SrcPTy->getAddressSpace())) + return new AddrSpaceCastInst(Src, DestTy); + // If we are casting a alloca to a pointer to a type of the same // size, rewrite the allocation instruction to allocate the "right" type. // There is no need to modify malloc calls because it is their bitcast that diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 742caf649007..62769f077b47 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -518,7 +518,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) { assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) && "can't fold an atomic store of requested type"); - + Value *Ptr = SI.getPointerOperand(); unsigned AS = SI.getPointerAddressSpace(); SmallVector<std::pair<unsigned, MDNode *>, 8> MD; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 4867808478a3..796b4021d273 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -54,6 +54,36 @@ static Value *createMinMax(InstCombiner::BuilderTy &Builder, return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); } +/// Fold +/// %A = icmp eq/ne i8 %x, 0 +/// %B = op i8 %x, %z +/// %C = select i1 %A, i8 %B, i8 %y +/// To +/// %C = select i1 %A, i8 %z, i8 %y +/// OP: binop with an identity constant +/// TODO: support for non-commutative and FP opcodes +static Instruction *foldSelectBinOpIdentity(SelectInst &Sel) { + + Value *Cond = Sel.getCondition(); + Value *X, *Z; + Constant *C; + CmpInst::Predicate Pred; + if (!match(Cond, m_ICmp(Pred, m_Value(X), m_Constant(C))) || + !ICmpInst::isEquality(Pred)) + return nullptr; + + bool IsEq = Pred == ICmpInst::ICMP_EQ; + auto *BO = + dyn_cast<BinaryOperator>(IsEq ? Sel.getTrueValue() : Sel.getFalseValue()); + // TODO: support for undefs + if (BO && match(BO, m_c_BinOp(m_Specific(X), m_Value(Z))) && + ConstantExpr::getBinOpIdentity(BO->getOpcode(), X->getType()) == C) { + Sel.setOperand(IsEq ? 1 : 2, Z); + return &Sel; + } + return nullptr; +} + /// This folds: /// select (icmp eq (and X, C1)), TC, FC /// iff C1 is a power 2 and the difference between TC and FC is a power-of-2. @@ -1961,5 +1991,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *Select = foldSelectCmpXchg(SI)) return Select; + if (Instruction *Select = foldSelectBinOpIdentity(SI)) + return Select; + return nullptr; } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 34f8037e519f..1ca75f3989d4 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -570,7 +570,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, m_OneUse(m_BinOp(FBO))))) { const APInt *C; if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal && - match(FBO->getOperand(1), m_APInt(C)) && + match(FBO->getOperand(1), m_APInt(C)) && canShiftBinOpWithConstantRHS(I, FBO, *C)) { Constant *NewRHS = ConstantExpr::get(I.getOpcode(), cast<Constant>(FBO->getOperand(1)), Op1); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 2560feb37d66..1c2de6352fa5 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -605,7 +605,7 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) { return nullptr; Value *SplatVal = InsElt.getOperand(1); - InsertElementInst *CurrIE = &InsElt; + InsertElementInst *CurrIE = &InsElt; SmallVector<bool, 16> ElementPresent(NumElements, false); InsertElementInst *FirstIE = nullptr; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 12fcc8752ea9..cff0d5447290 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1424,7 +1424,7 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) { bool ConstOp1 = isa<Constant>(Inst.getOperand(1)); if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1)) NewC = getSafeVectorConstantForBinop(Inst.getOpcode(), NewC, ConstOp1); - + // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask) // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask) Value *NewLHS = isa<Constant>(LHS) ? NewC : V1; diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b3f659194558..6af44354225c 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -2464,10 +2464,10 @@ bool AddressSanitizer::runOnFunction(Function &F) { // If needed, insert __asan_init before checking for SanitizeAddress attr. // This function needs to be called even if the function body is not - // instrumented. + // instrumented. if (maybeInsertAsanInitAtFunctionEntry(F)) FunctionModified = true; - + // Leave if the function doesn't need instrumentation. if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified; diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index acd27c2e226f..132e8089fe3b 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -148,7 +148,7 @@ public: } StringRef getPassName() const override { return "GCOV Profiler"; } - bool runOnModule(Module &M) override { + bool runOnModule(Module &M) override { auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); return Profiler.runOnModule(M, TLI); } diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 22076f04d6ad..4d5dfb0aa66b 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -898,7 +898,7 @@ void InstrProfiling::emitRegistration() { IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF)); for (Value *Data : UsedVars) - if (Data != NamesVar) + if (Data != NamesVar && !isa<Function>(Data)) IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy)); if (NamesVar) { diff --git a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index fa7bcec677f7..0830ff5dd042 100644 --- a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -280,7 +280,7 @@ bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I, return false; // Sign extend the offset to 64 bits (so that it is like all of the other - // expressions). + // expressions). unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits(); if (OffSCEVBits < 64) OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty); diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index 3a675b979017..55759e8b1661 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -781,7 +781,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, this->TTI = &TTI; this->DT = &DT; this->BFI = BFI; - this->Entry = &Entry; + this->Entry = &Entry; // Collect all constant candidates. collectConstantCandidates(Fn); diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index ea148b728a10..2f2d7f620a29 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -473,7 +473,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { // relatively expensive analysis for constants which are obviously either // null or non-null to start with. if (Type && !CS.paramHasAttr(ArgNo, Attribute::NonNull) && - !isa<Constant>(V) && + !isa<Constant>(V) && LVI->getPredicateAt(ICmpInst::ICMP_EQ, V, ConstantPointerNull::get(Type), CS.getInstruction()) == LazyValueInfo::False) @@ -670,12 +670,12 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { Value *Op0 = C->getOperand(0); Constant *Op1 = dyn_cast<Constant>(C->getOperand(1)); if (!Op1) return nullptr; - + LazyValueInfo::Tristate Result = LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At); if (Result == LazyValueInfo::Unknown) return nullptr; - + return (Result == LazyValueInfo::True) ? ConstantInt::getTrue(C->getContext()) : ConstantInt::getFalse(C->getContext()); @@ -747,7 +747,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, if (auto *C = getConstantAt(RetVal, RI, LVI)) { ++NumReturns; RI->replaceUsesOfWith(RetVal, C); - BBChanged = true; + BBChanged = true; } } } diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index dd1a2a6adb82..9a7405e98e7d 100644 --- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -188,7 +188,7 @@ static bool hasAnalyzableMemoryWrite(Instruction *I, /// returns true, this function and getLocForRead completely describe the memory /// operations for this instruction. static MemoryLocation getLocForWrite(Instruction *Inst) { - + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) return MemoryLocation::get(SI); diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 565745d12e99..533d16e088c8 100644 --- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -384,7 +384,7 @@ public: LoadMapAllocator>; LoadHTType AvailableLoads; - + // A scoped hash table mapping memory locations (represented as typed // addresses) to generation numbers at which that memory location became // (henceforth indefinitely) invariant. @@ -844,7 +844,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // start a scope in the current generaton which is true for all future // generations. Also, we dont need to consume the last store since the // semantics of invariant.start allow us to perform DSE of the last - // store, if there was a store following invariant.start. Consider: + // store, if there was a store following invariant.start. Consider: // // store 30, i8* p // invariant.start(p) @@ -852,7 +852,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // We can DSE the store to 30, since the store 40 to invariant location p // causes undefined behaviour. if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) { - // If there are any uses, the scope might end. + // If there are any uses, the scope might end. if (!Inst->use_empty()) continue; auto *CI = cast<CallInst>(Inst); diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp index 28c5940db1e0..8959038de596 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -568,7 +568,7 @@ public: ReversePostOrderTraversal<Function*> RPOT(&F); for (auto *N : RPOT) NumSunk += sinkBB(N); - + return NumSunk > 0; } diff --git a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp index ad1598d7b8bf..055fcbc8436f 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -43,6 +43,7 @@ #include <functional> #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/PostDominators.h" @@ -61,6 +62,8 @@ using namespace llvm; #define DEBUG_TYPE "guard-widening" +STATISTIC(GuardsEliminated, "Number of eliminated guards"); + namespace { class GuardWideningImpl { @@ -75,21 +78,33 @@ class GuardWideningImpl { /// The set of guards whose conditions have been widened into dominating /// guards. - SmallVector<IntrinsicInst *, 16> EliminatedGuards; + SmallVector<Instruction *, 16> EliminatedGuards; /// The set of guards which have been widened to include conditions to other /// guards. - DenseSet<IntrinsicInst *> WidenedGuards; + DenseSet<Instruction *> WidenedGuards; /// Try to eliminate guard \p Guard by widening it into an earlier dominating /// guard. \p DFSI is the DFS iterator on the dominator tree that is /// currently visiting the block containing \p Guard, and \p GuardsPerBlock /// maps BasicBlocks to the set of guards seen in that block. bool eliminateGuardViaWidening( - IntrinsicInst *Guard, const df_iterator<DomTreeNode *> &DFSI, - const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> & + Instruction *Guard, const df_iterator<DomTreeNode *> &DFSI, + const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & GuardsPerBlock); + // Get the condition from \p GuardInst. + Value *getGuardCondition(Instruction *GuardInst); + + // Set the condition for \p GuardInst. + void setGuardCondition(Instruction *GuardInst, Value *NewCond); + + // Whether or not the particular instruction is a guard. + bool isGuard(const Instruction *I); + + // Eliminates the guard instruction properly. + void eliminateGuard(Instruction *GuardInst); + /// Used to keep track of which widening potential is more effective. enum WideningScore { /// Don't widen. @@ -113,9 +128,9 @@ class GuardWideningImpl { /// Compute the score for widening the condition in \p DominatedGuard /// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in /// \p DominatingGuardLoop). - WideningScore computeWideningScore(IntrinsicInst *DominatedGuard, + WideningScore computeWideningScore(Instruction *DominatedGuard, Loop *DominatedGuardLoop, - IntrinsicInst *DominatingGuard, + Instruction *DominatingGuard, Loop *DominatingGuardLoop); /// Helper to check if \p V can be hoisted to \p InsertPos. @@ -206,10 +221,10 @@ class GuardWideningImpl { /// Widen \p ToWiden to fail if \p NewCondition is false (in addition to /// whatever it is already checking). - void widenGuard(IntrinsicInst *ToWiden, Value *NewCondition) { + void widenGuard(Instruction *ToWiden, Value *NewCondition) { Value *Result; - widenCondCommon(ToWiden->getArgOperand(0), NewCondition, ToWiden, Result); - ToWiden->setArgOperand(0, Result); + widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result); + setGuardCondition(ToWiden, Result); } public: @@ -225,9 +240,7 @@ public: } bool GuardWideningImpl::run() { - using namespace llvm::PatternMatch; - - DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> GuardsInBlock; + DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; bool Changed = false; for (auto DFI = df_begin(Root), DFE = df_end(Root); @@ -239,8 +252,8 @@ bool GuardWideningImpl::run() { auto &CurrentList = GuardsInBlock[BB]; for (auto &I : *BB) - if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>())) - CurrentList.push_back(cast<IntrinsicInst>(&I)); + if (isGuard(&I)) + CurrentList.push_back(cast<Instruction>(&I)); for (auto *II : CurrentList) Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock); @@ -249,16 +262,16 @@ bool GuardWideningImpl::run() { assert(EliminatedGuards.empty() || Changed); for (auto *II : EliminatedGuards) if (!WidenedGuards.count(II)) - II->eraseFromParent(); + eliminateGuard(II); return Changed; } bool GuardWideningImpl::eliminateGuardViaWidening( - IntrinsicInst *GuardInst, const df_iterator<DomTreeNode *> &DFSI, - const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> & + Instruction *GuardInst, const df_iterator<DomTreeNode *> &DFSI, + const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & GuardsInBlock) { - IntrinsicInst *BestSoFar = nullptr; + Instruction *BestSoFar = nullptr; auto BestScoreSoFar = WS_IllegalOrNegative; auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent()); @@ -302,8 +315,8 @@ bool GuardWideningImpl::eliminateGuardViaWidening( for (auto *Candidate : make_range(I, E)) { auto Score = computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop); - LLVM_DEBUG(dbgs() << "Score between " << *GuardInst->getArgOperand(0) - << " and " << *Candidate->getArgOperand(0) << " is " + LLVM_DEBUG(dbgs() << "Score between " << *getGuardCondition(GuardInst) + << " and " << *getGuardCondition(Candidate) << " is " << scoreTypeToString(Score) << "\n"); if (Score > BestScoreSoFar) { BestScoreSoFar = Score; @@ -323,16 +336,41 @@ bool GuardWideningImpl::eliminateGuardViaWidening( LLVM_DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar << " with score " << scoreTypeToString(BestScoreSoFar) << "\n"); - widenGuard(BestSoFar, GuardInst->getArgOperand(0)); - GuardInst->setArgOperand(0, ConstantInt::getTrue(GuardInst->getContext())); + widenGuard(BestSoFar, getGuardCondition(GuardInst)); + setGuardCondition(GuardInst, ConstantInt::getTrue(GuardInst->getContext())); EliminatedGuards.push_back(GuardInst); WidenedGuards.insert(BestSoFar); return true; } +Value *GuardWideningImpl::getGuardCondition(Instruction *GuardInst) { + IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst); + assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && + "Bad guard intrinsic?"); + return GI->getArgOperand(0); +} + +void GuardWideningImpl::setGuardCondition(Instruction *GuardInst, + Value *NewCond) { + IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst); + assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && + "Bad guard intrinsic?"); + GI->setArgOperand(0, NewCond); +} + +bool GuardWideningImpl::isGuard(const Instruction* I) { + using namespace llvm::PatternMatch; + return match(I, m_Intrinsic<Intrinsic::experimental_guard>()); +} + +void GuardWideningImpl::eliminateGuard(Instruction *GuardInst) { + GuardInst->eraseFromParent(); + ++GuardsEliminated; +} + GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( - IntrinsicInst *DominatedGuard, Loop *DominatedGuardLoop, - IntrinsicInst *DominatingGuard, Loop *DominatingGuardLoop) { + Instruction *DominatedGuard, Loop *DominatedGuardLoop, + Instruction *DominatingGuard, Loop *DominatingGuardLoop) { bool HoistingOutOfLoop = false; if (DominatingGuardLoop != DominatedGuardLoop) { @@ -345,7 +383,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( HoistingOutOfLoop = true; } - if (!isAvailableAt(DominatedGuard->getArgOperand(0), DominatingGuard)) + if (!isAvailableAt(getGuardCondition(DominatedGuard), DominatingGuard)) return WS_IllegalOrNegative; // If the guard was conditional executed, it may never be reached @@ -355,9 +393,9 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( // case. At the moment, we really only consider the second in our heuristic // here. TODO: evaluate cost model for spurious deopt // NOTE: As written, this also lets us hoist right over another guard which - // is essentially just another spelling for control flow. - if (isWideningCondProfitable(DominatedGuard->getArgOperand(0), - DominatingGuard->getArgOperand(0))) + // is essentially just another spelling for control flow. + if (isWideningCondProfitable(getGuardCondition(DominatedGuard), + getGuardCondition(DominatingGuard))) return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; if (HoistingOutOfLoop) @@ -369,7 +407,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( auto MaybeHoistingOutOfIf = [&]() { auto *DominatingBlock = DominatingGuard->getParent(); auto *DominatedBlock = DominatedGuard->getParent(); - + // Same Block? if (DominatedBlock == DominatingBlock) return false; diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index e2f29705f2dd..c5ed6d5c1b87 100644 --- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -735,7 +735,7 @@ static bool isSafeDecreasingBound(const SCEV *Start, assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be either 0 or 1"); - + const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType())); unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth) : @@ -786,7 +786,7 @@ static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *StepMinusOne = SE.getMinusSCEV(Step, SE.getOne(Step->getType())); unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); - APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) : + APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) : APInt::getMaxValue(BitWidth); const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne); @@ -798,7 +798,7 @@ static bool isSafeIncreasingBound(const SCEV *Start, static bool CannotBeMinInLoop(const SCEV *BoundSCEV, Loop *L, ScalarEvolution &SE, bool Signed) { unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); - APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) : + APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) : APInt::getMinValue(BitWidth); auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; return SE.isAvailableAtLoopEntry(BoundSCEV, L) && diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp index ff66632f0391..c4ea43a43249 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp @@ -455,7 +455,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // Keep track of whether the prefix of instructions visited so far are such // that the next instruction visited is guaranteed to execute if the loop - // is entered. + // is entered. bool IsMustExecute = CurLoop->getHeader() == BB; for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { @@ -1186,9 +1186,9 @@ bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) { if (isa<AllocaInst>(Object)) // Since the alloca goes out of scope, we know the caller can't retain a // reference to it and be well defined. Thus, we don't need to check for - // capture. + // capture. return true; - + // For all other objects we need to know that the caller can't possibly // have gotten a reference to the object. There are two components of // that: @@ -1282,7 +1282,7 @@ bool llvm::promoteLoopAccessesToScalars( // That said, we can't actually make the unwind edge explicit. Therefore, // we have to prove that the store is dead along the unwind edge. We do // this by proving that the caller can't have a reference to the object - // after return and thus can't possibly load from the object. + // after return and thus can't possibly load from the object. Value *Object = GetUnderlyingObject(SomePtr, MDL); if (!isKnownNonEscaping(Object, TLI)) return false; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index d8692198f7a3..653948717fb9 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1573,7 +1573,7 @@ void LoopIdiomRecognize::transformLoopToCountable( InitXNext = Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1)); else - llvm_unreachable("Unexpected opcode!"); + llvm_unreachable("Unexpected opcode!"); } else InitXNext = InitX; CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck); diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp index 561ceea1d880..cbb6594cf8f4 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -74,7 +74,7 @@ // } // // One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step) -// +// // Informal proof that the transformation above is correct: // // By the definition of guards we can rewrite the guard condition to: @@ -83,7 +83,7 @@ // Let's prove that for each iteration of the loop: // G(0) && M => G(I) // And the condition above can be simplified to G(Start) && M. -// +// // Induction base. // G(0) && M => G(0) // @@ -379,7 +379,7 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, Instruction *InsertAt) { // TODO: we can check isLoopEntryGuardedByCond before emitting the check - + Type *Ty = LHS->getType(); assert(Ty == RHS->getType() && "expandCheck operands have different types?"); diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 634215c9770f..e955821effa0 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -888,7 +888,7 @@ bool llvm::computeUnrollCount( UP.Count = 0; return false; } - + // Check if the runtime trip count is too small when profile is available. if (L->getHeader()->getParent()->hasProfileData()) { if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) { @@ -897,7 +897,7 @@ bool llvm::computeUnrollCount( else UP.AllowExpensiveTripCount = true; } - } + } // Reduce count based on the type of unrolling and the threshold values. UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index b12586758925..6aad077ff19e 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -708,7 +708,7 @@ bool LoopUnswitch::processCurrentLoop() { // Unswitch only those branches that are reachable. if (isUnreachableDueToPreviousUnswitching(*I)) continue; - + // If this isn't branching on an invariant condition, we can't unswitch // it. if (BI->isConditional()) { @@ -754,7 +754,7 @@ bool LoopUnswitch::processCurrentLoop() { // We are unswitching ~0 out. UnswitchVal = AllOne; } else { - assert(OpChain == OC_OpChainNone && + assert(OpChain == OC_OpChainNone && "Expect to unswitch on trivial chain"); // Do not process same value again and again. // At this point we have some cases already unswitched and @@ -1440,11 +1440,11 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // This in-loop instruction has been simplified w.r.t. its context, // i.e. LIC != Val, make sure we propagate its replacement value to // all its users. - // + // // We can not yet delete UI, the LIC user, yet, because that would invalidate // the LIC->users() iterator !. However, we can make this instruction // dead by replacing all its users and push it onto the worklist so that - // it can be properly deleted and its operands simplified. + // it can be properly deleted and its operands simplified. UI->replaceAllUsesWith(Replacement); } } @@ -1609,7 +1609,7 @@ Value *LoopUnswitch::SimplifyInstructionWithNotEqual(Instruction *Inst, LLVMContext &Ctx = Inst->getContext(); if (CI->getPredicate() == CmpInst::ICMP_EQ) return ConstantInt::getFalse(Ctx); - else + else return ConstantInt::getTrue(Ctx); } } diff --git a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp index 2eb887c986be..3e47e9441d15 100644 --- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -2007,7 +2007,7 @@ NewGVN::performSymbolicEvaluation(Value *V, case Instruction::Load: E = performSymbolicLoadEvaluation(I); break; - case Instruction::BitCast: + case Instruction::BitCast: E = createExpression(I); break; case Instruction::ICmp: diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp index c81ac70d99e6..1df0a9c49fb1 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -1179,7 +1179,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, // and both "Res" and "ConstOpnd" remain unchanged. bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd, Value *&Res) { - // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 + // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 // = ((x | c1) ^ c1) ^ (c1 ^ c2) // = (x & ~c1) ^ (c1 ^ c2) // It is useful only when c1 == c2. @@ -1202,12 +1202,12 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, RedoInsts.insert(T); return true; } - + // Helper function of OptimizeXor(). It tries to simplify // "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a -// symbolic value. -// -// If it was successful, true is returned, and the "R" and "C" is returned +// symbolic value. +// +// If it was successful, true is returned, and the "R" and "C" is returned // via "Res" and "ConstOpnd", respectively (If the entire expression is // evaluated to a constant, the Res is set to NULL); otherwise, false is // returned, and both "Res" and "ConstOpnd" remain unchanged. @@ -1254,7 +1254,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, const APInt &C1 = Opnd1->getConstPart(); const APInt &C2 = Opnd2->getConstPart(); APInt C3 = C1 ^ C2; - + // Do not increase code size if (!C3.isNullValue() && !C3.isAllOnesValue()) { int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2; @@ -1290,7 +1290,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I, SmallVectorImpl<ValueEntry> &Ops) { if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops)) return V; - + if (Ops.size() == 1) return nullptr; @@ -1365,7 +1365,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I, } // step 3.2: When previous and current operands share the same symbolic - // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" + // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) { // Remove previous operand PrevOpnd->Invalidate(); diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 391e43f79121..0de2bc72b522 100644 --- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -401,7 +401,7 @@ namespace { /// defining value. The 'base defining value' for 'Def' is the transitive /// closure of this relation stopping at the first instruction which has no /// immediate base defining value. The b.d.v. might itself be a base pointer, -/// but it can also be an arbitrary derived pointer. +/// but it can also be an arbitrary derived pointer. struct BaseDefiningValueResult { /// Contains the value which is the base defining value. Value * const BDV; @@ -427,13 +427,13 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I); /// Return a base defining value for the 'Index' element of the given vector /// instruction 'I'. If Index is null, returns a BDV for the entire vector -/// 'I'. As an optimization, this method will try to determine when the +/// 'I'. As an optimization, this method will try to determine when the /// element is known to already be a base pointer. If this can be established, /// the second value in the returned pair will be true. Note that either a /// vector or a pointer typed value can be returned. For the former, the /// vector returned is a BDV (and possibly a base) of the entire vector 'I'. /// If the later, the return pointer is a BDV (or possibly a base) for the -/// particular element in 'I'. +/// particular element in 'I'. static BaseDefiningValueResult findBaseDefiningValueOfVector(Value *I) { // Each case parallels findBaseDefiningValue below, see that code for @@ -444,7 +444,7 @@ findBaseDefiningValueOfVector(Value *I) { return BaseDefiningValueResult(I, true); if (isa<Constant>(I)) - // Base of constant vector consists only of constant null pointers. + // Base of constant vector consists only of constant null pointers. // For reasoning see similar case inside 'findBaseDefiningValue' function. return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()), true); @@ -508,11 +508,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { if (isa<Constant>(I)) { // We assume that objects with a constant base (e.g. a global) can't move // and don't need to be reported to the collector because they are always - // live. Besides global references, all kinds of constants (e.g. undef, + // live. Besides global references, all kinds of constants (e.g. undef, // constant expressions, null pointers) can be introduced by the inliner or // the optimizer, especially on dynamically dead paths. // Here we treat all of them as having single null base. By doing this we - // trying to avoid problems reporting various conflicts in a form of + // trying to avoid problems reporting various conflicts in a form of // "phi (const1, const2)" or "phi (const, regular gc ptr)". // See constant.ll file for relevant test cases. @@ -1285,14 +1285,14 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables, return Index; }; Module *M = StatepointToken->getModule(); - + // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose // element type is i8 addrspace(1)*). We originally generated unique // declarations for each pointer type, but this proved problematic because // the intrinsic mangling code is incomplete and fragile. Since we're moving // towards a single unified pointer type anyways, we can just cast everything // to an i8* of the right address space. A bitcast is added later to convert - // gc_relocate to the actual value's type. + // gc_relocate to the actual value's type. auto getGCRelocateDecl = [&] (Type *Ty) { assert(isHandledGCPointerType(Ty)); auto AS = Ty->getScalarType()->getPointerAddressSpace(); @@ -1413,7 +1413,7 @@ static StringRef getDeoptLowering(CallSite CS) { } return "live-through"; } - + static void makeStatepointExplicitImpl(const CallSite CS, /* to replace */ const SmallVectorImpl<Value *> &BasePtrs, @@ -2570,7 +2570,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, } // Before we start introducing relocations, we want to tweak the IR a bit to - // avoid unfortunate code generation effects. The main example is that we + // avoid unfortunate code generation effects. The main example is that we // want to try to make sure the comparison feeding a branch is after any // safepoints. Otherwise, we end up with a comparison of pre-relocation // values feeding a branch after relocation. This is semantically correct, @@ -2593,7 +2593,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, TerminatorInst *TI = BB.getTerminator(); if (auto *Cond = getConditionInst(TI)) // TODO: Handle more than just ICmps here. We should be able to move - // most instructions without side effects or memory access. + // most instructions without side effects or memory access. if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) { MadeChange = true; Cond->moveBefore(TI); diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp index 6c3f012c6280..de16b608f752 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp @@ -3730,7 +3730,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); - PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); + PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); // Append this load onto the list of split loads so we can find it later // to rewrite the stores. diff --git a/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 34510cb40732..5834b619046b 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -459,9 +459,11 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, *ParentBB, *OldPH, FullUnswitch); // Now we need to update the dominator tree. - DT.insertEdge(OldPH, UnswitchedBB); + SmallVector<DominatorTree::UpdateType, 2> DTUpdates; + DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB}); if (FullUnswitch) - DT.deleteEdge(ParentBB, UnswitchedBB); + DTUpdates.push_back({DT.Delete, ParentBB, LoopExitBB}); + DT.applyUpdates(DTUpdates); // The constant we can replace all of our invariants with inside the loop // body. If any of the invariants have a value other than this the loop won't diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 5f5c4150d3bb..d0396e6ce47d 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -911,7 +911,7 @@ static void appendTypeSuffix(Value *Op, StringRef &Name, NameBuffer += 'l'; Name = NameBuffer; - } + } } Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, diff --git a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index 4d9c22e57a68..6d18d0614611 100644 --- a/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -392,7 +392,7 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee, auto CalleeType = Callee->getFunctionType(); auto CalleeParamNum = CalleeType->getNumParams(); for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) { - auto *Arg = CS.getArgument(ArgNo); + auto *Arg = CS.getArgument(ArgNo); Type *FormalTy = CalleeType->getParamType(ArgNo); Type *ActualTy = Arg->getType(); if (FormalTy != ActualTy) { diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 61448e9acb57..807360340055 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -290,7 +290,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Have we already cloned this block? if (BBEntry) return; - + // Nope, clone it now. BasicBlock *NewBB; BBEntry = NewBB = BasicBlock::Create(BB->getContext()); @@ -363,7 +363,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, hasDynamicAllocas = true; } } - + // Finally, clone over the terminator. const TerminatorInst *OldTI = BB->getTerminator(); bool TerminatorDone = false; @@ -400,7 +400,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, TerminatorDone = true; } } - + if (!TerminatorDone) { Instruction *NewInst = OldTI->clone(); if (OldTI->hasName()) @@ -418,11 +418,11 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, for (const BasicBlock *Succ : TI->successors()) ToClone.push_back(Succ); } - + if (CodeInfo) { CodeInfo->ContainsCalls |= hasCalls; CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; - CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && BB != &BB->getParent()->front(); } } @@ -468,7 +468,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, CloneWorklist.pop_back(); PFC.CloneBlock(BB, BB->begin(), CloneWorklist); } - + // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. @@ -500,7 +500,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); } - + // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { @@ -519,7 +519,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, Value *V = VMap.lookup(PN->getIncomingBlock(pred)); if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), - VMap, + VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); @@ -529,9 +529,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, --pred; // Revisit the next entry. --e; } - } + } } - + // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our @@ -546,11 +546,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; - + // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; - + // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. @@ -563,7 +563,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, } } } - + // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. @@ -655,7 +655,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } - + BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor()) { ++I; continue; @@ -668,16 +668,16 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); - + // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(&*I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); - + // Remove the dest block. Dest->eraseFromParent(); - + // Do not increment I, iteratively merge all things this block branches to. } @@ -703,7 +703,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, - const char *NameSuffix, + const char *NameSuffix, ClonedCodeInfo *CodeInfo, Instruction *TheCall) { CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, @@ -730,7 +730,7 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl<BasicBlock *> &Blocks) { - assert(OrigLoop->getSubLoops().empty() && + assert(OrigLoop->getSubLoops().empty() && "Loop to be cloned cannot have inner loop"); Function *F = OrigLoop->getHeader()->getParent(); Loop *ParentLoop = OrigLoop->getParentLoop(); diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp index 35c7511a24b9..c7d68bab8170 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -61,7 +61,7 @@ std::unique_ptr<Module> llvm::CloneModule( // for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - GlobalVariable *GV = new GlobalVariable(*New, + GlobalVariable *GV = new GlobalVariable(*New, I->getValueType(), I->isConstant(), I->getLinkage(), (Constant*) nullptr, I->getName(), @@ -110,7 +110,7 @@ std::unique_ptr<Module> llvm::CloneModule( GA->copyAttributesFrom(&*I); VMap[&*I] = GA; } - + // Now that all of the things that global variable initializer can refer to // have been created, loop through and copy the global variable referrers // over... We also set the attributes on the global now. diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index f31dab9f96af..cb349e34606c 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1020,7 +1020,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, } else { // Otherwise we must have code extracted an unwind or something, just // return whatever we want. - ReturnInst::Create(Context, + ReturnInst::Create(Context, Constant::getNullValue(OldFnRetTy), TheSwitch); } @@ -1158,13 +1158,13 @@ Function *CodeExtractor::extractCodeRegion() { splitReturnBlocks(); // This takes place of the original loop - BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, header); // The new function needs a root node because other nodes can branch to the // head of the region, but the entry node of a function cannot have preds. - BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), + BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot"); auto *BranchI = BranchInst::Create(header); // If the original function has debug info, we have to add a debug location diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index 0315aac1cf84..ddc6e07e2f59 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1199,7 +1199,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // Only copy the edge if the call was inlined! if (VMI == VMap.end() || VMI->second == nullptr) continue; - + // If the call was inlined, but then constant folded, there is no edge to // add. Check for this case. Instruction *NewCall = dyn_cast<Instruction>(VMI->second); @@ -1211,7 +1211,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallSite CS = CallSite(NewCall); if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic()) continue; - + // Remember that this call site got inlined for the client of // InlineFunction. IFI.InlinedCalls.push_back(NewCall); @@ -1231,7 +1231,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->addCalledFunction(CallSite(NewCall), I->second); } - + // Update the call graph by deleting the edge from Callee to Caller. We must // do this after the loop above in case Caller and Callee are the same. CallerNode->removeCallEdgeFor(CS); @@ -1380,7 +1380,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, if (CalleeHasDebugInfo) continue; - + // If the inlined instruction has no line number, make it look as if it // originates from the call location. This is important for // ((__always_inline__, __nodebug__)) functions which must use caller @@ -1777,7 +1777,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); if (!AI) continue; - + // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { @@ -1787,10 +1787,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (!allocaWouldBeStaticInEntry(AI)) continue; - + // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); - + // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp index 3fbb3487884b..4a359b99bebd 100644 --- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -476,10 +476,10 @@ bool llvm::expandDivision(BinaryOperator *Div) { return true; } -/// Generate code to compute the remainder of two integers of bitwidth up to +/// Generate code to compute the remainder of two integers of bitwidth up to /// 32 bits. Uses the above routines and extends the inputs/truncates the /// outputs to operate in 32 bits; that is, these routines are good for targets -/// that have no or very little suppport for smaller than 32 bit integer +/// that have no or very little suppport for smaller than 32 bit integer /// arithmetic. /// /// Replace Rem with emulation code. @@ -527,7 +527,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { return expandRemainder(cast<BinaryOperator>(ExtRem)); } -/// Generate code to compute the remainder of two integers of bitwidth up to +/// Generate code to compute the remainder of two integers of bitwidth up to /// 64 bits. Uses the above routines and extends the inputs/truncates the /// outputs to operate in 64 bits. /// @@ -613,7 +613,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { } else { ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty); ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty); - ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); } Trunc = Builder.CreateTrunc(ExtDiv, DivTy); @@ -662,7 +662,7 @@ bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) { } else { ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty); ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty); - ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); } Trunc = Builder.CreateTrunc(ExtDiv, DivTy); diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index 956d0387c7a8..a1f8e7484bcf 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -10,7 +10,7 @@ // This pass transforms loops by placing phi nodes at the end of the loops for // all values that are live across the loop boundary. For example, it turns // the left into the right code: -// +// // for (...) for (...) // if (c) if (c) // X1 = ... X1 = ... @@ -21,8 +21,8 @@ // ... = X4 + 4 // // This is still valid LLVM; the extra phi nodes are purely redundant, and will -// be trivially eliminated by InstCombine. The major benefit of this -// transformation is that it makes many other loop optimizations, such as +// be trivially eliminated by InstCombine. The major benefit of this +// transformation is that it makes many other loop optimizations, such as // LoopUnswitching, simpler. // //===----------------------------------------------------------------------===// @@ -144,7 +144,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB), I->getName() + ".lcssa", &ExitBB->front()); - + // Get the debug location from the original instruction. + PN->setDebugLoc(I->getDebugLoc()); // Add inputs from inside the loop for this PHI. for (BasicBlock *Pred : PredCache.get(ExitBB)) { PN->addIncoming(I, Pred); diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp index 13794c53f24b..78afe748e596 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -344,7 +344,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, /// Update the branch weights of the latch of a peeled-off loop /// iteration. /// This sets the branch weights for the latch of the recently peeled off loop -/// iteration correctly. +/// iteration correctly. /// Our goal is to make sure that: /// a) The total weight of all the copies of the loop body is preserved. /// b) The total weight of the loop exit is preserved. @@ -544,7 +544,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // // Each following iteration will split the current bottom anchor in two, // and put the new copy of the loop body between these two blocks. That is, - // after peeling another iteration from the example above, we'll split + // after peeling another iteration from the example above, we'll split // InsertBot, and get: // // InsertTop: diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp index 323f2552ca80..88d595ee02ab 100644 --- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -68,7 +68,7 @@ namespace { PRNG prng; }; - + struct MetaRenamer : public ModulePass { // Pass identification, replacement for typeid static char ID; diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp index ca184ed7c4e3..4a1fd8d571aa 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -201,13 +201,13 @@ void SSAUpdater::RewriteUse(Use &U) { void SSAUpdater::RewriteUseAfterInsertions(Use &U) { Instruction *User = cast<Instruction>(U.getUser()); - + Value *V; if (PHINode *UserPN = dyn_cast<PHINode>(User)) V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); else V = GetValueAtEndOfBlock(User->getParent()); - + U.set(V); } @@ -235,7 +235,7 @@ public: PHI_iterator(PHINode *P, bool) // end iterator : PHI(P), idx(PHI->getNumIncomingValues()) {} - PHI_iterator &operator++() { ++idx; return *this; } + PHI_iterator &operator++() { ++idx; return *this; } bool operator==(const PHI_iterator& x) const { return idx == x.idx; } bool operator!=(const PHI_iterator& x) const { return !operator==(x); } @@ -333,7 +333,7 @@ LoadAndStorePromoter:: LoadAndStorePromoter(ArrayRef<const Instruction *> Insts, SSAUpdater &S, StringRef BaseName) : SSA(S) { if (Insts.empty()) return; - + const Value *SomeVal; if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0])) SomeVal = LI; @@ -354,7 +354,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { for (Instruction *User : Insts) UsesByBlock[User->getParent()].push_back(User); - + // Okay, now we can iterate over all the blocks in the function with uses, // processing them. Keep track of which loads are loading a live-in value. // Walk the uses in the use-list order to be determinstic. @@ -364,10 +364,10 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { for (Instruction *User : Insts) { BasicBlock *BB = User->getParent(); TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB]; - + // If this block has already been processed, ignore this repeat use. if (BlockUses.empty()) continue; - + // Okay, this is the first use in the block. If this block just has a // single user in it, we can rewrite it trivially. if (BlockUses.size() == 1) { @@ -375,13 +375,13 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { if (StoreInst *SI = dyn_cast<StoreInst>(User)) { updateDebugInfo(SI); SSA.AddAvailableValue(BB, SI->getOperand(0)); - } else + } else // Otherwise it is a load, queue it to rewrite as a live-in load. LiveInLoads.push_back(cast<LoadInst>(User)); BlockUses.clear(); continue; } - + // Otherwise, check to see if this block is all loads. bool HasStore = false; for (Instruction *I : BlockUses) { @@ -390,7 +390,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { break; } } - + // If so, we can queue them all as live in loads. We don't have an // efficient way to tell which on is first in the block and don't want to // scan large blocks, so just add all loads as live ins. @@ -400,7 +400,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { BlockUses.clear(); continue; } - + // Otherwise, we have mixed loads and stores (or just a bunch of stores). // Since SSAUpdater is purely for cross-block values, we need to determine // the order of these instructions in the block. If the first use in the @@ -411,7 +411,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { if (LoadInst *L = dyn_cast<LoadInst>(&I)) { // If this is a load from an unrelated pointer, ignore it. if (!isInstInList(L, Insts)) continue; - + // If we haven't seen a store yet, this is a live in use, otherwise // use the stored value. if (StoredValue) { @@ -433,13 +433,13 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { StoredValue = SI->getOperand(0); } } - + // The last stored value that happened is the live-out for the block. assert(StoredValue && "Already checked that there is a store in block"); SSA.AddAvailableValue(BB, StoredValue); BlockUses.clear(); } - + // Okay, now we rewrite all loads that use live-in values in the loop, // inserting PHI nodes as necessary. for (LoadInst *ALoad : LiveInLoads) { @@ -451,10 +451,10 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { ALoad->replaceAllUsesWith(NewVal); ReplacedLoads[ALoad] = NewVal; } - + // Allow the client to do stuff before we start nuking things. doExtraRewritesBeforeFinalDeletion(); - + // Now that everything is rewritten, delete the old instructions from the // function. They should all be dead now. for (Instruction *User : Insts) { @@ -465,7 +465,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { if (!User->use_empty()) { Value *NewVal = ReplacedLoads[User]; assert(NewVal && "not a replaced load?"); - + // Propagate down to the ultimate replacee. The intermediately loads // could theoretically already have been deleted, so we don't want to // dereference the Value*'s. @@ -474,11 +474,11 @@ run(const SmallVectorImpl<Instruction *> &Insts) const { NewVal = RLI->second; RLI = ReplacedLoads.find(NewVal); } - + replaceLoadWithValue(cast<LoadInst>(User), NewVal); User->replaceAllUsesWith(NewVal); } - + instructionDeleted(User); User->eraseFromParent(); } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index e381fbc34ab4..65b23f4d94a1 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -196,7 +196,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, SmallDenseMap<const SCEV*, Value*> CheapExpansions; CheapExpansions[S] = ICmp->getOperand(IVOperIdx); CheapExpansions[X] = ICmp->getOperand(1 - IVOperIdx); - + // TODO: Support multiple entry loops? (We currently bail out of these in // the IndVarSimplify pass) if (auto *BB = L->getLoopPredecessor()) { diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 8c48597fc2e4..15e035874002 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -890,7 +890,7 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B, return nullptr; // Replace the malloc with a calloc. We need the data layout to know what the - // actual size of a 'size_t' parameter is. + // actual size of a 'size_t' parameter is. B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator()); const DataLayout &DL = Malloc->getModule()->getDataLayout(); IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext()); @@ -970,7 +970,7 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, Value *V = valueHasFloatPrecision(CI->getArgOperand(0)); if (V == nullptr) return nullptr; - + // If call isn't an intrinsic, check that it isn't within a function with the // same name as the float version of this call. // @@ -1126,165 +1126,164 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) { if (!Pow->isFast()) return nullptr; - const APFloat *Arg1C; - if (!match(Pow->getArgOperand(1), m_APFloat(Arg1C))) - return nullptr; - if (!Arg1C->isExactlyValue(0.5) && !Arg1C->isExactlyValue(-0.5)) + Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Type *Ty = Pow->getType(); + + const APFloat *ExpoF; + if (!match(Expo, m_APFloat(ExpoF)) || + (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5))) return nullptr; - // Fast-math flags from the pow() are propagated to all replacement ops. - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(Pow->getFastMathFlags()); - Type *Ty = Pow->getType(); - Value *Sqrt; + // If errno is never set, then use the intrinsic for sqrt(). if (Pow->hasFnAttr(Attribute::ReadNone)) { - // We know that errno is never set, so replace with an intrinsic: - // pow(x, 0.5) --> llvm.sqrt(x) - // llvm.pow(x, 0.5) --> llvm.sqrt(x) - auto *F = Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::sqrt, Ty); - Sqrt = B.CreateCall(F, Pow->getArgOperand(0)); - } else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, - LibFunc_sqrtl)) { - // Errno could be set, so we must use a sqrt libcall. - // TODO: We also should check that the target can in fact lower the sqrt - // libcall. We currently have no way to ask this question, so we ask - // whether the target has a sqrt libcall which is not exactly the same. - Sqrt = emitUnaryFloatFnCall(Pow->getArgOperand(0), - TLI->getName(LibFunc_sqrt), B, + Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(), + Intrinsic::sqrt, Ty); + Sqrt = B.CreateCall(SqrtFn, Base); + } + // Otherwise, use the libcall for sqrt(). + else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) + // TODO: We also should check that the target can in fact lower the sqrt() + // libcall. We currently have no way to ask this question, so we ask if + // the target has a sqrt() libcall, which is not exactly the same. + Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), B, Pow->getCalledFunction()->getAttributes()); - } else { - // We can't replace with an intrinsic or a libcall. + else return nullptr; - } - // If this is pow(x, -0.5), get the reciprocal. - if (Arg1C->isExactlyValue(-0.5)) - Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt); + // If the exponent is negative, then get the reciprocal. + if (ExpoF->isNegative()) + Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal"); return Sqrt; } -Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; +Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { + Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); + Function *Callee = Pow->getCalledFunction(); + AttributeList Attrs = Callee->getAttributes(); StringRef Name = Callee->getName(); - if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name)) - Ret = optimizeUnaryDoubleFP(CI, B, true); + Module *Module = Pow->getModule(); + Type *Ty = Pow->getType(); + Value *Shrunk = nullptr; + bool Ignored; + + if (UnsafeFPShrink && + Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name)) + Shrunk = optimizeUnaryDoubleFP(Pow, B, true); + + // Propagate the math semantics from the call to any created instructions. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(Pow->getFastMathFlags()); - Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); + // Evaluate special cases related to the base. // pow(1.0, x) -> 1.0 - if (match(Op1, m_SpecificFP(1.0))) - return Op1; - // pow(2.0, x) -> llvm.exp2(x) - if (match(Op1, m_SpecificFP(2.0))) { - Value *Exp2 = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::exp2, - CI->getType()); - return B.CreateCall(Exp2, Op2, "exp2"); - } - - // There's no llvm.exp10 intrinsic yet, but, maybe, some day there will - // be one. - if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { - // pow(10.0, x) -> exp10(x) - if (Op1C->isExactlyValue(10.0) && - hasUnaryFloatFn(TLI, Op1->getType(), LibFunc_exp10, LibFunc_exp10f, - LibFunc_exp10l)) - return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc_exp10), B, - Callee->getAttributes()); + if (match(Base, m_SpecificFP(1.0))) + return Base; + + // pow(2.0, x) -> exp2(x) + if (match(Base, m_SpecificFP(2.0))) { + Value *Exp2 = Intrinsic::getDeclaration(Module, Intrinsic::exp2, Ty); + return B.CreateCall(Exp2, Expo, "exp2"); } + // pow(10.0, x) -> exp10(x) + if (ConstantFP *BaseC = dyn_cast<ConstantFP>(Base)) + // There's no exp10 intrinsic yet, but, maybe, some day there shall be one. + if (BaseC->isExactlyValue(10.0) && + hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) + return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs); + // pow(exp(x), y) -> exp(x * y) // pow(exp2(x), y) -> exp2(x * y) // We enable these only with fast-math. Besides rounding differences, the // transformation changes overflow and underflow behavior quite dramatically. // Example: x = 1000, y = 0.001. // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). - auto *OpC = dyn_cast<CallInst>(Op1); - if (OpC && OpC->isFast() && CI->isFast()) { - LibFunc Func; - Function *OpCCallee = OpC->getCalledFunction(); - if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && - TLI->has(Func) && (Func == LibFunc_exp || Func == LibFunc_exp2)) { + auto *BaseFn = dyn_cast<CallInst>(Base); + if (BaseFn && BaseFn->isFast() && Pow->isFast()) { + LibFunc LibFn; + Function *CalleeFn = BaseFn->getCalledFunction(); + if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && + (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) { IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(CI->getFastMathFlags()); - Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"); - return emitUnaryFloatFnCall(FMul, OpCCallee->getName(), B, - OpCCallee->getAttributes()); + B.setFastMathFlags(Pow->getFastMathFlags()); + + Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul"); + return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B, + CalleeFn->getAttributes()); } } - if (Value *Sqrt = replacePowWithSqrt(CI, B)) + // Evaluate special cases related to the exponent. + + if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; - ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); - if (!Op2C) - return Ret; + ConstantFP *ExpoC = dyn_cast<ConstantFP>(Expo); + if (!ExpoC) + return Shrunk; - if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 - return ConstantFP::get(CI->getType(), 1.0); + // pow(x, -1.0) -> 1.0 / x + if (ExpoC->isExactlyValue(-1.0)) + return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal"); - // FIXME: Correct the transforms and pull this into replacePowWithSqrt(). - if (Op2C->isExactlyValue(0.5) && - hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf, - LibFunc_sqrtl)) { - // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). - // This is faster than calling pow, and still handles negative zero - // and negative infinity correctly. - // TODO: In finite-only mode, this could be just fabs(sqrt(x)). - Value *Inf = ConstantFP::getInfinity(CI->getType()); - Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); + // pow(x, 0.0) -> 1.0 + if (ExpoC->getValueAPF().isZero()) + return ConstantFP::get(Ty, 1.0); - // TODO: As above, we should lower to the sqrt intrinsic if the pow is an - // intrinsic, to match errno semantics. - Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes()); + // pow(x, 1.0) -> x + if (ExpoC->isExactlyValue(1.0)) + return Base; - Module *M = Callee->getParent(); - Function *FabsF = Intrinsic::getDeclaration(M, Intrinsic::fabs, - CI->getType()); - Value *FAbs = B.CreateCall(FabsF, Sqrt); + // pow(x, 2.0) -> x * x + if (ExpoC->isExactlyValue(2.0)) + return B.CreateFMul(Base, Base, "square"); - Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); - Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); - return Sel; + // FIXME: Correct the transforms and pull this into replacePowWithSqrt(). + if (ExpoC->isExactlyValue(0.5) && + hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) { + // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). + // This is faster than calling pow(), and still handles -0.0 and + // negative infinity correctly. + // TODO: In finite-only mode, this could be just fabs(sqrt(x)). + Value *PosInf = ConstantFP::getInfinity(Ty); + Value *NegInf = ConstantFP::getInfinity(Ty, true); + + // TODO: As above, we should lower to the sqrt() intrinsic if the pow() is + // an intrinsic, to match errno semantics. + Value *Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), + B, Attrs); + Function *FAbsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty); + Value *FAbs = B.CreateCall(FAbsFn, Sqrt, "abs"); + Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf"); + Sqrt = B.CreateSelect(FCmp, PosInf, FAbs); + return Sqrt; } - // Propagate fast-math-flags from the call to any created instructions. - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(CI->getFastMathFlags()); - // pow(x, 1.0) --> x - if (Op2C->isExactlyValue(1.0)) - return Op1; - // pow(x, 2.0) --> x * x - if (Op2C->isExactlyValue(2.0)) - return B.CreateFMul(Op1, Op1, "pow2"); - // pow(x, -1.0) --> 1.0 / x - if (Op2C->isExactlyValue(-1.0)) - return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); - - // In -ffast-math, generate repeated fmul instead of generating pow(x, n). - if (CI->isFast()) { - APFloat V = abs(Op2C->getValueAPF()); - // We limit to a max of 7 fmul(s). Thus max exponent is 32. + // pow(x, n) -> x * x * x * .... + if (Pow->isFast()) { + APFloat ExpoA = abs(ExpoC->getValueAPF()); + // We limit to a max of 7 fmul(s). Thus the maximum exponent is 32. // This transformation applies to integer exponents only. - if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan || - !V.isInteger()) + if (!ExpoA.isInteger() || + ExpoA.compare + (APFloat(ExpoA.getSemantics(), 32.0)) == APFloat::cmpGreaterThan) return nullptr; // We will memoize intermediate products of the Addition Chain. Value *InnerChain[33] = {nullptr}; - InnerChain[1] = Op1; - InnerChain[2] = B.CreateFMul(Op1, Op1); + InnerChain[1] = Base; + InnerChain[2] = B.CreateFMul(Base, Base, "square"); // We cannot readily convert a non-double type (like float) to a double. - // So we first convert V to something which could be converted to double. - bool Ignored; - V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); - - Value *FMul = getPow(InnerChain, V.convertToDouble(), B); - // For negative exponents simply compute the reciprocal. - if (Op2C->isNegative()) - FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul); + // So we first convert it to something which could be converted to double. + ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); + Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B); + + // If the exponent is negative, then get the reciprocal. + if (ExpoC->isNegative()) + FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal"); return FMul; } diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp index 3640541e63cc..fd0da79487f1 100644 --- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp @@ -536,7 +536,7 @@ private: char RewriteSymbolsLegacyPass::ID = 0; RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) { - initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry()); + initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry()); } RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass( diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index e633ac0c874d..d49b26472548 100644 --- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -61,7 +61,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { } else if (UnreachableBlocks.size() == 1) { UnreachableBlock = UnreachableBlocks.front(); } else { - UnreachableBlock = BasicBlock::Create(F.getContext(), + UnreachableBlock = BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); new UnreachableInst(F.getContext(), UnreachableBlock); diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3c693f5d5ee0..859d0c92ca5a 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -535,13 +535,13 @@ protected: /// Returns true if we should generate a scalar version of \p IV. bool needsScalarInduction(Instruction *IV) const; - /// If there is a cast involved in the induction variable \p ID, which should - /// be ignored in the vectorized loop body, this function records the - /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the - /// cast. We had already proved that the casted Phi is equal to the uncasted - /// Phi in the vectorized loop (under a runtime guard), and therefore - /// there is no need to vectorize the cast - the same value can be used in the - /// vector loop for both the Phi and the cast. + /// If there is a cast involved in the induction variable \p ID, which should + /// be ignored in the vectorized loop body, this function records the + /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the + /// cast. We had already proved that the casted Phi is equal to the uncasted + /// Phi in the vectorized loop (under a runtime guard), and therefore + /// there is no need to vectorize the cast - the same value can be used in the + /// vector loop for both the Phi and the cast. /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified, /// Otherwise, \p VectorLoopValue is a widened/vectorized value. /// @@ -5443,7 +5443,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){ // high enough value to practically disable vectorization with such // operations, except where previously deployed legality hack allowed // using very low cost values. This is to avoid regressions coming simply - // from moving "masked load/store" check from legality to cost model. + // from moving "masked load/store" check from legality to cost model. // Masked Load/Gather emulation was previously never allowed. // Limited number of Masked Store/Scatter emulation was allowed. assert(isScalarWithPredication(I) && @@ -6412,12 +6412,12 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions( })) DeadInstructions.insert(IndUpdate); - // We record as "Dead" also the type-casting instructions we had identified + // We record as "Dead" also the type-casting instructions we had identified // during induction analysis. We don't need any handling for them in the - // vectorized loop because we have proven that, under a proper runtime - // test guarding the vectorized loop, the value of the phi, and the casted + // vectorized loop because we have proven that, under a proper runtime + // test guarding the vectorized loop, the value of the phi, and the casted // value of the phi, are the same. The last instruction in this casting chain - // will get its scalar/vector/widened def from the scalar/vector/widened def + // will get its scalar/vector/widened def from the scalar/vector/widened def // of the respective phi node. Any other casts in the induction def-use chain // have no other uses outside the phi update chain, and will be ignored. InductionDescriptor &IndDes = Induction.second; @@ -7060,8 +7060,8 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) { auto Plan = llvm::make_unique<VPlan>(); // Build hierarchical CFG - VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI); - HCFGBuilder.buildHierarchicalCFG(*Plan.get()); + VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); + HCFGBuilder.buildHierarchicalCFG(); return Plan; } diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ac8c4f046c6f..5c2efe885e22 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -345,7 +345,7 @@ static Value *isOneOf(const InstructionsState &S, Value *Op) { } /// \returns analysis of the Instructions in \p VL described in -/// InstructionsState, the Opcode that we suppose the whole list +/// InstructionsState, the Opcode that we suppose the whole list /// could be vectorized even if its structure is diverse. static InstructionsState getSameOpcode(ArrayRef<Value *> VL, unsigned BaseIndex = 0) { @@ -3111,6 +3111,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // TODO: Merge this shuffle with the ReorderShuffleMask. if (!E->ReorderIndices.empty()) Builder.SetInsertPoint(VL0); + else if (auto *I = dyn_cast<Instruction>(V)) + Builder.SetInsertPoint(I->getParent(), + std::next(I->getIterator())); + else + Builder.SetInsertPoint(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle"); } diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp b/contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp index f7b07b722bb1..0780e70809d0 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -18,6 +18,7 @@ //===----------------------------------------------------------------------===// #include "VPlan.h" +#include "VPlanDominatorTree.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallVector.h" @@ -25,7 +26,6 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -34,6 +34,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GenericDomTreeConstruction.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -576,3 +577,5 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, } O << "\\l\""; } + +template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm/lib/Transforms/Vectorize/VPlan.h index 866951cb79a4..883e6f52369a 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/contrib/llvm/lib/Transforms/Vectorize/VPlan.h @@ -26,8 +26,10 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H +#include "VPlanLoopInfo.h" #include "VPlanValue.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" @@ -51,7 +53,6 @@ class BasicBlock; class DominatorTree; class InnerLoopVectorizer; class InterleaveGroup; -class LoopInfo; class raw_ostream; class Value; class VPBasicBlock; @@ -516,6 +517,23 @@ public: /// Delete all blocks reachable from a given VPBlockBase, inclusive. static void deleteCFG(VPBlockBase *Entry); + + void printAsOperand(raw_ostream &OS, bool PrintType) const { + OS << getName(); + } + + void print(raw_ostream &OS) const { + // TODO: Only printing VPBB name for now since we only have dot printing + // support for VPInstructions/Recipes. + printAsOperand(OS, false); + } + + /// Return true if it is legal to hoist instructions into this block. + bool isLegalToHoistInto() { + // There are currently no constraints that prevent an instruction to be + // hoisted into a VPBlockBase. + return true; + } }; /// VPRecipeBase is a base class modeling a sequence of one or more output IR @@ -1037,6 +1055,12 @@ public: EntryBlock->setParent(this); } + // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a + // specific interface of llvm::Function, instead of using + // GraphTraints::getEntryNode. We should add a new template parameter to + // DominatorTreeBase representing the Graph type. + VPBlockBase &front() const { return *Entry; } + const VPBlockBase *getExit() const { return Exit; } VPBlockBase *getExit() { return Exit; } @@ -1087,6 +1111,9 @@ private: /// VPlan. Value2VPValueTy Value2VPValue; + /// Holds the VPLoopInfo analysis for this VPlan. + VPLoopInfo VPLInfo; + public: VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {} @@ -1133,6 +1160,10 @@ public: return Value2VPValue[V]; } + /// Return the VPLoopInfo analysis for this VPlan. + VPLoopInfo &getVPLoopInfo() { return VPLInfo; } + const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; } + private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. @@ -1210,12 +1241,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) { return OS; } -//===--------------------------------------------------------------------===// -// GraphTraits specializations for VPlan/VPRegionBlock Control-Flow Graphs // -//===--------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs // +//===----------------------------------------------------------------------===// -// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a -// graph of VPBlockBase nodes... +// The following set of template specializations implement GraphTraits to treat +// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note +// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the +// VPBlockBase is a VPRegionBlock, this specialization provides access to its +// successors/predecessors but not to the blocks inside the region. template <> struct GraphTraits<VPBlockBase *> { using NodeRef = VPBlockBase *; @@ -1247,17 +1281,13 @@ template <> struct GraphTraits<const VPBlockBase *> { } }; -// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a -// graph of VPBlockBase nodes... and to walk it in inverse order. Inverse order -// for a VPBlockBase is considered to be when traversing the predecessors of a -// VPBlockBase instead of its successors. +// Inverse order specialization for VPBasicBlocks. Predecessors are used instead +// of successors for the inverse traversal. template <> struct GraphTraits<Inverse<VPBlockBase *>> { using NodeRef = VPBlockBase *; using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator; - static Inverse<VPBlockBase *> getEntryNode(Inverse<VPBlockBase *> B) { - return B; - } + static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; } static inline ChildIteratorType child_begin(NodeRef N) { return N->getPredecessors().begin(); @@ -1268,6 +1298,71 @@ template <> struct GraphTraits<Inverse<VPBlockBase *>> { } }; +// The following set of template specializations implement GraphTraits to +// treat VPRegionBlock as a graph and recurse inside its nodes. It's important +// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases +// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so +// there won't be automatic recursion into other VPBlockBases that turn to be +// VPRegionBlocks. + +template <> +struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> { + using GraphRef = VPRegionBlock *; + using nodes_iterator = df_iterator<NodeRef>; + + static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } + + static nodes_iterator nodes_begin(GraphRef N) { + return nodes_iterator::begin(N->getEntry()); + } + + static nodes_iterator nodes_end(GraphRef N) { + // df_iterator::end() returns an empty iterator so the node used doesn't + // matter. + return nodes_iterator::end(N); + } +}; + +template <> +struct GraphTraits<const VPRegionBlock *> + : public GraphTraits<const VPBlockBase *> { + using GraphRef = const VPRegionBlock *; + using nodes_iterator = df_iterator<NodeRef>; + + static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } + + static nodes_iterator nodes_begin(GraphRef N) { + return nodes_iterator::begin(N->getEntry()); + } + + static nodes_iterator nodes_end(GraphRef N) { + // df_iterator::end() returns an empty iterator so the node used doesn't + // matter. + return nodes_iterator::end(N); + } +}; + +template <> +struct GraphTraits<Inverse<VPRegionBlock *>> + : public GraphTraits<Inverse<VPBlockBase *>> { + using GraphRef = VPRegionBlock *; + using nodes_iterator = df_iterator<NodeRef>; + + static NodeRef getEntryNode(Inverse<GraphRef> N) { + return N.Graph->getExit(); + } + + static nodes_iterator nodes_begin(GraphRef N) { + return nodes_iterator::begin(N->getExit()); + } + + static nodes_iterator nodes_end(GraphRef N) { + // df_iterator::end() returns an empty iterator so the node used doesn't + // matter. + return nodes_iterator::end(N); + } +}; + //===----------------------------------------------------------------------===// // VPlan Utilities //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h b/contrib/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h new file mode 100644 index 000000000000..1b81097b6d31 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h @@ -0,0 +1,41 @@ +//===-- VPlanDominatorTree.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements dominator tree analysis for a single level of a VPlan's +/// H-CFG. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H + +#include "VPlan.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/IR/Dominators.h" + +namespace llvm { + +/// Template specialization of the standard LLVM dominator tree utility for +/// VPBlockBases. +using VPDominatorTree = DomTreeBase<VPBlockBase>; + +using VPDomTreeNode = DomTreeNodeBase<VPBlockBase>; + +/// Template specializations of GraphTraits for VPDomTreeNode. +template <> +struct GraphTraits<VPDomTreeNode *> + : public DomTreeGraphTraitsBase<VPDomTreeNode, VPDomTreeNode::iterator> {}; + +template <> +struct GraphTraits<const VPDomTreeNode *> + : public DomTreeGraphTraitsBase<const VPDomTreeNode, + VPDomTreeNode::const_iterator> {}; +} // namespace llvm +#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 08129b74cddf..b6307acb9474 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -324,13 +324,28 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() { return TopRegion; } +VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() { + PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); + return PCFGBuilder.buildPlainCFG(); +} + // Public interface to build a H-CFG. -void VPlanHCFGBuilder::buildHierarchicalCFG(VPlan &Plan) { +void VPlanHCFGBuilder::buildHierarchicalCFG() { // Build Top Region enclosing the plain CFG and set it as VPlan entry. - PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); - VPRegionBlock *TopRegion = PCFGBuilder.buildPlainCFG(); + VPRegionBlock *TopRegion = buildPlainCFG(); Plan.setEntry(TopRegion); LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan); Verifier.verifyHierarchicalCFG(TopRegion); + + // Compute plain CFG dom tree for VPLInfo. + VPDomTree.recalculate(*TopRegion); + LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n"; + VPDomTree.print(dbgs())); + + // Compute VPLInfo and keep it in Plan. + VPLoopInfo &VPLInfo = Plan.getVPLoopInfo(); + VPLInfo.analyze(VPDomTree); + LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n"; + VPLInfo.print(dbgs())); } diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h index c4e69843615a..3f11dcb5164d 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h +++ b/contrib/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h @@ -26,14 +26,18 @@ #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H #include "VPlan.h" +#include "VPlanDominatorTree.h" #include "VPlanVerifier.h" namespace llvm { class Loop; +class VPlanTestBase; /// Main class to build the VPlan H-CFG for an incoming IR. class VPlanHCFGBuilder { + friend VPlanTestBase; + private: // The outermost loop of the input loop nest considered for vectorization. Loop *TheLoop; @@ -41,14 +45,27 @@ private: // Loop Info analysis. LoopInfo *LI; + // The VPlan that will contain the H-CFG we are building. + VPlan &Plan; + // VPlan verifier utility. VPlanVerifier Verifier; + // Dominator analysis for VPlan plain CFG to be used in the + // construction of the H-CFG. This analysis is no longer valid once regions + // are introduced. + VPDominatorTree VPDomTree; + + /// Build plain CFG for TheLoop. Return a new VPRegionBlock (TopRegion) + /// enclosing the plain CFG. + VPRegionBlock *buildPlainCFG(); + public: - VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI) : TheLoop(Lp), LI(LI) {} + VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) + : TheLoop(Lp), LI(LI), Plan(P) {} - /// Build H-CFG for TheLoop and update \p Plan accordingly. - void buildHierarchicalCFG(VPlan &Plan); + /// Build H-CFG for TheLoop and update Plan accordingly. + void buildHierarchicalCFG(); }; } // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h b/contrib/llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h new file mode 100644 index 000000000000..5c2485fc2145 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h @@ -0,0 +1,45 @@ +//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines VPLoopInfo analysis and VPLoop class. VPLoopInfo is a +/// specialization of LoopInfoBase for VPBlockBase. VPLoops is a specialization +/// of LoopBase that is used to hold loop metadata from VPLoopInfo. Further +/// information can be found in VectorizationPlanner.rst. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H + +#include "llvm/Analysis/LoopInfoImpl.h" + +namespace llvm { +class VPBlockBase; + +/// Hold analysis information for every loop detected by VPLoopInfo. It is an +/// instantiation of LoopBase. +class VPLoop : public LoopBase<VPBlockBase, VPLoop> { +private: + friend class LoopInfoBase<VPBlockBase, VPLoop>; + explicit VPLoop(VPBlockBase *VPB) : LoopBase<VPBlockBase, VPLoop>(VPB) {} +}; + +/// VPLoopInfo provides analysis of natural loop for VPBlockBase-based +/// Hierarchical CFG. It is a specialization of LoopInfoBase class. +// TODO: VPLoopInfo is initially computed on top of the VPlan plain CFG, which +// is the same as the incoming IR CFG. If it's more efficient than running the +// whole loop detection algorithm, we may want to create a mechanism to +// translate LoopInfo into VPLoopInfo. However, that would require significant +// changes in LoopInfoBase class. +typedef LoopInfoBase<VPBlockBase, VPLoop> VPLoopInfo; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H diff --git a/contrib/llvm/tools/llvm-mca/DispatchStage.cpp b/contrib/llvm/tools/llvm-mca/DispatchStage.cpp index be6f1f89be5c..1f508886c298 100644 --- a/contrib/llvm/tools/llvm-mca/DispatchStage.cpp +++ b/contrib/llvm/tools/llvm-mca/DispatchStage.cpp @@ -107,17 +107,21 @@ void DispatchStage::dispatch(InstRef IR) { // instruction. A dependency-breaking instruction is a zero-latency // instruction that doesn't consume hardware resources. // An example of dependency-breaking instruction on X86 is a zero-idiom XOR. - if (!Desc.isZeroLatency()) - for (std::unique_ptr<ReadState> &RS : IS.getUses()) + bool IsDependencyBreaking = IS.isDependencyBreaking(); + for (std::unique_ptr<ReadState> &RS : IS.getUses()) + if (RS->isImplicitRead() || !IsDependencyBreaking) updateRAWDependencies(*RS, STI); // By default, a dependency-breaking zero-latency instruction is expected to // be optimized at register renaming stage. That means, no physical register // is allocated to the instruction. + bool ShouldAllocateRegisters = + !(Desc.isZeroLatency() && IsDependencyBreaking); SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles()); - for (std::unique_ptr<WriteState> &WS : IS.getDefs()) + for (std::unique_ptr<WriteState> &WS : IS.getDefs()) { PRF.addRegisterWrite(WriteRef(IR.first, WS.get()), RegisterFiles, - !Desc.isZeroLatency()); + ShouldAllocateRegisters); + } // Reserve slots in the RCU, and notify the instruction that it has been // dispatched to the schedulers for execution. diff --git a/contrib/llvm/tools/llvm-mca/DispatchStage.h b/contrib/llvm/tools/llvm-mca/DispatchStage.h index f21789a29c50..4262a241c08c 100644 --- a/contrib/llvm/tools/llvm-mca/DispatchStage.h +++ b/contrib/llvm/tools/llvm-mca/DispatchStage.h @@ -38,7 +38,7 @@ class Scheduler; // the following conditions are met: // 1) There are enough entries in the reorder buffer (see class // RetireControlUnit) to write the opcodes associated with the instruction. -// 2) There are enough temporaries to rename output register operands. +// 2) There are enough physical registers to rename output register operands. // 3) There are enough entries available in the used buffered resource(s). // // The number of micro opcodes that can be dispatched in one cycle is limited by diff --git a/contrib/llvm/tools/llvm-mca/InstrBuilder.cpp b/contrib/llvm/tools/llvm-mca/InstrBuilder.cpp index dbd457196f9d..053b7b4e8175 100644 --- a/contrib/llvm/tools/llvm-mca/InstrBuilder.cpp +++ b/contrib/llvm/tools/llvm-mca/InstrBuilder.cpp @@ -443,6 +443,10 @@ InstrBuilder::createInstruction(const MCInst &MCI) { // register writes implicitly clear the upper portion of a super-register. MCIA.clearsSuperRegisters(MRI, MCI, WriteMask); + // Check if this is a dependency breaking instruction. + if (MCIA.isDependencyBreaking(STI, MCI)) + NewIS->setDependencyBreaking(); + // Initialize writes. unsigned WriteIndex = 0; for (const WriteDescriptor &WD : D.Writes) { diff --git a/contrib/llvm/tools/llvm-mca/Instruction.h b/contrib/llvm/tools/llvm-mca/Instruction.h index ddf5c3a5e33f..3b2f90528f2e 100644 --- a/contrib/llvm/tools/llvm-mca/Instruction.h +++ b/contrib/llvm/tools/llvm-mca/Instruction.h @@ -170,8 +170,6 @@ class ReadState { bool IsReady; public: - bool isReady() const { return IsReady; } - ReadState(const ReadDescriptor &Desc, unsigned RegID) : RD(Desc), RegisterID(RegID), DependentWrites(0), CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true) {} @@ -182,6 +180,9 @@ public: unsigned getSchedClass() const { return RD.SchedClassID; } unsigned getRegisterID() const { return RegisterID; } + bool isReady() const { return IsReady; } + bool isImplicitRead() const { return RD.isImplicitRead(); } + void cycleEvent(); void writeStartEvent(unsigned Cycles); void setDependentWrites(unsigned Writes) { @@ -299,6 +300,8 @@ class Instruction { // Retire Unit token ID for this instruction. unsigned RCUTokenID; + bool IsDepBreaking; + using UniqueDef = std::unique_ptr<WriteState>; using UniqueUse = std::unique_ptr<ReadState>; using VecDefs = std::vector<UniqueDef>; @@ -314,7 +317,8 @@ class Instruction { public: Instruction(const InstrDesc &D) - : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES) {} + : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), RCUTokenID(0), + IsDepBreaking(false) {} Instruction(const Instruction &Other) = delete; Instruction &operator=(const Instruction &Other) = delete; @@ -326,6 +330,9 @@ public: unsigned getRCUTokenID() const { return RCUTokenID; } int getCyclesLeft() const { return CyclesLeft; } + bool isDependencyBreaking() const { return IsDepBreaking; } + void setDependencyBreaking() { IsDepBreaking = true; } + unsigned getNumUsers() const { unsigned NumUsers = 0; for (const UniqueDef &Def : Defs) diff --git a/contrib/llvm/tools/llvm-mca/README.txt b/contrib/llvm/tools/llvm-mca/README.txt deleted file mode 100644 index 8b1670db0fca..000000000000 --- a/contrib/llvm/tools/llvm-mca/README.txt +++ /dev/null @@ -1,865 +0,0 @@ -llvm-mca - LLVM Machine Code Analyzer -------------------------------------- - -llvm-mca is a performance analysis tool that uses information which is already -available in LLVM (e.g., scheduling models) to statically measure the -performance of machine code in a specific cpu. - -Performance is measured in terms of throughput as well as processor resource -consumption. The tool currently works for processors with an out-of-order -backend, for which there is a scheduling model available in LLVM. - -The main goal of this tool is not just to predict the performance of the code -when run on the target, but also help with diagnosing potential performance -issues. - -Given an assembly code sequence, llvm-mca estimates the IPC (instructions Per -cycle), as well as hardware resources pressure. The analysis and reporting style -were inspired by the IACA tool from Intel. - -The presence of long data dependency chains, as well as poor usage of hardware -resources may lead to bottlenecks in the backend. The tool is able to generate -a detailed report which should help with identifying and analyzing sources of -bottlenecks. - -Scheduling models are mostly used to compute instruction latencies, to obtain -read-advance information, and understand how processor resources are used by -instructions. By design, the quality of the performance analysis conducted by -the tool is inevitably affected by the quality of the target scheduling models. -However, scheduling models intentionally do not describe all processor details, -since the goal is just to enable the scheduling of machine instructions during -compilation. That means, there are processor details which are not important for -the purpose of scheduling instructions (and therefore not described by the -scheduling model), but are very important for this tool. - -A few examples of details that are missing in scheduling models are: - - Actual dispatch width (it often differs from the issue width). - - Number of read/write ports in the register file(s). - - Length of the load/store queue in the LSUnit. - -It is also very difficult to find a "good" abstract model to describe the -behavior of out-of-order processors. So, we have to keep in mind that all of -these aspects are going to affect the quality of the static analysis performed -by the tool. - -An extensive list of known limitations is reported in one of the last sections -of this document. There is also a section related to design problems which must -be addressed (hopefully with the help of the community). At the moment, the -tool has been mostly tested for x86 targets, but there are still several -limitations, some of which could be overcome by integrating extra information -into the scheduling models. - -How the tool works ------------------- - -The tool takes assembly code as input. Assembly code is parsed into a sequence -of MCInst with the help of the existing LLVM target assembly parsers. The parsed -sequence of MCInst is then analyzed by a 'Pipeline' module to generate a -performance report. - -The Pipeline module internally emulates the execution of the machine code -sequence in a loop of iterations (which by default is 100). At the end of this -process, the pipeline collects a number of statistics which are then printed out -in the form of a report. - -Here is an example of performance report generated by the tool for a dot-product -of two packed float vectors of four elements. The analysis is conducted for -target x86, cpu btver2: - -/////////////////// - -Iterations: 300 -Instructions: 900 -Total Cycles: 610 -Dispatch Width: 2 -IPC: 1.48 - - -Resources: -[0] - JALU0 -[1] - JALU1 -[2] - JDiv -[3] - JFPM -[4] - JFPU0 -[5] - JFPU1 -[6] - JLAGU -[7] - JSAGU -[8] - JSTC -[9] - JVIMUL - - -Resource pressure per iteration: -[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] - - - - - 2.00 1.00 - - - - - -Resource pressure by instruction: -[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: - - - - - - 1.00 - - - - vmulps %xmm0, %xmm1, %xmm2 - - - - - 1.00 - - - - - vhaddps %xmm2, %xmm2, %xmm3 - - - - - 1.00 - - - - - vhaddps %xmm3, %xmm3, %xmm4 - - -Instruction Info: -[1]: #uOps -[2]: Latency -[3]: RThroughput -[4]: MayLoad -[5]: MayStore -[6]: HasSideEffects - -[1] [2] [3] [4] [5] [6] Instructions: - 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2 - 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3 - 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4 - -/////////////////// - -According to this report, the dot-product kernel has been executed 300 times, -for a total of 900 instructions dynamically executed. - -The report is structured in three main sections. A first section collects a few -performance numbers; the goal of this section is to give a very quick overview -of the performance throughput. In this example, the two important performance -indicators are a) the predicted total number of cycles, and b) the IPC. -IPC is probably the most important throughput indicator. A big delta between the -Dispatch Width and the computed IPC is an indicator of potential performance -issues. - -The second section is the so-called "resource pressure view". This view reports -the average number of resource cycles consumed every iteration by instructions -for every processor resource unit available on the target. Information is -structured in two tables. The first table reports the number of resource cycles -spent on average every iteration. The second table correlates the resource -cycles to the machine instruction in the sequence. For example, every iteration -of the dot-product, instruction 'vmulps' always executes on resource unit [5] -(JFPU1 - floating point pipeline #1), consuming an average of 1 resource cycle -per iteration. Note that on Jaguar, vector FP multiply can only be issued to -pipeline JFPU1, while horizontal FP adds can only be issued to pipeline JFPU0. - -The third (and last) section of the report shows the latency and reciprocal -throughput of every instruction in the sequence. That section also reports extra -information related to the number of micro opcodes, and opcode properties (i.e., -'MayLoad', 'MayStore', and 'UnmodeledSideEffects'). - -The resource pressure view helps with identifying bottlenecks caused by high -usage of specific hardware resources. Situations with resource pressure mainly -concentrated on a few resources should, in general, be avoided. Ideally, -pressure should be uniformly distributed between multiple resources. - -Timeline View -------------- - -A detailed report of each instruction's state transitions over time can be -enabled using the command line flag '-timeline'. This prints an extra section -in the report which contains the so-called "timeline view". Below is the -timeline view for the dot-product example from the previous section. - -/////////////// -Timeline view: - 012345 -Index 0123456789 - -[0,0] DeeER. . . vmulps %xmm0, %xmm1, %xmm2 -[0,1] D==eeeER . . vhaddps %xmm2, %xmm2, %xmm3 -[0,2] .D====eeeER . vhaddps %xmm3, %xmm3, %xmm4 - -[1,0] .DeeE-----R . vmulps %xmm0, %xmm1, %xmm2 -[1,1] . D=eeeE---R . vhaddps %xmm2, %xmm2, %xmm3 -[1,2] . D====eeeER . vhaddps %xmm3, %xmm3, %xmm4 - -[2,0] . DeeE-----R . vmulps %xmm0, %xmm1, %xmm2 -[2,1] . D====eeeER . vhaddps %xmm2, %xmm2, %xmm3 -[2,2] . D======eeeER vhaddps %xmm3, %xmm3, %xmm4 - - -Average Wait times (based on the timeline view): -[0]: Executions -[1]: Average time spent waiting in a scheduler's queue -[2]: Average time spent waiting in a scheduler's queue while ready -[3]: Average time elapsed from WB until retire stage - - [0] [1] [2] [3] -0. 3 1.0 1.0 3.3 vmulps %xmm0, %xmm1, %xmm2 -1. 3 3.3 0.7 1.0 vhaddps %xmm2, %xmm2, %xmm3 -2. 3 5.7 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 -/////////////// - -The timeline view is very interesting because it shows how instructions changed -in state during execution. It also gives an idea of how the tool "sees" -instructions executed on the target. - -The timeline view is structured in two tables. The first table shows how -instructions change in state over time (measured in cycles); the second table -(named "Average Wait times") reports useful timing statistics which should help -diagnose performance bottlenecks caused by long data dependencies and -sub-optimal usage of hardware resources. - -An instruction in the timeline view is identified by a pair of indices, where -the 'first' index identifies an iteration, and the 'second' index is the actual -instruction index (i.e., where it appears in the code sequence). - -Excluding the first and last column, the remaining columns are in cycles. -Cycles are numbered sequentially starting from 0. The following characters are -used to describe the state of an instruction: - - D : Instruction dispatched. - e : Instruction executing. - E : Instruction executed. - R : Instruction retired. - = : Instruction already dispatched, waiting to be executed. - - : Instruction executed, waiting to be retired. - -Based on the timeline view from the example, we know that: - - Instruction [1, 0] was dispatched at cycle 1. - - Instruction [1, 0] started executing at cycle 2. - - Instruction [1, 0] reached the write back stage at cycle 4. - - Instruction [1, 0] was retired at cycle 10. - -Instruction [1, 0] (i.e., the vmulps from iteration #1) doesn't have to wait in -the scheduler's queue for the operands to become available. By the time the -vmulps is dispatched, operands are already available, and pipeline JFPU1 is -ready to serve another instruction. So the instruction can be immediately -issued on the JFPU1 pipeline. That is demonstrated by the fact that the -instruction only spent 1cy in the scheduler's queue. - -There is a gap of 5 cycles between the write-back stage and the retire event. -That is because instructions must retire in program order, so [1,0] has to wait -for [0, 2] to be retired first (i.e., it has to wait until cycle 10). - -In the dot-product example, all instructions are in a RAW (Read After Write) -dependency chain. Register %xmm2 written by the vmulps is immediately used by -the first vhaddps, and register %xmm3 written by the first vhaddps is used by -the second vhaddps. Long data dependencies negatively affect the ILP -(Instruction Level Parallelism). - -In the dot-product example, there are anti-dependencies introduced by -instructions from different iterations. However, those dependencies can be -removed at register renaming stage (at the cost of allocating register aliases, -and therefore consuming temporary registers). - -Table "Average Wait times" helps diagnose performance issues that are caused by -the presence of long latency instructions and potentially long data dependencies -which may limit the ILP. Note that the tool by default assumes at least 1cy -between the dispatch event and the issue event. - -When the performance is limited by data dependencies and/or long latency -instructions, the number of cycles spent while in the "ready" state is expected -to be very small when compared with the total number of cycles spent in the -scheduler's queue. So the difference between the two counters is a good -indicator of how big of an impact data dependencies had on the execution of -instructions. When performance is mostly limited by the lack of hardware -resources, the delta between the two counters is small. However, the number of -cycles spent in the queue tends to be bigger (i.e., more than 1-3cy) especially -when compared with other low latency instructions. - -Extra statistics to further diagnose performance issues. --------------------------------------------------------- - -Flag '-verbose' enables extra statistics and performance counters for the -dispatch logic, the reorder buffer, the retire control unit and the register -file. - -Below is an example of verbose output generated by the tool for the dot-product -example discussed in the previous sections. - -/////////////////// -Iterations: 300 -Instructions: 900 -Total Cycles: 610 -Dispatch Width: 2 -IPC: 1.48 - - -Dynamic Dispatch Stall Cycles: -RAT - Register unavailable: 0 -RCU - Retire tokens unavailable: 0 -SCHEDQ - Scheduler full: 272 -LQ - Load queue full: 0 -SQ - Store queue full: 0 -GROUP - Static restrictions on the dispatch group: 0 - - -Register Alias Table: -Total number of mappings created: 900 -Max number of mappings used: 35 - - -Dispatch Logic - number of cycles where we saw N instructions dispatched: -[# dispatched], [# cycles] - 0, 24 (3.9%) - 1, 272 (44.6%) - 2, 314 (51.5%) - - -Schedulers - number of cycles where we saw N instructions issued: -[# issued], [# cycles] - 0, 7 (1.1%) - 1, 306 (50.2%) - 2, 297 (48.7%) - - -Retire Control Unit - number of cycles where we saw N instructions retired: -[# retired], [# cycles] - 0, 109 (17.9%) - 1, 102 (16.7%) - 2, 399 (65.4%) - - -Scheduler's queue usage: -JALU01, 0/20 -JFPU01, 18/18 -JLSAGU, 0/12 -/////////////////// - -Based on the verbose report, the pipeline was only able to dispatch two -instructions 51.5% of the time. The dispatch group was limited to one -instruction 44.6% of the cycles, which corresponds to 272 cycles. - -If we look at section "Dynamic Dispatch Stall Cycles", we can see how counter -SCHEDQ reports 272 cycles. Counter SCHEDQ is incremented every time the -dispatch logic is unable to dispatch a full group of two instructions because -the scheduler's queue is full. - -Section "Scheduler's queue usage" shows how the maximum number of buffer entries -(i.e., scheduler's queue entries) used at runtime for resource JFPU01 reached -its maximum. Note that AMD Jaguar implements three schedulers: - * JALU01 - A scheduler for ALU instructions - * JLSAGU - A scheduler for address generation - * JFPU01 - A scheduler floating point operations. - -The dot-product is a kernel of three floating point instructions (a vector -multiply followed by two horizontal adds). That explains why only the floating -point scheduler appears to be used according to section "Scheduler's queue -usage". - -A full scheduler's queue is either caused by data dependency chains, or by a -sub-optimal usage of hardware resources. Sometimes, resource pressure can be -mitigated by rewriting the kernel using different instructions that consume -different scheduler resources. Schedulers with a small queue are less resilient -to bottlenecks caused by the presence of long data dependencies. - -In this example, we can conclude that the IPC is mostly limited by data -dependencies, and not by resource pressure. - -LLVM-MCA instruction flow -------------------------- - -This section describes the instruction flow through the out-of-order backend, -as well as the functional units involved in the process. - -An instruction goes through a default sequence of stages: - - Dispatch (Instruction is dispatched to the schedulers). - - Issue (Instruction is issued to the processor pipelines). - - Write Back (Instruction is executed, and results are written back). - - Retire (Instruction is retired; writes are architecturally committed). - -The tool only models the out-of-order portion of a processor. Therefore, the -instruction fetch and decode stages are not modeled. Performance bottlenecks in -the frontend are not diagnosed by this tool. The tool assumes that instructions -have all been decoded and placed in a queue. Also, the tool doesn't know -anything about branch prediction. - -The long term plan is to make the process customizable, so that processors can -define their own. This is a future work. - -Instruction Dispatch --------------------- - -During the Dispatch stage, instructions are picked in program order from a queue -of already decoded instructions, and dispatched in groups to the hardware -schedulers. The dispatch logic is implemented by class DispatchStage in file -DispatchStage.h. - -The size of a dispatch group depends on the availability of hardware resources, -and it cannot exceed the value of field 'DispatchWidth' in class DispatchStage. -Note that field DispatchWidth defaults to the value of field 'IssueWidth' from -the scheduling model. - -Users can override the DispatchWidth value with flag "-dispatch=<N>" (where 'N' -is an unsigned quantity). - -An instruction can be dispatched if: - - The size of the dispatch group is smaller than DispatchWidth - - There are enough entries in the reorder buffer - - There are enough temporary registers to do register renaming - - Schedulers are not full. - -Since r329067, scheduling models can now optionally specify which register -files are available on the processor. Class DispatchStage(see DispatchStage.h) -would use that information to initialize register file descriptors. - -By default, if the model doesn't describe register files, the tool -(optimistically) assumes a single register file with an unbounded number of -temporary registers. Users can limit the number of temporary registers that -are globally available for register renaming using flag -`-register-file-size=<N>`, where N is the number of temporaries. A value of -zero for N means 'unbounded'. Knowing how many temporaries are available for -register renaming, the tool can predict dispatch stalls caused by the lack of -temporaries. - -The number of reorder buffer entries consumed by an instruction depends on the -number of micro-opcodes it specifies in the target scheduling model (see field -'NumMicroOpcodes' of TableGen class ProcWriteResources and its derived classes; -TargetSchedule.td). - -The reorder buffer is implemented by class RetireControlUnit (see -DispatchStage.h). Its goal is to track the progress of instructions that are -"in-flight", and retire instructions in program order. The number of entries -in the reorder buffer defaults to the value of field 'MicroOpBufferSize' from -the target scheduling model. - -Instructions that are dispatched to the schedulers consume scheduler buffer -entries. The tool queries the scheduling model to figure out the set of -buffered resources consumed by an instruction. Buffered resources are treated -like "scheduler" resources, and the field 'BufferSize' (from the processor -resource TableGen definition) defines the size of the scheduler's queue. - -Zero latency instructions (for example NOP instructions) don't consume scheduler -resources. However, those instructions still reserve a number of slots in the -reorder buffer. - -Instruction Issue ------------------ - -As mentioned in the previous section, each scheduler resource implements a queue -of instructions. An instruction has to wait in the scheduler's queue until -input register operands become available. Only at that point, does the -instruction becomes eligible for execution and may be issued (potentially -out-of-order) to a pipeline for execution. - -Instruction latencies can be computed by the tool with the help of the -scheduling model; latency values are defined by the scheduling model through -ProcWriteResources objects. - -Class Scheduler (see file Scheduler.h) knows how to emulate multiple processor -schedulers. A Scheduler is responsible for tracking data dependencies, and -dynamically select which processor resources are consumed/used by instructions. - -Internally, the Scheduler class delegates the management of processor resource -units and resource groups to the ResourceManager class. ResourceManager is also -responsible for selecting resource units that are effectively consumed by -instructions. For example, if an instruction consumes 1cy of a resource group, -the ResourceManager object selects one of the available units from the group; by -default, it uses a round-robin selector to guarantee that resource usage is -uniformly distributed between all units of a group. - -Internally, class Scheduler implements three instruction queues: - - WaitQueue: a queue of instructions whose operands are not ready yet. - - ReadyQueue: a queue of instructions ready to execute. - - IssuedQueue: a queue of instructions executing. - -Depending on the operands availability, instructions that are dispatched to the -Scheduler are either placed into the WaitQueue or into the ReadyQueue. - -Every cycle, class Scheduler checks if instructions can be moved from the -WaitQueue to the ReadyQueue, and if instructions from the ReadyQueue can be -issued to the underlying pipelines. The algorithm prioritizes older -instructions over younger instructions. - -Objects of class ResourceState (see Scheduler.h) describe processor resources. -There is an instance of class ResourceState for each single processor resource -specified by the scheduling model. A ResourceState object for a processor -resource with multiple units dynamically tracks the availability of every single -unit. For example, the ResourceState of a resource group tracks the -availability of every resource in that group. Internally, ResourceState -implements a round-robin selector to dynamically pick the next unit to use from -the group. - -Write-Back and Retire Stage ---------------------------- - -Issued instructions are moved from the ReadyQueue to the IssuedQueue. There, -instructions wait until they reach the write-back stage. At that point, they -get removed from the queue and the retire control unit is notified. - -On the event of "instruction executed", the retire control unit flags the -instruction as "ready to retire". - -Instruction are retired in program order; an "instruction retired" event is sent -to the register file which frees the temporary registers allocated for the -instruction at register renaming stage. - -Load/Store Unit and Memory Consistency Model --------------------------------------------- - -The tool attempts to emulate out-of-order execution of memory operations. Class -LSUnit (see file LSUnit.h) emulates a load/store unit implementing queues for -speculative execution of loads and stores. - -Each load (or store) consumes an entry in the load (or store) queue. The number -of slots in the load/store queues is unknown by the tool, since there is no -mention of it in the scheduling model. In practice, users can specify flag -`-lqueue=N` (vic. `-squeue=N`) to limit the number of entries in the queue to be -equal to exactly N (an unsigned value). If N is zero, then the tool assumes an -unbounded queue (this is the default). - -LSUnit implements a relaxed consistency model for memory loads and stores. The -rules are: -1) A younger load is allowed to pass an older load only if there is no - intervening store in between the two loads. -2) An younger store is not allowed to pass an older store. -3) A younger store is not allowed to pass an older load. -4) A younger load is allowed to pass an older store provided that the load does - not alias with the store. - -By default, this class conservatively (i.e., pessimistically) assumes that loads -always may-alias store operations. Essentially, this LSUnit doesn't perform -any sort of alias analysis to rule out cases where loads and stores don't -overlap with each other. The downside of this approach however is that younger -loads are never allowed to pass older stores. To make it possible for a -younger load to pass an older store, users can use the command line flag --noalias. Under 'noalias', a younger load is always allowed to pass an older -store. - -Note that, in the case of write-combining memory, rule 2. could be relaxed a bit -to allow reordering of non-aliasing store operations. That being said, at the -moment, there is no way to further relax the memory model (flag -noalias is the -only option). Essentially, there is no option to specify a different memory -type (for example: write-back, write-combining, write-through; etc.) and -consequently to weaken or strengthen the memory model. - -Other limitations are: - * LSUnit doesn't know when store-to-load forwarding may occur. - * LSUnit doesn't know anything about the cache hierarchy and memory types. - * LSUnit doesn't know how to identify serializing operations and memory fences. - -No assumption is made on the store buffer size. As mentioned before, LSUnit -conservatively assumes a may-alias relation between loads and stores, and it -doesn't attempt to identify cases where store-to-load forwarding would occur in -practice. - -LSUnit doesn't attempt to predict whether a load or store hits or misses the L1 -cache. It only knows if an instruction "MayLoad" and/or "MayStore". For loads, -the scheduling model provides an "optimistic" load-to-use latency (which usually -matches the load-to-use latency for when there is a hit in the L1D). - -Class MCInstrDesc in LLVM doesn't know about serializing operations, nor -memory-barrier like instructions. LSUnit conservatively assumes that an -instruction which has both 'MayLoad' and 'UnmodeledSideEffects' behaves like a -"soft" load-barrier. That means, it serializes loads without forcing a flush of -the load queue. Similarly, instructions flagged with both 'MayStore' and -'UnmodeledSideEffects' are treated like store barriers. A full memory barrier -is a 'MayLoad' and 'MayStore' instruction with 'UnmodeledSideEffects'. This is -inaccurate, but it is the best that we can do at the moment with the current -information available in LLVM. - -A load/store barrier consumes one entry of the load/store queue. A load/store -barrier enforces ordering of loads/stores. A younger load cannot pass a load -barrier. Also, a younger store cannot pass a store barrier. A younger load has -to wait for the memory/load barrier to execute. A load/store barrier is -"executed" when it becomes the oldest entry in the load/store queue(s). That -also means, by construction, all the older loads/stores have been executed. - -In conclusion the full set of rules is: - 1. A store may not pass a previous store. - 2. A load may not pass a previous store unless flag 'NoAlias' is set. - 3. A load may pass a previous load. - 4. A store may not pass a previous load (regardless of flag 'NoAlias'). - 5. A load has to wait until an older load barrier is fully executed. - 6. A store has to wait until an older store barrier is fully executed. - -Known limitations ------------------ -Previous sections described cases where the tool is missing information to give -an accurate report. For example, the first sections of this document explained -how the lack of knowledge about the processor negatively affects the performance -analysis. The lack of knowledge is often a consequence of how scheduling models -are defined; as mentioned before, scheduling models intentionally don't describe -processors in fine details. That being said, the LLVM machine model can be -extended to expose more details, as long as they are opt-in for targets. - -The accuracy of the performance analysis is also affected by assumptions made by -the processor model used by the tool. - -Most recent Intel and AMD processors implement dedicated LoopBuffer/OpCache in -the hardware frontend to speedup the throughput in the presence of tight loops. -The presence of these buffers complicates the decoding logic, and requires -knowledge on the branch predictor too. Class 'SchedMachineModel' in TableGen -provides a field named 'LoopMicroOpBufferSize' which is used to describe loop -buffers. However, the purpose of that field is to enable loop unrolling of -tight loops; essentially, it affects the cost model used by pass loop-unroll. - -At the current state, the tool only describes the out-of-order portion of a -processor, and consequently doesn't try to predict the frontend throughput. That -being said, this tool could be definitely extended in future to also account for -the hardware frontend when doing performance analysis. This would inevitably -require extra (extensive) processor knowledge related to all the available -decoding paths in the hardware frontend, as well as branch prediction. - -Currently, the tool assumes a zero-latency "perfect" fetch&decode -stage; the full sequence of decoded instructions is immediately visible to the -dispatch logic from the start. - -The tool doesn't know about simultaneous mutithreading. According to the tool, -processor resources are not statically/dynamically partitioned. Processor -resources are fully available to the hardware thread executing the -microbenchmark. - -The execution model implemented by this tool assumes that instructions are -firstly dispatched in groups to hardware schedulers, and then issued to -pipelines for execution. The model assumes dynamic scheduling of instructions. -Instructions are placed in a queue and potentially executed out-of-order (based -on the operand availability). The dispatch stage is definitely distinct from the -issue stage. This will change in future; as mentioned in the first section, the -end goal is to let processors customize the process. - -This model doesn't correctly describe processors where the dispatch/issue is a -single stage. This is what happens for example in VLIW processors, where -instructions are packaged and statically scheduled at compile time; it is up to -the compiler to predict the latency of instructions and package issue groups -accordingly. For such targets, there is no dynamic scheduling done by the -hardware. - -Existing classes (DispatchStage, Scheduler, etc.) could be extended/adapted to -support processors with a single dispatch/issue stage. The execution flow would -require some changes in the way how existing components (i.e., DispatchStage, -Scheduler, etc.) interact. This can be a future development. - -The following sections describes other known limitations. The goal is not to -provide an extensive list of limitations; we want to report what we believe are -the most important limitations, and suggest possible methods to overcome them. - -Load/Store barrier instructions and serializing operations ----------------------------------------------------------- -Section "Load/Store Unit and Memory Consistency Model" already mentioned how -LLVM doesn't know about serializing operations and memory barriers. Most of it -boils down to the fact that class MCInstrDesc (intentionally) doesn't expose -those properties. Instead, both serializing operations and memory barriers -"have side-effects" according to MCInstrDesc. That is because, at least for -scheduling purposes, knowing that an instruction has unmodeled side effects is -often enough to treat the instruction like a compiler scheduling barrier. - -A performance analysis tool could use the extra knowledge on barriers and -serializing operations to generate a more accurate performance report. One way -to improve this is by reserving a couple of bits in field 'Flags' from class -MCInstrDesc: one bit for barrier operations, and another bit to mark -instructions as serializing operations. - -Lack of support for instruction itineraries -------------------------------------------- -The current version of the tool doesn't know how to process instruction -itineraries. This is probably one of the most important limitations, since it -affects a few out-of-order processors in LLVM. - -As mentioned in section 'Instruction Issue', class Scheduler delegates to an -instance of class ResourceManager the handling of processor resources. -ResourceManager is where most of the scheduling logic is implemented. - -Adding support for instruction itineraries requires that we teach -ResourceManager how to handle functional units and instruction stages. This -development can be a future extension, and it would probably require a few -changes to the ResourceManager interface. - -Instructions that affect control flow are not correctly modeled ---------------------------------------------------------------- -Examples of instructions that affect the control flow are: return, indirect -branches, calls, etc. The tool doesn't try to predict/evaluate branch targets. -In particular, the tool doesn't model any sort of branch prediction, nor does it -attempt to track changes to the program counter. The tool always assumes that -the input assembly sequence is the body of a microbenchmark (a simple loop -executed for a number of iterations). The "next" instruction in sequence is -always the next instruction to dispatch. - -Call instructions default to an arbitrary high latency of 100cy. A warning is -generated if the tool encounters a call instruction in the sequence. Return -instructions are not evaluated, and therefore control flow is not affected. -However, the tool still queries the processor scheduling model to obtain latency -information for instructions that affect the control flow. - -Known limitations on X86 processors ------------------------------------ - -1) Partial register updates versus full register updates. - -On x86-64, a 32-bit GPR write fully updates the super-register. Example: - add %edi %eax ## eax += edi - -Here, register %eax aliases the lower half of 64-bit register %rax. On x86-64, -register %rax is fully updated by the 'add' (the upper half of %rax is zeroed). -Essentially, it "kills" any previous definition of (the upper half of) register -%rax. - -On the other hand, 8/16 bit register writes only perform a so-called "partial -register update". Example: - add %di, %ax ## ax += di - -Here, register %eax is only partially updated. To be more specific, the lower -half of %eax is set, and the upper half is left unchanged. There is also no -change in the upper 48 bits of register %rax. - -To get accurate performance analysis, the tool has to know which instructions -perform a partial register update, and which instructions fully update the -destination's super-register. - -One way to expose this information is (again) via TableGen. For example, we -could add a flag in the TableGen instruction class to tag instructions that -perform partial register updates. Something like this: 'bit -hasPartialRegisterUpdate = 1'. However, this would force a `let -hasPartialRegisterUpdate = 0` on several instruction definitions. - -Another approach is to have a MCSubtargetInfo hook similar to this: - virtual bool updatesSuperRegisters(unsigned short opcode) { return false; } - -Targets will be able to override this method if needed. Again, this is just an -idea. But the plan is to have this fixed as a future development. - -2) Macro Op fusion. - -The tool doesn't know about macro-op fusion. On modern x86 processors, a -'cmp/test' followed by a 'jmp' is fused into a single macro operation. The -advantage is that the fused pair only consumes a single slot in the dispatch -group. - -As a future development, the tool should be extended to address macro-fusion. -Ideally, we could have LLVM generate a table enumerating all the opcode pairs -that can be fused together. That table could be exposed to the tool via the -MCSubtargetInfo interface. This is just an idea; there may be better ways to -implement this. - -3) Intel processors: mixing legacy SSE with AVX instructions. - -On modern Intel processors with AVX, mixing legacy SSE code with AVX code -negatively impacts the performance. The tool is not aware of this issue, and -the performance penalty is not accounted when doing the analysis. This is -something that we would like to improve in future. - -4) Zero-latency register moves and Zero-idioms. - -Most modern AMD/Intel processors know how to optimize out register-register -moves and zero idioms at register renaming stage. The tool doesn't know -about these patterns, and this may negatively impact the performance analysis. - -Known design problems ---------------------- -This section describes two design issues that are currently affecting the tool. -The long term plan is to "fix" these issues. -Both limitations would be easily fixed if we teach the tool how to directly -manipulate MachineInstr objects (instead of MCInst objects). - -1) Variant instructions not correctly modeled. - -The tool doesn't know how to analyze instructions with a "variant" scheduling -class descriptor. A variant scheduling class needs to be resolved dynamically. -The "actual" scheduling class often depends on the subtarget, as well as -properties of the specific MachineInstr object. - -Unfortunately, the tool manipulates MCInst, and it doesn't know anything about -MachineInstr. As a consequence, the tool cannot use the existing machine -subtarget hooks that are normally used to resolve the variant scheduling class. -This is a major design issue which mostly affects ARM/AArch64 targets. It -mostly boils down to the fact that the existing scheduling framework was meant -to work for MachineInstr. - -When the tool encounters a "variant" instruction, it assumes a generic 1cy -latency. However, the tool would not be able to tell which processor resources -are effectively consumed by the variant instruction. - -2) MCInst and MCInstrDesc. - -Performance analysis tools require data dependency information to correctly -predict the runtime performance of the code. This tool must always be able to -obtain the set of implicit/explicit register defs/uses for every instruction of -the input assembly sequence. - -In the first section of this document, it was mentioned how the tool takes as -input an assembly sequence. That sequence is parsed into a MCInst sequence with -the help of assembly parsers available from the targets. - -A MCInst is a very low-level instruction representation. The tool can inspect -the MCOperand sequence of an MCInst to identify register operands. However, -there is no way to tell register operands that are definitions from register -operands that are uses. - -In LLVM, class MCInstrDesc is used to fully describe target instructions and -their operands. The opcode of a machine instruction (a MachineInstr object) can -be used to query the instruction set through method `MCInstrInfo::get' to obtain -the associated MCInstrDesc object. - -However class MCInstrDesc describes properties and operands of MachineInstr -objects. Essentially, MCInstrDesc is not meant to be used to describe MCInst -objects. To be more specific, MCInstrDesc objects are automatically generated -via TableGen from the instruction set description in the target .td files. For -example, field `MCInstrDesc::NumDefs' is always equal to the cardinality of the -`(outs)` set from the TableGen instruction definition. - -By construction, register definitions always appear at the beginning of the -MachineOperands list in MachineInstr. Basically, the (outs) are the first -operands of a MachineInstr, and the (ins) will come after in the machine operand -list. Knowing the number of register definitions is enough to identify -all the register operands that are definitions. - -In a normal compilation process, MCInst objects are generated from MachineInstr -objects through a lowering step. By default the lowering logic simply iterates -over the machine operands of a MachineInstr, and converts/expands them into -equivalent MCOperand objects. - -The default lowering strategy has the advantage of preserving all of the above -mentioned assumptions on the machine operand sequence. That means, register -definitions would still be at the beginning of the MCOperand sequence, and -register uses would come after. - -Targets may still define custom lowering routines for specific opcodes. Some of -these routines may lower operands in a way that potentially breaks (some of) the -assumptions on the machine operand sequence which were valid for MachineInstr. -Luckily, this is not the most common form of lowering done by the targets, and -the vast majority of the MachineInstr are lowered based on the default strategy -which preserves the original machine operand sequence. This is especially true -for x86, where the custom lowering logic always preserves the original (i.e., -from the MachineInstr) operand sequence. - -This tool currently works under the strong (and potentially incorrect) -assumption that register def/uses in a MCInst can always be identified by -querying the machine instruction descriptor for the opcode. This assumption made -it possible to develop this tool and get good numbers at least for the -processors available in the x86 backend. - -That being said, the analysis is still potentially incorrect for other targets. -So we plan (with the help of the community) to find a proper mechanism to map -when possible MCOperand indices back to MachineOperand indices of the equivalent -MachineInstr. This would be equivalent to describing changes made by the -lowering step which affected the operand sequence. For example, we could have an -index for every register MCOperand (or -1, if the operand didn't exist in the -original MachineInstr). The mapping could look like this <0,1,3,2>. Here, -MCOperand #2 was obtained from the lowering of MachineOperand #3. etc. - -This information could be automatically generated via TableGen for all the -instructions whose custom lowering step breaks assumptions made by the tool on -the register operand sequence (In general, these instructions should be the -minority of a target's instruction set). Unfortunately, we don't have that -information now. As a consequence, we assume that the number of explicit -register definitions is the same number specified in MCInstrDesc. We also -assume that register definitions always come first in the operand sequence. - -In conclusion: these are for now the strong assumptions made by the tool: - * The number of explicit and implicit register definitions in a MCInst - matches the number of explicit and implicit definitions specified by the - MCInstrDesc object. - * Register uses always come after register definitions. - * If an opcode specifies an optional definition, then the optional - definition is always the last register operand in the sequence. - -Note that some of the information accessible from the MCInstrDesc is always -valid for MCInst. For example: implicit register defs, implicit register uses -and 'MayLoad/MayStore/HasUnmodeledSideEffects' opcode properties still apply to -MCInst. The tool knows about this, and uses that information during its -analysis. - -Future work ------------ - * Address limitations (described in section "Known limitations"). - * Let processors specify the selection strategy for processor resource groups - and resources with multiple units. The tool currently uses a round-robin - selector to pick the next resource to use. - * Address limitations specifically described in section "Known limitations on - X86 processors". - * Address design issues identified in section "Known design problems". - * Define a standard interface for "Views". This would let users customize the - performance report generated by the tool. - -When interfaces are mature/stable: - * Move the logic into a library. This will enable a number of other - interesting use cases. - -Work is currently tracked on https://bugs.llvm.org. llvm-mca bugs are tagged -with prefix [llvm-mca]. You can easily find the full list of open bugs if you -search for that tag. diff --git a/contrib/llvm/tools/llvm-mca/RetireControlUnit.h b/contrib/llvm/tools/llvm-mca/RetireControlUnit.h index 3530ff21ba0d..8acc8bcc98fe 100644 --- a/contrib/llvm/tools/llvm-mca/RetireControlUnit.h +++ b/contrib/llvm/tools/llvm-mca/RetireControlUnit.h @@ -31,7 +31,7 @@ namespace mca { /// this RetireControlUnit (RCU) gets notified. /// /// On instruction retired, register updates are all architecturally -/// committed, and any temporary registers originally allocated for the +/// committed, and any physicall registers previously allocated for the /// retired instruction are freed. struct RetireControlUnit : public HardwareUnit { // A RUToken is created by the RCU for every instruction dispatched to the diff --git a/contrib/llvm/tools/llvm-mca/RetireStage.cpp b/contrib/llvm/tools/llvm-mca/RetireStage.cpp index 386ec54d7ba3..55c3b887e478 100644 --- a/contrib/llvm/tools/llvm-mca/RetireStage.cpp +++ b/contrib/llvm/tools/llvm-mca/RetireStage.cpp @@ -45,10 +45,12 @@ void RetireStage::cycleStart() { void RetireStage::notifyInstructionRetired(const InstRef &IR) { LLVM_DEBUG(dbgs() << "[E] Instruction Retired: #" << IR << '\n'); SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles()); - const InstrDesc &Desc = IR.getInstruction()->getDesc(); + const Instruction &Inst = *IR.getInstruction(); + const InstrDesc &Desc = Inst.getDesc(); - for (const std::unique_ptr<WriteState> &WS : IR.getInstruction()->getDefs()) - PRF.removeRegisterWrite(*WS.get(), FreedRegs, !Desc.isZeroLatency()); + bool ShouldFreeRegs = !(Desc.isZeroLatency() && Inst.isDependencyBreaking()); + for (const std::unique_ptr<WriteState> &WS : Inst.getDefs()) + PRF.removeRegisterWrite(*WS.get(), FreedRegs, ShouldFreeRegs); notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs)); } diff --git a/contrib/llvm/tools/llvm-mca/llvm-mca.cpp b/contrib/llvm/tools/llvm-mca/llvm-mca.cpp index 2d292f375e6e..897ff232a36d 100644 --- a/contrib/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/contrib/llvm/tools/llvm-mca/llvm-mca.cpp @@ -96,7 +96,7 @@ static cl::opt<unsigned> static cl::opt<unsigned> RegisterFileSize("register-file-size", - cl::desc("Maximum number of temporary registers which can " + cl::desc("Maximum number of physical registers which can " "be used for register mappings"), cl::cat(ToolOptions), cl::init(0)); diff --git a/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp index 4ccc67cc75db..21a1622db765 100644 --- a/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/contrib/llvm/tools/llvm-objcopy/llvm-objcopy.cpp @@ -185,6 +185,11 @@ LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) { } // end namespace objcopy } // end namespace llvm +static bool IsDebugSection(const SectionBase &Sec) { + return Sec.Name.startswith(".debug") || Sec.Name.startswith(".zdebug") || + Sec.Name == ".gdb_index"; +} + static bool IsDWOSection(const SectionBase &Sec) { return Sec.Name.endswith(".dwo"); } @@ -316,8 +321,7 @@ static void HandleArgs(const CopyConfig &Config, Object &Obj, // Removes: if (!Config.ToRemove.empty()) { RemovePred = [&Config](const SectionBase &Sec) { - return std::find(std::begin(Config.ToRemove), std::end(Config.ToRemove), - Sec.Name) != std::end(Config.ToRemove); + return find(Config.ToRemove, Sec.Name) != Config.ToRemove.end(); }; } @@ -346,7 +350,7 @@ static void HandleArgs(const CopyConfig &Config, Object &Obj, case SHT_STRTAB: return true; } - return Sec.Name.startswith(".debug"); + return IsDebugSection(Sec); }; if (Config.StripSections) { @@ -357,7 +361,7 @@ static void HandleArgs(const CopyConfig &Config, Object &Obj, if (Config.StripDebug) { RemovePred = [RemovePred](const SectionBase &Sec) { - return RemovePred(Sec) || Sec.Name.startswith(".debug"); + return RemovePred(Sec) || IsDebugSection(Sec); }; } @@ -385,8 +389,7 @@ static void HandleArgs(const CopyConfig &Config, Object &Obj, if (!Config.OnlyKeep.empty()) { RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) { // Explicitly keep these sections regardless of previous removes. - if (std::find(std::begin(Config.OnlyKeep), std::end(Config.OnlyKeep), - Sec.Name) != std::end(Config.OnlyKeep)) + if (find(Config.OnlyKeep, Sec.Name) != Config.OnlyKeep.end()) return false; // Allow all implicit removes. @@ -408,8 +411,7 @@ static void HandleArgs(const CopyConfig &Config, Object &Obj, if (!Config.Keep.empty()) { RemovePred = [Config, RemovePred](const SectionBase &Sec) { // Explicitly keep these sections regardless of previous removes. - if (std::find(std::begin(Config.Keep), std::end(Config.Keep), Sec.Name) != - std::end(Config.Keep)) + if (find(Config.Keep, Sec.Name) != Config.Keep.end()) return false; // Otherwise defer to RemovePred. return RemovePred(Sec); diff --git a/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp index b454ab345456..f4e38a32a511 100644 --- a/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp +++ b/contrib/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp @@ -760,3 +760,9 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) { P.formatLine("original type = {0}", UDT.Type); return Error::success(); } + +Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, + UsingNamespaceSym &UN) { + P.format(" `{0}`", UN.Name); + return Error::success(); +} diff --git a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index 1abe3a88bfbf..cc2b9d788980 100644 --- a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -3946,6 +3946,24 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) { return false; } +// Promote xform function to be an explicit node wherever set. +static TreePatternNodePtr PromoteXForms(TreePatternNodePtr N) { + if (Record *Xform = N->getTransformFn()) { + N->setTransformFn(nullptr); + std::vector<TreePatternNodePtr> Children; + Children.push_back(PromoteXForms(N)); + return std::make_shared<TreePatternNode>(Xform, std::move(Children), + N->getNumTypes()); + } + + if (!N->isLeaf()) + for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) { + TreePatternNodePtr Child = N->getChildShared(i); + N->setChild(i, PromoteXForms(Child)); + } + return N; +} + void CodeGenDAGPatterns::ParseOnePattern(Record *TheDef, TreePattern &Pattern, TreePattern &Result, const std::vector<Record *> &InstImpResults) { @@ -4011,30 +4029,8 @@ void CodeGenDAGPatterns::ParseOnePattern(Record *TheDef, Result.error("Could not infer all types in pattern result!"); } - // Promote the xform function to be an explicit node if set. - const TreePatternNodePtr &DstPattern = Result.getOnlyTree(); - std::vector<TreePatternNodePtr> ResultNodeOperands; - for (unsigned ii = 0, ee = DstPattern->getNumChildren(); ii != ee; ++ii) { - TreePatternNodePtr OpNode = DstPattern->getChildShared(ii); - if (Record *Xform = OpNode->getTransformFn()) { - OpNode->setTransformFn(nullptr); - std::vector<TreePatternNodePtr> Children; - Children.push_back(OpNode); - OpNode = std::make_shared<TreePatternNode>(Xform, std::move(Children), - OpNode->getNumTypes()); - } - ResultNodeOperands.push_back(OpNode); - } - - TreePatternNodePtr DstShared = - DstPattern->isLeaf() - ? DstPattern - : std::make_shared<TreePatternNode>(DstPattern->getOperator(), - std::move(ResultNodeOperands), - DstPattern->getNumTypes()); - - for (unsigned i = 0, e = Result.getOnlyTree()->getNumTypes(); i != e; ++i) - DstShared->setType(i, Result.getOnlyTree()->getExtType(i)); + // Promote xform function to be an explicit node wherever set. + TreePatternNodePtr DstShared = PromoteXForms(Result.getOnlyTree()); TreePattern Temp(Result.getRecord(), DstShared, false, *this); Temp.InferAllTypes(); |