diff options
95 files changed, 1382 insertions, 406 deletions
diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cc index f12e8206abe6..feb7bad6f347 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cc @@ -159,7 +159,6 @@ typedef struct user_fpregs elf_fpregset_t; # include <sys/procfs.h> #endif #include <sys/user.h> -#include <sys/ustat.h> #include <linux/cyclades.h> #include <linux/if_eql.h> #include <linux/if_plip.h> @@ -253,7 +252,19 @@ namespace __sanitizer { #endif // SANITIZER_LINUX || SANITIZER_FREEBSD #if SANITIZER_LINUX && !SANITIZER_ANDROID - unsigned struct_ustat_sz = sizeof(struct ustat); + // Use pre-computed size of struct ustat to avoid <sys/ustat.h> which + // has been removed from glibc 2.28. +#if defined(__aarch64__) || defined(__s390x__) || defined (__mips64) \ + || defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) \ + || defined(__x86_64__) +#define SIZEOF_STRUCT_USTAT 32 +#elif defined(__arm__) || defined(__i386__) || defined(__mips__) \ + || defined(__powerpc__) || defined(__s390__) +#define SIZEOF_STRUCT_USTAT 20 +#else +#error Unknown size of struct ustat +#endif + unsigned struct_ustat_sz = SIZEOF_STRUCT_USTAT; unsigned struct_rlimit64_sz = sizeof(struct rlimit64); unsigned struct_statvfs64_sz = sizeof(struct statvfs64); #endif // SANITIZER_LINUX && !SANITIZER_ANDROID diff --git a/contrib/libc++/include/list b/contrib/libc++/include/list index 32e9a27bd2a4..f884b168126f 100644 --- a/contrib/libc++/include/list +++ b/contrib/libc++/include/list @@ -2058,15 +2058,15 @@ list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __f, con #endif if (__f != __l) { + __link_pointer __first = __f.__ptr_; + --__l; + __link_pointer __last = __l.__ptr_; if (this != &__c) { - size_type __s = _VSTD::distance(__f, __l); + size_type __s = _VSTD::distance(__f, __l) + 1; __c.__sz() -= __s; base::__sz() += __s; } - __link_pointer __first = __f.__ptr_; - --__l; - __link_pointer __last = __l.__ptr_; base::__unlink_nodes(__first, __last); __link_nodes(__p.__ptr_, __first, __last); #if _LIBCPP_DEBUG_LEVEL >= 2 diff --git a/contrib/libc++/src/support/runtime/exception_libcxxabi.ipp b/contrib/libc++/src/support/runtime/exception_libcxxabi.ipp index c3dcf1ec591a..feefc5152891 100644 --- a/contrib/libc++/src/support/runtime/exception_libcxxabi.ipp +++ b/contrib/libc++/src/support/runtime/exception_libcxxabi.ipp @@ -18,7 +18,7 @@ bool uncaught_exception() _NOEXCEPT { return uncaught_exceptions() > 0; } int uncaught_exceptions() _NOEXCEPT { -# if _LIBCPPABI_VERSION > 1101 +# if _LIBCPPABI_VERSION > 1001 return __cxa_uncaught_exceptions(); # else return __cxa_uncaught_exception() ? 1 : 0; diff --git a/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 6b5404be35d3..57dee3bb44b3 100644 --- a/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -421,7 +421,8 @@ public: /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI /// and \p DefIdx. /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of - /// the list is modeled as <Reg:SubReg, SubIdx>. + /// the list is modeled as <Reg:SubReg, SubIdx>. Operands with the undef + /// flag are not added to this list. /// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce /// two elements: /// - %1:sub1, sub0 @@ -446,7 +447,8 @@ public: /// - %1:sub1, sub0 /// /// \returns true if it is possible to build such an input sequence - /// with the pair \p MI, \p DefIdx. False otherwise. + /// with the pair \p MI, \p DefIdx and the operand has no undef flag set. + /// False otherwise. /// /// \pre MI.isExtractSubreg() or MI.isExtractSubregLike(). /// @@ -465,7 +467,8 @@ public: /// - InsertedReg: %1:sub1, sub3 /// /// \returns true if it is possible to build such an input sequence - /// with the pair \p MI, \p DefIdx. False otherwise. + /// with the pair \p MI, \p DefIdx and the operand has no undef flag set. + /// False otherwise. /// /// \pre MI.isInsertSubreg() or MI.isInsertSubregLike(). /// diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 6321bb81b8cb..a302d5726aa3 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -36,8 +36,12 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // Intrinsics used to generate ctr-based loops. These should only be // generated by the PowerPC backend! + // The branch intrinsic is marked as NoDuplicate because loop rotation will + // attempt to duplicate it forming loops where a block reachable from one + // instance of it can contain another. def int_ppc_mtctr : Intrinsic<[], [llvm_anyint_ty], []>; - def int_ppc_is_decremented_ctr_nonzero : Intrinsic<[llvm_i1_ty], [], []>; + def int_ppc_is_decremented_ctr_nonzero : + Intrinsic<[llvm_i1_ty], [], [IntrNoDuplicate]>; // Intrinsics for [double]word extended forms of divide instructions def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">, diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp index daee93267f56..94306d0f54ad 100644 --- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp @@ -502,6 +502,8 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { } FunctionInfo &FI = FunctionInfos[F]; + Handles.emplace_front(*this, F); + Handles.front().I = Handles.begin(); bool KnowNothing = false; // Collect the mod/ref properties due to called functions. We only compute diff --git a/contrib/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm/lib/Analysis/MemorySSA.cpp index 6e9368c49d65..09605f61fa93 100644 --- a/contrib/llvm/lib/Analysis/MemorySSA.cpp +++ b/contrib/llvm/lib/Analysis/MemorySSA.cpp @@ -153,9 +153,14 @@ public: if (IsCall != Other.IsCall) return false; - if (IsCall) - return CS.getCalledValue() == Other.CS.getCalledValue(); - return Loc == Other.Loc; + if (!IsCall) + return Loc == Other.Loc; + + if (CS.getCalledValue() != Other.CS.getCalledValue()) + return false; + + return CS.arg_size() == Other.CS.arg_size() && + std::equal(CS.arg_begin(), CS.arg_end(), Other.CS.arg_begin()); } private: @@ -179,12 +184,18 @@ template <> struct DenseMapInfo<MemoryLocOrCall> { } static unsigned getHashValue(const MemoryLocOrCall &MLOC) { - if (MLOC.IsCall) - return hash_combine(MLOC.IsCall, - DenseMapInfo<const Value *>::getHashValue( - MLOC.getCS().getCalledValue())); - return hash_combine( - MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); + if (!MLOC.IsCall) + return hash_combine( + MLOC.IsCall, + DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); + + hash_code hash = + hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue( + MLOC.getCS().getCalledValue())); + + for (const Value *Arg : MLOC.getCS().args()) + hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg)); + return hash; } static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index a22ce0dab9c2..d8ce90e63a9d 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -1714,20 +1714,25 @@ bool IfConverter::IfConvertDiamondCommon( } // Remove the duplicated instructions at the beginnings of both paths. - // Skip dbg_value instructions + // Skip dbg_value instructions. MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr(); MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr(); BBI1->NonPredSize -= NumDups1; BBI2->NonPredSize -= NumDups1; // Skip past the dups on each side separately since there may be - // differing dbg_value entries. + // differing dbg_value entries. NumDups1 can include a "return" + // instruction, if it's not marked as "branch". for (unsigned i = 0; i < NumDups1; ++DI1) { + if (DI1 == MBB1.end()) + break; if (!DI1->isDebugValue()) ++i; } while (NumDups1 != 0) { ++DI2; + if (DI2 == MBB2.end()) + break; if (!DI2->isDebugValue()) --NumDups1; } @@ -1738,11 +1743,16 @@ bool IfConverter::IfConvertDiamondCommon( Redefs.stepForward(MI, Dummy); } } + BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1); MBB2.erase(MBB2.begin(), DI2); - // The branches have been checked to match, so it is safe to remove the branch - // in BB1 and rely on the copy in BB2 + // The branches have been checked to match, so it is safe to remove the + // branch in BB1 and rely on the copy in BB2. The complication is that + // the blocks may end with a return instruction, which may or may not + // be marked as "branch". If it's not, then it could be included in + // "dups1", leaving the blocks potentially empty after moving the common + // duplicates. #ifndef NDEBUG // Unanalyzable branches must match exactly. Check that now. if (!BBI1->IsBrAnalyzable) @@ -1768,11 +1778,14 @@ bool IfConverter::IfConvertDiamondCommon( if (RemoveBranch) BBI2->NonPredSize -= TII->removeBranch(*BBI2->BB); else { - do { - assert(DI2 != MBB2.begin()); - DI2--; - } while (DI2->isBranch() || DI2->isDebugValue()); - DI2++; + // Make DI2 point to the end of the range where the common "tail" + // instructions could be found. + while (DI2 != MBB2.begin()) { + MachineBasicBlock::iterator Prev = std::prev(DI2); + if (!Prev->isBranch() && !Prev->isDebugValue()) + break; + DI2 = Prev; + } } while (NumDups2 != 0) { // NumDups2 only counted non-dbg_value instructions, so this won't @@ -1833,11 +1846,15 @@ bool IfConverter::IfConvertDiamondCommon( // a non-predicated in BBI2, then we don't want to predicate the one from // BBI2. The reason is that if we merged these blocks, we would end up with // two predicated terminators in the same block. + // Also, if the branches in MBB1 and MBB2 were non-analyzable, then don't + // predicate them either. They were checked to be identical, and so the + // same branch would happen regardless of which path was taken. if (!MBB2.empty() && (DI2 == MBB2.end())) { MachineBasicBlock::iterator BBI1T = MBB1.getFirstTerminator(); MachineBasicBlock::iterator BBI2T = MBB2.getFirstTerminator(); - if (BBI1T != MBB1.end() && TII->isPredicated(*BBI1T) && - BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T)) + bool BB1Predicated = BBI1T != MBB1.end() && TII->isPredicated(*BBI1T); + bool BB2NonPredicated = BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T); + if (BB2NonPredicated && (BB1Predicated || !BBI2->IsBrAnalyzable)) --DI2; } diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index 75e3d35169cf..4ffcffcea693 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -514,6 +514,39 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { return false; } + // Detect invalid DBG_VALUE instructions, with a debug-use of a virtual + // register that hasn't been defined yet. If we do not remove those here, then + // the re-insertion of the DBG_VALUE instruction after register allocation + // will be incorrect. + // TODO: If earlier passes are corrected to generate sane debug information + // (and if the machine verifier is improved to catch this), then these checks + // could be removed or replaced by asserts. + bool Discard = false; + if (MI.getOperand(0).isReg() && + TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) { + const unsigned Reg = MI.getOperand(0).getReg(); + if (!LIS->hasInterval(Reg)) { + // The DBG_VALUE is described by a virtual register that does not have a + // live interval. Discard the DBG_VALUE. + Discard = true; + DEBUG(dbgs() << "Discarding debug info (no LIS interval): " + << Idx << " " << MI); + } else { + // The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg + // is defined dead at Idx (where Idx is the slot index for the instruction + // preceeding the DBG_VALUE). + const LiveInterval &LI = LIS->getInterval(Reg); + LiveQueryResult LRQ = LI.Query(Idx); + if (!LRQ.valueOutOrDead()) { + // We have found a DBG_VALUE with the value in a virtual register that + // is not live. Discard the DBG_VALUE. + Discard = true; + DEBUG(dbgs() << "Discarding debug info (reg not live): " + << Idx << " " << MI); + } + } + } + // Get or create the UserValue for (variable,offset) here. bool IsIndirect = MI.getOperand(1).isImm(); if (IsIndirect) @@ -522,7 +555,13 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { const DIExpression *Expr = MI.getDebugExpression(); UserValue *UV = getUserValue(Var, Expr, MI.getDebugLoc()); - UV->addDef(Idx, MI.getOperand(0), IsIndirect); + if (!Discard) + UV->addDef(Idx, MI.getOperand(0), IsIndirect); + else { + MachineOperand MO = MachineOperand::CreateReg(0U, false); + MO.setIsDebug(); + UV->addDef(Idx, MO, false); + } return true; } diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 84c808ee7938..167135b56ec0 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -513,6 +513,11 @@ public: bool runOnMachineFunction(MachineFunction &F) override; + bool allowTailDupPlacement() const { + assert(F); + return TailDupPlacement && !F->getTarget().requiresStructuredCFG(); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineBlockFrequencyInfo>(); @@ -1018,7 +1023,7 @@ MachineBlockPlacement::getBestTrellisSuccessor( MachineBasicBlock *Succ1 = BestA.Dest; MachineBasicBlock *Succ2 = BestB.Dest; // Check to see if tail-duplication would be profitable. - if (TailDupPlacement && shouldTailDuplicate(Succ2) && + if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) && canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) && isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1), Chain, BlockFilter)) { @@ -1044,7 +1049,7 @@ MachineBlockPlacement::getBestTrellisSuccessor( return Result; } -/// When the option TailDupPlacement is on, this method checks if the +/// When the option allowTailDupPlacement() is on, this method checks if the /// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated /// into all of its unplaced, unfiltered predecessors, that are not BB. bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( @@ -1493,7 +1498,7 @@ MachineBlockPlacement::selectBestSuccessor( if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb, Chain, BlockFilter)) { // If tail duplication would make Succ profitable, place it. - if (TailDupPlacement && shouldTailDuplicate(Succ)) + if (allowTailDupPlacement() && shouldTailDuplicate(Succ)) DupCandidates.push_back(std::make_tuple(SuccProb, Succ)); continue; } @@ -1702,7 +1707,7 @@ void MachineBlockPlacement::buildChain( auto Result = selectBestSuccessor(BB, Chain, BlockFilter); MachineBasicBlock* BestSucc = Result.BB; bool ShouldTailDup = Result.ShouldTailDup; - if (TailDupPlacement) + if (allowTailDupPlacement()) ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc)); // If an immediate successor isn't available, look for the best viable @@ -1724,7 +1729,7 @@ void MachineBlockPlacement::buildChain( // Placement may have changed tail duplication opportunities. // Check for that now. - if (TailDupPlacement && BestSucc && ShouldTailDup) { + if (allowTailDupPlacement() && BestSucc && ShouldTailDup) { // If the chosen successor was duplicated into all its predecessors, // don't bother laying it out, just go round the loop again with BB as // the chain end. @@ -2758,7 +2763,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { TailDupSize = TailDupPlacementAggressiveThreshold; } - if (TailDupPlacement) { + if (allowTailDupPlacement()) { MPDT = &getAnalysis<MachinePostDominatorTree>(); if (MF.getFunction().optForSize()) TailDupSize = 1; diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 7d3fc57072b5..1320f9985553 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -1812,6 +1812,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() { return ValueTrackerResult(); // Otherwise, we want the whole source. const MachineOperand &Src = Def->getOperand(1); + if (Src.isUndef()) + return ValueTrackerResult(); return ValueTrackerResult(Src.getReg(), Src.getSubReg()); } @@ -1855,6 +1857,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { } const MachineOperand &Src = Def->getOperand(SrcIdx); + if (Src.isUndef()) + return ValueTrackerResult(); return ValueTrackerResult(Src.getReg(), Src.getSubReg()); } @@ -2023,6 +2027,10 @@ ValueTrackerResult ValueTracker::getNextSourceFromPHI() { for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) { const MachineOperand &MO = Def->getOperand(i); assert(MO.isReg() && "Invalid PHI instruction"); + // We have no code to deal with undef operands. They shouldn't happen in + // normal programs anyway. + if (MO.isUndef()) + return ValueTrackerResult(); Res.addSource(MO.getReg(), MO.getSubReg()); } @@ -2079,9 +2087,14 @@ ValueTrackerResult ValueTracker::getNextSource() { // If we can still move up in the use-def chain, move to the next // definition. if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) { - Def = MRI.getVRegDef(Reg); - DefIdx = MRI.def_begin(Reg).getOperandNo(); - DefSubReg = Res.getSrcSubReg(0); + MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg); + if (DI != MRI.def_end()) { + Def = DI->getParent(); + DefIdx = DI.getOperandNo(); + DefSubReg = Res.getSrcSubReg(0); + } else { + Def = nullptr; + } return Res; } } diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp index db925f803db6..bd90ed5b55b8 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1151,6 +1151,8 @@ bool TargetInstrInfo::getRegSequenceInputs( for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx; OpIdx += 2) { const MachineOperand &MOReg = MI.getOperand(OpIdx); + if (MOReg.isUndef()) + continue; const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1); assert(MOSubIdx.isImm() && "One of the subindex of the reg_sequence is not an immediate"); @@ -1174,6 +1176,8 @@ bool TargetInstrInfo::getExtractSubregInputs( // Def = EXTRACT_SUBREG v0.sub1, sub0. assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def"); const MachineOperand &MOReg = MI.getOperand(1); + if (MOReg.isUndef()) + return false; const MachineOperand &MOSubIdx = MI.getOperand(2); assert(MOSubIdx.isImm() && "The subindex of the extract_subreg is not an immediate"); @@ -1198,6 +1202,8 @@ bool TargetInstrInfo::getInsertSubregInputs( assert(DefIdx == 0 && "INSERT_SUBREG only has one def"); const MachineOperand &MOBaseReg = MI.getOperand(1); const MachineOperand &MOInsertedReg = MI.getOperand(2); + if (MOInsertedReg.isUndef()) + return false; const MachineOperand &MOSubIdx = MI.getOperand(3); assert(MOSubIdx.isImm() && "One of the subindex of the reg_sequence is not an immediate"); diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index c0047d0cde6a..2c57eee191db 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1422,7 +1422,8 @@ RuntimeDyldELF::processRelocationRef( SectionEntry &Section = Sections[SectionID]; uint8_t *Target = Section.getAddressWithOffset(Offset); bool RangeOverflow = false; - if (!Value.SymbolName && SymType != SymbolRef::ST_Unknown) { + bool IsExtern = Value.SymbolName || SymType == SymbolRef::ST_Unknown; + if (!IsExtern) { if (AbiVariant != 2) { // In the ELFv1 ABI, a function call may point to the .opd entry, // so the final symbol value is calculated based on the relocation @@ -1432,21 +1433,24 @@ RuntimeDyldELF::processRelocationRef( } else { // In the ELFv2 ABI, a function symbol may provide a local entry // point, which must be used for direct calls. - uint8_t SymOther = Symbol->getOther(); - Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther); + if (Value.SectionID == SectionID){ + uint8_t SymOther = Symbol->getOther(); + Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther); + } } uint8_t *RelocTarget = Sections[Value.SectionID].getAddressWithOffset(Value.Addend); int64_t delta = static_cast<int64_t>(Target - RelocTarget); // If it is within 26-bits branch range, just set the branch target - if (SignExtend64<26>(delta) == delta) { + if (SignExtend64<26>(delta) != delta) { + RangeOverflow = true; + } else if ((AbiVariant != 2) || + (AbiVariant == 2 && Value.SectionID == SectionID)) { RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); addRelocationForSection(RE, Value.SectionID); - } else { - RangeOverflow = true; } } - if (Value.SymbolName || SymType == SymbolRef::ST_Unknown || + if (IsExtern || (AbiVariant == 2 && Value.SectionID != SectionID) || RangeOverflow) { // It is an external symbol (either Value.SymbolName is set, or // SymType is SymbolRef::ST_Unknown) or out of range. @@ -1503,10 +1507,10 @@ RuntimeDyldELF::processRelocationRef( RelType, 0); Section.advanceStubOffset(getMaxStubSize()); } - if (Value.SymbolName || SymType == SymbolRef::ST_Unknown) { + if (IsExtern || (AbiVariant == 2 && Value.SectionID != SectionID)) { // Restore the TOC for external calls if (AbiVariant == 2) - writeInt32BE(Target + 4, 0xE8410018); // ld r2,28(r1) + writeInt32BE(Target + 4, 0xE8410018); // ld r2,24(r1) else writeInt32BE(Target + 4, 0xE8410028); // ld r2,40(r1) } diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp index d3c33edec186..743e3710fd68 100644 --- a/contrib/llvm/lib/IR/Core.cpp +++ b/contrib/llvm/lib/IR/Core.cpp @@ -359,11 +359,9 @@ LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) { return wrap(&unwrap(Ty)->getContext()); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void LLVMDumpType(LLVMTypeRef Ty) { - return unwrap(Ty)->dump(); +void LLVMDumpType(LLVMTypeRef Ty) { + return unwrap(Ty)->print(errs(), /*IsForDebug=*/true); } -#endif char *LLVMPrintTypeToString(LLVMTypeRef Ty) { std::string buf; @@ -658,7 +656,7 @@ void LLVMSetValueName(LLVMValueRef Val, const char *Name) { unwrap(Val)->setName(Name); } -LLVM_DUMP_METHOD void LLVMDumpValue(LLVMValueRef Val) { +void LLVMDumpValue(LLVMValueRef Val) { unwrap(Val)->print(errs(), /*IsForDebug=*/true); } diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp index a6b5c43f1d2a..328f000f37c9 100644 --- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp @@ -289,6 +289,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { case Triple::mips64el: FDECFIEncoding = dwarf::DW_EH_PE_sdata8; break; + case Triple::ppc64: + case Triple::ppc64le: case Triple::x86_64: FDECFIEncoding = dwarf::DW_EH_PE_pcrel | (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp index 3dc67ad782af..6e65b5e6c807 100644 --- a/contrib/llvm/lib/Support/Host.cpp +++ b/contrib/llvm/lib/Support/Host.cpp @@ -1009,7 +1009,7 @@ StringRef sys::getHostCPUName() { #include "llvm/Support/X86TargetParser.def" // Now check types. -#define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ +#define X86_CPU_TYPE(ARCHNAME, ENUM) \ if (Type == X86::ENUM) \ return ARCHNAME; #include "llvm/Support/X86TargetParser.def" diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 2ff2ee347f56..6704fa27c86e 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -299,6 +299,11 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, printOffset(MO.getOffset(), O); break; } + case MachineOperand::MO_BlockAddress: { + MCSymbol *Sym = GetBlockAddressSymbol(MO.getBlockAddress()); + Sym->print(O, MAI); + break; + } } } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp index d1ddb2e3ef70..0d00dab598d5 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -46,6 +46,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <iterator> @@ -60,6 +61,8 @@ STATISTIC(NumCollisionsAvoided, "Number of HW prefetch tag collisions avoided"); STATISTIC(NumCollisionsNotAvoided, "Number of HW prefetch tag collisions not avoided due to lack of regsiters"); +DEBUG_COUNTER(FixCounter, "falkor-hwpf", + "Controls which tag collisions are avoided"); namespace { @@ -729,6 +732,21 @@ void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) { bool Fixed = false; DEBUG(dbgs() << "Attempting to fix tag collision: " << MI); + if (!DebugCounter::shouldExecute(FixCounter)) { + DEBUG(dbgs() << "Skipping fix due to debug counter:\n " << MI); + continue; + } + + // Add the non-base registers of MI as live so we don't use them as + // scratch registers. + for (unsigned OpI = 0, OpE = MI.getNumOperands(); OpI < OpE; ++OpI) { + if (OpI == static_cast<unsigned>(LdI.BaseRegIdx)) + continue; + MachineOperand &MO = MI.getOperand(OpI); + if (MO.isReg() && MO.readsReg()) + LR.addReg(MO.getReg()); + } + for (unsigned ScratchReg : AArch64::GPR64RegClass) { if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg)) continue; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index d66f7b59a4b5..789200b28445 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -917,6 +917,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16; int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); bool isFixed = MFI.isFixedObjectIndex(FI); + bool isCSR = !isFixed && MFI.getObjectOffset(FI) >= + -((int)AFI->getCalleeSavedStackSize()); // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs or a dynamically realigned SP (and thus the SP isn't @@ -930,6 +932,12 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, // Argument access should always use the FP. if (isFixed) { UseFP = hasFP(MF); + } else if (isCSR && RegInfo->needsStackRealignment(MF)) { + // References to the CSR area must use FP if we're re-aligning the stack + // since the dynamically-sized alignment padding is between the SP/BP and + // the CSR area. + assert(hasFP(MF) && "Re-aligned stack must have frame pointer"); + UseFP = true; } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && !RegInfo->needsStackRealignment(MF)) { // Use SP or FP, whichever gives us the best chance of the offset @@ -947,9 +955,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, } } - assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && + assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) && "In the presence of dynamic stack pointer realignment, " - "non-argument objects cannot be accessed through the frame pointer"); + "non-argument/CSR objects cannot be accessed through the frame pointer"); if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 41ed24c329ef..233d6be247c2 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4930,7 +4930,8 @@ bool AArch64TargetLowering::isOffsetFoldingLegal( bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases. // FIXME: We should be able to handle f128 as well with a clever lowering. - if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) { + if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 || + (VT == MVT::f16 && Subtarget->hasFullFP16()))) { DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n"); return true; } @@ -5066,7 +5067,7 @@ SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand, // Table of Constraints // TODO: This is the current set of constraints supported by ARM for the -// compiler, not all of them may make sense, e.g. S may be difficult to support. +// compiler, not all of them may make sense. // // r - A general register // w - An FP/SIMD register of some size in the range v0-v31 @@ -5126,6 +5127,8 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const { // currently handle addresses it is the same as 'r'. case 'Q': return C_Memory; + case 'S': // A symbolic address + return C_Other; } } return TargetLowering::getConstraintType(Constraint); @@ -5250,6 +5253,23 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint( Result = DAG.getRegister(AArch64::WZR, MVT::i32); break; } + case 'S': { + // An absolute symbolic address or label reference. + if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { + Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), + GA->getValueType(0)); + } else if (const BlockAddressSDNode *BA = + dyn_cast<BlockAddressSDNode>(Op)) { + Result = + DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0)); + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(Op)) { + Result = + DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0)); + } else + return; + break; + } case 'I': case 'J': @@ -9637,6 +9657,15 @@ static SDValue performPostLD1Combine(SDNode *N, if (LD->getOpcode() != ISD::LOAD) return SDValue(); + // The vector lane must be a constant in the LD1LANE opcode. + SDValue Lane; + if (IsLaneOp) { + Lane = N->getOperand(2); + auto *LaneC = dyn_cast<ConstantSDNode>(Lane); + if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements()) + return SDValue(); + } + LoadSDNode *LoadSDN = cast<LoadSDNode>(LD); EVT MemVT = LoadSDN->getMemoryVT(); // Check if memory operand is the same type as the vector element. @@ -9693,7 +9722,7 @@ static SDValue performPostLD1Combine(SDNode *N, Ops.push_back(LD->getOperand(0)); // Chain if (IsLaneOp) { Ops.push_back(Vector); // The vector to be inserted - Ops.push_back(N->getOperand(2)); // The lane to be inserted in the vector + Ops.push_back(Lane); // The lane to be inserted in the vector } Ops.push_back(Addr); Ops.push_back(Inc); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 79826ca2ed8d..040011d858e7 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2713,7 +2713,7 @@ defm FMOV : UnscaledConversion<"fmov">; // Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in { def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, - Sched<[WriteF]>; + Sched<[WriteF]>, Requires<[HasFullFP16]>; def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, Sched<[WriteF]>; def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 2042dbf6d5e2..e09263b6fac9 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -147,6 +147,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeR600PacketizerPass(*PR); initializeR600ExpandSpecialInstrsPassPass(*PR); initializeR600VectorRegMergerPass(*PR); + initializeGlobalISel(*PR); initializeAMDGPUDAGToDAGISelPass(*PR); initializeSILowerI1CopiesPass(*PR); initializeSIFixSGPRCopiesPass(*PR); diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 6d89aa6968e9..41ca7fe8bfaa 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -358,6 +358,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote); setOperationAction(ISD::CTLZ, MVT::i16, Promote); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote); + setOperationAction(ISD::CTPOP, MVT::i16, Promote); setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td index 9740a18b7248..8c02e8da8d79 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -726,6 +726,10 @@ def : GCNPat < (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)), (V_BCNT_U32_B32_e64 $popcnt, $val) >; +def : GCNPat < + (i16 (add (i16 (trunc (ctpop i32:$popcnt))), i16:$val)), + (V_BCNT_U32_B32_e64 $popcnt, $val) +>; /********** ============================================ **********/ /********** Extraction, Insertion, Building and Casting **********/ diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8c1727724a9e..cff24a10bb5f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -4864,12 +4864,14 @@ bool ARMBaseInstrInfo::getRegSequenceLikeInputs( // Populate the InputRegs accordingly. // rY const MachineOperand *MOReg = &MI.getOperand(1); - InputRegs.push_back( - RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0)); + if (!MOReg->isUndef()) + InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), + MOReg->getSubReg(), ARM::ssub_0)); // rZ MOReg = &MI.getOperand(2); - InputRegs.push_back( - RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1)); + if (!MOReg->isUndef()) + InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), + MOReg->getSubReg(), ARM::ssub_1)); return true; } llvm_unreachable("Target dependent opcode missing"); @@ -4888,6 +4890,8 @@ bool ARMBaseInstrInfo::getExtractSubregLikeInputs( // rX = EXTRACT_SUBREG dZ, ssub_0 // rY = EXTRACT_SUBREG dZ, ssub_1 const MachineOperand &MOReg = MI.getOperand(2); + if (MOReg.isUndef()) + return false; InputReg.Reg = MOReg.getReg(); InputReg.SubReg = MOReg.getSubReg(); InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; @@ -4907,6 +4911,8 @@ bool ARMBaseInstrInfo::getInsertSubregLikeInputs( // dX = VSETLNi32 dY, rZ, imm const MachineOperand &MOBaseReg = MI.getOperand(1); const MachineOperand &MOInsertedReg = MI.getOperand(2); + if (MOInsertedReg.isUndef()) + return false; const MachineOperand &MOIndex = MI.getOperand(3); BaseReg.Reg = MOBaseReg.getReg(); BaseReg.SubReg = MOBaseReg.getSubReg(); diff --git a/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp index 2e97b99b05a7..b263e9d86c42 100644 --- a/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp @@ -35,6 +35,7 @@ mayOptimizeThumb2Instruction(const MachineInstr *MI) { case ARM::tBcc: // optimizeThumb2JumpTables. case ARM::t2BR_JT: + case ARM::tBR_JTr: return true; } return false; diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 345b081500a4..f36a4317b1b9 100644 --- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -5136,6 +5136,7 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { // It also applies for registers Rt and Rs of microMIPSr6 jalrc.hb instruction // and registers Rd and Base for microMIPS lwp instruction case Mips::JALR_HB: + case Mips::JALR_HB64: case Mips::JALRC_HB_MMR6: case Mips::JALRC_MMR6: if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 6d2f098a6b32..3c67743947cb 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -225,6 +225,8 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx, switch (Kind) { case Mips::fixup_Mips_NONE: return ELF::R_MIPS_NONE; + case FK_Data_1: + report_fatal_error("MIPS does not support one byte relocations"); case Mips::fixup_Mips_16: case FK_Data_2: return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16; diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td index 3ff3f07654d9..326897dc5c63 100644 --- a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td @@ -1886,6 +1886,12 @@ let AddedComplexity = 41 in { def TAILCALL_MMR6 : TailCall<BC_MMR6, brtarget26_mm>, ISA_MICROMIPS32R6; +def TAILCALLREG_MMR6 : TailCallReg<JRC16_MM, GPR32Opnd>, ISA_MICROMIPS32R6; + +def PseudoIndirectBranch_MMR6 : PseudoIndirectBranchBase<JRC16_MMR6, + GPR32Opnd>, + ISA_MICROMIPS32R6; + def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)), (TAILCALL_MMR6 tglobaladdr:$dst)>, ISA_MICROMIPS32R6; diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td index 64fe55e9776b..1fef51fd69d0 100644 --- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td @@ -1003,6 +1003,12 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>, ISA_MIPS1_NOT_32R6_64R6; +def TAILCALLREG_MM : TailCallReg<JRC16_MM, GPR32Opnd>, + ISA_MICROMIPS32_NOT_MIPS32R6; + +def PseudoIndirectBranch_MM : PseudoIndirectBranchBase<JR_MM, GPR32Opnd>, + ISA_MICROMIPS32_NOT_MIPS32R6; + let DecoderNamespace = "MicroMips" in { def RDHWR_MM : MMRel, R6MMR6Rel, ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM_MM, ISA_MICROMIPS32_NOT_MIPS32R6; diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td index 6ceb05577538..f8e739497f4c 100644 --- a/contrib/llvm/lib/Target/Mips/Mips.td +++ b/contrib/llvm/lib/Target/Mips/Mips.td @@ -193,6 +193,10 @@ def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">; def FeatureLongCalls : SubtargetFeature<"long-calls", "UseLongCalls", "true", "Disable use of the jal instruction">; +def FeatureUseIndirectJumpsHazard : SubtargetFeature<"use-indirect-jump-hazard", + "UseIndirectJumpsHazard", + "true", "Use indirect jump" + " guards to prevent certain speculation based attacks">; //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td index 62f045e77fdb..9e9e074875d0 100644 --- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td @@ -1036,3 +1036,42 @@ def : MipsPat<(select i32:$cond, immz, i32:$f), (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6; } + +// Pseudo instructions +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1, + hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in { + class TailCallRegR6<Instruction JumpInst, Register RT, RegisterOperand RO> : + PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>, + PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)>; +} + +class PseudoIndirectBranchBaseR6<Instruction JumpInst, Register RT, + RegisterOperand RO> : + MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], + II_IndirectBranchPseudo>, + PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)> { + let isTerminator=1; + let isBarrier=1; + let hasDelaySlot = 1; + let isBranch = 1; + let isIndirectBranch = 1; + bit isCTI = 1; +} + + +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + NoIndirectJumpGuards] in { + def TAILCALLR6REG : TailCallRegR6<JALR, ZERO, GPR32Opnd>, ISA_MIPS32R6; + def PseudoIndirectBranchR6 : PseudoIndirectBranchBaseR6<JALR, ZERO, + GPR32Opnd>, + ISA_MIPS32R6; +} + +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + UseIndirectJumpsHazard] in { + def TAILCALLHBR6REG : TailCallReg<JR_HB_R6, GPR32Opnd>, ISA_MIPS32R6; + def PseudoIndrectHazardBranchR6 : PseudoIndirectBranchBase<JR_HB_R6, + GPR32Opnd>, + ISA_MIPS32R6; +} + diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td index e008aeafaa2b..828dd4f54223 100644 --- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td @@ -240,13 +240,32 @@ let isCodeGenOnly = 1 in { def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>; def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>; def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>; - def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>; + let AdditionalPredicates = [NoIndirectJumpGuards] in + def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>; } +let AdditionalPredicates = [NotInMicroMips], + DecoderNamespace = "Mips64" in { + def JR_HB64 : JR_HB_DESC<GPR64Opnd>, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6; + def JALR_HB64 : JALR_HB_DESC<GPR64Opnd>, JALR_HB_ENC, ISA_MIPS32R2; +} +def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>; -def TAILCALLREG64 : TailCallReg<GPR64Opnd>; +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + NoIndirectJumpGuards] in { + def TAILCALLREG64 : TailCallReg<JR64, GPR64Opnd>, ISA_MIPS3_NOT_32R6_64R6, + PTR_64; + def PseudoIndirectBranch64 : PseudoIndirectBranchBase<JR64, GPR64Opnd>, + ISA_MIPS3_NOT_32R6_64R6; +} -def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>; -def PseudoIndirectBranch64 : PseudoIndirectBranchBase<GPR64Opnd>; +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + UseIndirectJumpsHazard] in { + def TAILCALLREGHB64 : TailCallReg<JR_HB64, GPR64Opnd>, + ISA_MIPS32R2_NOT_32R6_64R6, PTR_64; + def PseudoIndirectHazardBranch64 : PseudoIndirectBranchBase<JR_HB64, + GPR64Opnd>, + ISA_MIPS32R2_NOT_32R6_64R6; +} /// Multiply and Divide Instructions. let AdditionalPredicates = [NotInMicroMips] in { @@ -536,6 +555,10 @@ def DMTC2 : MTC3OP<"dmtc2", COP2Opnd, GPR64Opnd, II_DMTC2>, MFC3OP_FM<0x12, 5>, ISA_MIPS3; } + +let AdditionalPredicates = [UseIndirectJumpsHazard] in + def JALRHB64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR_HB64, RA_64>; + //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// @@ -843,7 +866,8 @@ let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"dext $rt, $rs, $pos, $size", (DEXTU GPR64Opnd:$rt, GPR64Opnd:$rs, uimm5_plus32:$pos, uimm5_plus1:$size), 0>, ISA_MIPS64R2; - + def : MipsInstAlias<"jalr.hb $rs", (JALR_HB64 RA_64, GPR64Opnd:$rs), 1>, + ISA_MIPS64; // Two operand (implicit 0 selector) versions: def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 COP0Opnd:$rd, GPR64Opnd:$rt, 0), 0>; diff --git a/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td index 1cd43ee6f1c3..da743fbdee45 100644 --- a/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td @@ -104,6 +104,16 @@ class JIC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR64Opnd, class LL64_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9, II_LL>; class SC64_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd, II_SC>; + +class JR_HB64_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR64Opnd> { + bit isBranch = 1; + bit isIndirectBranch = 1; + bit hasDelaySlot = 1; + bit isTerminator=1; + bit isBarrier=1; + bit isCTI = 1; + InstrItinClass Itinerary = II_JR_HB; +} //===----------------------------------------------------------------------===// // // Instruction Definitions @@ -136,6 +146,7 @@ def SCD_R6 : SCD_R6_ENC, SCD_R6_DESC, ISA_MIPS32R6; let DecoderNamespace = "Mips32r6_64r6_GP64" in { def SELEQZ64 : SELEQZ_ENC, SELEQZ64_DESC, ISA_MIPS32R6, GPR_64; def SELNEZ64 : SELNEZ_ENC, SELNEZ64_DESC, ISA_MIPS32R6, GPR_64; + def JR_HB64_R6 : JR_HB_R6_ENC, JR_HB64_R6_DESC, ISA_MIPS32R6; } let AdditionalPredicates = [NotInMicroMips], DecoderNamespace = "Mips32r6_64r6_PTR64" in { @@ -277,3 +288,22 @@ def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i64:$f), def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i64:$f), (SELNEZ64 i64:$f, (SLL64_32 i32:$cond))>, ISA_MIPS64R6; + +// Pseudo instructions + +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + NoIndirectJumpGuards] in { + def TAILCALL64R6REG : TailCallRegR6<JALR64, ZERO_64, GPR64Opnd>, ISA_MIPS64R6; + def PseudoIndirectBranch64R6 : PseudoIndirectBranchBaseR6<JALR64, ZERO_64, + GPR64Opnd>, + ISA_MIPS64R6; +} + +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + UseIndirectJumpsHazard] in { + def TAILCALLHB64R6REG : TailCallReg<JR_HB64_R6, GPR64Opnd>, + ISA_MIPS64R6; + def PseudoIndrectHazardBranch64R6 : PseudoIndirectBranchBase<JR_HB64_R6, + GPR64Opnd>, + ISA_MIPS64R6; +} diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td index 0ceb1858fb09..2dcefdc789a5 100644 --- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td +++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td @@ -53,7 +53,7 @@ class DSPInst<string opstr = ""> class PseudoDSP<dag outs, dag ins, list<dag> pattern, InstrItinClass itin = IIPseudo> - : MipsPseudo<outs, ins, pattern, itin>, PredicateControl { + : MipsPseudo<outs, ins, pattern, itin> { let InsnPredicates = [HasDSP]; } diff --git a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp index 8bbac3ed7cfb..d3048c7390e1 100644 --- a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -67,6 +67,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> +#include <array> #include <cassert> #include <cstdint> @@ -1306,13 +1307,13 @@ bool MipsFastISel::fastLowerArguments() { return false; } - const ArrayRef<MCPhysReg> GPR32ArgRegs = {Mips::A0, Mips::A1, Mips::A2, - Mips::A3}; - const ArrayRef<MCPhysReg> FGR32ArgRegs = {Mips::F12, Mips::F14}; - const ArrayRef<MCPhysReg> AFGR64ArgRegs = {Mips::D6, Mips::D7}; - ArrayRef<MCPhysReg>::iterator NextGPR32 = GPR32ArgRegs.begin(); - ArrayRef<MCPhysReg>::iterator NextFGR32 = FGR32ArgRegs.begin(); - ArrayRef<MCPhysReg>::iterator NextAFGR64 = AFGR64ArgRegs.begin(); + std::array<MCPhysReg, 4> GPR32ArgRegs = {{Mips::A0, Mips::A1, Mips::A2, + Mips::A3}}; + std::array<MCPhysReg, 2> FGR32ArgRegs = {{Mips::F12, Mips::F14}}; + std::array<MCPhysReg, 2> AFGR64ArgRegs = {{Mips::D6, Mips::D7}}; + auto NextGPR32 = GPR32ArgRegs.begin(); + auto NextFGR32 = FGR32ArgRegs.begin(); + auto NextAFGR64 = AFGR64ArgRegs.begin(); struct AllocatedReg { const TargetRegisterClass *RC; diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp index ba05b0f48df7..3d383b3dfe3e 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -3868,7 +3868,7 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, nullptr); case 'l': // use the `lo` register to store values // that are no bigger than a word - if (VT == MVT::i32) + if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass); return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass); case 'x': // use the concatenated `hi` and `lo` registers diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td index 817d9b44b9c2..516edef0556c 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td @@ -128,7 +128,7 @@ class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern, // Mips Pseudo Instructions Format class MipsPseudo<dag outs, dag ins, list<dag> pattern, InstrItinClass itin = IIPseudo> : - MipsInst<outs, ins, "", pattern, itin, Pseudo> { + MipsInst<outs, ins, "", pattern, itin, Pseudo>, PredicateControl { let isCodeGenOnly = 1; let isPseudo = 1; } @@ -136,7 +136,7 @@ class MipsPseudo<dag outs, dag ins, list<dag> pattern, // Mips32/64 Pseudo Instruction Format class PseudoSE<dag outs, dag ins, list<dag> pattern, InstrItinClass itin = IIPseudo> : - MipsPseudo<outs, ins, pattern, itin>, PredicateControl { + MipsPseudo<outs, ins, pattern, itin> { let EncodingPredicates = [HasStdEnc]; } diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp index 51ddc0d44c00..2e30d271e130 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -298,7 +298,6 @@ unsigned MipsInstrInfo::getEquivalentCompactForm( case Mips::JR: case Mips::PseudoReturn: case Mips::PseudoIndirectBranch: - case Mips::TAILCALLREG: canUseShortMicroMipsCTI = true; break; } @@ -377,18 +376,18 @@ unsigned MipsInstrInfo::getEquivalentCompactForm( // For MIPSR6, the instruction 'jic' can be used for these cases. Some // tools will accept 'jrc reg' as an alias for 'jic 0, $reg'. case Mips::JR: + case Mips::PseudoIndirectBranchR6: case Mips::PseudoReturn: - case Mips::PseudoIndirectBranch: - case Mips::TAILCALLREG: + case Mips::TAILCALLR6REG: if (canUseShortMicroMipsCTI) return Mips::JRC16_MM; return Mips::JIC; case Mips::JALRPseudo: return Mips::JIALC; case Mips::JR64: + case Mips::PseudoIndirectBranch64R6: case Mips::PseudoReturn64: - case Mips::PseudoIndirectBranch64: - case Mips::TAILCALLREG64: + case Mips::TAILCALL64R6REG: return Mips::JIC64; case Mips::JALR64Pseudo: return Mips::JIALC64; @@ -617,6 +616,18 @@ bool MipsInstrInfo::verifyInstruction(const MachineInstr &MI, return verifyInsExtInstruction(MI, ErrInfo, 0, 32, 32, 64, 32, 64); case Mips::DEXTU: return verifyInsExtInstruction(MI, ErrInfo, 32, 64, 0, 32, 32, 64); + case Mips::TAILCALLREG: + case Mips::PseudoIndirectBranch: + case Mips::JR: + case Mips::JR64: + case Mips::JALR: + case Mips::JALR64: + case Mips::JALRPseudo: + if (!Subtarget.useIndirectJumpsHazard()) + return true; + + ErrInfo = "invalid instruction when using jump guards!"; + return false; default: return true; } diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td index e0d818b749df..33a061e12a3f 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -244,7 +244,10 @@ def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">, AssemblerPredicate<"!FeatureMadd4">; def HasMT : Predicate<"Subtarget->hasMT()">, AssemblerPredicate<"FeatureMT">; - +def UseIndirectJumpsHazard : Predicate<"Subtarget->useIndirectJumpsHazard()">, + AssemblerPredicate<"FeatureUseIndirectJumpsHazard">; +def NoIndirectJumpGuards : Predicate<"!Subtarget->useIndirectJumpsHazard()">, + AssemblerPredicate<"!FeatureUseIndirectJumpsHazard">; //===----------------------------------------------------------------------===// // Mips GPR size adjectives. // They are mutually exclusive. @@ -1540,8 +1543,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1, PseudoSE<(outs), (ins calltarget:$target), [], II_J>, PseudoInstExpansion<(JumpInst Opnd:$target)>; - class TailCallReg<RegisterOperand RO> : - PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>; + class TailCallReg<Instruction JumpInst, RegisterOperand RO> : + PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>, + PseudoInstExpansion<(JumpInst RO:$rs)>; } class BAL_BR_Pseudo<Instruction RealInst> : @@ -2068,7 +2072,7 @@ def B : UncondBranch<BEQ, brtarget>, AdditionalRequires<[NotInMicroMips]>; def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>; -let AdditionalPredicates = [NotInMicroMips] in { +let AdditionalPredicates = [NotInMicroMips, NoIndirectJumpGuards] in { def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM; def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>; } @@ -2088,24 +2092,28 @@ def BAL_BR : BAL_BR_Pseudo<BGEZAL>; let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips] in { def TAILCALL : TailCall<J, jmptarget>; } - -def TAILCALLREG : TailCallReg<GPR32Opnd>; +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + NoIndirectJumpGuards] in + def TAILCALLREG : TailCallReg<JR, GPR32Opnd>, ISA_MIPS1_NOT_32R6_64R6; // Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64 // then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA. -class PseudoIndirectBranchBase<RegisterOperand RO> : +class PseudoIndirectBranchBase<Instruction JumpInst, RegisterOperand RO> : MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], - II_IndirectBranchPseudo> { + II_IndirectBranchPseudo>, + PseudoInstExpansion<(JumpInst RO:$rs)> { let isTerminator=1; let isBarrier=1; let hasDelaySlot = 1; let isBranch = 1; let isIndirectBranch = 1; bit isCTI = 1; - let Predicates = [NotInMips16Mode]; } -def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>; +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + NoIndirectJumpGuards] in + def PseudoIndirectBranch : PseudoIndirectBranchBase<JR, GPR32Opnd>, + ISA_MIPS1_NOT_32R6_64R6; // Return instructions are matched as a RetRA instruction, then are expanded // into PseudoReturn/PseudoReturn64 after register allocation. Finally, @@ -2278,8 +2286,8 @@ class JALR_HB_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { list<dag> Pattern = []; } -class JR_HB_DESC : InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>, - JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> { +class JR_HB_DESC<RegisterOperand RO> : + InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>, JR_HB_DESC_BASE<"jr.hb", RO> { let isBranch=1; let isIndirectBranch=1; let hasDelaySlot=1; @@ -2288,8 +2296,9 @@ class JR_HB_DESC : InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>, bit isCTI = 1; } -class JALR_HB_DESC : InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>, - JALR_HB_DESC_BASE<"jalr.hb", GPR32Opnd> { +class JALR_HB_DESC<RegisterOperand RO> : + InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>, JALR_HB_DESC_BASE<"jalr.hb", + RO> { let isIndirectBranch=1; let hasDelaySlot=1; bit isCTI = 1; @@ -2298,8 +2307,19 @@ class JALR_HB_DESC : InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>, class JR_HB_ENC : JR_HB_FM<8>; class JALR_HB_ENC : JALR_HB_FM<9>; -def JR_HB : JR_HB_DESC, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6; -def JALR_HB : JALR_HB_DESC, JALR_HB_ENC, ISA_MIPS32; +def JR_HB : JR_HB_DESC<GPR32Opnd>, JR_HB_ENC, ISA_MIPS32R2_NOT_32R6_64R6; +def JALR_HB : JALR_HB_DESC<GPR32Opnd>, JALR_HB_ENC, ISA_MIPS32; + +let AdditionalPredicates = [NotInMicroMips, UseIndirectJumpsHazard] in + def JALRHBPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR_HB, RA>; + + +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + UseIndirectJumpsHazard] in { + def TAILCALLREGHB : TailCallReg<JR_HB, GPR32Opnd>, ISA_MIPS32_NOT_32R6_64R6; + def PseudoIndirectHazardBranch : PseudoIndirectBranchBase<JR_HB, GPR32Opnd>, + ISA_MIPS32R2_NOT_32R6_64R6; +} class TLB<string asmstr, InstrItinClass itin = NoItinerary> : InstSE<(outs), (ins), asmstr, [], itin, FrmOther, asmstr>; @@ -2433,7 +2453,8 @@ def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>; let Predicates = [NotInMicroMips] in { def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>; } -def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>, ISA_MIPS32; +def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>, + ISA_MIPS32; def : MipsInstAlias<"neg $rt, $rs", (SUB GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>; def : MipsInstAlias<"neg $rt", diff --git a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp index bbf2050ce1eb..e6ecbe9b5f66 100644 --- a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp @@ -371,11 +371,12 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { // In NaCl, modifying the sp is not allowed in branch delay slot. // For MIPS32R6, we can skip using a delay slot branch. - if (Subtarget.isTargetNaCl() || Subtarget.hasMips32r6()) + if (Subtarget.isTargetNaCl() || + (Subtarget.hasMips32r6() && !Subtarget.useIndirectJumpsHazard())) BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) .addReg(Mips::SP).addImm(8); - if (Subtarget.hasMips32r6()) { + if (Subtarget.hasMips32r6() && !Subtarget.useIndirectJumpsHazard()) { const unsigned JICOp = Subtarget.inMicroMipsMode() ? Mips::JIC_MMR6 : Mips::JIC; BuildMI(*BalTgtMBB, Pos, DL, TII->get(JICOp)) @@ -383,7 +384,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addImm(0); } else { - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT); + unsigned JROp = + Subtarget.useIndirectJumpsHazard() + ? (Subtarget.hasMips32r6() ? Mips::JR_HB_R6 : Mips::JR_HB) + : Mips::JR; + BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT); if (Subtarget.isTargetNaCl()) { BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP)); @@ -475,7 +480,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64) .addReg(Mips::SP_64).addImm(0); - if (Subtarget.hasMips64r6()) { + if (Subtarget.hasMips64r6() && !Subtarget.useIndirectJumpsHazard()) { BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) .addReg(Mips::SP_64) .addImm(16); @@ -483,7 +488,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::AT_64) .addImm(0); } else { - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64); + unsigned JROp = + Subtarget.useIndirectJumpsHazard() + ? (Subtarget.hasMips32r6() ? Mips::JR_HB64_R6 : Mips::JR_HB64) + : Mips::JR64; + BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT_64); BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) .addReg(Mips::SP_64) .addImm(16); diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index f7d7e2af85e4..eee5b23117f6 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -701,6 +701,77 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, + SelectionDAG &DAG, + const MipsSubtarget &Subtarget) { + // Estimate the number of operations the below transform will turn a + // constant multiply into. The number is approximately how many powers + // of two summed together that the constant can be broken down into. + + SmallVector<APInt, 16> WorkStack(1, C); + unsigned Steps = 0; + unsigned BitWidth = C.getBitWidth(); + + while (!WorkStack.empty()) { + APInt Val = WorkStack.pop_back_val(); + + if (Val == 0 || Val == 1) + continue; + + if (Val.isPowerOf2()) { + ++Steps; + continue; + } + + APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); + APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) + : APInt(BitWidth, 1) << C.ceilLogBase2(); + + if ((Val - Floor).ule(Ceil - Val)) { + WorkStack.push_back(Floor); + WorkStack.push_back(Val - Floor); + ++Steps; + continue; + } + + WorkStack.push_back(Ceil); + WorkStack.push_back(Ceil - Val); + ++Steps; + + // If we have taken more than 12[1] / 8[2] steps to attempt the + // optimization for a native sized value, it is more than likely that this + // optimization will make things worse. + // + // [1] MIPS64 requires 6 instructions at most to materialize any constant, + // multiplication requires at least 4 cycles, but another cycle (or two) + // to retrieve the result from the HI/LO registers. + // + // [2] For MIPS32, more than 8 steps is expensive as the constant could be + // materialized in 2 instructions, multiplication requires at least 4 + // cycles, but another cycle (or two) to retrieve the result from the + // HI/LO registers. + + if (Steps > 12 && (Subtarget.isABI_N32() || Subtarget.isABI_N64())) + return false; + + if (Steps > 8 && Subtarget.isABI_O32()) + return false; + } + + // If the value being multiplied is not supported natively, we have to pay + // an additional legalization cost, conservatively assume an increase in the + // cost of 3 instructions per step. This values for this heuristic were + // determined experimentally. + unsigned RegisterSize = DAG.getTargetLoweringInfo() + .getRegisterType(*DAG.getContext(), VT) + .getSizeInBits(); + Steps *= (VT.getSizeInBits() != RegisterSize) * 3; + if (Steps > 27) + return false; + + return true; +} + static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG) { // Return 0. @@ -739,11 +810,13 @@ static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, - const MipsSETargetLowering *TL) { + const MipsSETargetLowering *TL, + const MipsSubtarget &Subtarget) { EVT VT = N->getValueType(0); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) - if (!VT.isVector()) + if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( + C->getAPIntValue(), VT, DAG, Subtarget)) return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), DAG); @@ -983,7 +1056,7 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { Val = performORCombine(N, DAG, DCI, Subtarget); break; case ISD::MUL: - return performMULCombine(N, DAG, DCI, this); + return performMULCombine(N, DAG, DCI, this, Subtarget); case ISD::SHL: Val = performSHLCombine(N, DAG, DCI, Subtarget); break; diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp index f6af7e22e351..ddaa07ea9bc1 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp @@ -72,9 +72,10 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false), HasEVA(false), DisableMadd4(false), HasMT(false), - StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT), - TSInfo(), InstrInfo(MipsInstrInfo::create( - initializeSubtargetDependencies(CPU, FS, TM))), + UseIndirectJumpsHazard(false), StackAlignOverride(StackAlignOverride), + TM(TM), TargetTriple(TT), TSInfo(), + InstrInfo( + MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))), FrameLowering(MipsFrameLowering::create(*this)), TLInfo(MipsTargetLowering::create(TM, *this)) { @@ -107,6 +108,15 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, if (hasMips64r6() && InMicroMipsMode) report_fatal_error("microMIPS64R6 is not supported", false); + + if (UseIndirectJumpsHazard) { + if (InMicroMipsMode) + report_fatal_error( + "cannot combine indirect jumps with hazard barriers and microMIPS"); + if (!hasMips32r2()) + report_fatal_error( + "indirect jumps with hazard barriers requires MIPS32R2 or later"); + } if (hasMips32r6()) { StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6"; diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h index 8b10b0596e0e..ad2905c51601 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h @@ -152,6 +152,10 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // HasMT -- support MT ASE. bool HasMT; + // Use hazard variants of the jump register instructions for indirect + // function calls and jump tables. + bool UseIndirectJumpsHazard; + // Disable use of the `jal` instruction. bool UseLongCalls = false; @@ -272,6 +276,9 @@ public: bool disableMadd4() const { return DisableMadd4; } bool hasEVA() const { return HasEVA; } bool hasMT() const { return HasMT; } + bool useIndirectJumpsHazard() const { + return UseIndirectJumpsHazard && hasMips32r2(); + } bool useSmallSection() const { return UseSmallSection; } bool hasStandardEncoding() const { return !inMips16Mode(); } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index d31e1cb5047b..cb8cc7bb347a 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -44,6 +44,14 @@ static cl::opt<bool> cl::desc("Disable load/store vectorizer"), cl::init(false), cl::Hidden); +// TODO: Remove this flag when we are confident with no regressions. +static cl::opt<bool> DisableRequireStructuredCFG( + "disable-nvptx-require-structured-cfg", + cl::desc("Transitional flag to turn off NVPTX's requirement on preserving " + "structured CFG. The requirement should be disabled only when " + "unexpected regressions happen."), + cl::init(false), cl::Hidden); + namespace llvm { void initializeNVVMIntrRangePass(PassRegistry&); @@ -108,6 +116,8 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, drvInterface = NVPTX::NVCL; else drvInterface = NVPTX::CUDA; + if (!DisableRequireStructuredCFG) + setRequiresStructuredCFG(true); initAsmInfo(); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f0e8b11a3d9c..26e9f13f9ff4 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12264,6 +12264,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, N->getOperand(1).getValueType() == MVT::i16 || (Subtarget.hasLDBRX() && Subtarget.isPPC64() && N->getOperand(1).getValueType() == MVT::i64))) { + // STBRX can only handle simple types. + EVT mVT = cast<StoreSDNode>(N)->getMemoryVT(); + if (mVT.isExtended()) + break; + SDValue BSwapOp = N->getOperand(1).getOperand(0); // Do an any-extend to 32-bits if this is a half-word input. if (BSwapOp.getValueType() == MVT::i16) @@ -12271,7 +12276,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // If the type of BSWAP operand is wider than stored memory width // it need to be shifted to the right side before STBRX. - EVT mVT = cast<StoreSDNode>(N)->getMemoryVT(); if (Op1VT.bitsGT(mVT)) { int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits(); BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index fb16700a5e17..4ef71effd49b 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2431,7 +2431,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, // Use APInt's rotate function. int64_t SH = MI.getOperand(2).getImm(); int64_t MB = MI.getOperand(3).getImm(); - APInt InVal(Opc == PPC::RLDICL ? 64 : 32, SExtImm, true); + APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICLo) ? + 64 : 32, SExtImm, true); InVal = InVal.rotl(SH); uint64_t Mask = (1LLU << (63 - MB + 1)) - 1; InVal &= Mask; @@ -2444,6 +2445,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, Is64BitLI = Opc != PPC::RLDICL_32; NewImm = InVal.getSExtValue(); SetCR = Opc == PPC::RLDICLo; + if (SetCR && (SExtImm & NewImm) != NewImm) + return false; break; } return false; @@ -2471,6 +2474,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o; NewImm = InVal.getSExtValue(); SetCR = Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o; + if (SetCR && (SExtImm & NewImm) != NewImm) + return false; break; } return false; diff --git a/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp b/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp index bc0f55f581ff..ffe176ad4770 100644 --- a/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/contrib/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Printable.h" #include <bitset> using namespace llvm; @@ -262,25 +263,6 @@ public: } }; -/// An Instruction Converter which completely deletes an instruction. -/// For example, IMPLICIT_DEF instructions can be deleted when converting from -/// GPR to mask. -class InstrDeleter : public InstrConverterBase { -public: - InstrDeleter(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {} - - bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, - MachineRegisterInfo *MRI) const override { - assert(isLegal(MI, TII) && "Cannot convert instruction"); - return true; - } - - double getExtraCost(const MachineInstr *MI, - MachineRegisterInfo *MRI) const override { - return 0; - } -}; - // Key type to be used by the Instruction Converters map. // A converter is identified by <destination domain, source opcode> typedef std::pair<int, unsigned> InstrConverterBaseKeyTy; @@ -310,8 +292,12 @@ private: /// Domains which this closure can legally be reassigned to. std::bitset<NumDomains> LegalDstDomains; + /// An ID to uniquely identify this closure, even when it gets + /// moved around + unsigned ID; + public: - Closure(std::initializer_list<RegDomain> LegalDstDomainList) { + Closure(unsigned ID, std::initializer_list<RegDomain> LegalDstDomainList) : ID(ID) { for (RegDomain D : LegalDstDomainList) LegalDstDomains.set(D); } @@ -347,6 +333,27 @@ public: return Instrs; } + LLVM_DUMP_METHOD void dump(const MachineRegisterInfo *MRI) const { + dbgs() << "Registers: "; + bool First = true; + for (unsigned Reg : Edges) { + if (!First) + dbgs() << ", "; + First = false; + dbgs() << printReg(Reg, MRI->getTargetRegisterInfo()); + } + dbgs() << "\n" << "Instructions:"; + for (MachineInstr *MI : Instrs) { + dbgs() << "\n "; + MI->print(dbgs()); + } + dbgs() << "\n"; + } + + unsigned getID() const { + return ID; + } + }; class X86DomainReassignment : public MachineFunctionPass { @@ -358,7 +365,7 @@ class X86DomainReassignment : public MachineFunctionPass { DenseSet<unsigned> EnclosedEdges; /// All instructions that are included in some closure. - DenseMap<MachineInstr *, Closure *> EnclosedInstrs; + DenseMap<MachineInstr *, unsigned> EnclosedInstrs; public: static char ID; @@ -435,14 +442,14 @@ void X86DomainReassignment::visitRegister(Closure &C, unsigned Reg, void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) { auto I = EnclosedInstrs.find(MI); if (I != EnclosedInstrs.end()) { - if (I->second != &C) + if (I->second != C.getID()) // Instruction already belongs to another closure, avoid conflicts between // closure and mark this closure as illegal. C.setAllIllegal(); return; } - EnclosedInstrs[MI] = &C; + EnclosedInstrs[MI] = C.getID(); C.addInstruction(MI); // Mark closure as illegal for reassignment to domains, if there is no @@ -587,7 +594,7 @@ void X86DomainReassignment::initConverters() { new InstrIgnore(TargetOpcode::PHI); Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] = - new InstrDeleter(TargetOpcode::IMPLICIT_DEF); + new InstrIgnore(TargetOpcode::IMPLICIT_DEF); Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] = new InstrReplaceWithCopy(TargetOpcode::INSERT_SUBREG, 2); @@ -723,6 +730,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { std::vector<Closure> Closures; // Go over all virtual registers and calculate a closure. + unsigned ClosureID = 0; for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) { unsigned Reg = TargetRegisterInfo::index2VirtReg(Idx); @@ -735,7 +743,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { continue; // Calculate closure starting with Reg. - Closure C({MaskDomain}); + Closure C(ClosureID++, {MaskDomain}); buildClosure(C, Reg); // Collect all closures that can potentially be converted. @@ -743,15 +751,16 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { Closures.push_back(std::move(C)); } - for (Closure &C : Closures) + for (Closure &C : Closures) { + DEBUG(C.dump(MRI)); if (isReassignmentProfitable(C, MaskDomain)) { reassign(C, MaskDomain); ++NumClosuresConverted; Changed = true; } + } - for (auto I : Converters) - delete I.second; + DeleteContainerSeconds(Converters); DEBUG(dbgs() << "***** Machine Function after Domain Reassignment *****\n"); DEBUG(MF.print(dbgs())); diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index 80ce3c579fe0..dca6c592614c 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -1789,9 +1789,16 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned CReg = 0, OpReg = 0; const TargetRegisterClass *RC = nullptr; - assert(!I->getType()->isIntegerTy(8) && - "i8 shifts should be handled by autogenerated table"); - if (I->getType()->isIntegerTy(16)) { + if (I->getType()->isIntegerTy(8)) { + CReg = X86::CL; + RC = &X86::GR8RegClass; + switch (I->getOpcode()) { + case Instruction::LShr: OpReg = X86::SHR8rCL; break; + case Instruction::AShr: OpReg = X86::SAR8rCL; break; + case Instruction::Shl: OpReg = X86::SHL8rCL; break; + default: return false; + } + } else if (I->getType()->isIntegerTy(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -1836,10 +1843,10 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { // The shift instruction uses X86::CL. If we defined a super-register // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. - assert(CReg != X86::CL && "CReg should be a super register of CL"); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::KILL), X86::CL) - .addReg(CReg, RegState::Kill); + if (CReg != X86::CL) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::KILL), X86::CL) + .addReg(CReg, RegState::Kill); unsigned ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg) diff --git a/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index 1fd1c704d79a..a6fccd134740 100644 --- a/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -127,6 +127,10 @@ private: MachineInstr &JmpI, CondRegArray &CondRegs); void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse, MachineInstr &CopyDefI); + void rewriteSetCarryExtended(MachineBasicBlock &TestMBB, + MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &SetBI, + MachineOperand &FlagUse, CondRegArray &CondRegs); void rewriteSetCC(MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, MachineInstr &SetCCI, MachineOperand &FlagUse, @@ -511,8 +515,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { } else if (MI.getOpcode() == TargetOpcode::COPY) { rewriteCopy(MI, *FlagUse, CopyDefI); } else { - // We assume that arithmetic instructions that use flags also def - // them. + // We assume all other instructions that use flags also def them. assert(MI.findRegisterDefOperand(X86::EFLAGS) && "Expected a def of EFLAGS for this instruction!"); @@ -524,7 +527,23 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { // logic. FlagsKilled = true; - rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); + switch (MI.getOpcode()) { + case X86::SETB_C8r: + case X86::SETB_C16r: + case X86::SETB_C32r: + case X86::SETB_C64r: + // Use custom lowering for arithmetic that is merely extending the + // carry flag. We model this as the SETB_C* pseudo instructions. + rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse, + CondRegs); + break; + + default: + // Generically handle remaining uses as arithmetic instructions. + rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, + CondRegs); + break; + } break; } @@ -636,7 +655,7 @@ void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB, // also allow us to select a shorter encoding of `testb %reg, %reg` when that // would be equivalent. auto TestI = - BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8ri)).addReg(Reg).addImm(-1); + BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg); (void)TestI; DEBUG(dbgs() << " test cond: "; TestI->dump()); ++NumTestsInserted; @@ -756,6 +775,126 @@ void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI, MI.eraseFromParent(); } +void X86FlagsCopyLoweringPass::rewriteSetCarryExtended( + MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse, + CondRegArray &CondRegs) { + // This routine is only used to handle pseudos for setting a register to zero + // or all ones based on CF. This is essentially the sign extended from 1-bit + // form of SETB and modeled with the SETB_C* pseudos. They require special + // handling as they aren't normal SETcc instructions and are lowered to an + // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that + // they are only provided in reg-defining forms. A complicating factor is that + // they can define many different register widths. + assert(SetBI.getOperand(0).isReg() && + "Cannot have a non-register defined operand to this variant of SETB!"); + + // Little helper to do the common final step of replacing the register def'ed + // by this SETB instruction with a new register and removing the SETB + // instruction. + auto RewriteToReg = [&](unsigned Reg) { + MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg); + SetBI.eraseFromParent(); + }; + + // Grab the register class used for this particular instruction. + auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg()); + + MachineBasicBlock &MBB = *SetBI.getParent(); + auto SetPos = SetBI.getIterator(); + auto SetLoc = SetBI.getDebugLoc(); + + auto AdjustReg = [&](unsigned Reg) { + auto &OrigRC = *MRI->getRegClass(Reg); + if (&OrigRC == &SetBRC) + return Reg; + + unsigned NewReg; + + int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8; + int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8; + assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!"); + assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!"); + int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit, + X86::NoSubRegister, X86::sub_32bit}; + + // If the original size is smaller than the target *and* is smaller than 4 + // bytes, we need to explicitly zero extend it. We always extend to 4-bytes + // to maximize the chance of being able to CSE that operation and to avoid + // partial dependency stalls extending to 2-bytes. + if (OrigRegSize < TargetRegSize && OrigRegSize < 4) { + NewReg = MRI->createVirtualRegister(&X86::GR32RegClass); + BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg) + .addReg(Reg); + if (&SetBRC == &X86::GR32RegClass) + return NewReg; + Reg = NewReg; + OrigRegSize = 4; + } + + NewReg = MRI->createVirtualRegister(&SetBRC); + if (OrigRegSize < TargetRegSize) { + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG), + NewReg) + .addImm(0) + .addReg(Reg) + .addImm(SubRegIdx[OrigRegSize]); + } else if (OrigRegSize > TargetRegSize) { + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG), + NewReg) + .addReg(Reg) + .addImm(SubRegIdx[TargetRegSize]); + } else { + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg) + .addReg(Reg); + } + return NewReg; + }; + + unsigned &CondReg = CondRegs[X86::COND_B]; + if (!CondReg) + CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B); + + // Adjust the condition to have the desired register width by zero-extending + // as needed. + // FIXME: We should use a better API to avoid the local reference and using a + // different variable here. + unsigned ExtCondReg = AdjustReg(CondReg); + + // Now we need to turn this into a bitmask. We do this by subtracting it from + // zero. + unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass); + BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg); + ZeroReg = AdjustReg(ZeroReg); + + unsigned Sub; + switch (SetBI.getOpcode()) { + case X86::SETB_C8r: + Sub = X86::SUB8rr; + break; + + case X86::SETB_C16r: + Sub = X86::SUB16rr; + break; + + case X86::SETB_C32r: + Sub = X86::SUB32rr; + break; + + case X86::SETB_C64r: + Sub = X86::SUB64rr; + break; + + default: + llvm_unreachable("Invalid SETB_C* opcode!"); + } + unsigned ResultReg = MRI->createVirtualRegister(&SetBRC); + BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg) + .addReg(ZeroReg) + .addReg(ExtCondReg); + return RewriteToReg(ResultReg); +} + void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td index d09deb5b7584..98cc8fb7439e 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -1334,7 +1334,7 @@ let Predicates = [HasBMI2] in { } //===----------------------------------------------------------------------===// -// ADCX Instruction +// ADCX and ADOX Instructions // let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS], Constraints = "$src0 = $dst", AddedComplexity = 10 in { @@ -1349,6 +1349,15 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS], [(set GR64:$dst, EFLAGS, (X86adc_flag GR64:$src0, GR64:$src, EFLAGS))], IIC_BIN_CARRY_NONMEM>, T8PD; + + // We don't have patterns for ADOX yet. + let hasSideEffects = 0 in { + def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src0, GR32:$src), + "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; + + def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src0, GR64:$src), + "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; + } // hasSideEffects = 0 } // SchedRW let mayLoad = 1, SchedRW = [WriteALULd] in { @@ -1363,27 +1372,14 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS], [(set GR64:$dst, EFLAGS, (X86adc_flag GR64:$src0, (loadi64 addr:$src), EFLAGS))], IIC_BIN_CARRY_MEM>, T8PD; - } -} -//===----------------------------------------------------------------------===// -// ADOX Instruction -// -let Predicates = [HasADX], hasSideEffects = 0, Defs = [EFLAGS], - Uses = [EFLAGS] in { - let SchedRW = [WriteALU] in { - def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; - - def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; - } // SchedRW - - let mayLoad = 1, SchedRW = [WriteALULd] in { - def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + // We don't have patterns for ADOX yet. + let hasSideEffects = 0 in { + def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src0, i32mem:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS; - def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src0, i64mem:$src), "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS; } + } // hasSideEffects = 0 } diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index b25cbcad3b9d..76c4a8fbc16e 100644 --- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -847,10 +847,20 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; + // Can't change signature of musttail callee + if (CS.isMustTailCall()) + return nullptr; + if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; } + // Can't change signature of musttail caller + // FIXME: Support promoting whole chain of musttail functions + for (BasicBlock &BB : *F) + if (BB.getTerminatingMustTailCall()) + return nullptr; + const DataLayout &DL = F->getParent()->getDataLayout(); AAResults &AAR = AARGetter(*F); diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 5446541550e5..b2afa6f2c9cd 100644 --- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -507,14 +507,28 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // MaybeLive. Initialized to a list of RetCount empty lists. RetUses MaybeLiveRetUses(RetCount); - for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) + bool HasMustTailCalls = false; + + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType() != F.getFunctionType()->getReturnType()) { // We don't support old style multiple return values. MarkLive(F); return; } + } + + // If we have any returns of `musttail` results - the signature can't + // change + if (BB->getTerminatingMustTailCall() != nullptr) + HasMustTailCalls = true; + } + + if (HasMustTailCalls) { + DEBUG(dbgs() << "DeadArgumentEliminationPass - " << F.getName() + << " has musttail calls\n"); + } if (!F.hasLocalLinkage() && (!ShouldHackArguments || F.isIntrinsic())) { MarkLive(F); @@ -526,6 +540,9 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // Keep track of the number of live retvals, so we can skip checks once all // of them turn out to be live. unsigned NumLiveRetVals = 0; + + bool HasMustTailCallers = false; + // Loop all uses of the function. for (const Use &U : F.uses()) { // If the function is PASSED IN as an argument, its address has been @@ -536,6 +553,11 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { return; } + // The number of arguments for `musttail` call must match the number of + // arguments of the caller + if (CS.isMustTailCall()) + HasMustTailCallers = true; + // If this use is anything other than a call site, the function is alive. const Instruction *TheCall = CS.getInstruction(); if (!TheCall) { // Not a direct call site? @@ -580,6 +602,11 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { } } + if (HasMustTailCallers) { + DEBUG(dbgs() << "DeadArgumentEliminationPass - " << F.getName() + << " has musttail callers\n"); + } + // Now we've inspected all callers, record the liveness of our return values. for (unsigned i = 0; i != RetCount; ++i) MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]); @@ -593,12 +620,19 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { for (Function::const_arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E; ++AI, ++i) { Liveness Result; - if (F.getFunctionType()->isVarArg()) { + if (F.getFunctionType()->isVarArg() || HasMustTailCallers || + HasMustTailCalls) { // Variadic functions will already have a va_arg function expanded inside // them, making them potentially very sensitive to ABI changes resulting // from removing arguments entirely, so don't. For example AArch64 handles // register and stack HFAs very differently, and this is reflected in the // IR which has already been generated. + // + // `musttail` calls to this function restrict argument removal attempts. + // The signature of the caller must match the signature of the function. + // + // `musttail` calls in this function prevents us from changing its + // signature Result = Live; } else { // See what the effect of this use is (recording any uses that cause diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 4bb2984e3b47..e0bbf45d316a 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2099,8 +2099,31 @@ static void RemoveNestAttribute(Function *F) { /// GHC, or anyregcc. static bool isProfitableToMakeFastCC(Function *F) { CallingConv::ID CC = F->getCallingConv(); + // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc? - return CC == CallingConv::C || CC == CallingConv::X86_ThisCall; + if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall) + return false; + + // FIXME: Change CC for the whole chain of musttail calls when possible. + // + // Can't change CC of the function that either has musttail calls, or is a + // musttail callee itself + for (User *U : F->users()) { + if (isa<BlockAddress>(U)) + continue; + CallInst* CI = dyn_cast<CallInst>(U); + if (!CI) + continue; + + if (CI->isMustTailCall()) + return false; + } + + for (BasicBlock &BB : *F) + if (BB.getTerminatingMustTailCall()) + return false; + + return true; } static bool diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 76b90391fbb1..8886af90ba65 100644 --- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -638,6 +638,19 @@ void MergeFunctions::filterInstsUnrelatedToPDI( DEBUG(dbgs() << " }\n"); } +// Don't merge tiny functions using a thunk, since it can just end up +// making the function larger. +static bool isThunkProfitable(Function * F) { + if (F->size() == 1) { + if (F->front().size() <= 2) { + DEBUG(dbgs() << "isThunkProfitable: " << F->getName() + << " is too small to bother creating a thunk for\n"); + return false; + } + } + return true; +} + // Replace G with a simple tail call to bitcast(F). Also (unless // MergeFunctionsPDI holds) replace direct uses of G with bitcast(F), // delete G. Under MergeFunctionsPDI, we use G itself for creating @@ -647,39 +660,6 @@ void MergeFunctions::filterInstsUnrelatedToPDI( // For better debugability, under MergeFunctionsPDI, we do not modify G's // call sites to point to F even when within the same translation unit. void MergeFunctions::writeThunk(Function *F, Function *G) { - if (!G->isInterposable() && !MergeFunctionsPDI) { - if (G->hasGlobalUnnamedAddr()) { - // G might have been a key in our GlobalNumberState, and it's illegal - // to replace a key in ValueMap<GlobalValue *> with a non-global. - GlobalNumbers.erase(G); - // If G's address is not significant, replace it entirely. - Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); - G->replaceAllUsesWith(BitcastF); - } else { - // Redirect direct callers of G to F. (See note on MergeFunctionsPDI - // above). - replaceDirectCallers(G, F); - } - } - - // If G was internal then we may have replaced all uses of G with F. If so, - // stop here and delete G. There's no need for a thunk. (See note on - // MergeFunctionsPDI above). - if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) { - G->eraseFromParent(); - return; - } - - // Don't merge tiny functions using a thunk, since it can just end up - // making the function larger. - if (F->size() == 1) { - if (F->front().size() <= 2) { - DEBUG(dbgs() << "writeThunk: " << F->getName() - << " is too small to bother creating a thunk for\n"); - return; - } - } - BasicBlock *GEntryBlock = nullptr; std::vector<Instruction *> PDIUnrelatedWL; BasicBlock *BB = nullptr; @@ -754,6 +734,10 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { if (F->isInterposable()) { assert(G->isInterposable()); + if (!isThunkProfitable(F)) { + return; + } + // Make them both thunks to the same internal function. Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", F->getParent()); @@ -770,11 +754,41 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { F->setAlignment(MaxAlignment); F->setLinkage(GlobalValue::PrivateLinkage); ++NumDoubleWeak; + ++NumFunctionsMerged; } else { + // For better debugability, under MergeFunctionsPDI, we do not modify G's + // call sites to point to F even when within the same translation unit. + if (!G->isInterposable() && !MergeFunctionsPDI) { + if (G->hasGlobalUnnamedAddr()) { + // G might have been a key in our GlobalNumberState, and it's illegal + // to replace a key in ValueMap<GlobalValue *> with a non-global. + GlobalNumbers.erase(G); + // If G's address is not significant, replace it entirely. + Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); + G->replaceAllUsesWith(BitcastF); + } else { + // Redirect direct callers of G to F. (See note on MergeFunctionsPDI + // above). + replaceDirectCallers(G, F); + } + } + + // If G was internal then we may have replaced all uses of G with F. If so, + // stop here and delete G. There's no need for a thunk. (See note on + // MergeFunctionsPDI above). + if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) { + G->eraseFromParent(); + ++NumFunctionsMerged; + return; + } + + if (!isThunkProfitable(F)) { + return; + } + writeThunk(F, G); + ++NumFunctionsMerged; } - - ++NumFunctionsMerged; } /// Replace function F by function G. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index b332e75c7feb..8fa7d0684b94 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -34,6 +34,7 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm-c/Initialization.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -1946,13 +1947,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // addrspacecast between types is canonicalized as a bitcast, then an // addrspacecast. To take advantage of the below bitcast + struct GEP, look // through the addrspacecast. + Value *ASCStrippedPtrOp = PtrOp; if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) { // X = bitcast A addrspace(1)* to B addrspace(1)* // Y = addrspacecast A addrspace(1)* to B addrspace(2)* // Z = gep Y, <...constant indices...> // Into an addrspacecasted GEP of the struct. if (BitCastInst *BC = dyn_cast<BitCastInst>(ASC->getOperand(0))) - PtrOp = BC; + ASCStrippedPtrOp = BC; } /// See if we can simplify: @@ -1960,7 +1962,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { /// Y = gep X, <...constant indices...> /// into a gep of the original struct. This is important for SROA and alias /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { + if (BitCastInst *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp)) { Value *Operand = BCI->getOperand(0); PointerType *OpType = cast<PointerType>(Operand->getType()); unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType()); diff --git a/contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp index 4edea7cc3c82..7488cd5af8be 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -201,6 +201,46 @@ static bool canSplitCallSite(CallSite CS) { return CallSiteBB->canSplitPredecessors(); } +static Instruction *cloneInstForMustTail(Instruction *I, Instruction *Before, + Value *V) { + Instruction *Copy = I->clone(); + Copy->setName(I->getName()); + Copy->insertBefore(Before); + if (V) + Copy->setOperand(0, V); + return Copy; +} + +/// Copy mandatory `musttail` return sequence that follows original `CI`, and +/// link it up to `NewCI` value instead: +/// +/// * (optional) `bitcast NewCI to ...` +/// * `ret bitcast or NewCI` +/// +/// Insert this sequence right before `SplitBB`'s terminator, which will be +/// cleaned up later in `splitCallSite` below. +static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI, + Instruction *NewCI) { + bool IsVoid = SplitBB->getParent()->getReturnType()->isVoidTy(); + auto II = std::next(CI->getIterator()); + + BitCastInst *BCI = dyn_cast<BitCastInst>(&*II); + if (BCI) + ++II; + + ReturnInst *RI = dyn_cast<ReturnInst>(&*II); + assert(RI && "`musttail` call must be followed by `ret` instruction"); + + TerminatorInst *TI = SplitBB->getTerminator(); + Value *V = NewCI; + if (BCI) + V = cloneInstForMustTail(BCI, TI, V); + cloneInstForMustTail(RI, TI, IsVoid ? nullptr : V); + + // FIXME: remove TI here, `DuplicateInstructionsInSplitBetween` has a bug + // that prevents doing this now. +} + /// Return true if the CS is split into its new predecessors which are directly /// hooked to each of its original predecessors pointed by PredBB1 and PredBB2. /// CallInst1 and CallInst2 will be the new call-sites placed in the new @@ -245,6 +285,7 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, Instruction *CallInst1, Instruction *CallInst2) { Instruction *Instr = CS.getInstruction(); BasicBlock *TailBB = Instr->getParent(); + bool IsMustTailCall = CS.isMustTailCall(); assert(Instr == (TailBB->getFirstNonPHIOrDbg()) && "Unexpected call-site"); BasicBlock *SplitBlock1 = @@ -276,9 +317,14 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, ++ArgNo; } } + // Clone and place bitcast and return instructions before `TI` + if (IsMustTailCall) { + copyMustTailReturn(SplitBlock1, CS.getInstruction(), CallInst1); + copyMustTailReturn(SplitBlock2, CS.getInstruction(), CallInst2); + } // Replace users of the original call with a PHI mering call-sites split. - if (Instr->getNumUses()) { + if (!IsMustTailCall && Instr->getNumUses()) { PHINode *PN = PHINode::Create(Instr->getType(), 2, "phi.call", TailBB->getFirstNonPHI()); PN->addIncoming(CallInst1, SplitBlock1); @@ -290,8 +336,25 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, << "\n"); DEBUG(dbgs() << " " << *CallInst2 << " in " << SplitBlock2->getName() << "\n"); - Instr->eraseFromParent(); + NumCallSiteSplit++; + + // FIXME: remove TI in `copyMustTailReturn` + if (IsMustTailCall) { + // Remove superfluous `br` terminators from the end of the Split blocks + // NOTE: Removing terminator removes the SplitBlock from the TailBB's + // predecessors. Therefore we must get complete list of Splits before + // attempting removal. + SmallVector<BasicBlock *, 2> Splits(predecessors((TailBB))); + assert(Splits.size() == 2 && "Expected exactly 2 splits!"); + for (unsigned i = 0; i < Splits.size(); i++) + Splits[i]->getTerminator()->eraseFromParent(); + + // Erase the tail block once done with musttail patching + TailBB->eraseFromParent(); + return; + } + Instr->eraseFromParent(); } // Return true if the call-site has an argument which is a PHI with only @@ -369,7 +432,17 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) { Function *Callee = CS.getCalledFunction(); if (!Callee || Callee->isDeclaration()) continue; + + // Successful musttail call-site splits result in erased CI and erased BB. + // Check if such path is possible before attempting the splitting. + bool IsMustTail = CS.isMustTailCall(); + Changed |= tryToSplitCallSite(CS); + + // There're no interesting instructions after this. The call site + // itself might have been erased on splitting. + if (IsMustTail) + break; } } return Changed; diff --git a/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp index e383af89a384..e1bc590c5c9a 100644 --- a/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp @@ -13,6 +13,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/DivRemPairs.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -48,7 +50,10 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, // Insert all divide and remainder instructions into maps keyed by their // operands and opcode (signed or unsigned). - DenseMap<DivRemMapKey, Instruction *> DivMap, RemMap; + DenseMap<DivRemMapKey, Instruction *> DivMap; + // Use a MapVector for RemMap so that instructions are moved/inserted in a + // deterministic order. + MapVector<DivRemMapKey, Instruction *> RemMap; for (auto &BB : F) { for (auto &I : BB) { if (I.getOpcode() == Instruction::SDiv) @@ -67,14 +72,14 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, // rare than division. for (auto &RemPair : RemMap) { // Find the matching division instruction from the division map. - Instruction *DivInst = DivMap[RemPair.getFirst()]; + Instruction *DivInst = DivMap[RemPair.first]; if (!DivInst) continue; // We have a matching pair of div/rem instructions. If one dominates the // other, hoist and/or replace one. NumPairs++; - Instruction *RemInst = RemPair.getSecond(); + Instruction *RemInst = RemPair.second; bool IsSigned = DivInst->getOpcode() == Instruction::SDiv; bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned); diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 141c9938bf8b..2f1645433fb8 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1454,6 +1454,9 @@ FindMostPopularDest(BasicBlock *BB, if (PredToDest.second) DestPopularity[PredToDest.second]++; + if (DestPopularity.empty()) + return nullptr; + // Find the most popular dest. DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin(); BasicBlock *MostPopularDest = DPI->first; @@ -1629,8 +1632,20 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // threadable destination (the common case) we can avoid this. BasicBlock *MostPopularDest = OnlyDest; - if (MostPopularDest == MultipleDestSentinel) + if (MostPopularDest == MultipleDestSentinel) { + // Remove any loop headers from the Dest list, ThreadEdge conservatively + // won't process them, but we might have other destination that are eligible + // and we still want to process. + erase_if(PredToDestList, + [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) { + return LoopHeaders.count(PredToDest.second) != 0; + }); + + if (PredToDestList.empty()) + return false; + MostPopularDest = FindMostPopularDest(BB, PredToDestList); + } // Now that we know what the most popular destination is, factor all // predecessors that will jump to it into a single predecessor. diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp index 9dc550ceaeca..3e12649ddedc 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -223,6 +223,10 @@ class SCCPSolver : public InstVisitor<SCCPSolver> { /// represented here for efficient lookup. SmallPtrSet<Function *, 16> MRVFunctionsTracked; + /// MustTailFunctions - Each function here is a callee of non-removable + /// musttail call site. + SmallPtrSet<Function *, 16> MustTailCallees; + /// TrackingIncomingArguments - This is the set of functions for whose /// arguments we make optimistic assumptions about and try to prove as /// constants. @@ -289,6 +293,18 @@ public: TrackedRetVals.insert(std::make_pair(F, LatticeVal())); } + /// AddMustTailCallee - If the SCCP solver finds that this function is called + /// from non-removable musttail call site. + void AddMustTailCallee(Function *F) { + MustTailCallees.insert(F); + } + + /// Returns true if the given function is called from non-removable musttail + /// call site. + bool isMustTailCallee(Function *F) { + return MustTailCallees.count(F); + } + void AddArgumentTrackedFunction(Function *F) { TrackingIncomingArguments.insert(F); } @@ -358,6 +374,12 @@ public: return MRVFunctionsTracked; } + /// getMustTailCallees - Get the set of functions which are called + /// from non-removable musttail call sites. + const SmallPtrSet<Function *, 16> getMustTailCallees() { + return MustTailCallees; + } + /// markOverdefined - Mark the specified value overdefined. This /// works with both scalars and structs. void markOverdefined(Value *V) { @@ -1672,6 +1694,23 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { IV.isConstant() ? IV.getConstant() : UndefValue::get(V->getType()); } assert(Const && "Constant is nullptr here!"); + + // Replacing `musttail` instructions with constant breaks `musttail` invariant + // unless the call itself can be removed + CallInst *CI = dyn_cast<CallInst>(V); + if (CI && CI->isMustTailCall() && !isInstructionTriviallyDead(CI)) { + CallSite CS(CI); + Function *F = CS.getCalledFunction(); + + // Don't zap returns of the callee + if (F) + Solver.AddMustTailCallee(F); + + DEBUG(dbgs() << " Can\'t treat the result of musttail call : " << *CI + << " as a constant\n"); + return false; + } + DEBUG(dbgs() << " Constant: " << *Const << " = " << *V << '\n'); // Replaces all of the uses of a variable with uses of the constant. @@ -1802,10 +1841,26 @@ static void findReturnsToZap(Function &F, if (!Solver.isArgumentTrackedFunction(&F)) return; - for (BasicBlock &BB : F) + // There is a non-removable musttail call site of this function. Zapping + // returns is not allowed. + if (Solver.isMustTailCallee(&F)) { + DEBUG(dbgs() << "Can't zap returns of the function : " << F.getName() + << " due to present musttail call of it\n"); + return; + } + + for (BasicBlock &BB : F) { + if (CallInst *CI = BB.getTerminatingMustTailCall()) { + DEBUG(dbgs() << "Can't zap return of the block due to present " + << "musttail call : " << *CI << "\n"); + (void)CI; + return; + } + if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) if (!isa<UndefValue>(RI->getOperand(0))) ReturnsToZap.push_back(RI); + } } static bool runIPSCCP(Module &M, const DataLayout &DL, diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp index bddcbd86e914..75539428b688 100644 --- a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -710,7 +710,7 @@ int FunctionComparator::cmpInlineAsm(const InlineAsm *L, return Res; if (int Res = cmpNumbers(L->getDialect(), R->getDialect())) return Res; - llvm_unreachable("InlineAsm blocks were not uniqued."); + assert(L->getFunctionType() != R->getFunctionType()); return 0; } diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td index 28aead0cdb99..69c20ea7806c 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -326,6 +326,10 @@ def warn_drv_unsupported_abicalls : Warning< "ignoring '-mabicalls' option as it cannot be used with " "non position-independent code and the N64 ABI">, InGroup<OptionIgnored>; +def err_drv_unsupported_indirect_jump_opt : Error< + "'-mindirect-jump=%0' is unsupported with the '%1' architecture">; +def err_drv_unknown_indirect_jump_opt : Error< + "unknown '-mindirect-jump=' option '%0'">; def warn_drv_unable_to_find_directory_expected : Warning< "unable to find %0 directory, expected to be in '%1'">, diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index ec624a0cc116..9d17bdaeb7cd 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -759,6 +759,10 @@ def warn_cxx_ms_struct : Warning<"ms_struct may not produce Microsoft-compatible layouts for classes " "with base classes or virtual functions">, DefaultError, InGroup<IncompatibleMSStruct>; +def warn_npot_ms_struct : + Warning<"ms_struct may not produce Microsoft-compatible layouts with fundamental " + "data types with sizes that aren't a power of two">, + DefaultError, InGroup<IncompatibleMSStruct>; def err_section_conflict : Error<"%0 causes a section type conflict with %1">; def err_no_base_classes : Error<"invalid use of '__super', %0 has no base classes">; def err_invalid_super_scope : Error<"invalid use of '__super', " diff --git a/contrib/llvm/tools/clang/include/clang/Driver/CLCompatOptions.td b/contrib/llvm/tools/clang/include/clang/Driver/CLCompatOptions.td index c1f0a89b5dc8..e417f6cbbec6 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/CLCompatOptions.td +++ b/contrib/llvm/tools/clang/include/clang/Driver/CLCompatOptions.td @@ -238,7 +238,7 @@ def _SLASH_Fo : CLCompileJoined<"Fo">, def _SLASH_GX : CLFlag<"GX">, HelpText<"Enable exception handling">; def _SLASH_GX_ : CLFlag<"GX-">, - HelpText<"Enable exception handling">; + HelpText<"Disable exception handling">; def _SLASH_imsvc : CLJoinedOrSeparate<"imsvc">, HelpText<"Add directory to system include search path, as if part of %INCLUDE%">, MetaVarName<"<dir>">; diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Options.td b/contrib/llvm/tools/clang/include/clang/Driver/Options.td index cab450042e69..7480f5163d3b 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/Options.td +++ b/contrib/llvm/tools/clang/include/clang/Driver/Options.td @@ -1100,7 +1100,8 @@ def fthinlto_index_EQ : Joined<["-"], "fthinlto-index=">, HelpText<"Perform ThinLTO importing using provided function summary index">; def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">, Group<f_Group>, Flags<[DriverOption, CoreOption]>; -def fmerge_all_constants : Flag<["-"], "fmerge-all-constants">, Group<f_Group>; +def fmerge_all_constants : Flag<["-"], "fmerge-all-constants">, Group<f_Group>, + Flags<[CC1Option]>, HelpText<"Allow merging of constants">; def fmessage_length_EQ : Joined<["-"], "fmessage-length=">, Group<f_Group>; def fms_extensions : Flag<["-"], "fms-extensions">, Group<f_Group>, Flags<[CC1Option, CoreOption]>, HelpText<"Accept some non-standard constructs supported by the Microsoft compiler">; @@ -1249,7 +1250,7 @@ def fveclib : Joined<["-"], "fveclib=">, Group<f_Group>, Flags<[CC1Option]>, def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group<f_Group>, HelpText<"Disallow implicit conversions between vectors with a different number of elements or different element types">, Flags<[CC1Option]>; def fno_merge_all_constants : Flag<["-"], "fno-merge-all-constants">, Group<f_Group>, - Flags<[CC1Option]>, HelpText<"Disallow merging of constants">; + HelpText<"Disallow merging of constants">; def fno_modules : Flag <["-"], "fno-modules">, Group<f_Group>, Flags<[DriverOption]>; def fno_implicit_module_maps : Flag <["-"], "fno-implicit-module-maps">, Group<f_Group>, @@ -1992,6 +1993,9 @@ def mbranch_likely : Flag<["-"], "mbranch-likely">, Group<m_Group>, IgnoredGCCCompat; def mno_branch_likely : Flag<["-"], "mno-branch-likely">, Group<m_Group>, IgnoredGCCCompat; +def mindirect_jump_EQ : Joined<["-"], "mindirect-jump=">, + Group<m_Group>, + HelpText<"Change indirect jump instructions to inhibit speculation">; def mdsp : Flag<["-"], "mdsp">, Group<m_Group>; def mno_dsp : Flag<["-"], "mno-dsp">, Group<m_Group>; def mdspr2 : Flag<["-"], "mdspr2">, Group<m_Group>; diff --git a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp index 8d9b3c3bebc0..57bf2fca4eb7 100644 --- a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp @@ -61,14 +61,22 @@ namespace { static QualType getType(APValue::LValueBase B) { if (!B) return QualType(); - if (const ValueDecl *D = B.dyn_cast<const ValueDecl*>()) + if (const ValueDecl *D = B.dyn_cast<const ValueDecl*>()) { // FIXME: It's unclear where we're supposed to take the type from, and - // this actually matters for arrays of unknown bound. Using the type of - // the most recent declaration isn't clearly correct in general. Eg: + // this actually matters for arrays of unknown bound. Eg: // // extern int arr[]; void f() { extern int arr[3]; }; // constexpr int *p = &arr[1]; // valid? - return cast<ValueDecl>(D->getMostRecentDecl())->getType(); + // + // For now, we take the array bound from the most recent declaration. + for (auto *Redecl = cast<ValueDecl>(D->getMostRecentDecl()); Redecl; + Redecl = cast_or_null<ValueDecl>(Redecl->getPreviousDecl())) { + QualType T = Redecl->getType(); + if (!T->isIncompleteArrayType()) + return T; + } + return D->getType(); + } const Expr *Base = B.get<const Expr*>(); @@ -8535,9 +8543,6 @@ bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { (LHSValue.Base && isZeroSized(RHSValue))) return Error(E); // Pointers with different bases cannot represent the same object. - // (Note that clang defaults to -fmerge-all-constants, which can - // lead to inconsistent results for comparisons involving the address - // of a constant; this generally doesn't matter in practice.) return Success(E->getOpcode() == BO_NE, E); } diff --git a/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp b/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp index a9d43dfa80c5..de38bc9b64dd 100644 --- a/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp @@ -1751,7 +1751,34 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D, QualType T = Context.getBaseElementType(D->getType()); if (const BuiltinType *BTy = T->getAs<BuiltinType>()) { CharUnits TypeSize = Context.getTypeSizeInChars(BTy); - if (TypeSize > FieldAlign) + + if (!llvm::isPowerOf2_64(TypeSize.getQuantity())) { + assert( + !Context.getTargetInfo().getTriple().isWindowsMSVCEnvironment() && + "Non PowerOf2 size in MSVC mode"); + // Base types with sizes that aren't a power of two don't work + // with the layout rules for MS structs. This isn't an issue in + // MSVC itself since there are no such base data types there. + // On e.g. x86_32 mingw and linux, long double is 12 bytes though. + // Any structs involving that data type obviously can't be ABI + // compatible with MSVC regardless of how it is laid out. + + // Since ms_struct can be mass enabled (via a pragma or via the + // -mms-bitfields command line parameter), this can trigger for + // structs that don't actually need MSVC compatibility, so we + // need to be able to sidestep the ms_struct layout for these types. + + // Since the combination of -mms-bitfields together with structs + // like max_align_t (which contains a long double) for mingw is + // quite comon (and GCC handles it silently), just handle it + // silently there. For other targets that have ms_struct enabled + // (most probably via a pragma or attribute), trigger a diagnostic + // that defaults to an error. + if (!Context.getTargetInfo().getTriple().isWindowsGNUEnvironment()) + Diag(D->getLocation(), diag::warn_npot_ms_struct); + } + if (TypeSize > FieldAlign && + llvm::isPowerOf2_64(TypeSize.getQuantity())) FieldAlign = TypeSize; } } diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets/AArch64.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets/AArch64.cpp index 4d3cd121f705..8762ef691700 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets/AArch64.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Targets/AArch64.cpp @@ -299,7 +299,40 @@ ArrayRef<const char *> AArch64TargetInfo::getGCCRegNames() const { } const TargetInfo::GCCRegAlias AArch64TargetInfo::GCCRegAliases[] = { - {{"w31"}, "wsp"}, {{"x29"}, "fp"}, {{"x30"}, "lr"}, {{"x31"}, "sp"}, + {{"w31"}, "wsp"}, + {{"x31"}, "sp"}, + // GCC rN registers are aliases of xN registers. + {{"r0"}, "x0"}, + {{"r1"}, "x1"}, + {{"r2"}, "x2"}, + {{"r3"}, "x3"}, + {{"r4"}, "x4"}, + {{"r5"}, "x5"}, + {{"r6"}, "x6"}, + {{"r7"}, "x7"}, + {{"r8"}, "x8"}, + {{"r9"}, "x9"}, + {{"r10"}, "x10"}, + {{"r11"}, "x11"}, + {{"r12"}, "x12"}, + {{"r13"}, "x13"}, + {{"r14"}, "x14"}, + {{"r15"}, "x15"}, + {{"r16"}, "x16"}, + {{"r17"}, "x17"}, + {{"r18"}, "x18"}, + {{"r19"}, "x19"}, + {{"r20"}, "x20"}, + {{"r21"}, "x21"}, + {{"r22"}, "x22"}, + {{"r23"}, "x23"}, + {{"r24"}, "x24"}, + {{"r25"}, "x25"}, + {{"r26"}, "x26"}, + {{"r27"}, "x27"}, + {{"r28"}, "x28"}, + {{"r29", "x29"}, "fp"}, + {{"r30", "x30"}, "lr"}, // The S/D/Q and W/X registers overlap, but aren't really aliases; we // don't want to substitute one of these for a different-sized one. }; diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets/Mips.h b/contrib/llvm/tools/clang/lib/Basic/Targets/Mips.h index 28900f21f86b..4b61116a3256 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets/Mips.h +++ b/contrib/llvm/tools/clang/lib/Basic/Targets/Mips.h @@ -54,6 +54,7 @@ class LLVM_LIBRARY_VISIBILITY MipsTargetInfo : public TargetInfo { enum DspRevEnum { NoDSP, DSP1, DSP2 } DspRev; bool HasMSA; bool DisableMadd4; + bool UseIndirectJumpHazard; protected: bool HasFP64; @@ -64,7 +65,8 @@ public: : TargetInfo(Triple), IsMips16(false), IsMicromips(false), IsNan2008(false), IsAbs2008(false), IsSingleFloat(false), IsNoABICalls(false), CanUseBSDABICalls(false), FloatABI(HardFloat), - DspRev(NoDSP), HasMSA(false), DisableMadd4(false), HasFP64(false) { + DspRev(NoDSP), HasMSA(false), DisableMadd4(false), + UseIndirectJumpHazard(false), HasFP64(false) { TheCXXABI.set(TargetCXXABI::GenericMIPS); setABI((getTriple().getArch() == llvm::Triple::mips || @@ -338,6 +340,8 @@ public: IsAbs2008 = false; else if (Feature == "+noabicalls") IsNoABICalls = true; + else if (Feature == "+use-indirect-jump-hazard") + UseIndirectJumpHazard = true; } setDataLayout(); diff --git a/contrib/llvm/tools/clang/lib/Basic/Version.cpp b/contrib/llvm/tools/clang/lib/Basic/Version.cpp index 580bd94ccbef..ea3754e02af5 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Version.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Version.cpp @@ -36,7 +36,7 @@ std::string getClangRepositoryPath() { // If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us // pick up a tag in an SVN export, for example. - StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_600/final/lib/Basic/Version.cpp $"); + StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_601/final/lib/Basic/Version.cpp $"); if (URL.empty()) { URL = SVNRepository.slice(SVNRepository.find(':'), SVNRepository.find("/lib/Basic")); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp index abd633280b9d..f2352a373401 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp @@ -1931,13 +1931,8 @@ void X86_32TargetCodeGenInfo::setTargetAttributes( return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { - // Get the LLVM function. llvm::Function *Fn = cast<llvm::Function>(GV); - - // Now add the 'alignstack' attribute with a value of 16. - llvm::AttrBuilder B; - B.addStackAlignmentAttr(16); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + Fn->addFnAttr("stackrealign"); } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); @@ -2292,13 +2287,8 @@ public: return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { - // Get the LLVM function. - auto *Fn = cast<llvm::Function>(GV); - - // Now add the 'alignstack' attribute with a value of 16. - llvm::AttrBuilder B; - B.addStackAlignmentAttr(16); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->addFnAttr("stackrealign"); } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); @@ -2429,13 +2419,8 @@ void WinX86_64TargetCodeGenInfo::setTargetAttributes( return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { - // Get the LLVM function. - auto *Fn = cast<llvm::Function>(GV); - - // Now add the 'alignstack' attribute with a value of 16. - llvm::AttrBuilder B; - B.addStackAlignmentAttr(16); - Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + llvm::Function *Fn = cast<llvm::Function>(GV); + Fn->addFnAttr("stackrealign"); } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); diff --git a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp index 325b233ac5ec..c890f7741503 100644 --- a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp @@ -858,11 +858,14 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) { : std::move(*CLOptions)); if (HasConfigFile) for (auto *Opt : *CLOptions) { + if (Opt->getOption().matches(options::OPT_config)) + continue; + unsigned Index = Args.MakeIndex(Opt->getSpelling()); const Arg *BaseArg = &Opt->getBaseArg(); if (BaseArg == Opt) BaseArg = nullptr; Arg *Copy = new llvm::opt::Arg(Opt->getOption(), Opt->getSpelling(), - Args.size(), BaseArg); + Index, BaseArg); Copy->getValues() = Opt->getValues(); if (Opt->isClaimed()) Copy->claim(); diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Arch/Mips.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Arch/Mips.cpp index 61481a92d0b7..e72754d5ad53 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Arch/Mips.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Arch/Mips.cpp @@ -343,6 +343,28 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple, AddTargetFeature(Args, Features, options::OPT_mno_madd4, options::OPT_mmadd4, "nomadd4"); AddTargetFeature(Args, Features, options::OPT_mmt, options::OPT_mno_mt, "mt"); + + if (Arg *A = Args.getLastArg(options::OPT_mindirect_jump_EQ)) { + StringRef Val = StringRef(A->getValue()); + if (Val == "hazard") { + Arg *B = + Args.getLastArg(options::OPT_mmicromips, options::OPT_mno_micromips); + Arg *C = Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16); + + if (B && B->getOption().matches(options::OPT_mmicromips)) + D.Diag(diag::err_drv_unsupported_indirect_jump_opt) + << "hazard" << "micromips"; + else if (C && C->getOption().matches(options::OPT_mips16)) + D.Diag(diag::err_drv_unsupported_indirect_jump_opt) + << "hazard" << "mips16"; + else if (mips::supportsIndirectJumpHazardBarrier(CPUName)) + Features.push_back("+use-indirect-jump-hazard"); + else + D.Diag(diag::err_drv_unsupported_indirect_jump_opt) + << "hazard" << CPUName; + } else + D.Diag(diag::err_drv_unknown_indirect_jump_opt) << Val; + } } mips::IEEE754Standard mips::getIEEE754Standard(StringRef &CPU) { @@ -447,3 +469,20 @@ bool mips::shouldUseFPXX(const ArgList &Args, const llvm::Triple &Triple, return UseFPXX; } + +bool mips::supportsIndirectJumpHazardBarrier(StringRef &CPU) { + // Supporting the hazard barrier method of dealing with indirect + // jumps requires MIPSR2 support. + return llvm::StringSwitch<bool>(CPU) + .Case("mips32r2", true) + .Case("mips32r3", true) + .Case("mips32r5", true) + .Case("mips32r6", true) + .Case("mips64r2", true) + .Case("mips64r3", true) + .Case("mips64r5", true) + .Case("mips64r6", true) + .Case("octeon", true) + .Case("p5600", true) + .Default(false); +} diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Arch/Mips.h b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Arch/Mips.h index 89eea9a1514c..7e90488363a5 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Arch/Mips.h +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Arch/Mips.h @@ -53,6 +53,7 @@ bool isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName, bool shouldUseFPXX(const llvm::opt::ArgList &Args, const llvm::Triple &Triple, StringRef CPUName, StringRef ABIName, mips::FloatABI FloatABI); +bool supportsIndirectJumpHazardBarrier(StringRef &CPU); } // end namespace mips } // end namespace target diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp index 484df520012c..a2821b70d716 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp @@ -3288,9 +3288,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fveclib); - if (!Args.hasFlag(options::OPT_fmerge_all_constants, - options::OPT_fno_merge_all_constants)) - CmdArgs.push_back("-fno-merge-all-constants"); + if (Args.hasFlag(options::OPT_fmerge_all_constants, + options::OPT_fno_merge_all_constants, false)) + CmdArgs.push_back("-fmerge-all-constants"); // LLVM Code Generator Options. diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/CrossWindows.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/CrossWindows.cpp index 5049033c4137..6ca04a8a3abb 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/CrossWindows.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/CrossWindows.cpp @@ -127,7 +127,8 @@ void tools::CrossWindows::Linker::ConstructJob( } CmdArgs.push_back("-shared"); - CmdArgs.push_back("-Bdynamic"); + CmdArgs.push_back(Args.hasArg(options::OPT_static) ? "-Bstatic" + : "-Bdynamic"); CmdArgs.push_back("--enable-auto-image-base"); diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/MinGW.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/MinGW.cpp index 572ea803f2dc..db83ac6fa7a2 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/MinGW.cpp @@ -141,22 +141,21 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("console"); } + if (Args.hasArg(options::OPT_mdll)) + CmdArgs.push_back("--dll"); + else if (Args.hasArg(options::OPT_shared)) + CmdArgs.push_back("--shared"); if (Args.hasArg(options::OPT_static)) CmdArgs.push_back("-Bstatic"); - else { - if (Args.hasArg(options::OPT_mdll)) - CmdArgs.push_back("--dll"); - else if (Args.hasArg(options::OPT_shared)) - CmdArgs.push_back("--shared"); + else CmdArgs.push_back("-Bdynamic"); - if (Args.hasArg(options::OPT_mdll) || Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back("-e"); - if (TC.getArch() == llvm::Triple::x86) - CmdArgs.push_back("_DllMainCRTStartup@12"); - else - CmdArgs.push_back("DllMainCRTStartup"); - CmdArgs.push_back("--enable-auto-image-base"); - } + if (Args.hasArg(options::OPT_mdll) || Args.hasArg(options::OPT_shared)) { + CmdArgs.push_back("-e"); + if (TC.getArch() == llvm::Triple::x86) + CmdArgs.push_back("_DllMainCRTStartup@12"); + else + CmdArgs.push_back("DllMainCRTStartup"); + CmdArgs.push_back("--enable-auto-image-base"); } CmdArgs.push_back("-o"); diff --git a/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp b/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp index 1160df15a920..83731c0d39e2 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp @@ -1259,6 +1259,7 @@ ASTUnit::getMainBufferWithPrecompiledPreamble( Preamble.reset(); PreambleDiagnostics.clear(); TopLevelDeclsInPreamble.clear(); + PreambleSrcLocCache.clear(); PreambleRebuildCounter = 1; } } diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp index 6ce719aac673..438a48083ef6 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp @@ -552,7 +552,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Args.hasFlag(OPT_ffine_grained_bitfield_accesses, OPT_fno_fine_grained_bitfield_accesses, false); Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags); - Opts.MergeAllConstants = !Args.hasArg(OPT_fno_merge_all_constants); + Opts.MergeAllConstants = Args.hasArg(OPT_fmerge_all_constants); Opts.NoCommon = Args.hasArg(OPT_fno_common); Opts.NoImplicitFloat = Args.hasArg(OPT_no_implicit_float); Opts.OptimizeSize = getOptimizationLevelSize(Args); diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlbitalgintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlbitalgintrin.h index 76eb87721b8b..beb0a0e4779a 100644 --- a/contrib/llvm/tools/clang/lib/Headers/avx512vlbitalgintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlbitalgintrin.h @@ -54,23 +54,23 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_popcnt_epi16(__m128i __A) +_mm_popcnt_epi16(__m128i __A) { return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) +_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, - (__v8hi) _mm128_popcnt_epi16(__B), + (__v8hi) _mm_popcnt_epi16(__B), (__v8hi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) +_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { - return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), + return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), __U, __B); } @@ -98,29 +98,29 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_popcnt_epi8(__m128i __A) +_mm_popcnt_epi8(__m128i __A) { return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) +_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, - (__v16qi) _mm128_popcnt_epi8(__B), + (__v16qi) _mm_popcnt_epi8(__B), (__v16qi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) +_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { - return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), + return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), __U, __B); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) +_mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B) { return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A, (__v32qi) __B, @@ -128,15 +128,15 @@ _mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) } static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) +_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) { - return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1, + return _mm256_mask_bitshuffle_epi64_mask((__mmask32) -1, __A, __B); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) +_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B) { return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A, (__v16qi) __B, @@ -144,9 +144,9 @@ _mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) } static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) +_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) { - return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1, + return _mm_mask_bitshuffle_epi64_mask((__mmask16) -1, __A, __B); } diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlvbmi2intrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlvbmi2intrin.h index d1ec4976f274..5b05376fc4b2 100644 --- a/contrib/llvm/tools/clang/lib/Headers/avx512vlvbmi2intrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlvbmi2intrin.h @@ -31,13 +31,8 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"))) -static __inline __m128i __DEFAULT_FN_ATTRS -_mm128_setzero_hi(void) { - return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }; -} - static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) +_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, (__v8hi) __S, @@ -45,15 +40,15 @@ _mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_compress_epi16(__mmask8 __U, __m128i __D) +_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, - (__v8hi) _mm128_setzero_hi(), + (__v8hi) _mm_setzero_si128(), __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) +_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, (__v16qi) __S, @@ -61,29 +56,29 @@ _mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_compress_epi8(__mmask16 __U, __m128i __D) +_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, - (__v16qi) _mm128_setzero_hi(), + (__v16qi) _mm_setzero_si128(), __U); } static __inline__ void __DEFAULT_FN_ATTRS -_mm128_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) +_mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) { __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, __U); } static __inline__ void __DEFAULT_FN_ATTRS -_mm128_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) +_mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) { __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) +_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, (__v8hi) __S, @@ -91,15 +86,15 @@ _mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_expand_epi16(__mmask8 __U, __m128i __D) +_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, - (__v8hi) _mm128_setzero_hi(), + (__v8hi) _mm_setzero_si128(), __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) +_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, (__v16qi) __S, @@ -107,15 +102,15 @@ _mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_expand_epi8(__mmask16 __U, __m128i __D) +_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, - (__v16qi) _mm128_setzero_hi(), + (__v16qi) _mm_setzero_si128(), __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) +_mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, (__v8hi) __S, @@ -123,15 +118,15 @@ _mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) +_mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, - (__v8hi) _mm128_setzero_hi(), + (__v8hi) _mm_setzero_si128(), __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) +_mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, (__v16qi) __S, @@ -139,19 +134,13 @@ _mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) +_mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, - (__v16qi) _mm128_setzero_hi(), + (__v16qi) _mm_setzero_si128(), __U); } -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_setzero_hi(void) { - return (__m256i)(__v16hi){ 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }; -} - static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) { @@ -164,7 +153,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, - (__v16hi) _mm256_setzero_hi(), + (__v16hi) _mm256_setzero_si256(), __U); } @@ -180,7 +169,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, - (__v32qi) _mm256_setzero_hi(), + (__v32qi) _mm256_setzero_si256(), __U); } @@ -210,7 +199,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, - (__v16hi) _mm256_setzero_hi(), + (__v16hi) _mm256_setzero_si256(), __U); } @@ -226,7 +215,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, - (__v32qi) _mm256_setzero_hi(), + (__v32qi) _mm256_setzero_si256(), __U); } @@ -242,7 +231,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, - (__v16hi) _mm256_setzero_hi(), + (__v16hi) _mm256_setzero_si256(), __U); } @@ -258,7 +247,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, - (__v32qi) _mm256_setzero_hi(), + (__v32qi) _mm256_setzero_si256(), __U); } @@ -270,23 +259,23 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) (__mmask8)(U)); }) #define _mm256_maskz_shldi_epi64(U, A, B, I) \ - _mm256_mask_shldi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I)) + _mm256_mask_shldi_epi64(_mm256_setzero_si256(), (U), (A), (B), (I)) #define _mm256_shldi_epi64(A, B, I) \ _mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) -#define _mm128_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \ (__v2di)(B), \ (int)(I), \ (__v2di)(S), \ (__mmask8)(U)); }) -#define _mm128_maskz_shldi_epi64(U, A, B, I) \ - _mm128_mask_shldi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I)) +#define _mm_maskz_shldi_epi64(U, A, B, I) \ + _mm_mask_shldi_epi64(_mm_setzero_si128(), (U), (A), (B), (I)) -#define _mm128_shldi_epi64(A, B, I) \ - _mm128_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) +#define _mm_shldi_epi64(A, B, I) \ + _mm_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) #define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \ @@ -296,23 +285,23 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) (__mmask8)(U)); }) #define _mm256_maskz_shldi_epi32(U, A, B, I) \ - _mm256_mask_shldi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I)) + _mm256_mask_shldi_epi32(_mm256_setzero_si256(), (U), (A), (B), (I)) #define _mm256_shldi_epi32(A, B, I) \ _mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) -#define _mm128_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \ (__v4si)(B), \ (int)(I), \ (__v4si)(S), \ (__mmask8)(U)); }) -#define _mm128_maskz_shldi_epi32(U, A, B, I) \ - _mm128_mask_shldi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I)) +#define _mm_maskz_shldi_epi32(U, A, B, I) \ + _mm_mask_shldi_epi32(_mm_setzero_si128(), (U), (A), (B), (I)) -#define _mm128_shldi_epi32(A, B, I) \ - _mm128_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) +#define _mm_shldi_epi32(A, B, I) \ + _mm_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) #define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \ @@ -322,23 +311,23 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) (__mmask16)(U)); }) #define _mm256_maskz_shldi_epi16(U, A, B, I) \ - _mm256_mask_shldi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I)) + _mm256_mask_shldi_epi16(_mm256_setzero_si256(), (U), (A), (B), (I)) #define _mm256_shldi_epi16(A, B, I) \ _mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) -#define _mm128_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \ (__v8hi)(B), \ (int)(I), \ (__v8hi)(S), \ (__mmask8)(U)); }) -#define _mm128_maskz_shldi_epi16(U, A, B, I) \ - _mm128_mask_shldi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I)) +#define _mm_maskz_shldi_epi16(U, A, B, I) \ + _mm_mask_shldi_epi16(_mm_setzero_si128(), (U), (A), (B), (I)) -#define _mm128_shldi_epi16(A, B, I) \ - _mm128_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) +#define _mm_shldi_epi16(A, B, I) \ + _mm_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) #define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \ @@ -348,23 +337,23 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) (__mmask8)(U)); }) #define _mm256_maskz_shrdi_epi64(U, A, B, I) \ - _mm256_mask_shrdi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I)) + _mm256_mask_shrdi_epi64(_mm256_setzero_si256(), (U), (A), (B), (I)) #define _mm256_shrdi_epi64(A, B, I) \ _mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) -#define _mm128_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \ (__v2di)(B), \ (int)(I), \ (__v2di)(S), \ (__mmask8)(U)); }) -#define _mm128_maskz_shrdi_epi64(U, A, B, I) \ - _mm128_mask_shrdi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I)) +#define _mm_maskz_shrdi_epi64(U, A, B, I) \ + _mm_mask_shrdi_epi64(_mm_setzero_si128(), (U), (A), (B), (I)) -#define _mm128_shrdi_epi64(A, B, I) \ - _mm128_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) +#define _mm_shrdi_epi64(A, B, I) \ + _mm_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) #define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \ @@ -374,23 +363,23 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) (__mmask8)(U)); }) #define _mm256_maskz_shrdi_epi32(U, A, B, I) \ - _mm256_mask_shrdi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I)) + _mm256_mask_shrdi_epi32(_mm256_setzero_si256(), (U), (A), (B), (I)) #define _mm256_shrdi_epi32(A, B, I) \ _mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) -#define _mm128_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \ (__v4si)(B), \ (int)(I), \ (__v4si)(S), \ (__mmask8)(U)); }) -#define _mm128_maskz_shrdi_epi32(U, A, B, I) \ - _mm128_mask_shrdi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I)) +#define _mm_maskz_shrdi_epi32(U, A, B, I) \ + _mm_mask_shrdi_epi32(_mm_setzero_si128(), (U), (A), (B), (I)) -#define _mm128_shrdi_epi32(A, B, I) \ - _mm128_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) +#define _mm_shrdi_epi32(A, B, I) \ + _mm_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) #define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \ @@ -400,23 +389,23 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) (__mmask16)(U)); }) #define _mm256_maskz_shrdi_epi16(U, A, B, I) \ - _mm256_mask_shrdi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I)) + _mm256_mask_shrdi_epi16(_mm256_setzero_si256(), (U), (A), (B), (I)) #define _mm256_shrdi_epi16(A, B, I) \ _mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) -#define _mm128_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ +#define _mm_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \ (__v8hi)(B), \ (int)(I), \ (__v8hi)(S), \ (__mmask8)(U)); }) -#define _mm128_maskz_shrdi_epi16(U, A, B, I) \ - _mm128_mask_shrdi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I)) +#define _mm_maskz_shrdi_epi16(U, A, B, I) \ + _mm_mask_shrdi_epi16(_mm_setzero_si128(), (U), (A), (B), (I)) -#define _mm128_shrdi_epi16(A, B, I) \ - _mm128_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) +#define _mm_shrdi_epi16(A, B, I) \ + _mm_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) @@ -446,7 +435,7 @@ _mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, (__v2di) __A, @@ -455,7 +444,7 @@ _mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S, (__v2di) __A, @@ -464,7 +453,7 @@ _mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) +_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, (__v2di) __A, @@ -500,7 +489,7 @@ _mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, (__v4si) __A, @@ -509,7 +498,7 @@ _mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S, (__v4si) __A, @@ -518,7 +507,7 @@ _mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) +_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, (__v4si) __A, @@ -554,7 +543,7 @@ _mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, (__v8hi) __A, @@ -563,7 +552,7 @@ _mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S, (__v8hi) __A, @@ -572,7 +561,7 @@ _mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) +_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, (__v8hi) __A, @@ -608,7 +597,7 @@ _mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, (__v2di) __A, @@ -617,7 +606,7 @@ _mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S, (__v2di) __A, @@ -626,7 +615,7 @@ _mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) +_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, (__v2di) __A, @@ -662,7 +651,7 @@ _mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, (__v4si) __A, @@ -671,7 +660,7 @@ _mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S, (__v4si) __A, @@ -680,7 +669,7 @@ _mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) +_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, (__v4si) __A, @@ -716,7 +705,7 @@ _mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, (__v8hi) __A, @@ -725,7 +714,7 @@ _mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S, (__v8hi) __A, @@ -734,7 +723,7 @@ _mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) +_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, (__v8hi) __A, diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlvnniintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlvnniintrin.h index 745ae8b7ad3d..40cbbf29964d 100644 --- a/contrib/llvm/tools/clang/lib/Headers/avx512vlvnniintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlvnniintrin.h @@ -141,7 +141,7 @@ _mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, (__v4si) __A, @@ -150,7 +150,7 @@ _mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S, (__v4si) __A, @@ -159,7 +159,7 @@ _mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) +_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, (__v4si) __A, @@ -168,7 +168,7 @@ _mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, (__v4si) __A, @@ -177,7 +177,7 @@ _mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S, (__v4si) __A, @@ -186,7 +186,7 @@ _mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) +_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, (__v4si) __A, @@ -195,7 +195,7 @@ _mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, (__v4si) __A, @@ -204,7 +204,7 @@ _mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S, (__v4si) __A, @@ -213,7 +213,7 @@ _mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) +_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, (__v4si) __A, @@ -222,7 +222,7 @@ _mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, (__v4si) __A, @@ -231,7 +231,7 @@ _mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S, (__v4si) __A, @@ -240,7 +240,7 @@ _mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) +_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, (__v4si) __A, diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp index 7ca48c34e516..0ceaf7c6d691 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp @@ -10947,6 +10947,8 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { if (var->isThisDeclarationADefinition() && var->getDeclContext()->getRedeclContext()->isFileContext() && var->isExternallyVisible() && var->hasLinkage() && + !var->isInline() && !var->getDescribedVarTemplate() && + !isTemplateInstantiation(var->getTemplateSpecializationKind()) && !getDiagnostics().isIgnored(diag::warn_missing_variable_declarations, var->getLocation())) { // Find a previous declaration that's not a definition. diff --git a/contrib/llvm/tools/clang/tools/driver/driver.cpp b/contrib/llvm/tools/clang/tools/driver/driver.cpp index fa757da9535c..1b614accb2ce 100644 --- a/contrib/llvm/tools/clang/tools/driver/driver.cpp +++ b/contrib/llvm/tools/clang/tools/driver/driver.cpp @@ -212,20 +212,21 @@ static void insertTargetAndModeArgs(const ParsedClangName &NameParts, // Put target and mode arguments at the start of argument list so that // arguments specified in command line could override them. Avoid putting // them at index 0, as an option like '-cc1' must remain the first. - auto InsertionPoint = ArgVector.begin(); - if (InsertionPoint != ArgVector.end()) + int InsertionPoint = 0; + if (ArgVector.size() > 0) ++InsertionPoint; if (NameParts.DriverMode) { // Add the mode flag to the arguments. - ArgVector.insert(InsertionPoint, + ArgVector.insert(ArgVector.begin() + InsertionPoint, GetStableCStr(SavedStrings, NameParts.DriverMode)); } if (NameParts.TargetIsValid) { const char *arr[] = {"-target", GetStableCStr(SavedStrings, NameParts.TargetPrefix)}; - ArgVector.insert(InsertionPoint, std::begin(arr), std::end(arr)); + ArgVector.insert(ArgVector.begin() + InsertionPoint, + std::begin(arr), std::end(arr)); } } diff --git a/contrib/llvm/tools/lld/COFF/Config.h b/contrib/llvm/tools/lld/COFF/Config.h index 93bef23a97f0..b01689930fac 100644 --- a/contrib/llvm/tools/lld/COFF/Config.h +++ b/contrib/llvm/tools/lld/COFF/Config.h @@ -175,6 +175,7 @@ struct Configuration { bool AppContainer = false; bool MinGW = false; bool WarnLocallyDefinedImported = true; + bool KillAt = false; }; extern Configuration *Config; diff --git a/contrib/llvm/tools/lld/COFF/Driver.cpp b/contrib/llvm/tools/lld/COFF/Driver.cpp index 0f3d8fb0b4ef..d4030588e211 100644 --- a/contrib/llvm/tools/lld/COFF/Driver.cpp +++ b/contrib/llvm/tools/lld/COFF/Driver.cpp @@ -970,6 +970,10 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { if (Args.hasArg(OPT_lldsavetemps)) Config->SaveTemps = true; + // Handle /kill-at + if (Args.hasArg(OPT_kill_at)) + Config->KillAt = true; + // Handle /lldltocache if (auto *Arg = Args.getLastArg(OPT_lldltocache)) Config->LTOCache = Arg->getValue(); diff --git a/contrib/llvm/tools/lld/COFF/DriverUtils.cpp b/contrib/llvm/tools/lld/COFF/DriverUtils.cpp index e0641e04a017..4b3b6d5e09d6 100644 --- a/contrib/llvm/tools/lld/COFF/DriverUtils.cpp +++ b/contrib/llvm/tools/lld/COFF/DriverUtils.cpp @@ -561,6 +561,26 @@ static StringRef undecorate(StringRef Sym) { return Sym.startswith("_") ? Sym.substr(1) : Sym; } +// Convert stdcall/fastcall style symbols into unsuffixed symbols, +// with or without a leading underscore. (MinGW specific.) +static StringRef killAt(StringRef Sym, bool Prefix) { + if (Sym.empty()) + return Sym; + // Strip any trailing stdcall suffix + Sym = Sym.substr(0, Sym.find('@', 1)); + if (!Sym.startswith("@")) { + if (Prefix && !Sym.startswith("_")) + return Saver.save("_" + Sym); + return Sym; + } + // For fastcall, remove the leading @ and replace it with an + // underscore, if prefixes are used. + Sym = Sym.substr(1); + if (Prefix) + Sym = Saver.save("_" + Sym); + return Sym; +} + // Performs error checking on all /export arguments. // It also sets ordinals. void fixupExports() { @@ -593,6 +613,15 @@ void fixupExports() { } } + if (Config->KillAt && Config->Machine == I386) { + for (Export &E : Config->Exports) { + E.Name = killAt(E.Name, true); + E.ExportName = killAt(E.ExportName, false); + E.ExtName = killAt(E.ExtName, true); + E.SymbolName = killAt(E.SymbolName, true); + } + } + // Uniquefy by name. DenseMap<StringRef, Export *> Map(Config->Exports.size()); std::vector<Export> V; diff --git a/contrib/llvm/tools/lld/COFF/Options.td b/contrib/llvm/tools/lld/COFF/Options.td index 7d4cdba14f75..2a1de14657c1 100644 --- a/contrib/llvm/tools/lld/COFF/Options.td +++ b/contrib/llvm/tools/lld/COFF/Options.td @@ -121,6 +121,7 @@ def help_q : Flag<["/?", "-?"], "">, Alias<help>; def debug_ghash : F<"debug:ghash">; def debug_dwarf : F<"debug:dwarf">; def export_all_symbols : F<"export-all-symbols">; +def kill_at : F<"kill-at">; def lldmingw : F<"lldmingw">; def msvclto : F<"msvclto">; def output_def : Joined<["/", "-"], "output-def:">; diff --git a/contrib/llvm/tools/lld/ELF/Arch/Mips.cpp b/contrib/llvm/tools/lld/ELF/Arch/Mips.cpp index 495e2567006f..e8af36e6d11e 100644 --- a/contrib/llvm/tools/lld/ELF/Arch/Mips.cpp +++ b/contrib/llvm/tools/lld/ELF/Arch/Mips.cpp @@ -296,7 +296,8 @@ template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *Buf) const { write32<E>(Buf + 20, 0x0018c082); // srl $24, $24, 2 } - write32<E>(Buf + 24, 0x0320f809); // jalr $25 + uint32_t JalrInst = Config->ZHazardplt ? 0x0320fc09 : 0x0320f809; + write32<E>(Buf + 24, JalrInst); // jalr.hb $25 or jalr $25 write32<E>(Buf + 28, 0x2718fffe); // subu $24, $24, 2 uint64_t GotPlt = InX::GotPlt->getVA(); @@ -330,9 +331,12 @@ void MIPS<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, return; } + uint32_t JrInst = isMipsR6() ? (Config->ZHazardplt ? 0x03200409 : 0x03200009) + : (Config->ZHazardplt ? 0x03200408 : 0x03200008); + write32<E>(Buf, 0x3c0f0000); // lui $15, %hi(.got.plt entry) write32<E>(Buf + 4, 0x8df90000); // l[wd] $25, %lo(.got.plt entry)($15) - write32<E>(Buf + 8, isMipsR6() ? 0x03200009 : 0x03200008); // jr $25 + write32<E>(Buf + 8, JrInst); // jr $25 / jr.hb $25 write32<E>(Buf + 12, 0x25f80000); // addiu $24, $15, %lo(.got.plt entry) writeRelocation<E>(Buf, GotPltEntryAddr + 0x8000, 16, 16); writeRelocation<E>(Buf + 4, GotPltEntryAddr, 16, 0); diff --git a/contrib/llvm/tools/lld/ELF/Config.h b/contrib/llvm/tools/lld/ELF/Config.h index ed425720965e..1f244df4d652 100644 --- a/contrib/llvm/tools/lld/ELF/Config.h +++ b/contrib/llvm/tools/lld/ELF/Config.h @@ -151,6 +151,7 @@ struct Configuration { bool WarnMissingEntry; bool ZCombreloc; bool ZExecstack; + bool ZHazardplt; bool ZNocopyreloc; bool ZNodelete; bool ZNodlopen; diff --git a/contrib/llvm/tools/lld/ELF/Driver.cpp b/contrib/llvm/tools/lld/ELF/Driver.cpp index 714976ae9c4b..44cfa56c94ce 100644 --- a/contrib/llvm/tools/lld/ELF/Driver.cpp +++ b/contrib/llvm/tools/lld/ELF/Driver.cpp @@ -668,6 +668,7 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { Config->WarnCommon = Args.hasArg(OPT_warn_common); Config->ZCombreloc = !hasZOption(Args, "nocombreloc"); Config->ZExecstack = hasZOption(Args, "execstack"); + Config->ZHazardplt = hasZOption(Args, "hazardplt"); Config->ZNocopyreloc = hasZOption(Args, "nocopyreloc"); Config->ZNodelete = hasZOption(Args, "nodelete"); Config->ZNodlopen = hasZOption(Args, "nodlopen"); diff --git a/lib/clang/freebsd_cc_version.h b/lib/clang/freebsd_cc_version.h index 728ac2fd8897..cb89cc37f126 100644 --- a/lib/clang/freebsd_cc_version.h +++ b/lib/clang/freebsd_cc_version.h @@ -1,3 +1,3 @@ /* $FreeBSD$ */ -#define FREEBSD_CC_VERSION 1200014 +#define FREEBSD_CC_VERSION 1200015 diff --git a/lib/clang/include/clang/Basic/Version.inc b/lib/clang/include/clang/Basic/Version.inc index 9b39f0e2441c..c59fab5444db 100644 --- a/lib/clang/include/clang/Basic/Version.inc +++ b/lib/clang/include/clang/Basic/Version.inc @@ -8,4 +8,4 @@ #define CLANG_VENDOR "FreeBSD " -#define SVN_REVISION "326565" +#define SVN_REVISION "335540" diff --git a/lib/clang/include/lld/Common/Version.inc b/lib/clang/include/lld/Common/Version.inc index e785138fd2f2..a14068958f31 100644 --- a/lib/clang/include/lld/Common/Version.inc +++ b/lib/clang/include/lld/Common/Version.inc @@ -7,4 +7,4 @@ #define LLD_REPOSITORY_STRING "FreeBSD" // <Upstream revision at import>-<Local identifier in __FreeBSD_version style> -#define LLD_REVISION_STRING "326565-1200002" +#define LLD_REVISION_STRING "335540-1200003" diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h index be945251af58..d16d9c27dfb3 100644 --- a/lib/clang/include/llvm/Support/VCSRevision.h +++ b/lib/clang/include/llvm/Support/VCSRevision.h @@ -1,2 +1,2 @@ /* $FreeBSD$ */ -#define LLVM_REVISION "svn-r326565" +#define LLVM_REVISION "svn-r335540" |