diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2016-02-21 13:51:43 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2016-02-21 13:51:43 +0000 |
commit | 3f4bde29a30d8c43db5cbe8f5541ebc5d1fdc6af (patch) | |
tree | 87140683fc3fee4e14b3c37e2aa7a4031d473f49 | |
parent | a322a4af1fe8b989fe5d1bbc15de8736a26c03ca (diff) |
Vendor import of llvm release_38 branch r261369:vendor/llvm/llvm-release_38-r261369
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=295846
svn path=/vendor/llvm/llvm-release_38-r261369/; revision=295847; tag=vendor/llvm/llvm-release_38-r261369
34 files changed, 651 insertions, 298 deletions
diff --git a/docs/CMake.rst b/docs/CMake.rst index 9ec6b0a2416e..4e5feae99931 100644 --- a/docs/CMake.rst +++ b/docs/CMake.rst @@ -197,12 +197,6 @@ CMake manual, or execute ``cmake --help-variable VARIABLE_NAME``. **CMAKE_CXX_FLAGS**:STRING Extra flags to use when compiling C++ source files. -**BUILD_SHARED_LIBS**:BOOL - Flag indicating if shared libraries will be built. Its default value is - OFF. This option is only recommended for use by LLVM developers. - On Windows, shared libraries may be used when building with MinGW, including - mingw-w64, but not when building with the Microsoft toolchain. - .. _LLVM-specific variables: LLVM-specific variables @@ -445,6 +439,30 @@ LLVM-specific variables $CMAKE_INSTALL_PREFIX/Toolchains containing an xctoolchain directory which can be used to override the default system tools. +**LLVM_BUILD_LLVM_DYLIB**:BOOL + If enabled, the target for building the libLLVM shared library is added. + This library contains all of LLVM's components in a single shared library. + Defaults to OFF. This cannot be used in conjunction with BUILD_SHARED_LIBS. + Tools will only be linked to the libLLVM shared library if LLVM_LINK_LLVM_DYLIB + is also ON. + The components in the library can be customised by setting LLVM_DYLIB_COMPONENTS + to a list of the desired components. + +**LLVM_LINK_LLVM_DYLIB**:BOOL + If enabled, tools will be linked with the libLLVM shared library. Defaults + to OFF. Setting LLVM_LINK_LLVM_DYLIB to ON also sets LLVM_BUILD_LLVM_DYLIB + to ON. + +**BUILD_SHARED_LIBS**:BOOL + Flag indicating if each LLVM component (e.g. Support) is built as a shared + library (ON) or as a static library (OFF). Its default value is OFF. On + Windows, shared libraries may be used when building with MinGW, including + mingw-w64, but not when building with the Microsoft toolchain. + + .. note:: BUILD_SHARED_LIBS is only recommended for use by LLVM developers. + If you want to build LLVM as a shared library, you should use the + ``LLVM_BUILD_LLVM_DYLIB`` option. + Executing the test suite ======================== diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 7b284d59656b..73831f545390 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -89,6 +89,30 @@ Non-comprehensive list of changes in this release the node ``N`` is guaranteed not to be the last in the list, it is safe to call ``&*++N->getIterator()`` directly. +* The `Kaleidoscope tutorials <tutorial/index.html>`_ have been updated to use + the ORC JIT APIs. + +* ORC now has a basic set of C bindings. + +* Optional support for linking clang and the LLVM tools with a single libLLVM + shared library. To enable this, pass ``-DLLVM_LINK_LLVM_DYLIB=ON`` to CMake. + See `Building LLVM with CMake`_ for more details. + +* The optimization to move the prologue and epilogue of functions in colder + code path (shrink-wrapping) is now enabled by default. + +* A new target-independent gcc-compatible emulated Thread Local Storage mode + is added. When ``-femultated-tls`` flag is used, all accesses to TLS + variables are converted to calls to ``__emutls_get_address`` in the runtime + library. + +* MSVC compatible exception handling has been completely overhauled. New + instructions have been introduced to facilitate this: + `New exception handling instructions <ExceptionHandling.html#new-exception-handling-instructions>`_. + While we have done our best to test this feature thoroughly, it would + not be completely surprising if there were a few lingering issues that + early adopters might bump into. + .. NOTE For small 1-3 sentence descriptions, just add an entry at the end of this list. If your description won't fit comfortably in one bullet @@ -115,7 +139,7 @@ Changes to the ARM Backends During this release the AArch64 target has: * Added support for more sanitizers (MSAN, TSAN) and made them compatible with - all VMA kernel configurations (kurrently tested on 39 and 42 bits). + all VMA kernel configurations (currently tested on 39 and 42 bits). * Gained initial LLD support in the new ELF back-end * Extended the Load/Store optimiser and cleaned up some of the bad decisions made earlier. @@ -218,9 +242,16 @@ Changes to the X86 Target * More efficient code for wide integer compares. (E.g. 64-bit compares on 32-bit targets.) -* Tail call support for ``thiscall``, ``stdcall`, ``vectorcall``, and +* Tail call support for ``thiscall``, ``stdcall``, ``vectorcall``, and ``fastcall`` functions. +Changes to the Hexagon Target +----------------------------- + +In addition to general code size and performance improvements, Hexagon target +now has basic support for Hexagon V60 architecture and Hexagon Vector +Extensions (HVX). + Changes to the AVR Target ------------------------- diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index edade3164a3c..f1ea2c03f13c 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -544,6 +544,11 @@ namespace llvm { return true; } + // Returns true if any segment in the live range contains any of the + // provided slot indexes. Slots which occur in holes between + // segments will not cause the function to return true. + bool isLiveAtIndexes(ArrayRef<SlotIndex> Slots) const; + bool operator<(const LiveRange& other) const { const SlotIndex &thisIndex = beginIndex(); const SlotIndex &otherIndex = other.beginIndex(); diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index 1b75c60631b0..2f8c3c499295 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -1539,16 +1539,7 @@ public: } CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None, - ArrayRef<OperandBundleDef> OpBundles = None, const Twine &Name = "", MDNode *FPMathTag = nullptr) { - CallInst *CI = CallInst::Create(Callee, Args, OpBundles); - if (isa<FPMathOperator>(CI)) - CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF)); - return Insert(CI, Name); - } - - CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args, - const Twine &Name, MDNode *FPMathTag = nullptr) { PointerType *PTy = cast<PointerType>(Callee->getType()); FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); return CreateCall(FTy, Callee, Args, Name, FPMathTag); @@ -1563,6 +1554,15 @@ public: return Insert(CI, Name); } + CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args, + ArrayRef<OperandBundleDef> OpBundles, + const Twine &Name = "", MDNode *FPMathTag = nullptr) { + CallInst *CI = CallInst::Create(Callee, Args, OpBundles); + if (isa<FPMathOperator>(CI)) + CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF)); + return Insert(CI, Name); + } + CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args, const Twine &Name = "", MDNode *FPMathTag = nullptr) { return CreateCall(Callee->getFunctionType(), Callee, Args, Name, FPMathTag); diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index aba48ca6fa9e..28e1fd90fdf6 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -2512,6 +2512,14 @@ public: return block_begin() + getNumOperands(); } + iterator_range<block_iterator> blocks() { + return make_range(block_begin(), block_end()); + } + + iterator_range<const_block_iterator> blocks() const { + return make_range(block_begin(), block_end()); + } + op_range incoming_values() { return operands(); } const_op_range incoming_values() const { return operands(); } diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index abc655ac34ca..26aa46fb6c2a 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -213,8 +213,11 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { if (!Spillable) return; - // Mark li as unspillable if all live ranges are tiny. - if (li.isZeroLength(LIS.getSlotIndexes())) { + // Mark li as unspillable if all live ranges are tiny and the interval + // is not live at any reg mask. If the interval is live at a reg mask + // spilling may be required. + if (li.isZeroLength(LIS.getSlotIndexes()) && + !li.isLiveAtIndexes(LIS.getRegMaskSlots())) { li.markNotSpillable(); return; } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index bb3488348f24..50158006211d 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -748,6 +748,40 @@ void LiveRange::flushSegmentSet() { verify(); } +bool LiveRange::isLiveAtIndexes(ArrayRef<SlotIndex> Slots) const { + ArrayRef<SlotIndex>::iterator SlotI = Slots.begin(); + ArrayRef<SlotIndex>::iterator SlotE = Slots.end(); + + // If there are no regmask slots, we have nothing to search. + if (SlotI == SlotE) + return false; + + // Start our search at the first segment that ends after the first slot. + const_iterator SegmentI = find(*SlotI); + const_iterator SegmentE = end(); + + // If there are no segments that end after the first slot, we're done. + if (SegmentI == SegmentE) + return false; + + // Look for each slot in the live range. + for ( ; SlotI != SlotE; ++SlotI) { + // Go to the next segment that ends after the current slot. + // The slot may be within a hole in the range. + SegmentI = advanceTo(SegmentI, *SlotI); + if (SegmentI == SegmentE) + return false; + + // If this segment contains the slot, we're done. + if (SegmentI->contains(*SlotI)) + return true; + // Otherwise, look for the next slot. + } + + // We didn't find a segment containing any of the slots. + return false; +} + void LiveInterval::freeSubRange(SubRange *S) { S->~SubRange(); // Memory was allocated with BumpPtr allocator and is not freed here. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5d572c4c2b02..f7836345f720 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1637,6 +1637,7 @@ struct FloatSignAsInt { MachinePointerInfo FloatPointerInfo; SDValue IntValue; APInt SignMask; + uint8_t SignBit; }; } @@ -1653,6 +1654,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, if (TLI.isTypeLegal(IVT)) { State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value); State.SignMask = APInt::getSignBit(NumBits); + State.SignBit = NumBits - 1; return; } @@ -1689,6 +1691,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, IntPtr, State.IntPointerInfo, MVT::i8, false, false, false, 0); State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7); + State.SignBit = 7; } /// Replace the integer value produced by getSignAsIntValue() with a new value @@ -1731,15 +1734,38 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue); } - // Transform values to integer, copy the sign bit and transform back. + // Transform Mag value to integer, and clear the sign bit. FloatSignAsInt MagAsInt; getSignAsIntValue(MagAsInt, DL, Mag); - assert(SignAsInt.SignMask == MagAsInt.SignMask); - SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT); - SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue, + EVT MagVT = MagAsInt.IntValue.getValueType(); + SDValue ClearSignMask = DAG.getConstant(~MagAsInt.SignMask, DL, MagVT); + SDValue ClearedSign = DAG.getNode(ISD::AND, DL, MagVT, MagAsInt.IntValue, ClearSignMask); - SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit); + // Get the signbit at the right position for MagAsInt. + int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit; + if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) { + if (ShiftAmount > 0) { + SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, IntVT); + SignBit = DAG.getNode(ISD::SRL, DL, IntVT, SignBit, ShiftCnst); + } else if (ShiftAmount < 0) { + SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, IntVT); + SignBit = DAG.getNode(ISD::SHL, DL, IntVT, SignBit, ShiftCnst); + } + SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit); + } else if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) { + SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit); + if (ShiftAmount > 0) { + SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, MagVT); + SignBit = DAG.getNode(ISD::SRL, DL, MagVT, SignBit, ShiftCnst); + } else if (ShiftAmount < 0) { + SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, MagVT); + SignBit = DAG.getNode(ISD::SHL, DL, MagVT, SignBit, ShiftCnst); + } + } + + // Store the part with the modified sign and convert back to float. + SDValue CopiedSign = DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit); return modifySignAsInt(MagAsInt, DL, CopiedSign); } diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index 886c5f6070c1..14ec91159809 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -257,10 +257,14 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo, if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest()) calculateCXXStateNumbers(FuncInfo, UserI, CatchLow); - if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) - if (getCleanupRetUnwindDest(InnerCleanupPad) == - CatchSwitch->getUnwindDest()) + if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) { + BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad); + // If a nested cleanup pad reports a null unwind destination and the + // enclosing catch pad doesn't it must be post-dominated by an + // unreachable instruction. + if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest()) calculateCXXStateNumbers(FuncInfo, UserI, CatchLow); + } } } int CatchHigh = FuncInfo.getLastStateNumber(); @@ -360,10 +364,14 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo, if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest()) calculateSEHStateNumbers(FuncInfo, UserI, ParentState); - if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) - if (getCleanupRetUnwindDest(InnerCleanupPad) == - CatchSwitch->getUnwindDest()) + if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) { + BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad); + // If a nested cleanup pad reports a null unwind destination and the + // enclosing catch pad doesn't it must be post-dominated by an + // unreachable instruction. + if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest()) calculateSEHStateNumbers(FuncInfo, UserI, ParentState); + } } } else { auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI); diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt index 331d2141b0e2..65f2a2f51f9b 100644 --- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt +++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt @@ -1,8 +1,17 @@ include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. ) +if( HAVE_LIBDL ) + set(LLVM_INTEL_JIT_LIBS ${CMAKE_DL_LIBS}) +endif() +if( HAVE_LIBPTHREAD ) + set(LLVM_INTEL_JIT_LIBS pthread ${LLVM_INTEL_JIT_LIBS}) +endif() + add_llvm_library(LLVMIntelJITEvents IntelJITEventListener.cpp jitprofiling.c - LINK_LIBS pthread ${CMAKE_DL_LIBS} + LINK_LIBS ${LLVM_INTEL_JIT_LIBS} ) + +add_dependencies(LLVMIntelJITEvents LLVMCodeGen) diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt index afea3ecccda4..8584500d64ad 100644 --- a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt +++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt @@ -21,4 +21,4 @@ type = OptionalLibrary name = IntelJITEvents parent = ExecutionEngine -required_libraries = Core DebugInfoDWARF Support Object ExecutionEngine +required_libraries = CodeGen Core DebugInfoDWARF Support Object ExecutionEngine diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 0e5d3ac50379..11afcf7f0adc 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -338,9 +338,9 @@ static Triple::ArchType parseArch(StringRef ArchName) { // FIXME: Do we need to support these? .Cases("i786", "i886", "i986", Triple::x86) .Cases("amd64", "x86_64", "x86_64h", Triple::x86_64) - .Case("powerpc", Triple::ppc) - .Cases("powerpc64", "ppu", Triple::ppc64) - .Case("powerpc64le", Triple::ppc64le) + .Cases("powerpc", "ppc32", Triple::ppc) + .Cases("powerpc64", "ppu", "ppc64", Triple::ppc64) + .Cases("powerpc64le", "ppc64le", Triple::ppc64le) .Case("xscale", Triple::arm) .Case("xscaleeb", Triple::armeb) .Case("aarch64", Triple::aarch64) @@ -359,7 +359,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("r600", Triple::r600) .Case("amdgcn", Triple::amdgcn) .Case("hexagon", Triple::hexagon) - .Case("s390x", Triple::systemz) + .Cases("s390x", "systemz", Triple::systemz) .Case("sparc", Triple::sparc) .Case("sparcel", Triple::sparcel) .Cases("sparcv9", "sparc64", Triple::sparcv9) diff --git a/lib/Target/Sparc/SparcInstrAliases.td b/lib/Target/Sparc/SparcInstrAliases.td index d51e2ccc8a35..361d21440a97 100644 --- a/lib/Target/Sparc/SparcInstrAliases.td +++ b/lib/Target/Sparc/SparcInstrAliases.td @@ -267,9 +267,9 @@ defm : int_cond_alias<"neg", 0b0110>; defm : int_cond_alias<"vc", 0b1111>; defm : int_cond_alias<"vs", 0b0111>; -defm : fp_cond_alias<"a", 0b0000>; -defm : fp_cond_alias<"", 0b0000>; // same as a; gnu asm, not in manual -defm : fp_cond_alias<"n", 0b1000>; +defm : fp_cond_alias<"a", 0b1000>; +defm : fp_cond_alias<"", 0b1000>; // same as a; gnu asm, not in manual +defm : fp_cond_alias<"n", 0b0000>; defm : fp_cond_alias<"u", 0b0111>; defm : fp_cond_alias<"g", 0b0110>; defm : fp_cond_alias<"ug", 0b0101>; diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 133bd0e1772a..135c32bf8c3b 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -69,19 +69,15 @@ public: class X86AsmBackend : public MCAsmBackend { const StringRef CPU; bool HasNopl; - uint64_t MaxNopLength; + const uint64_t MaxNopLength; public: - X86AsmBackend(const Target &T, StringRef CPU) : MCAsmBackend(), CPU(CPU) { + X86AsmBackend(const Target &T, StringRef CPU) + : MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) { HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" && CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" && CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" && CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" && CPU != "c3" && CPU != "c3-2"; - // Max length of true long nop instruction is 15 bytes. - // Max length of long nop replacement instruction is 7 bytes. - // Taking into account SilverMont architecture features max length of nops - // is reduced for it to achieve better performance. - MaxNopLength = (!HasNopl || CPU == "slm") ? 7 : 15; } unsigned getNumFixupKinds() const override { @@ -299,7 +295,7 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { /// bytes. /// \return - true on success, false on failure bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { - static const uint8_t TrueNops[10][10] = { + static const uint8_t Nops[10][10] = { // nop {0x90}, // xchg %ax,%ax @@ -322,31 +318,17 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, }; - // Alternative nop instructions for CPUs which don't support long nops. - static const uint8_t AltNops[7][10] = { - // nop - {0x90}, - // xchg %ax,%ax - {0x66, 0x90}, - // lea 0x0(%esi),%esi - {0x8d, 0x76, 0x00}, - // lea 0x0(%esi),%esi - {0x8d, 0x74, 0x26, 0x00}, - // nop + lea 0x0(%esi),%esi - {0x90, 0x8d, 0x74, 0x26, 0x00}, - // lea 0x0(%esi),%esi - {0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00 }, - // lea 0x0(%esi),%esi - {0x8d, 0xb4, 0x26, 0x00, 0x00, 0x00, 0x00}, - }; - - // Select the right NOP table. - // FIXME: Can we get if CPU supports long nops from the subtarget somehow? - const uint8_t (*Nops)[10] = HasNopl ? TrueNops : AltNops; - assert(HasNopl || MaxNopLength <= 7); + // This CPU doesn't support long nops. If needed add more. + // FIXME: Can we get this from the subtarget somehow? + // FIXME: We could generated something better than plain 0x90. + if (!HasNopl) { + for (uint64_t i = 0; i < Count; ++i) + OW->write8(0x90); + return true; + } - // Emit as many largest nops as needed, then emit a nop of the remaining - // length. + // 15 is the longest single nop instruction. Emit as many 15-byte nops as + // needed, then emit a nop of the remaining length. do { const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 7f8ce4768c00..f5ffe0cf7e88 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -192,10 +192,9 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, return 0; } -static bool isEAXLiveIn(MachineFunction &MF) { - for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), - EE = MF.getRegInfo().livein_end(); II != EE; ++II) { - unsigned Reg = II->first; +static bool isEAXLiveIn(MachineBasicBlock &MBB) { + for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) { + unsigned Reg = RegMask.PhysReg; if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || Reg == X86::AH || Reg == X86::AL) @@ -261,7 +260,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, // load the offset into a register and do one sub/add unsigned Reg = 0; - if (isSub && !isEAXLiveIn(*MBB.getParent())) + if (isSub && !isEAXLiveIn(MBB)) Reg = (unsigned)(Is64Bit ? X86::RAX : X86::EAX); else Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); @@ -1133,8 +1132,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { - // Check whether EAX is livein for this function. - bool isEAXAlive = isEAXLiveIn(MF); + // Check whether EAX is livein for this block. + bool isEAXAlive = isEAXLiveIn(MBB); if (isEAXAlive) { // Sanity check that EAX is not livein for this function. diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 49be64883939..6f0199b015cd 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -5896,7 +5896,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, def : Pat<(_.EltVT (OpNode (load addr:$src))), (!cast<Instruction>(NAME#SUFF#Zm) - (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>; + (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>; } multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> { diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 3af4afb903fe..22a95fa03f7c 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> using namespace llvm; @@ -186,32 +187,8 @@ bool PruneEH::SimplifyFunction(Function *F) { for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) { - SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end()); - SmallVector<OperandBundleDef, 1> OpBundles; - II->getOperandBundlesAsDefs(OpBundles); - - // Insert a call instruction before the invoke. - CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles, - "", II); - Call->takeName(II); - Call->setCallingConv(II->getCallingConv()); - Call->setAttributes(II->getAttributes()); - Call->setDebugLoc(II->getDebugLoc()); - - // Anything that used the value produced by the invoke instruction - // now uses the value produced by the call instruction. Note that we - // do this even for void functions and calls with no uses so that the - // callgraph edge is updated. - II->replaceAllUsesWith(Call); BasicBlock *UnwindBlock = II->getUnwindDest(); - UnwindBlock->removePredecessor(II->getParent()); - - // Insert a branch to the normal destination right before the - // invoke. - BranchInst::Create(II->getNormalDest(), II); - - // Finally, delete the invoke instruction! - BB->getInstList().pop_back(); + removeUnwindEdge(&*BB); // If the unwind block is now dead, nuke it. if (pred_empty(UnwindBlock)) @@ -251,23 +228,39 @@ void PruneEH::DeleteBasicBlock(BasicBlock *BB) { assert(pred_empty(BB) && "BB is not dead!"); CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + Instruction *TokenInst = nullptr; + CallGraphNode *CGN = CG[BB->getParent()]; for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) { --I; - if (CallInst *CI = dyn_cast<CallInst>(I)) { - if (!isa<IntrinsicInst>(I)) - CGN->removeCallEdgeFor(CI); - } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) - CGN->removeCallEdgeFor(II); + + if (I->getType()->isTokenTy()) { + TokenInst = &*I; + break; + } + + if (auto CS = CallSite (&*I)) { + const Function *Callee = CS.getCalledFunction(); + if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID())) + CGN->removeCallEdgeFor(CS); + else if (!Callee->isIntrinsic()) + CGN->removeCallEdgeFor(CS); + } + if (!I->use_empty()) I->replaceAllUsesWith(UndefValue::get(I->getType())); } - // Get the list of successors of this block. - std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB)); + if (TokenInst) { + if (!isa<TerminatorInst>(TokenInst)) + changeToUnreachable(TokenInst->getNextNode(), /*UseLLVMTrap=*/false); + } else { + // Get the list of successors of this block. + std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB)); - for (unsigned i = 0, e = Succs.size(); i != e; ++i) - Succs[i]->removePredecessor(BB); + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + Succs[i]->removePredecessor(BB); - BB->eraseFromParent(); + BB->eraseFromParent(); + } } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 2101225ed9f7..acfdec43d21a 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4799,6 +4799,17 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U << "\n"); return; } + // Bail out if we have a PHI on an EHPad that gets a value from a + // CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is + // no good place to stick any instructions. + if (auto *PN = dyn_cast<PHINode>(U.getUser())) { + auto *FirstNonPHI = PN->getParent()->getFirstNonPHI(); + if (isa<FuncletPadInst>(FirstNonPHI) || + isa<CatchSwitchInst>(FirstNonPHI)) + for (BasicBlock *PredBB : PN->blocks()) + if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI())) + return; + } } #ifndef NDEBUG diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 2c0d317d16bc..17c25dfffc10 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1409,14 +1409,15 @@ private: /// different operations. class LoopVectorizationCostModel { public: - LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE, - LoopInfo *LI, LoopVectorizationLegality *Legal, + LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, + LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, const Function *F, - const LoopVectorizeHints *Hints) - : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), - AC(AC), TheFunction(F), Hints(Hints) {} + const LoopVectorizeHints *Hints, + SmallPtrSetImpl<const Value *> &ValuesToIgnore) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), + TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {} /// Information about vectorization costs struct VectorizationFactor { @@ -1464,9 +1465,6 @@ public: SmallVector<RegisterUsage, 8> calculateRegisterUsage(const SmallVector<unsigned, 8> &VFs); - /// Collect values we want to ignore in the cost model. - void collectValuesToIgnore(); - private: /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different @@ -1498,8 +1496,8 @@ public: /// The loop that we evaluate. Loop *TheLoop; - /// Predicated scalar evolution analysis. - PredicatedScalarEvolution &PSE; + /// Scev analysis. + ScalarEvolution *SE; /// Loop Info analysis. LoopInfo *LI; /// Vectorization legality. @@ -1508,17 +1506,13 @@ public: const TargetTransformInfo &TTI; /// Target Library Info. const TargetLibraryInfo *TLI; - /// Demanded bits analysis. + /// Demanded bits analysis DemandedBits *DB; - /// Assumption cache. - AssumptionCache *AC; const Function *TheFunction; - /// Loop Vectorize Hint. + // Loop Vectorize Hint. const LoopVectorizeHints *Hints; - /// Values to ignore in the cost model. - SmallPtrSet<const Value *, 16> ValuesToIgnore; - /// Values to ignore in the cost model when VF > 1. - SmallPtrSet<const Value *, 16> VecValuesToIgnore; + // Values to ignore in the cost model. + const SmallPtrSetImpl<const Value *> &ValuesToIgnore; }; /// \brief This holds vectorization requirements that must be verified late in @@ -1763,10 +1757,19 @@ struct LoopVectorize : public FunctionPass { return false; } + // Collect values we want to ignore in the cost model. This includes + // type-promoting instructions we identified during reduction detection. + SmallPtrSet<const Value *, 32> ValuesToIgnore; + CodeMetrics::collectEphemeralValues(L, AC, ValuesToIgnore); + for (auto &Reduction : *LVL.getReductionVars()) { + RecurrenceDescriptor &RedDes = Reduction.second; + SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts(); + ValuesToIgnore.insert(Casts.begin(), Casts.end()); + } + // Use the cost model. - LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F, - &Hints); - CM.collectValuesToIgnore(); + LoopVectorizationCostModel CM(L, PSE.getSE(), LI, &LVL, *TTI, TLI, DB, AC, + F, &Hints, ValuesToIgnore); // Check the function attributes to find out if this function should be // optimized for size. @@ -4636,6 +4639,8 @@ void InterleavedAccessInfo::analyzeInterleaving( // Holds all interleaved store groups temporarily. SmallSetVector<InterleaveGroup *, 4> StoreGroups; + // Holds all interleaved load groups temporarily. + SmallSetVector<InterleaveGroup *, 4> LoadGroups; // Search the load-load/write-write pair B-A in bottom-up order and try to // insert B into the interleave group of A according to 3 rules: @@ -4663,6 +4668,8 @@ void InterleavedAccessInfo::analyzeInterleaving( if (A->mayWriteToMemory()) StoreGroups.insert(Group); + else + LoadGroups.insert(Group); for (auto II = std::next(I); II != E; ++II) { Instruction *B = II->first; @@ -4710,6 +4717,12 @@ void InterleavedAccessInfo::analyzeInterleaving( for (InterleaveGroup *Group : StoreGroups) if (Group->getNumMembers() != Group->getFactor()) releaseGroup(Group); + + // Remove interleaved load groups that don't have the first and last member. + // This guarantees that we won't do speculative out of bounds loads. + for (InterleaveGroup *Group : LoadGroups) + if (!Group->getMember(0) || !Group->getMember(Group->getFactor() - 1)) + releaseGroup(Group); } LoopVectorizationCostModel::VectorizationFactor @@ -4734,7 +4747,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { } // Find the trip count. - unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); + unsigned TC = SE->getSmallConstantTripCount(TheLoop); DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); @@ -4936,7 +4949,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, return 1; // Do not interleave loops with a relatively small trip count. - unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); + unsigned TC = SE->getSmallConstantTripCount(TheLoop); if (TC > 1 && TC < TinyTripCountInterleaveThreshold) return 1; @@ -5164,15 +5177,15 @@ LoopVectorizationCostModel::calculateRegisterUsage( // Ignore instructions that are never used within the loop. if (!Ends.count(I)) continue; + // Skip ignored values. + if (ValuesToIgnore.count(I)) + continue; + // Remove all of the instructions that end at this location. InstrList &List = TransposeEnds[i]; for (unsigned int j = 0, e = List.size(); j < e; ++j) OpenIntervals.erase(List[j]); - // Skip ignored values. - if (ValuesToIgnore.count(I)) - continue; - // For each VF find the maximum usage of registers. for (unsigned j = 0, e = VFs.size(); j < e; ++j) { if (VFs[j] == 1) { @@ -5182,12 +5195,8 @@ LoopVectorizationCostModel::calculateRegisterUsage( // Count the number of live intervals. unsigned RegUsage = 0; - for (auto Inst : OpenIntervals) { - // Skip ignored values for VF > 1. - if (VecValuesToIgnore.count(Inst)) - continue; + for (auto Inst : OpenIntervals) RegUsage += GetRegUsage(Inst->getType(), VFs[j]); - } MaxUsages[j] = std::max(MaxUsages[j], RegUsage); } @@ -5331,7 +5340,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { if (VF > 1 && MinBWs.count(I)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); Type *VectorTy = ToVectorTy(RetTy, VF); - auto SE = PSE.getSE(); // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { @@ -5633,79 +5641,6 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { return false; } -void LoopVectorizationCostModel::collectValuesToIgnore() { - // Ignore ephemeral values. - CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore); - - // Ignore type-promoting instructions we identified during reduction - // detection. - for (auto &Reduction : *Legal->getReductionVars()) { - RecurrenceDescriptor &RedDes = Reduction.second; - SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts(); - VecValuesToIgnore.insert(Casts.begin(), Casts.end()); - } - - // Ignore induction phis that are only used in either GetElementPtr or ICmp - // instruction to exit loop. Induction variables usually have large types and - // can have big impact when estimating register usage. - // This is for when VF > 1. - for (auto &Induction : *Legal->getInductionVars()) { - auto *PN = Induction.first; - auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch()); - - // Check that the PHI is only used by the induction increment (UpdateV) or - // by GEPs. Then check that UpdateV is only used by a compare instruction or - // the loop header PHI. - // FIXME: Need precise def-use analysis to determine if this instruction - // variable will be vectorized. - if (std::all_of(PN->user_begin(), PN->user_end(), - [&](const User *U) -> bool { - return U == UpdateV || isa<GetElementPtrInst>(U); - }) && - std::all_of(UpdateV->user_begin(), UpdateV->user_end(), - [&](const User *U) -> bool { - return U == PN || isa<ICmpInst>(U); - })) { - VecValuesToIgnore.insert(PN); - VecValuesToIgnore.insert(UpdateV); - } - } - - // Ignore instructions that will not be vectorized. - // This is for when VF > 1. - for (auto bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; - ++bb) { - for (auto &Inst : **bb) { - switch (Inst.getOpcode()) { - case Instruction::GetElementPtr: { - // Ignore GEP if its last operand is an induction variable so that it is - // a consecutive load/store and won't be vectorized as scatter/gather - // pattern. - - GetElementPtrInst *Gep = cast<GetElementPtrInst>(&Inst); - unsigned NumOperands = Gep->getNumOperands(); - unsigned InductionOperand = getGEPInductionOperand(Gep); - bool GepToIgnore = true; - - // Check that all of the gep indices are uniform except for the - // induction operand. - for (unsigned i = 0; i != NumOperands; ++i) { - if (i != InductionOperand && - !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), - TheLoop)) { - GepToIgnore = false; - break; - } - } - - if (GepToIgnore) - VecValuesToIgnore.insert(&Inst); - break; - } - } - } - } -} void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, bool IfPredicateStore) { diff --git a/test/CodeGen/AArch64/fcopysign.ll b/test/CodeGen/AArch64/fcopysign.ll new file mode 100644 index 000000000000..6bda33cf76ef --- /dev/null +++ b/test/CodeGen/AArch64/fcopysign.ll @@ -0,0 +1,23 @@ +; RUN: llc -o - %s | FileCheck %s +; Check that selection dag legalization of fcopysign works in cases with +; different modes for the arguments. +target triple = "aarch64--" + +declare fp128 @llvm.copysign.f128(fp128, fp128) + +@val = global double zeroinitializer, align 8 + +; CHECK-LABEL: copysign0 +; CHECK: ldr [[REG:x[0-9]+]], [x8, :lo12:val] +; CHECK: and [[ANDREG:x[0-9]+]], [[REG]], #0x8000000000000000 +; CHECK: lsr x[[LSRREGNUM:[0-9]+]], [[ANDREG]], #56 +; CHECK: bfxil w[[LSRREGNUM]], w{{[0-9]+}}, #0, #7 +; CHECK: strb w[[LSRREGNUM]], +; CHECK: ldr q{{[0-9]+}}, +define fp128 @copysign0() { +entry: + %v = load double, double* @val, align 8 + %conv = fpext double %v to fp128 + %call = tail call fp128 @llvm.copysign.f128(fp128 0xL00000000000000007FFF000000000000, fp128 %conv) #2 + ret fp128 %call +} diff --git a/test/CodeGen/WinEH/wineh-noret-cleanup.ll b/test/CodeGen/WinEH/wineh-noret-cleanup.ll new file mode 100644 index 000000000000..7d4d833aa9ba --- /dev/null +++ b/test/CodeGen/WinEH/wineh-noret-cleanup.ll @@ -0,0 +1,80 @@ +; RUN: sed -e s/.Cxx:// %s | llc -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefix=CXX +; RUN: sed -e s/.Seh:// %s | llc -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefix=SEH + +declare i32 @__CxxFrameHandler3(...) +declare i32 @__C_specific_handler(...) +declare void @dummy_filter() + +declare void @f(i32) + +;Cxx: define void @test() personality i32 (...)* @__CxxFrameHandler3 { +;Seh: define void @test() personality i32 (...)* @__C_specific_handler { +entry: + invoke void @f(i32 1) + to label %invoke.cont unwind label %catch.dispatch + +catch.dispatch: + %cs1 = catchswitch within none [label %catch.body] unwind label %catch.dispatch.2 + +catch.body: +;Cxx: %catch = catchpad within %cs1 [i8* null, i32 u0x40, i8* null] +;Seh: %catch = catchpad within %cs1 [void ()* @dummy_filter] + invoke void @f(i32 2) [ "funclet"(token %catch) ] + to label %unreachable unwind label %terminate + +terminate: + %cleanup = cleanuppad within %catch [] + call void @f(i32 3) [ "funclet"(token %cleanup) ] + unreachable + +unreachable: + unreachable + +invoke.cont: + ret void + +catch.dispatch.2: + %cs2 = catchswitch within none [label %catch.body.2] unwind to caller + +catch.body.2: +;Cxx: %catch2 = catchpad within %cs2 [i8* null, i32 u0x40, i8* null] +;Seh: %catch2 = catchpad within %cs2 [void ()* @dummy_filter] + unreachable +} + +; CXX-LABEL: test: +; CXX-LABEL: $ip2state$test: +; CXX-NEXT: .long .Lfunc_begin0@IMGREL +; CXX-NEXT: .long -1 +; CXX-NEXT: .long .Ltmp0@IMGREL+1 +; CXX-NEXT: .long 1 +; CXX-NEXT: .long .Ltmp1@IMGREL+1 +; CXX-NEXT: .long -1 +; CXX-NEXT: .long "?catch$3@?0?test@4HA"@IMGREL +; CXX-NEXT: .long 2 +; CXX-NEXT: .long .Ltmp2@IMGREL+1 +; CXX-NEXT: .long 3 +; CXX-NEXT: .long .Ltmp3@IMGREL+1 +; CXX-NEXT: .long 2 +; CXX-NEXT: .long "?catch$5@?0?test@4HA"@IMGREL +; CXX-NEXT: .long 4 + +; SEH-LABEL: test: +; SEH-LABEL: .Llsda_begin0: +; SEH-NEXT: .long .Ltmp0@IMGREL+1 +; SEH-NEXT: .long .Ltmp1@IMGREL+1 +; SEH-NEXT: .long dummy_filter@IMGREL +; SEH-NEXT: .long .LBB0_3@IMGREL +; SEH-NEXT: .long .Ltmp0@IMGREL+1 +; SEH-NEXT: .long .Ltmp1@IMGREL+1 +; SEH-NEXT: .long dummy_filter@IMGREL +; SEH-NEXT: .long .LBB0_5@IMGREL +; SEH-NEXT: .long .Ltmp2@IMGREL+1 +; SEH-NEXT: .long .Ltmp3@IMGREL+1 +; SEH-NEXT: .long "?dtor$2@?0?test@4HA"@IMGREL +; SEH-NEXT: .long 0 +; SEH-NEXT: .long .Ltmp2@IMGREL+1 +; SEH-NEXT: .long .Ltmp3@IMGREL+1 +; SEH-NEXT: .long dummy_filter@IMGREL +; SEH-NEXT: .long .LBB0_5@IMGREL +; SEH-NEXT: .Llsda_end0: diff --git a/test/CodeGen/X86/pr26625.ll b/test/CodeGen/X86/pr26625.ll new file mode 100644 index 000000000000..1b2e227bb59a --- /dev/null +++ b/test/CodeGen/X86/pr26625.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -mcpu=i686 2>&1 | FileCheck %s +; PR26625 + +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386" + +define float @x0(float %f) #0 { +entry: + %call = tail call float @sqrtf(float %f) #1 + ret float %call +; CHECK-LABEL: x0: +; CHECK: flds +; CHECK-NEXT: fsqrt +; CHECK-NOT: vsqrtss +} + +declare float @sqrtf(float) #0 + +attributes #0 = { nounwind optsize readnone } +attributes #1 = { nounwind optsize readnone } diff --git a/test/CodeGen/X86/regalloc-spill-at-ehpad.ll b/test/CodeGen/X86/regalloc-spill-at-ehpad.ll new file mode 100644 index 000000000000..32cc9e781fb6 --- /dev/null +++ b/test/CodeGen/X86/regalloc-spill-at-ehpad.ll @@ -0,0 +1,75 @@ +; RUN: llc -regalloc=greedy -mtriple=x86_64-pc-windows-msvc < %s -o - | FileCheck %s + +; This test checks for proper handling of a condition where the greedy register +; allocator encounters a very short interval that contains no uses but does +; contain an EH pad unwind edge, which requires spilling. Previously the +; register allocator marked a interval like this as unspillable, resulting in +; a compilation failure. + + +; The following checks that the value %p is reloaded within the catch handler. +; CHECK-LABEL: "?catch$8@?0?test@4HA": +; CHECK: .seh_endprologue +; CHECK: movq -16(%rbp), %rax +; CHECK: movb $0, (%rax) + +define i32* @test(i32* %a) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { +entry: + %call = call i32 @f() + %p = bitcast i32* %a to i8* + br i1 undef, label %if.end, label %if.else + +if.else: ; preds = %entry + br i1 undef, label %cond.false.i, label %if.else.else + +if.else.else: ; preds = %if.else + br i1 undef, label %cond.true.i, label %cond.false.i + +cond.true.i: ; preds = %if.else.else + br label %invoke.cont + +cond.false.i: ; preds = %if.else.else, %if.else + %call.i = invoke i32 @f() + to label %invoke.cont unwind label %catch.dispatch + +catch.dispatch: ; preds = %cond.false.i + %tmp0 = catchswitch within none [label %catch] unwind label %ehcleanup + +catch: ; preds = %catch.dispatch + %tmp1 = catchpad within %tmp0 [i8* null, i32 64, i8* null] + %p.0 = getelementptr inbounds i8, i8* %p, i64 0 + store i8 0, i8* %p.0, align 8 + invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) [ "funclet"(token %tmp1) ] + to label %noexc unwind label %ehcleanup + +noexc: ; preds = %catch + unreachable + +invoke.cont: ; preds = %cond.false.i, %cond.true.i + %cond.i = phi i32 [ %call, %cond.true.i ], [ %call.i, %cond.false.i ] + %cmp = icmp eq i32 %cond.i, -1 + %tmp3 = select i1 %cmp, i32 4, i32 0 + br label %if.end + +if.end: ; preds = %invoke.cont, %entry + %state.0 = phi i32 [ %tmp3, %invoke.cont ], [ 4, %entry ] + %p.1 = getelementptr inbounds i8, i8* %p, i64 0 + invoke void @g(i8* %p.1, i32 %state.0) + to label %invoke.cont.1 unwind label %ehcleanup + +invoke.cont.1: ; preds = %if.end + ret i32* %a + +ehcleanup: ; preds = %if.end, %catch, %catch.dispatch + %tmp4 = cleanuppad within none [] + cleanupret from %tmp4 unwind to caller +} + +%eh.ThrowInfo = type { i32, i32, i32, i32 } + +declare i32 @__CxxFrameHandler3(...) + +declare void @_CxxThrowException(i8*, %eh.ThrowInfo*) + +declare i32 @f() +declare void @g(i8*, i32) diff --git a/test/CodeGen/X86/shrink-wrap-chkstk.ll b/test/CodeGen/X86/shrink-wrap-chkstk.ll index c0b2b45e676f..aecae89aee56 100644 --- a/test/CodeGen/X86/shrink-wrap-chkstk.ll +++ b/test/CodeGen/X86/shrink-wrap-chkstk.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s ; chkstk cannot come before the usual prologue, since it adjusts ESP. +; If chkstk is used in the prologue, we also have to be careful about preserving +; EAX if it is used. target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc18.0.0" @@ -35,3 +37,36 @@ bb2: ; CHECK: retl declare void @inalloca_params(<{ %struct.S }>* inalloca) + +declare i32 @doSomething(i32, i32*) + +; In this test case, we force usage of EAX before the prologue, and have to +; compensate before calling __chkstk. It would also be valid for us to avoid +; shrink wrapping in this case. + +define x86_fastcallcc i32 @use_eax_before_prologue(i32 inreg %a, i32 inreg %b) { + %tmp = alloca i32, i32 1024, align 4 + %tmp2 = icmp slt i32 %a, %b + br i1 %tmp2, label %true, label %false + +true: + store i32 %a, i32* %tmp, align 4 + %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) + br label %false + +false: + %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] + ret i32 %tmp.0 +} + +; CHECK-LABEL: @use_eax_before_prologue@8: # @use_eax_before_prologue +; CHECK: movl %ecx, %eax +; CHECK: cmpl %edx, %eax +; CHECK: jge LBB1_2 +; CHECK: pushl %eax +; CHECK: movl $4100, %eax +; CHECK: calll __chkstk +; CHECK: movl 4100(%esp), %eax +; CHECK: calll _doSomething +; CHECK: LBB1_2: +; CHECK: retl diff --git a/test/MC/Sparc/sparc-ctrl-instructions.s b/test/MC/Sparc/sparc-ctrl-instructions.s index bcb625b17e61..ccfa36a0b143 100644 --- a/test/MC/Sparc/sparc-ctrl-instructions.s +++ b/test/MC/Sparc/sparc-ctrl-instructions.s @@ -124,6 +124,18 @@ ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22 bvs .BB0 + ! CHECK: fba .BB0 ! encoding: [0x11,0b10AAAAAA,A,A] + ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22 + fba .BB0 + + ! CHECK: fba .BB0 ! encoding: [0x11,0b10AAAAAA,A,A] + ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22 + fb .BB0 + + ! CHECK: fbn .BB0 ! encoding: [0x01,0b10AAAAAA,A,A] + ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22 + fbn .BB0 + ! CHECK: fbu .BB0 ! encoding: [0x0f,0b10AAAAAA,A,A] ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22 fbu .BB0 @@ -131,6 +143,7 @@ ! CHECK: fbg .BB0 ! encoding: [0x0d,0b10AAAAAA,A,A] ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22 fbg .BB0 + ! CHECK: fbug .BB0 ! encoding: [0x0b,0b10AAAAAA,A,A] ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22 fbug .BB0 diff --git a/test/MC/Sparc/sparc64-ctrl-instructions.s b/test/MC/Sparc/sparc64-ctrl-instructions.s index 65bca2990f15..0e7ea25cab94 100644 --- a/test/MC/Sparc/sparc64-ctrl-instructions.s +++ b/test/MC/Sparc/sparc64-ctrl-instructions.s @@ -770,6 +770,18 @@ ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19 bpos,a,pt %xcc, .BB0 + ! CHECK: fba %fcc0, .BB0 ! encoding: [0x11,0b01001AAA,A,A] + ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19 + fba %fcc0, .BB0 + + ! CHECK: fba %fcc0, .BB0 ! encoding: [0x11,0b01001AAA,A,A] + ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19 + fb %fcc0, .BB0 + + ! CHECK: fbn %fcc0, .BB0 ! encoding: [0x01,0b01001AAA,A,A] + ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19 + fbn %fcc0, .BB0 + ! CHECK: fbu %fcc0, .BB0 ! encoding: [0x0f,0b01001AAA,A,A] ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19 fbu %fcc0, .BB0 diff --git a/test/MC/X86/x86_nop.s b/test/MC/X86/x86_nop.s index feac4e4cf039..572487bfdaca 100644 --- a/test/MC/X86/x86_nop.s +++ b/test/MC/X86/x86_nop.s @@ -22,7 +22,13 @@ inc %eax inc %eax // CHECK: 0: 40 incl %eax -// CHECK: 1: 8d b4 26 00 00 00 00 leal (%esi), %esi +// CHECK: 1: 90 nop +// CHECK: 2: 90 nop +// CHECK: 3: 90 nop +// CHECK: 4: 90 nop +// CHECK: 5: 90 nop +// CHECK: 6: 90 nop +// CHECK: 7: 90 nop // CHECK: 8: 40 incl %eax diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll index 6d21f39d4fd6..cb364102c7f3 100644 --- a/test/Transforms/InstCombine/fprintf-1.ll +++ b/test/Transforms/InstCombine/fprintf-1.ll @@ -61,6 +61,15 @@ define void @test_simplify4(%FILE* %fp) { ; CHECK-IPRINTF-NEXT: ret void } +define void @test_simplify5(%FILE* %fp) { +; CHECK-LABEL: @test_simplify5( + %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0 + call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt) [ "deopt"() ] +; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp) [ "deopt"() ] + ret void +; CHECK-NEXT: ret void +} + define void @test_no_simplify1(%FILE* %fp) { ; CHECK-IPRINTF-LABEL: @test_no_simplify1( %fmt = getelementptr [3 x i8], [3 x i8]* @percent_f, i32 0, i32 0 diff --git a/test/Transforms/LoopStrengthReduce/funclet.ll b/test/Transforms/LoopStrengthReduce/funclet.ll index 5d20646141c4..1bee3706cafa 100644 --- a/test/Transforms/LoopStrengthReduce/funclet.ll +++ b/test/Transforms/LoopStrengthReduce/funclet.ll @@ -214,3 +214,32 @@ try.cont.7: ; preds = %try.cont ; CHECK: catch.dispatch.2: ; CHECK: %e.0 = phi i32* [ %c, %try.cont ], [ %b, %catch.dispatch ] + +define i32 @test2() personality i32 (...)* @_except_handler3 { +entry: + br label %for.body + +for.body: ; preds = %for.inc, %entry + %phi = phi i32 [ %inc, %for.inc ], [ 0, %entry ] + invoke void @reserve() + to label %for.inc unwind label %catch.dispatch + +catch.dispatch: ; preds = %for.body + %tmp18 = catchswitch within none [label %catch.handler] unwind to caller + +catch.handler: ; preds = %catch.dispatch + %phi.lcssa = phi i32 [ %phi, %catch.dispatch ] + %tmp19 = catchpad within %tmp18 [i8* null] + catchret from %tmp19 to label %done + +done: + ret i32 %phi.lcssa + +for.inc: ; preds = %for.body + %inc = add i32 %phi, 1 + br label %for.body +} + +; CHECK-LABEL: define i32 @test2( +; CHECK: %phi.lcssa = phi i32 [ %phi, %catch.dispatch ] +; CHECK-NEXT: catchpad within diff --git a/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll b/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll index 0cb845520246..f6f2609e8fbe 100644 --- a/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll +++ b/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll @@ -16,9 +16,15 @@ for.cond.cleanup: ; preds = %for.body for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %0 = shl nsw i64 %indvars.iv, 1 + %odd.idx = add nsw i64 %0, 1 + %arrayidx = getelementptr inbounds double, double* %b, i64 %0 + %arrayidx.odd = getelementptr inbounds double, double* %b, i64 %odd.idx + %1 = load double, double* %arrayidx, align 8 - %add = fadd double %1, 1.000000e+00 + %2 = load double, double* %arrayidx.odd, align 8 + + %add = fadd double %1, %2 %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv store double %add, double* %arrayidx2, align 8 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 diff --git a/test/Transforms/LoopVectorize/X86/reg-usage.ll b/test/Transforms/LoopVectorize/X86/reg-usage.ll deleted file mode 100644 index 47a6e1029eda..000000000000 --- a/test/Transforms/LoopVectorize/X86/reg-usage.ll +++ /dev/null @@ -1,71 +0,0 @@ -; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -S 2>&1 | FileCheck %s -; REQUIRES: asserts - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@a = global [1024 x i8] zeroinitializer, align 16 -@b = global [1024 x i8] zeroinitializer, align 16 - -define i32 @foo() { -; This function has a loop of SAD pattern. Here we check when VF = 16 the -; register usage doesn't exceed 16. -; -; CHECK-LABEL: foo -; CHECK: LV(REG): VF = 4 -; CHECK-NEXT: LV(REG): Found max usage: 4 -; CHECK: LV(REG): VF = 8 -; CHECK-NEXT: LV(REG): Found max usage: 7 -; CHECK: LV(REG): VF = 16 -; CHECK-NEXT: LV(REG): Found max usage: 13 - -entry: - br label %for.body - -for.cond.cleanup: - %add.lcssa = phi i32 [ %add, %for.body ] - ret i32 %add.lcssa - -for.body: - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv - %0 = load i8, i8* %arrayidx, align 1 - %conv = zext i8 %0 to i32 - %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv - %1 = load i8, i8* %arrayidx2, align 1 - %conv3 = zext i8 %1 to i32 - %sub = sub nsw i32 %conv, %conv3 - %ispos = icmp sgt i32 %sub, -1 - %neg = sub nsw i32 0, %sub - %2 = select i1 %ispos, i32 %sub, i32 %neg - %add = add nsw i32 %2, %s.015 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1024 - br i1 %exitcond, label %for.cond.cleanup, label %for.body -} - -define i64 @bar(i64* nocapture %a) { -; CHECK-LABEL: bar -; CHECK: LV(REG): VF = 2 -; CHECK: LV(REG): Found max usage: 4 -; -entry: - br label %for.body - -for.cond.cleanup: - %add2.lcssa = phi i64 [ %add2, %for.body ] - ret i64 %add2.lcssa - -for.body: - %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ] - %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012 - %0 = load i64, i64* %arrayidx, align 8 - %add = add nsw i64 %0, %i.012 - store i64 %add, i64* %arrayidx, align 8 - %add2 = add nsw i64 %add, %s.011 - %inc = add nuw nsw i64 %i.012, 1 - %exitcond = icmp eq i64 %inc, 1024 - br i1 %exitcond, label %for.cond.cleanup, label %for.body -} diff --git a/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll b/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll index fe9d59efc8b3..e6dc39c2afad 100644 --- a/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll +++ b/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll @@ -16,7 +16,7 @@ target triple = "x86_64-unknown-linux-gnu" ; -vectorizer-maximize-bandwidth is indicated. ; ; CHECK-label: foo -; CHECK: LV: Selecting VF: 32. +; CHECK: LV: Selecting VF: 16. define void @foo() { entry: br label %for.body diff --git a/test/Transforms/LoopVectorize/interleaved-accesses.ll b/test/Transforms/LoopVectorize/interleaved-accesses.ll index d7237a5c27dc..54ce3e29293a 100644 --- a/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -292,10 +292,8 @@ for.body: ; preds = %for.body, %entry ; } ; CHECK-LABEL: @even_load( -; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 -; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> -; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> -; CHECK: shl nsw <4 x i32> %strided.vec, <i32 1, i32 1, i32 1, i32 1> +; CHECK-NOT: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 +; CHECK-NOT: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) { entry: diff --git a/test/Transforms/PruneEH/pr26263.ll b/test/Transforms/PruneEH/pr26263.ll new file mode 100644 index 000000000000..17fafeb68505 --- /dev/null +++ b/test/Transforms/PruneEH/pr26263.ll @@ -0,0 +1,56 @@ +; RUN: opt -prune-eh -S < %s | FileCheck %s +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-msvc" + +declare void @neverthrows() nounwind + +define void @test1() personality i32 (...)* @__CxxFrameHandler3 { + invoke void @neverthrows() + to label %try.cont unwind label %cleanuppad + +try.cont: + ret void + +cleanuppad: + %cp = cleanuppad within none [] + br label %cleanupret + +cleanupret: + cleanupret from %cp unwind to caller +} + +; CHECK-LABEL: define void @test1( +; CHECK: call void @neverthrows() + +; CHECK: %[[cp:.*]] = cleanuppad within none [] +; CHECK-NEXT: unreachable + +; CHECK: cleanupret from %[[cp]] unwind to caller + +define void @test2() personality i32 (...)* @__CxxFrameHandler3 { + invoke void @neverthrows() + to label %try.cont unwind label %catchswitch + +try.cont: + ret void + +catchswitch: + %cs = catchswitch within none [label %catchpad] unwind to caller + +catchpad: + %cp = catchpad within %cs [] + unreachable + +ret: + ret void +} + +; CHECK-LABEL: define void @test2( +; CHECK: call void @neverthrows() + +; CHECK: %[[cs:.*]] = catchswitch within none [label + +; CHECK: catchpad within %[[cs]] [] +; CHECK-NEXT: unreachable + +declare i32 @__CxxFrameHandler3(...) |