diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-07-29 21:25:18 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-07-29 21:25:18 +0000 |
commit | 3ad6a4b447326bc16c17df65637ca02330b8d090 (patch) | |
tree | 568321855815f8ca008258972e27d4a3ea487475 | |
parent | 93c91e39b29142dec1d03a30df9f6e757f56c193 (diff) |
Vendor import of llvm release_50 branch r309439:vendor/llvm/llvm-release_50-r309439
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=321691
svn path=/vendor/llvm/llvm-release_50-r309439/; revision=321692; tag=vendor/llvm/llvm-release_50-r309439
51 files changed, 1082 insertions, 1383 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 61ecfdf970d0..6af2cba10093 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,7 +29,7 @@ if(NOT DEFINED LLVM_VERSION_PATCH) set(LLVM_VERSION_PATCH 0) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) - set(LLVM_VERSION_SUFFIX svn) + set(LLVM_VERSION_SUFFIX "") endif() if (POLICY CMP0048) diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index ddb31acfd029..800a8ec115b3 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -71,6 +71,12 @@ Non-comprehensive list of changes in this release Changes to the LLVM IR ---------------------- +* The datalayout string may now indicate an address space to use for + the pointer type of alloca rather than the default of 0. + +* Added speculatable attribute indicating a function which does has no + side-effects which could inhibit hoisting of calls. + Changes to the ARM Backend -------------------------- @@ -91,12 +97,30 @@ Changes to the PowerPC Target Changes to the X86 Target ------------------------- - During this release ... +* Added initial AMD Ryzen (znver1) scheduler support. + +* Added support for Intel Goldmont CPUs. + +* Add support for avx512vpopcntdq instructions. + +* Added heuristics to convert CMOV into branches when it may be profitable. + +* More aggressive inlining of memcmp calls. + +* Improve vXi64 shuffles on 32-bit targets. + +* Improved use of PMOVMSKB for any_of/all_of comparision reductions. + +* Improved Silvermont, Sandybridge, and Jaguar (btver2) schedulers. + +* Improved support for AVX512 vector rotations. + +* Added support for AMD Lightweight Profiling (LWP) instructions. Changes to the AMDGPU Target ----------------------------- - During this release ... +* Initial gfx9 support Changes to the AVR Target ----------------------------- diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp index f1932d2471cb..ff44375e3921 100644 --- a/examples/ParallelJIT/ParallelJIT.cpp +++ b/examples/ParallelJIT/ParallelJIT.cpp @@ -145,6 +145,7 @@ public: waitFor = 0; int result = pthread_cond_init( &condition, nullptr ); + (void)result; assert( result == 0 ); result = pthread_mutex_init( &mutex, nullptr ); diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 59a4073646eb..1060d8fd667e 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -40,7 +40,8 @@ class TargetRegisterInfo; /// This is convenient because std::bitset does not have a constructor /// with an initializer list of set bits. /// -/// Each InstructionSelector subclass should define a PredicateBitset class with: +/// Each InstructionSelector subclass should define a PredicateBitset class +/// with: /// const unsigned MAX_SUBTARGET_PREDICATES = 192; /// using PredicateBitset = PredicateBitsetImpl<MAX_SUBTARGET_PREDICATES>; /// and updating the constant to suit the target. Tablegen provides a suitable @@ -102,7 +103,8 @@ enum { /// - OpIdx - Operand index /// - Expected integer GIM_CheckConstantInt, - /// Check the operand is a specific literal integer (i.e. MO.isImm() or MO.isCImm() is true). + /// Check the operand is a specific literal integer (i.e. MO.isImm() or + /// MO.isCImm() is true). /// - InsnID - Instruction ID /// - OpIdx - Operand index /// - Expected integer diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h index 771b0a8c26a9..71d2f0293083 100644 --- a/include/llvm/Support/CommandLine.h +++ b/include/llvm/Support/CommandLine.h @@ -66,15 +66,12 @@ bool ParseCommandLineOptions(int argc, const char *const *argv, void ParseEnvironmentOptions(const char *progName, const char *envvar, const char *Overview = ""); -// Function pointer type for printing version information. -using VersionPrinterTy = std::function<void(raw_ostream &)>; - ///===---------------------------------------------------------------------===// /// SetVersionPrinter - Override the default (LLVM specific) version printer /// used to print out the version when --version is given /// on the command line. This allows other systems using the /// CommandLine utilities to print their own version string. -void SetVersionPrinter(VersionPrinterTy func); +void SetVersionPrinter(void (*func)()); ///===---------------------------------------------------------------------===// /// AddExtraVersionPrinter - Add an extra printer to use in addition to the @@ -83,7 +80,7 @@ void SetVersionPrinter(VersionPrinterTy func); /// which will be called after the basic LLVM version /// printing is complete. Each can then add additional /// information specific to the tool. -void AddExtraVersionPrinter(VersionPrinterTy func); +void AddExtraVersionPrinter(void (*func)()); // PrintOptionValues - Print option values. // With -print-options print the difference between option values and defaults. diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h index 90d6c084ee95..8454b27b6f04 100644 --- a/include/llvm/Support/TargetRegistry.h +++ b/include/llvm/Support/TargetRegistry.h @@ -599,7 +599,7 @@ struct TargetRegistry { /// printRegisteredTargetsForVersion - Print the registered targets /// appropriately for inclusion in a tool's version output. - static void printRegisteredTargetsForVersion(raw_ostream &OS); + static void printRegisteredTargetsForVersion(); /// @name Registry Access /// @{ diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h index 1344285917ba..94e20b83754e 100644 --- a/include/llvm/Transforms/Utils/LoopUtils.h +++ b/include/llvm/Transforms/Utils/LoopUtils.h @@ -531,8 +531,10 @@ Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, /// Get the intersection (logical and) of all of the potential IR flags /// of each scalar operation (VL) that will be converted into a vector (I). +/// If OpValue is non-null, we only consider operations similar to OpValue +/// when intersecting. /// Flag set: NSW, NUW, exact, and all of fast-math. -void propagateIRFlags(Value *I, ArrayRef<Value *> VL); +void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr); } // end namespace llvm diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 45dc13d58de7..dc02a00e0fcc 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -4016,14 +4016,18 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, return true; } +// Max number of memory uses to look at before aborting the search to conserve +// compile time. +static constexpr int MaxMemoryUsesToScan = 20; + /// Recursively walk all the uses of I until we find a memory use. /// If we find an obviously non-foldable instruction, return true. /// Add the ultimately found memory instructions to MemoryUses. static bool FindAllMemoryUses( Instruction *I, SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses, - SmallPtrSetImpl<Instruction *> &ConsideredInsts, - const TargetLowering &TLI, const TargetRegisterInfo &TRI) { + SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI, + const TargetRegisterInfo &TRI, int SeenInsts = 0) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I).second) return false; @@ -4036,8 +4040,12 @@ static bool FindAllMemoryUses( // Loop over all the uses, recursively processing them. for (Use &U : I->uses()) { - Instruction *UserI = cast<Instruction>(U.getUser()); + // Conservatively return true if we're seeing a large number or a deep chain + // of users. This avoids excessive compilation times in pathological cases. + if (SeenInsts++ >= MaxMemoryUsesToScan) + return true; + Instruction *UserI = cast<Instruction>(U.getUser()); if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); continue; @@ -4082,7 +4090,8 @@ static bool FindAllMemoryUses( continue; } - if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI)) + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, + SeenInsts)) return true; } diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 4e6a3ec21866..eda4f74c7874 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -643,8 +643,11 @@ void InlineSpiller::reMaterializeAll() { Edit->eraseVirtReg(Reg); continue; } - assert((LIS.hasInterval(Reg) && !LIS.getInterval(Reg).empty()) && - "Reg with empty interval has reference"); + + assert(LIS.hasInterval(Reg) && + (!LIS.getInterval(Reg).empty() || !MRI.reg_nodbg_empty(Reg)) && + "Empty and not used live-range?!"); + RegsToSpill[ResultPos++] = Reg; } RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end()); diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index a7b7a9f8ab15..7b4fbace2c1c 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -133,18 +133,19 @@ void RegAllocBase::allocatePhysRegs() { if (AvailablePhysReg) Matrix->assign(*VirtReg, AvailablePhysReg); - for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); - I != E; ++I) { - LiveInterval *SplitVirtReg = &LIS->getInterval(*I); + for (unsigned Reg : SplitVRegs) { + assert(LIS->hasInterval(Reg)); + + LiveInterval *SplitVirtReg = &LIS->getInterval(Reg); assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { + assert(SplitVirtReg->empty() && "Non-empty but used interval"); DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); aboutToRemoveInterval(*SplitVirtReg); LIS->removeInterval(SplitVirtReg->reg); continue; } DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); - assert(!SplitVirtReg->empty() && "expecting non-empty interval"); assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); enqueue(SplitVirtReg); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d41054b15bbc..0cad20db0964 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2965,7 +2965,12 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) { else if (N.getOpcode() == ISD::SIGN_EXTEND) N = N.getOperand(0); - return (N.getOpcode() == ISD::SETCC); + if (isLogicalMaskOp(N.getOpcode())) + return isSETCCorConvertedSETCC(N.getOperand(0)) && + isSETCCorConvertedSETCC(N.getOperand(1)); + + return (N.getOpcode() == ISD::SETCC || + ISD::isBuildVectorOfConstantSDNodes(N.getNode())); } #endif @@ -2973,28 +2978,20 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) { // to ToMaskVT if needed with vector extension or truncation. SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT) { - LLVMContext &Ctx = *DAG.getContext(); - // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled. - unsigned InMaskOpc = InMask->getOpcode(); - // FIXME: This code seems to be too restrictive, we might consider // generalizing it or dropping it. - assert((InMaskOpc == ISD::SETCC || - ISD::isBuildVectorOfConstantSDNodes(InMask.getNode()) || - (isLogicalMaskOp(InMaskOpc) && - isSETCCorConvertedSETCC(InMask->getOperand(0)) && - isSETCCorConvertedSETCC(InMask->getOperand(1)))) && - "Unexpected mask argument."); + assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument."); // Make a new Mask node, with a legal result VT. SmallVector<SDValue, 4> Ops; for (unsigned i = 0; i < InMask->getNumOperands(); ++i) Ops.push_back(InMask->getOperand(i)); - SDValue Mask = DAG.getNode(InMaskOpc, SDLoc(InMask), MaskVT, Ops); + SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign // extend or truncate is needed. + LLVMContext &Ctx = *DAG.getContext(); unsigned MaskScalarBits = MaskVT.getScalarSizeInBits(); unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits(); if (MaskScalarBits < ToMaskScalBits) { diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp index f3b438e829d6..51c62d33f8e1 100644 --- a/lib/Option/OptTable.cpp +++ b/lib/Option/OptTable.cpp @@ -235,7 +235,9 @@ OptTable::findByPrefix(StringRef Cur, unsigned short DisableFlags) const { continue; for (int I = 0; In.Prefixes[I]; I++) { - std::string S = std::string(In.Prefixes[I]) + std::string(In.Name); + std::string S = std::string(In.Prefixes[I]) + std::string(In.Name) + "\t"; + if (In.HelpText) + S += In.HelpText; if (StringRef(S).startswith(Cur)) Ret.push_back(S); } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 50173f5256bf..8eeb685a18a9 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -2039,9 +2039,9 @@ void CommandLineParser::printOptionValues() { Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions); } -static VersionPrinterTy OverrideVersionPrinter = nullptr; +static void (*OverrideVersionPrinter)() = nullptr; -static std::vector<VersionPrinterTy> *ExtraVersionPrinters = nullptr; +static std::vector<void (*)()> *ExtraVersionPrinters = nullptr; namespace { class VersionPrinter { @@ -2081,7 +2081,7 @@ public: return; if (OverrideVersionPrinter != nullptr) { - OverrideVersionPrinter(outs()); + (*OverrideVersionPrinter)(); exit(0); } print(); @@ -2090,8 +2090,10 @@ public: // information. if (ExtraVersionPrinters != nullptr) { outs() << '\n'; - for (auto I : *ExtraVersionPrinters) - I(outs()); + for (std::vector<void (*)()>::iterator I = ExtraVersionPrinters->begin(), + E = ExtraVersionPrinters->end(); + I != E; ++I) + (*I)(); } exit(0); @@ -2129,11 +2131,11 @@ void cl::PrintHelpMessage(bool Hidden, bool Categorized) { /// Utility function for printing version number. void cl::PrintVersionMessage() { VersionPrinterInstance.print(); } -void cl::SetVersionPrinter(VersionPrinterTy func) { OverrideVersionPrinter = func; } +void cl::SetVersionPrinter(void (*func)()) { OverrideVersionPrinter = func; } -void cl::AddExtraVersionPrinter(VersionPrinterTy func) { +void cl::AddExtraVersionPrinter(void (*func)()) { if (!ExtraVersionPrinters) - ExtraVersionPrinters = new std::vector<VersionPrinterTy>; + ExtraVersionPrinters = new std::vector<void (*)()>; ExtraVersionPrinters->push_back(func); } diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index 2fd4f3ea0d45..fb8ae4c1cd5e 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -169,7 +169,8 @@ void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) { // Don't call the normal error handler. It may allocate memory. Directly write // an OOM to stderr and abort. char OOMMessage[] = "LLVM ERROR: out of memory\n"; - (void)::write(2, OOMMessage, strlen(OOMMessage)); + ssize_t written = ::write(2, OOMMessage, strlen(OOMMessage)); + (void)written; abort(); #endif } diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp index b5c283253117..bed9ed64f802 100644 --- a/lib/Support/TargetRegistry.cpp +++ b/lib/Support/TargetRegistry.cpp @@ -114,7 +114,7 @@ static int TargetArraySortFn(const std::pair<StringRef, const Target *> *LHS, return LHS->first.compare(RHS->first); } -void TargetRegistry::printRegisteredTargetsForVersion(raw_ostream &OS) { +void TargetRegistry::printRegisteredTargetsForVersion() { std::vector<std::pair<StringRef, const Target*> > Targets; size_t Width = 0; for (const auto &T : TargetRegistry::targets()) { @@ -123,6 +123,7 @@ void TargetRegistry::printRegisteredTargetsForVersion(raw_ostream &OS) { } array_pod_sort(Targets.begin(), Targets.end(), TargetArraySortFn); + raw_ostream &OS = outs(); OS << " Registered Targets:\n"; for (unsigned i = 0, e = Targets.size(); i != e; ++i) { OS << " " << Targets[i].first; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index c6150f9e5d1d..8c30c4410c09 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2889,9 +2889,12 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); int GPRIdx = 0; if (GPRSaveSize != 0) { - if (IsWin64) + if (IsWin64) { GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false); - else + if (GPRSaveSize & 15) + // The extra size here, if triggered, will always be 8. + MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false); + } else GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false); SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT); diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 7c31c8e397ba..a844081db5b2 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -297,6 +297,11 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, case AMDGPU::FLAT_SCR_HI: O << "flat_scratch_hi"; return; + case AMDGPU::FP_REG: + case AMDGPU::SP_REG: + case AMDGPU::SCRATCH_WAVE_OFFSET_REG: + case AMDGPU::PRIVATE_RSRC_REG: + llvm_unreachable("pseudo-register should not ever be emitted"); default: break; } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td index 54ea7805e18d..d097b78890e3 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/lib/Target/AMDGPU/SIRegisterInfo.td @@ -274,8 +274,7 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, - SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, - FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> { + SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> { let AllocationPriority = 7; } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index 0d021d67033e..0a72a4438218 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -61,14 +61,6 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Sparc::fixup_sparc_lo10: return Value & 0x3ff; - case Sparc::fixup_sparc_tls_ldo_hix22: - case Sparc::fixup_sparc_tls_le_hix22: - return (~Value >> 10) & 0x3fffff; - - case Sparc::fixup_sparc_tls_ldo_lox10: - case Sparc::fixup_sparc_tls_le_lox10: - return (~(~Value & 0x3ff)) & 0x1fff; - case Sparc::fixup_sparc_h44: return (Value >> 22) & 0x3fffff; @@ -84,6 +76,13 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Sparc::fixup_sparc_hm: return (Value >> 32) & 0x3ff; + case Sparc::fixup_sparc_tls_ldo_hix22: + case Sparc::fixup_sparc_tls_le_hix22: + case Sparc::fixup_sparc_tls_ldo_lox10: + case Sparc::fixup_sparc_tls_le_lox10: + assert(Value == 0 && "Sparc TLS relocs expect zero Value"); + return 0; + case Sparc::fixup_sparc_tls_gd_add: case Sparc::fixup_sparc_tls_gd_call: case Sparc::fixup_sparc_tls_ldm_add: diff --git a/lib/Target/SystemZ/SystemZScheduleZ14.td b/lib/Target/SystemZ/SystemZScheduleZ14.td index f11177af91a5..698eb5627d19 100644 --- a/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -455,10 +455,10 @@ def : InstRW<[FXa, LSU, Lat8], (instregex "MH(Y)?$")>; def : InstRW<[FXa2, Lat6, GroupAlone], (instregex "M(L)?R$")>; def : InstRW<[FXa2, LSU, Lat10, GroupAlone], (instregex "M(FY|L)?$")>; def : InstRW<[FXa, LSU, Lat8], (instregex "MGH$")>; -def : InstRW<[FXa, LSU, Lat12, GroupAlone], (instregex "MG$")>; -def : InstRW<[FXa, Lat8, GroupAlone], (instregex "MGRK$")>; -def : InstRW<[FXa, LSU, Lat9, GroupAlone], (instregex "MSC$")>; -def : InstRW<[FXa, LSU, Lat11, GroupAlone], (instregex "MSGC$")>; +def : InstRW<[FXa, FXa, LSU, Lat12, GroupAlone], (instregex "MG$")>; +def : InstRW<[FXa, FXa, Lat8, GroupAlone], (instregex "MGRK$")>; +def : InstRW<[FXa, LSU, Lat9], (instregex "MSC$")>; +def : InstRW<[FXa, LSU, Lat11], (instregex "MSGC$")>; def : InstRW<[FXa, Lat5], (instregex "MSRKC$")>; def : InstRW<[FXa, Lat7], (instregex "MSGRKC$")>; @@ -620,7 +620,7 @@ def : InstRW<[FXa, Lat30], (instregex "(PCC|PPNO|PRNO)$")>; def : InstRW<[LSU], (instregex "LGG$")>; def : InstRW<[LSU, Lat5], (instregex "LLGFSG$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)GSC$")>; +def : InstRW<[LSU, Lat30], (instregex "(L|ST)GSC$")>; //===----------------------------------------------------------------------===// // Decimal arithmetic @@ -708,7 +708,7 @@ def : InstRW<[FXb, LSU, Lat5], (instregex "NTSTG$")>; // Processor assist //===----------------------------------------------------------------------===// -def : InstRW<[FXb], (instregex "PPA$")>; +def : InstRW<[FXb, GroupAlone], (instregex "PPA$")>; //===----------------------------------------------------------------------===// // Miscellaneous Instructions. @@ -1276,9 +1276,9 @@ def : InstRW<[VecXsPm], (instregex "VESRL(B|F|G|H)?$")>; def : InstRW<[VecXsPm], (instregex "VESRLV(B|F|G|H)?$")>; def : InstRW<[VecXsPm], (instregex "VSL(DB)?$")>; -def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSLB$")>; +def : InstRW<[VecXsPm], (instregex "VSLB$")>; def : InstRW<[VecXsPm], (instregex "VSR(A|L)$")>; -def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSR(A|L)B$")>; +def : InstRW<[VecXsPm], (instregex "VSR(A|L)B$")>; def : InstRW<[VecXsPm], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; def : InstRW<[VecXsPm], (instregex "VSCBI(B|F|G|H|Q)?$")>; @@ -1435,9 +1435,9 @@ def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>; // Vector: Packed-decimal instructions //===----------------------------------------------------------------------===// -def : InstRW<[VecDF, VecDF, Lat10, GroupAlone], (instregex "VLIP$")>; -def : InstRW<[VecDFX, LSU, Lat12, GroupAlone], (instregex "VPKZ$")>; -def : InstRW<[VecDFX, FXb, LSU, Lat12, GroupAlone], (instregex "VUPKZ$")>; +def : InstRW<[VecDF, VecDF, Lat10], (instregex "VLIP$")>; +def : InstRW<[VecDFX, LSU, GroupAlone], (instregex "VPKZ$")>; +def : InstRW<[VecDFX, FXb, LSU, Lat12, BeginGroup], (instregex "VUPKZ$")>; def : InstRW<[VecDF, VecDF, FXb, Lat20, GroupAlone], (instregex "VCVB(G)?$")>; def : InstRW<[VecDF, VecDF, FXb, Lat20, GroupAlone], (instregex "VCVD(G)?$")>; def : InstRW<[VecDFX], (instregex "V(A|S)P$")>; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 3c4589ab18f6..8f24f98be681 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1055,7 +1055,10 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, // Scale the leading zero count down based on the actual size of the value. // Also scale it down based on the size of the shift. - MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; + unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; + if (MaskLZ < ScaleDown) + return true; + MaskLZ -= ScaleDown; // The final check is to ensure that any masked out high bits of X are // already known to be zero. Otherwise, the mask has a semantic impact diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 44eecd664714..ba8eb8656585 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1672,8 +1672,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // TODO: These control memcmp expansion in CGP and could be raised higher, but // that needs to benchmarked and balanced with the potential use of vector - // load/store types (PR33329). - MaxLoadsPerMemcmp = 4; + // load/store types (PR33329, PR33914). + MaxLoadsPerMemcmp = 2; MaxLoadsPerMemcmpOptSize = 2; // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4). @@ -22022,8 +22022,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); // i64 SRA needs to be performed as partial shifts. - if ((VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64)) && - Op.getOpcode() == ISD::SRA && !Subtarget.hasXOP()) + if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || + (Subtarget.hasInt256() && VT == MVT::v4i64)) && + Op.getOpcode() == ISD::SRA) return ArithmeticShiftRight64(ShiftAmt); if (VT == MVT::v16i8 || diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 4056cc5cb346..dc9143bebc45 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -64,6 +64,11 @@ ImplicationSearchThreshold( "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden); +static cl::opt<bool> PrintLVIAfterJumpThreading( + "print-lvi-after-jump-threading", + cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), + cl::Hidden); + namespace { /// This pass performs 'jump threading', which looks at blocks that have /// multiple predecessors and multiple successors. If one or more of the @@ -93,9 +98,10 @@ namespace { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + if (PrintLVIAfterJumpThreading) + AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<LazyValueInfoWrapperPass>(); - AU.addPreserved<LazyValueInfoWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); } @@ -137,8 +143,14 @@ bool JumpThreading::runOnFunction(Function &F) { BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - return Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI), - std::move(BPI)); + bool Changed = Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI), + std::move(BPI)); + if (PrintLVIAfterJumpThreading) { + dbgs() << "LVI for function '" << F.getName() << "':\n"; + LVI->printLVI(F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(), + dbgs()); + } + return Changed; } PreservedAnalyses JumpThreadingPass::run(Function &F, @@ -231,13 +243,15 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, // Can't thread an unconditional jump, but if the block is "almost // empty", we can replace uses of it with uses of the successor and make // this dead. - // We should not eliminate the loop header either, because eliminating - // a loop header might later prevent LoopSimplify from transforming nested - // loops into simplified form. + // We should not eliminate the loop header or latch either, because + // eliminating a loop header or latch might later prevent LoopSimplify + // from transforming nested loops into simplified form. We will rely on + // later passes in backend to clean up empty blocks. if (BI && BI->isUnconditional() && BB != &BB->getParent()->getEntryBlock() && // If the terminator is the only non-phi instruction, try to nuke it. - BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) { + BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB) && + !LoopHeaders.count(BI->getSuccessor(0))) { // FIXME: It is always conservatively correct to drop the info // for a block even if it doesn't get erased. This isn't totally // awesome, but it allows us to use AssertingVH to prevent nasty diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 58b70be95d99..3c522786641a 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -1376,16 +1376,21 @@ Value *llvm::createTargetReduction(IRBuilder<> &Builder, } } -void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL) { - if (auto *VecOp = dyn_cast<Instruction>(I)) { - if (auto *I0 = dyn_cast<Instruction>(VL[0])) { - // VecOVp is initialized to the 0th scalar, so start counting from index - // '1'. - VecOp->copyIRFlags(I0); - for (int i = 1, e = VL.size(); i < e; ++i) { - if (auto *Scalar = dyn_cast<Instruction>(VL[i])) - VecOp->andIRFlags(Scalar); - } - } +void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { + auto *VecOp = dyn_cast<Instruction>(I); + if (!VecOp) + return; + auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) + : dyn_cast<Instruction>(OpValue); + if (!Intersection) + return; + const unsigned Opcode = Intersection->getOpcode(); + VecOp->copyIRFlags(Intersection); + for (auto *V : VL) { + auto *Instr = dyn_cast<Instruction>(V); + if (!Instr) + continue; + if (OpValue == nullptr || Opcode == Instr->getOpcode()) + VecOp->andIRFlags(V); } } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index dee658f98393..8784b9702141 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5656,20 +5656,22 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); + BasicBlock *Succ = BI->getSuccessor(0); if (SinkCommon && SinkThenElseCodeToEnd(BI)) return true; // If the Terminator is the only non-phi instruction, simplify the block. - // if LoopHeader is provided, check if the block is a loop header - // (This is for early invocations before loop simplify and vectorization - // to keep canonical loop forms for nested loops. - // These blocks can be eliminated when the pass is invoked later - // in the back-end.) + // if LoopHeader is provided, check if the block or its successor is a loop + // header (This is for early invocations before loop simplify and + // vectorization to keep canonical loop forms for nested loops. These blocks + // can be eliminated when the pass is invoked later in the back-end.) + bool NeedCanonicalLoop = + !LateSimplifyCFG && + (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - (!LoopHeaders || !LoopHeaders->count(BB)) && - TryToSimplifyUncondBranchFromEmptyBlock(BB)) + !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; // If the only instruction in the block is a seteq/setne comparison diff --git a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll index e797b377556e..41bb8c9c8201 100644 --- a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll +++ b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -jump-threading -print-lazy-value-info -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -jump-threading -print-lvi-after-jump-threading -disable-output 2>&1 | FileCheck %s ; Testing LVI cache after jump-threading @@ -19,13 +19,10 @@ entry: ; CHECK-NEXT: ; LatticeVal for: 'i32 %a' is: overdefined ; CHECK-NEXT: ; LatticeVal for: 'i32 %length' is: overdefined ; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%backedge' is: constantrange<0, 400> -; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<399, 400> ; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] ; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, 401> -; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%exit' is: constantrange<400, 401> ; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1 ; CHECK-NEXT: ; LatticeVal for: ' %cont = icmp slt i32 %iv.next, 400' in BB: '%backedge' is: overdefined -; CHECK-NEXT: ; LatticeVal for: ' %cont = icmp slt i32 %iv.next, 400' in BB: '%exit' is: constantrange<0, -1> ; CHECK-NEXT: %cont = icmp slt i32 %iv.next, 400 ; CHECK-NOT: loop loop: diff --git a/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll index 1b2ed4b89521..2b4e438a13aa 100644 --- a/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll +++ b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll @@ -19,9 +19,9 @@ entry: do.body.i: ; CHECK-LABEL: do.body.i: -; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3 -; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32* -; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032 +; CHECK: %uglygep1 = getelementptr i8, i8* %uglygep, i64 %3 +; CHECK-NEXT: %4 = bitcast i8* %uglygep1 to i32* +; CHECK-NOT: %uglygep1 = getelementptr i8, i8* %uglygep, i64 1032 %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ] diff --git a/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll b/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll index 2546e7c90ce5..89efe335e329 100644 --- a/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll +++ b/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll @@ -59,10 +59,12 @@ entry: } ; CHECK-LABEL: f7: -; CHECK: sub sp, sp, #16 -; CHECK: add x8, sp, #8 -; CHECK: add x0, sp, #8 -; CHECK: stp x8, x7, [sp], #16 +; CHECK: sub sp, sp, #32 +; CHECK: add x8, sp, #24 +; CHECK: str x7, [sp, #24] +; CHECK: add x0, sp, #24 +; CHECK: str x8, [sp, #8] +; CHECK: add sp, sp, #32 ; CHECK: ret define win64cc i8* @f7(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, ...) nounwind { entry: diff --git a/test/CodeGen/AArch64/win64_vararg.ll b/test/CodeGen/AArch64/win64_vararg.ll index b760e4acd16a..3acc7e520c96 100644 --- a/test/CodeGen/AArch64/win64_vararg.ll +++ b/test/CodeGen/AArch64/win64_vararg.ll @@ -59,10 +59,12 @@ entry: } ; CHECK-LABEL: f7: -; CHECK: sub sp, sp, #16 -; CHECK: add x8, sp, #8 -; CHECK: add x0, sp, #8 -; CHECK: stp x8, x7, [sp], #16 +; CHECK: sub sp, sp, #32 +; CHECK: add x8, sp, #24 +; CHECK: str x7, [sp, #24] +; CHECK: add x0, sp, #24 +; CHECK: str x8, [sp, #8] +; CHECK: add sp, sp, #32 ; CHECK: ret define i8* @f7(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, ...) nounwind { entry: @@ -79,9 +81,8 @@ entry: ; CHECK: stp x6, x7, [sp, #64] ; CHECK: stp x4, x5, [sp, #48] ; CHECK: stp x2, x3, [sp, #32] -; CHECK: stp x8, x1, [sp, #16] -; CHECK: str x8, [sp, #8] -; CHECK: add sp, sp, #80 +; CHECK: str x1, [sp, #24] +; CHECK: stp x8, x8, [sp], #80 ; CHECK: ret define void @copy1(i64 %a0, ...) nounwind { entry: @@ -93,3 +94,54 @@ entry: call void @llvm.va_copy(i8* %cp1, i8* %ap1) ret void } + +declare void @llvm.va_end(i8*) +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 + +declare i32 @__stdio_common_vsprintf(i64, i8*, i64, i8*, i8*, i8*) local_unnamed_addr #3 +declare i64* @__local_stdio_printf_options() local_unnamed_addr #4 + +; CHECK-LABEL: snprintf +; CHECK: sub sp, sp, #96 +; CHECK: stp x21, x20, [sp, #16] +; CHECK: stp x19, x30, [sp, #32] +; CHECK: add x8, sp, #56 +; CHECK: mov x19, x2 +; CHECK: mov x20, x1 +; CHECK: mov x21, x0 +; CHECK: stp x6, x7, [sp, #80] +; CHECK: stp x4, x5, [sp, #64] +; CHECK: str x3, [sp, #56] +; CHECK: str x8, [sp, #8] +; CHECK: bl __local_stdio_printf_options +; CHECK: ldr x8, [x0] +; CHECK: add x5, sp, #56 +; CHECK: mov x1, x21 +; CHECK: mov x2, x20 +; CHECK: orr x0, x8, #0x2 +; CHECK: mov x3, x19 +; CHECK: mov x4, xzr +; CHECK: bl __stdio_common_vsprintf +; CHECK: ldp x19, x30, [sp, #32] +; CHECK: ldp x21, x20, [sp, #16] +; CHECK: cmp w0, #0 +; CHECK: csinv w0, w0, wzr, ge +; CHECK: add sp, sp, #96 +; CHECK: ret +define i32 @snprintf(i8*, i64, i8*, ...) local_unnamed_addr #5 { + %4 = alloca i8*, align 8 + %5 = bitcast i8** %4 to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %5) #2 + call void @llvm.va_start(i8* nonnull %5) + %6 = load i8*, i8** %4, align 8 + %7 = call i64* @__local_stdio_printf_options() #2 + %8 = load i64, i64* %7, align 8 + %9 = or i64 %8, 2 + %10 = call i32 @__stdio_common_vsprintf(i64 %9, i8* %0, i64 %1, i8* %2, i8* null, i8* %6) #2 + %11 = icmp sgt i32 %10, -1 + %12 = select i1 %11, i32 %10, i32 -1 + call void @llvm.va_end(i8* nonnull %5) + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %5) #2 + ret i32 %12 +} diff --git a/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/test/CodeGen/AMDGPU/spill-empty-live-interval.mir new file mode 100644 index 000000000000..bb58da7a59f6 --- /dev/null +++ b/test/CodeGen/AMDGPU/spill-empty-live-interval.mir @@ -0,0 +1,74 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s +# https://bugs.llvm.org/show_bug.cgi?id=33620 + +--- +# This would assert due to the empty live interval created for %vreg9 +# on the last S_NOP with an undef subreg use. + +# CHECK-LABEL: name: expecting_non_empty_interval + +# CHECK: undef %7.sub1 = V_MAC_F32_e32 0, undef %1, undef %7.sub1, implicit %exec +# CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr5, 0, implicit %exec :: (store 8 into %stack.0, align 4) +# CHECK-NEXT: undef %5.sub1 = V_MOV_B32_e32 1786773504, implicit %exec +# CHECK-NEXT: dead %2 = V_MUL_F32_e32 0, %5.sub1, implicit %exec + +# CHECK: S_NOP 0, implicit %6.sub1 +# CHECK-NEXT: %8 = SI_SPILL_V64_RESTORE %stack.0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr5, 0, implicit %exec :: (load 8 from %stack.0, align 4) +# CHECK-NEXT: S_NOP 0, implicit %8.sub1 +# CHECK-NEXT: S_NOP 0, implicit undef %9.sub0 + +name: expecting_non_empty_interval +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_64, preferred-register: '' } + - { id: 1, class: vgpr_32, preferred-register: '' } + - { id: 2, class: vgpr_32, preferred-register: '' } + - { id: 3, class: vreg_64, preferred-register: '' } +body: | + bb.0: + successors: %bb.1 + undef %0.sub1 = V_MAC_F32_e32 0, undef %1, undef %0.sub1, implicit %exec + undef %3.sub1 = V_MOV_B32_e32 1786773504, implicit %exec + dead %2 = V_MUL_F32_e32 0, %3.sub1, implicit %exec + + bb.1: + S_NOP 0, implicit %3.sub1 + S_NOP 0, implicit %0.sub1 + S_NOP 0, implicit undef %0.sub0 + S_ENDPGM + +... + +# Similar assert which happens when trying to rematerialize. +# https://bugs.llvm.org/show_bug.cgi?id=33884 +--- +# CHECK-LABEL: name: rematerialize_empty_interval_has_reference + +# CHECK-NOT: MOV +# CHECK: undef %3.sub2 = V_MOV_B32_e32 1786773504, implicit %exec + +# CHECK: bb.1: +# CHECK-NEXT: S_NOP 0, implicit %3.sub2 +# CHECK-NEXT: S_NOP 0, implicit undef %6.sub0 +# CHECK-NEXT: undef %4.sub2 = V_MOV_B32_e32 0, implicit %exec +# CHECK-NEXT: S_NOP 0, implicit %4.sub2 +name: rematerialize_empty_interval_has_reference +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_128, preferred-register: '' } + - { id: 1, class: vgpr_32, preferred-register: '' } + - { id: 2, class: vgpr_32, preferred-register: '' } + - { id: 3, class: vreg_128, preferred-register: '' } +body: | + bb.0: + successors: %bb.1 + + undef %0.sub2 = V_MOV_B32_e32 0, implicit %exec + undef %3.sub2 = V_MOV_B32_e32 1786773504, implicit %exec + + bb.1: + S_NOP 0, implicit %3.sub2 + S_NOP 0, implicit undef %0.sub0 + S_NOP 0, implicit %0.sub2 + +... diff --git a/test/CodeGen/X86/memcmp-minsize.ll b/test/CodeGen/X86/memcmp-minsize.ll index a7f42644ca2d..a55c40f5bda8 100644 --- a/test/CodeGen/X86/memcmp-minsize.ll +++ b/test/CodeGen/X86/memcmp-minsize.ll @@ -527,6 +527,93 @@ define i1 @length16_eq_const(i8* %X) nounwind minsize { ret i1 %c } +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(i8* %X, i8* %Y) nounwind minsize { +; X86-LABEL: length24: +; X86: # BB#0: +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: andl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $24, {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length24: +; X64: # BB#0: +; X64-NEXT: pushq $24 +; X64-NEXT: popq %rdx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(i8* %x, i8* %y) nounwind minsize { +; X86-LABEL: length24_eq: +; X86: # BB#0: +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: andl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $24, {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length24_eq: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: pushq $24 +; X64-NEXT: popq %rdx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(i8* %X) nounwind minsize { +; X86-LABEL: length24_eq_const: +; X86: # BB#0: +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: andl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $24, {{[0-9]+}}(%esp) +; X86-NEXT: movl $.L.str, {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length24_eq_const: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: pushq $24 +; X64-NEXT: popq %rdx +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + define i32 @length32(i8* %X, i8* %Y) nounwind minsize { ; X86-LABEL: length32: ; X86: # BB#0: diff --git a/test/CodeGen/X86/memcmp-optsize.ll b/test/CodeGen/X86/memcmp-optsize.ll index 450205a966d2..4a5f30890513 100644 --- a/test/CodeGen/X86/memcmp-optsize.ll +++ b/test/CodeGen/X86/memcmp-optsize.ll @@ -699,6 +699,82 @@ define i1 @length16_eq_const(i8* %X) nounwind optsize { ret i1 %c } +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(i8* %X, i8* %Y) nounwind optsize { +; X86-LABEL: length24: +; X86: # BB#0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $24 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length24: +; X64: # BB#0: +; X64-NEXT: movl $24, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize { +; X86-LABEL: length24_eq: +; X86: # BB#0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $24 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length24_eq: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $24, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(i8* %X) nounwind optsize { +; X86-LABEL: length24_eq_const: +; X86: # BB#0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $24 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length24_eq_const: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $24, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + define i32 @length32(i8* %X, i8* %Y) nounwind optsize { ; X86-LABEL: length32: ; X86: # BB#0: diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll index 2e6782765462..889f6a74bf7f 100644 --- a/test/CodeGen/X86/memcmp.ll +++ b/test/CodeGen/X86/memcmp.ll @@ -475,25 +475,14 @@ define i1 @length8_eq_const(i8* %X) nounwind { define i1 @length12_eq(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length12_eq: -; X86: # BB#0: # %loadbb -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: cmpl (%eax), %edx -; X86-NEXT: jne .LBB14_1 -; X86-NEXT: # BB#2: # %loadbb1 -; X86-NEXT: movl 4(%ecx), %edx -; X86-NEXT: cmpl 4(%eax), %edx -; X86-NEXT: jne .LBB14_1 -; X86-NEXT: # BB#3: # %loadbb2 -; X86-NEXT: movl 8(%ecx), %edx -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: cmpl 8(%eax), %edx -; X86-NEXT: je .LBB14_4 -; X86-NEXT: .LBB14_1: # %res_block -; X86-NEXT: movl $1, %ecx -; X86-NEXT: .LBB14_4: # %endblock -; X86-NEXT: testl %ecx, %ecx +; X86: # BB#0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $12 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax ; X86-NEXT: setne %al ; X86-NEXT: retl ; @@ -520,40 +509,13 @@ define i1 @length12_eq(i8* %X, i8* %Y) nounwind { define i32 @length12(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length12: -; X86: # BB#0: # %loadbb -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB15_1 -; X86-NEXT: # BB#2: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB15_1 -; X86-NEXT: # BB#3: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB15_1 -; X86-NEXT: # BB#4: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB15_1: # %res_block -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: movl $1, %eax -; X86-NEXT: cmovbl %ecx, %eax -; X86-NEXT: popl %esi +; X86: # BB#0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $12 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp ; X86-NEXT: retl ; ; X64-LABEL: length12: @@ -588,47 +550,13 @@ define i32 @length12(i8* %X, i8* %Y) nounwind { define i32 @length16(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length16: -; X86: # BB#0: # %loadbb -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB16_1 -; X86-NEXT: # BB#2: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB16_1 -; X86-NEXT: # BB#3: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB16_1 -; X86-NEXT: # BB#4: # %loadbb3 -; X86-NEXT: movl 12(%esi), %ecx -; X86-NEXT: movl 12(%eax), %edx -; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB16_1 -; X86-NEXT: # BB#5: # %endblock -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB16_1: # %res_block -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: movl $1, %eax -; X86-NEXT: cmovbl %ecx, %eax -; X86-NEXT: popl %esi +; X86: # BB#0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $16 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp ; X86-NEXT: retl ; ; X64-LABEL: length16: @@ -660,32 +588,29 @@ define i32 @length16(i8* %X, i8* %Y) nounwind { } define i1 @length16_eq(i8* %x, i8* %y) nounwind { -; X86-LABEL: length16_eq: -; X86: # BB#0: # %loadbb -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: cmpl (%eax), %edx -; X86-NEXT: jne .LBB17_1 -; X86-NEXT: # BB#2: # %loadbb1 -; X86-NEXT: movl 4(%ecx), %edx -; X86-NEXT: cmpl 4(%eax), %edx -; X86-NEXT: jne .LBB17_1 -; X86-NEXT: # BB#3: # %loadbb2 -; X86-NEXT: movl 8(%ecx), %edx -; X86-NEXT: cmpl 8(%eax), %edx -; X86-NEXT: jne .LBB17_1 -; X86-NEXT: # BB#4: # %loadbb3 -; X86-NEXT: movl 12(%ecx), %edx -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: cmpl 12(%eax), %edx -; X86-NEXT: je .LBB17_5 -; X86-NEXT: .LBB17_1: # %res_block -; X86-NEXT: movl $1, %ecx -; X86-NEXT: .LBB17_5: # %endblock -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: setne %al -; X86-NEXT: retl +; X86-NOSSE-LABEL: length16_eq: +; X86-NOSSE: # BB#0: +; X86-NOSSE-NEXT: pushl $0 +; X86-NOSSE-NEXT: pushl $16 +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: setne %al +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: length16_eq: +; X86-SSE2: # BB#0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE2-NEXT: movdqu (%eax), %xmm1 +; X86-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; X86-SSE2-NEXT: pmovmskb %xmm1, %eax +; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X86-SSE2-NEXT: setne %al +; X86-SSE2-NEXT: retl ; ; X64-LABEL: length16_eq: ; X64: # BB#0: # %loadbb @@ -709,27 +634,27 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind { } define i1 @length16_eq_const(i8* %X) nounwind { -; X86-LABEL: length16_eq_const: -; X86: # BB#0: # %loadbb -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $858927408, (%eax) # imm = 0x33323130 -; X86-NEXT: jne .LBB18_1 -; X86-NEXT: # BB#2: # %loadbb1 -; X86-NEXT: cmpl $926299444, 4(%eax) # imm = 0x37363534 -; X86-NEXT: jne .LBB18_1 -; X86-NEXT: # BB#3: # %loadbb2 -; X86-NEXT: cmpl $825243960, 8(%eax) # imm = 0x31303938 -; X86-NEXT: jne .LBB18_1 -; X86-NEXT: # BB#4: # %loadbb3 -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: cmpl $892613426, 12(%eax) # imm = 0x35343332 -; X86-NEXT: je .LBB18_5 -; X86-NEXT: .LBB18_1: # %res_block -; X86-NEXT: movl $1, %ecx -; X86-NEXT: .LBB18_5: # %endblock -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-NOSSE-LABEL: length16_eq_const: +; X86-NOSSE: # BB#0: +; X86-NOSSE-NEXT: pushl $0 +; X86-NOSSE-NEXT: pushl $16 +; X86-NOSSE-NEXT: pushl $.L.str +; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: calll memcmp +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: testl %eax, %eax +; X86-NOSSE-NEXT: sete %al +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: length16_eq_const: +; X86-SSE2: # BB#0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movdqu (%eax), %xmm0 +; X86-SSE2-NEXT: pcmpeqb {{\.LCPI.*}}, %xmm0 +; X86-SSE2-NEXT: pmovmskb %xmm0, %eax +; X86-SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; X86-SSE2-NEXT: sete %al +; X86-SSE2-NEXT: retl ; ; X64-LABEL: length16_eq_const: ; X64: # BB#0: # %loadbb @@ -752,6 +677,82 @@ define i1 @length16_eq_const(i8* %X) nounwind { ret i1 %c } +; PR33914 - https://bugs.llvm.org/show_bug.cgi?id=33914 + +define i32 @length24(i8* %X, i8* %Y) nounwind { +; X86-LABEL: length24: +; X86: # BB#0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $24 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: retl +; +; X64-LABEL: length24: +; X64: # BB#0: +; X64-NEXT: movl $24, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 24) nounwind + ret i32 %m +} + +define i1 @length24_eq(i8* %x, i8* %y) nounwind { +; X86-LABEL: length24_eq: +; X86: # BB#0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $24 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: length24_eq: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $24, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @length24_eq_const(i8* %X) nounwind { +; X86-LABEL: length24_eq_const: +; X86: # BB#0: +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $24 +; X86-NEXT: pushl $.L.str +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll memcmp +; X86-NEXT: addl $16, %esp +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: length24_eq_const: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $.L.str, %esi +; X64-NEXT: movl $24, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 24) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + define i32 @length32(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length32: ; X86: # BB#0: @@ -764,43 +765,9 @@ define i32 @length32(i8* %X, i8* %Y) nounwind { ; X86-NEXT: retl ; ; X64-LABEL: length32: -; X64: # BB#0: # %loadbb -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB19_1 -; X64-NEXT: # BB#2: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB19_1 -; X64-NEXT: # BB#3: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB19_1 -; X64-NEXT: # BB#4: # %loadbb3 -; X64-NEXT: movq 24(%rdi), %rcx -; X64-NEXT: movq 24(%rsi), %rdx -; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB19_1 -; X64-NEXT: # BB#5: # %endblock -; X64-NEXT: retq -; X64-NEXT: .LBB19_1: # %res_block -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax -; X64-NEXT: retq +; X64: # BB#0: +; X64-NEXT: movl $32, %edx +; X64-NEXT: jmp memcmp # TAILCALL %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind ret i32 %m } @@ -820,30 +787,25 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind { ; X86-NEXT: sete %al ; X86-NEXT: retl ; -; X64-LABEL: length32_eq: -; X64: # BB#0: # %loadbb -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: cmpq (%rsi), %rax -; X64-NEXT: jne .LBB20_1 -; X64-NEXT: # BB#2: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: cmpq 8(%rsi), %rax -; X64-NEXT: jne .LBB20_1 -; X64-NEXT: # BB#3: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rax -; X64-NEXT: cmpq 16(%rsi), %rax -; X64-NEXT: jne .LBB20_1 -; X64-NEXT: # BB#4: # %loadbb3 -; X64-NEXT: movq 24(%rdi), %rcx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq 24(%rsi), %rcx -; X64-NEXT: je .LBB20_5 -; X64-NEXT: .LBB20_1: # %res_block -; X64-NEXT: movl $1, %eax -; X64-NEXT: .LBB20_5: # %endblock -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sete %al -; X64-NEXT: retq +; X64-SSE2-LABEL: length32_eq: +; X64-SSE2: # BB#0: +; X64-SSE2-NEXT: pushq %rax +; X64-SSE2-NEXT: movl $32, %edx +; X64-SSE2-NEXT: callq memcmp +; X64-SSE2-NEXT: testl %eax, %eax +; X64-SSE2-NEXT: sete %al +; X64-SSE2-NEXT: popq %rcx +; X64-SSE2-NEXT: retq +; +; X64-AVX2-LABEL: length32_eq: +; X64-AVX2: # BB#0: +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax +; X64-AVX2-NEXT: cmpl $-1, %eax +; X64-AVX2-NEXT: sete %al +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind %cmp = icmp eq i32 %call, 0 ret i1 %cmp @@ -862,30 +824,26 @@ define i1 @length32_eq_const(i8* %X) nounwind { ; X86-NEXT: setne %al ; X86-NEXT: retl ; -; X64-LABEL: length32_eq_const: -; X64: # BB#0: # %loadbb -; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130 -; X64-NEXT: cmpq %rax, (%rdi) -; X64-NEXT: jne .LBB21_1 -; X64-NEXT: # BB#2: # %loadbb1 -; X64-NEXT: movabsq $3833745473465760056, %rax # imm = 0x3534333231303938 -; X64-NEXT: cmpq %rax, 8(%rdi) -; X64-NEXT: jne .LBB21_1 -; X64-NEXT: # BB#3: # %loadbb2 -; X64-NEXT: movabsq $3689065127958034230, %rax # imm = 0x3332313039383736 -; X64-NEXT: cmpq %rax, 16(%rdi) -; X64-NEXT: jne .LBB21_1 -; X64-NEXT: # BB#4: # %loadbb3 -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: movabsq $3544395820347831604, %rcx # imm = 0x3130393837363534 -; X64-NEXT: cmpq %rcx, 24(%rdi) -; X64-NEXT: je .LBB21_5 -; X64-NEXT: .LBB21_1: # %res_block -; X64-NEXT: movl $1, %eax -; X64-NEXT: .LBB21_5: # %endblock -; X64-NEXT: testl %eax, %eax -; X64-NEXT: setne %al -; X64-NEXT: retq +; X64-SSE2-LABEL: length32_eq_const: +; X64-SSE2: # BB#0: +; X64-SSE2-NEXT: pushq %rax +; X64-SSE2-NEXT: movl $.L.str, %esi +; X64-SSE2-NEXT: movl $32, %edx +; X64-SSE2-NEXT: callq memcmp +; X64-SSE2-NEXT: testl %eax, %eax +; X64-SSE2-NEXT: setne %al +; X64-SSE2-NEXT: popq %rcx +; X64-SSE2-NEXT: retq +; +; X64-AVX2-LABEL: length32_eq_const: +; X64-AVX2: # BB#0: +; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; X64-AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpmovmskb %ymm0, %eax +; X64-AVX2-NEXT: cmpl $-1, %eax +; X64-AVX2-NEXT: setne %al +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 32) nounwind %c = icmp ne i32 %m, 0 ret i1 %c diff --git a/test/CodeGen/X86/pr33844.ll b/test/CodeGen/X86/pr33844.ll new file mode 100644 index 000000000000..2585945aa109 --- /dev/null +++ b/test/CodeGen/X86/pr33844.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@global = external global i32 +@global.1 = external global i64 + +define void @patatino() { +; CHECK-LABEL: patatino: +; CHECK: # BB#0: # %bb +; CHECK-NEXT: movl {{.*}}(%rip), %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $31, %ecx +; CHECK-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF +; CHECK-NEXT: shrl $31, %ecx +; CHECK-NEXT: andl $62, %ecx +; CHECK-NEXT: andl $-536870912, %eax # imm = 0xE0000000 +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: movl %eax, {{.*}}(%rip) +; CHECK-NEXT: retq +bb: + %tmp = load i32, i32* @global + %tmp1 = lshr i32 %tmp, 31 + %tmp2 = add nuw nsw i32 %tmp1, 2147483647 + %tmp3 = load i64, i64* @global.1 + %tmp4 = shl i64 %tmp3, 23 + %tmp5 = add nsw i64 %tmp4, 8388639 + %tmp6 = trunc i64 %tmp5 to i32 + %tmp7 = lshr i32 %tmp2, %tmp6 + %tmp8 = load i32, i32* @global + %tmp9 = and i32 %tmp7, 62 + %tmp10 = and i32 %tmp8, -536870912 + %tmp11 = or i32 %tmp9, %tmp10 + store i32 %tmp11, i32* @global + ret void +} diff --git a/test/CodeGen/X86/pr33960.ll b/test/CodeGen/X86/pr33960.ll new file mode 100644 index 000000000000..fb9236d3ffa2 --- /dev/null +++ b/test/CodeGen/X86/pr33960.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 + +@b = external local_unnamed_addr global i32, align 4 + +define void @PR33960() { +; X86-LABEL: PR33960: +; X86: # BB#0: # %entry +; X86-NEXT: movl $0, b +; X86-NEXT: retl +; +; X64-LABEL: PR33960: +; X64: # BB#0: # %entry +; X64-NEXT: movl $0, {{.*}}(%rip) +; X64-NEXT: retq +entry: + %tmp = insertelement <4 x i32> <i32 undef, i32 -7, i32 -3, i32 undef>, i32 -2, i32 3 + %predphi26 = insertelement <4 x i32> %tmp, i32 -7, i32 0 + %tmp1 = trunc <4 x i32> %predphi26 to <4 x i16> + %tmp2 = icmp eq <4 x i16> %tmp1, zeroinitializer + %tmp3 = icmp eq <4 x i32> undef, zeroinitializer + %tmp4 = and <4 x i1> %tmp2, %tmp3 + %predphi17 = select <4 x i1> %tmp4, <4 x i32> undef, <4 x i32> zeroinitializer + %tmp5 = shl <4 x i32> %predphi17, <i32 16, i32 16, i32 16, i32 16> + %tmp6 = ashr exact <4 x i32> %tmp5, <i32 16, i32 16, i32 16, i32 16> + %tmp7 = or <4 x i32> %tmp6, undef + %tmp8 = or <4 x i32> undef, %tmp7 + %tmp9 = or <4 x i32> undef, %tmp8 + %tmp10 = or <4 x i32> undef, %tmp9 + %tmp11 = or <4 x i32> undef, %tmp10 + %tmp12 = or <4 x i32> undef, %tmp11 + %bin.rdx = or <4 x i32> %tmp12, undef + %bin.rdx19 = or <4 x i32> %bin.rdx, undef + %tmp13 = extractelement <4 x i32> %bin.rdx19, i32 0 + %or = or i32 0, %tmp13 + store i32 %or, i32* @b, align 4 + ret void +} diff --git a/test/CodeGen/X86/vector-shift-ashr-256.ll b/test/CodeGen/X86/vector-shift-ashr-256.ll index 5f2b18fc9c03..6bb57d8f5f71 100644 --- a/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -1699,10 +1699,9 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind { ; ; XOPAVX2-LABEL: splatconstant_shift_v4i64: ; XOPAVX2: # BB#0: +; XOPAVX2-NEXT: vpsrad $7, %ymm0, %ymm1 ; XOPAVX2-NEXT: vpsrlq $7, %ymm0, %ymm0 -; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72057594037927936,72057594037927936,72057594037927936,72057594037927936] -; XOPAVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; XOPAVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: splatconstant_shift_v4i64: diff --git a/test/MC/Sparc/sparc-tls-relocations.s b/test/MC/Sparc/sparc-tls-relocations.s new file mode 100644 index 000000000000..3d1b80b5f191 --- /dev/null +++ b/test/MC/Sparc/sparc-tls-relocations.s @@ -0,0 +1,83 @@ +! Testing Sparc TLS relocations emission +! (for now a couple local ones). +! +! RUN: llvm-mc %s -arch=sparc -show-encoding | FileCheck %s --check-prefix=ASM +! RUN: llvm-mc %s -arch=sparcv9 -show-encoding | FileCheck %s --check-prefix=ASM +! RUN: llvm-mc %s -arch=sparc -filetype=obj | llvm-readobj -r | FileCheck %s --check-prefix=REL +! RUN: llvm-mc %s -arch=sparcv9 -filetype=obj | llvm-readobj -r | FileCheck %s --check-prefix=REL +! RUN: llvm-mc %s -arch=sparc -filetype=obj | llvm-objdump -r -d - | FileCheck %s --check-prefix=OBJDUMP +! RUN: llvm-mc %s -arch=sparcv9 -filetype=obj | llvm-objdump -r -d - | FileCheck %s --check-prefix=OBJDUMP + +! REL: Arch: sparc +! REL: Relocations [ +! REL: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LE_HIX22 Head 0x0 +! REL: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LE_LOX10 Head 0x0 +! REL: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDO_HIX22 Head 0x0 +! REL: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDM_HI22 Head 0x0 +! REL: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDM_LO10 Head 0x0 +! REL: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDO_LOX10 Head 0x0 +! REL: ] + + +! OBJDUMP: foo: +foo: +! Here we use two different sequences to get the address of a static TLS variable 'Head' +! (note - there is no intent to have valid assembler function here, +! we just check how TLS relocations are emitted) +! +! First sequence uses LE_HIX22/LE_LOX10 + +! OBJDUMP: {{[0-9,a-f]+}}: 31 00 00 00 sethi 0, %i0 +! OBJDUMP: {{[0-9,a-f]+}}: R_SPARC_TLS_LE_HIX22 Unknown +! ASM: sethi %tle_hix22(Head), %i0 ! encoding: [0x31,0x00,0x00,0x00] +! ASM: ! fixup A - offset: 0, value: %tle_hix22(Head), kind: fixup_sparc_tls_le_hix22 + sethi %tle_hix22(Head), %i0 + +! OBJDUMP: {{[0-9,a-f]+}}: b0 1e 20 00 xor %i0, 0, %i0 +! OBJDUMP: {{[0-9,a-f]+}}: R_SPARC_TLS_LE_LOX10 Unknown +! ASM: xor %i0, %tle_lox10(Head), %i0 ! encoding: [0xb0,0x1e,0x20,0x00] +! ASM: ! fixup A - offset: 0, value: %tle_lox10(Head), kind: fixup_sparc_tls_le_lox10 + xor %i0, %tle_lox10(Head), %i0 + + +! Second sequence is for PIC, so it is more complicated. +! It uses LDO_HIX22/LDO_LOX10/LDO_ADD/LDM_HI22/LDM_LO10/LDM_ADD/LDM_CALL + +! OBJDUMP: {{[0-9,a-f]+}}: 33 00 00 00 sethi 0, %i1 +! OBJDUMP: {{[0-9,a-f]+}}: R_SPARC_TLS_LDO_HIX22 Unknown +! ASM: sethi %tldo_hix22(Head), %i1 ! encoding: [0x33,0b00AAAAAA,A,A] +! ASM: ! fixup A - offset: 0, value: %tldo_hix22(Head), kind: fixup_sparc_tls_ldo_hix22 + sethi %tldo_hix22(Head), %i1 + +! OBJDUMP: {{[0-9,a-f]+}}: 35 00 00 00 sethi 0, %i2 +! OBJDUMP: {{[0-9,a-f]+}}: R_SPARC_TLS_LDM_HI22 Unknown +! ASM: sethi %tldm_hi22(Head), %i2 ! encoding: [0x35,0b00AAAAAA,A,A] +! ASM: ! fixup A - offset: 0, value: %tldm_hi22(Head), kind: fixup_sparc_tls_ldm_hi22 + sethi %tldm_hi22(Head), %i2 + +! OBJDUMP: {{[0-9,a-f]+}}: b4 06 a0 00 add %i2, 0, %i2 +! OBJDUMP: {{[0-9,a-f]+}}: R_SPARC_TLS_LDM_LO10 Unknown +! ASM: add %i2, %tldm_lo10(Head), %i2 ! encoding: [0xb4,0x06,0b101000AA,A] +! ASM: ! fixup A - offset: 0, value: %tldm_lo10(Head), kind: fixup_sparc_tls_ldm_lo10 + add %i2, %tldm_lo10(Head), %i2 + + ! ???error from llvm-mc on the next asm line??? + ! add %i0, %i2, %o0, %tldm_add(Head) + +! OBJDUMP: {{[0-9,a-f]+}}: b0 1e 60 00 xor %i1, 0, %i0 +! OBJDUMP: {{[0-9,a-f]+}}: R_SPARC_TLS_LDO_LOX10 Unknown +! ASM: xor %i1, %tldo_lox10(Head), %i0 ! encoding: [0xb0,0x1e,0b011000AA,A] +! ASM: ! fixup A - offset: 0, value: %tldo_lox10(Head), kind: fixup_sparc_tls_ldo_lox10 + xor %i1, %tldo_lox10(Head), %i0 + + ! ???error from llvm-mc on the next asm line??? + ! call __tls_get_addr, %tldm_call(Head) + ! nop + ! ???error from llvm-mc on the next asm line??? + ! add %o0, %i0, %i0, %tldo_add(Head) + + .type Head,@object + .section .tbss,#alloc,#write,#tls +Head: + .word 0 + .size Head, 4 diff --git a/test/Transforms/CodeGenPrepare/X86/memcmp.ll b/test/Transforms/CodeGenPrepare/X86/memcmp.ll index 1dfc08761965..c5281a9e5733 100644 --- a/test/Transforms/CodeGenPrepare/X86/memcmp.ll +++ b/test/Transforms/CodeGenPrepare/X86/memcmp.ll @@ -238,91 +238,9 @@ define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) { } define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; X32-LABEL: @cmp7( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) -; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] -; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ] -; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] -; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i16* -; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i16* -; X32-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 2 -; X32-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2 -; X32-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP11]] -; X32-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]] -; X32-NEXT: [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]]) -; X32-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) -; X32-NEXT: [[TMP17]] = zext i16 [[TMP15]] to i32 -; X32-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32 -; X32-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP17]], [[TMP18]] -; X32-NEXT: br i1 [[TMP19]], label [[LOADBB2:%.*]], label [[RES_BLOCK]] -; X32: loadbb2: -; X32-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[X]], i8 6 -; X32-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[Y]], i8 6 -; X32-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP20]] -; X32-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP21]] -; X32-NEXT: [[TMP24:%.*]] = zext i8 [[TMP22]] to i32 -; X32-NEXT: [[TMP25:%.*]] = zext i8 [[TMP23]] to i32 -; X32-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP25]] -; X32-NEXT: br label [[ENDBLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP26]], [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X32-NEXT: ret i32 [[PHI_RES]] -; -; X64-LABEL: @cmp7( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) -; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64 -; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64 -; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]] -; X64-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP6]], [[LOADBB:%.*]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP7]], [[LOADBB]] ], [ [[TMP20:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] -; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1 -; X64-NEXT: br label [[ENDBLOCK:%.*]] -; X64: loadbb1: -; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[X]] to i16* -; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[Y]] to i16* -; X64-NEXT: [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 2 -; X64-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 2 -; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]] -; X64-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]] -; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP16]]) -; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64 -; X64-NEXT: [[TMP20]] = zext i16 [[TMP18]] to i64 -; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]] -; X64-NEXT: br i1 [[TMP21]], label [[LOADBB2:%.*]], label [[RES_BLOCK]] -; X64: loadbb2: -; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[X]], i8 6 -; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, i8* [[Y]], i8 6 -; X64-NEXT: [[TMP24:%.*]] = load i8, i8* [[TMP22]] -; X64-NEXT: [[TMP25:%.*]] = load i8, i8* [[TMP23]] -; X64-NEXT: [[TMP26:%.*]] = zext i8 [[TMP24]] to i32 -; X64-NEXT: [[TMP27:%.*]] = zext i8 [[TMP25]] to i32 -; X64-NEXT: [[TMP28:%.*]] = sub i32 [[TMP26]], [[TMP27]] -; X64-NEXT: br label [[ENDBLOCK]] -; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP28]], [[LOADBB2]] ], [ [[TMP10]], [[RES_BLOCK]] ] -; X64-NEXT: ret i32 [[PHI_RES]] +; ALL-LABEL: @cmp7( +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 7) +; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7) ret i32 %call @@ -379,44 +297,8 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp9( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) -; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] -; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ] -; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] -; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1 -; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1 -; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] -; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) -; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]] -; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2:%.*]], label [[RES_BLOCK]] -; X32: loadbb2: -; X32-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[X]], i8 8 -; X32-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[Y]], i8 8 -; X32-NEXT: [[TMP20:%.*]] = load i8, i8* [[TMP18]] -; X32-NEXT: [[TMP21:%.*]] = load i8, i8* [[TMP19]] -; X32-NEXT: [[TMP22:%.*]] = zext i8 [[TMP20]] to i32 -; X32-NEXT: [[TMP23:%.*]] = zext i8 [[TMP21]] to i32 -; X32-NEXT: [[TMP24:%.*]] = sub i32 [[TMP22]], [[TMP23]] -; X32-NEXT: br label [[ENDBLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP24]], [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X32-NEXT: ret i32 [[PHI_RES]] +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 9) +; X32-NEXT: ret i32 [[CALL]] ; ; X64-LABEL: @cmp9( ; X64-NEXT: loadbb: @@ -451,48 +333,8 @@ define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp10( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) -; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] -; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ], [ [[TMP26:%.*]], [[LOADBB2:%.*]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ], [ [[TMP27:%.*]], [[LOADBB2]] ] -; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] -; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1 -; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1 -; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] -; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) -; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]] -; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2]], label [[RES_BLOCK]] -; X32: loadbb2: -; X32-NEXT: [[TMP18:%.*]] = bitcast i8* [[X]] to i16* -; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[Y]] to i16* -; X32-NEXT: [[TMP20:%.*]] = getelementptr i16, i16* [[TMP18]], i16 4 -; X32-NEXT: [[TMP21:%.*]] = getelementptr i16, i16* [[TMP19]], i16 4 -; X32-NEXT: [[TMP22:%.*]] = load i16, i16* [[TMP20]] -; X32-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP21]] -; X32-NEXT: [[TMP24:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP22]]) -; X32-NEXT: [[TMP25:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP23]]) -; X32-NEXT: [[TMP26]] = zext i16 [[TMP24]] to i32 -; X32-NEXT: [[TMP27]] = zext i16 [[TMP25]] to i32 -; X32-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] -; X32-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X32-NEXT: ret i32 [[PHI_RES]] +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 10) +; X32-NEXT: ret i32 [[CALL]] ; ; X64-LABEL: @cmp10( ; X64-NEXT: loadbb: @@ -532,100 +374,9 @@ define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) { } define i32 @cmp11(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; X32-LABEL: @cmp11( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) -; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] -; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ], [ [[TMP26:%.*]], [[LOADBB2:%.*]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ], [ [[TMP27:%.*]], [[LOADBB2]] ] -; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] -; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1 -; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1 -; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] -; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) -; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]] -; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2]], label [[RES_BLOCK]] -; X32: loadbb2: -; X32-NEXT: [[TMP18:%.*]] = bitcast i8* [[X]] to i16* -; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[Y]] to i16* -; X32-NEXT: [[TMP20:%.*]] = getelementptr i16, i16* [[TMP18]], i16 4 -; X32-NEXT: [[TMP21:%.*]] = getelementptr i16, i16* [[TMP19]], i16 4 -; X32-NEXT: [[TMP22:%.*]] = load i16, i16* [[TMP20]] -; X32-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP21]] -; X32-NEXT: [[TMP24:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP22]]) -; X32-NEXT: [[TMP25:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP23]]) -; X32-NEXT: [[TMP26]] = zext i16 [[TMP24]] to i32 -; X32-NEXT: [[TMP27]] = zext i16 [[TMP25]] to i32 -; X32-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]] -; X32-NEXT: br i1 [[TMP28]], label [[LOADBB3:%.*]], label [[RES_BLOCK]] -; X32: loadbb3: -; X32-NEXT: [[TMP29:%.*]] = getelementptr i8, i8* [[X]], i8 10 -; X32-NEXT: [[TMP30:%.*]] = getelementptr i8, i8* [[Y]], i8 10 -; X32-NEXT: [[TMP31:%.*]] = load i8, i8* [[TMP29]] -; X32-NEXT: [[TMP32:%.*]] = load i8, i8* [[TMP30]] -; X32-NEXT: [[TMP33:%.*]] = zext i8 [[TMP31]] to i32 -; X32-NEXT: [[TMP34:%.*]] = zext i8 [[TMP32]] to i32 -; X32-NEXT: [[TMP35:%.*]] = sub i32 [[TMP33]], [[TMP34]] -; X32-NEXT: br label [[ENDBLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP35]], [[LOADBB3]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X32-NEXT: ret i32 [[PHI_RES]] -; -; X64-LABEL: @cmp11( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]] -; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] -; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X64-NEXT: br label [[ENDBLOCK:%.*]] -; X64: loadbb1: -; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i16* -; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i16* -; X64-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 4 -; X64-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 4 -; X64-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP11]] -; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]] -; X64-NEXT: [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]]) -; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) -; X64-NEXT: [[TMP17]] = zext i16 [[TMP15]] to i64 -; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64 -; X64-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], [[TMP18]] -; X64-NEXT: br i1 [[TMP19]], label [[LOADBB2:%.*]], label [[RES_BLOCK]] -; X64: loadbb2: -; X64-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[X]], i8 10 -; X64-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[Y]], i8 10 -; X64-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP20]] -; X64-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP21]] -; X64-NEXT: [[TMP24:%.*]] = zext i8 [[TMP22]] to i32 -; X64-NEXT: [[TMP25:%.*]] = zext i8 [[TMP23]] to i32 -; X64-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP25]] -; X64-NEXT: br label [[ENDBLOCK]] -; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP26]], [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X64-NEXT: ret i32 [[PHI_RES]] +; ALL-LABEL: @cmp11( +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11) +; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11) ret i32 %call @@ -633,46 +384,8 @@ define i32 @cmp11(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp12( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) -; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] -; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ], [ [[TMP24:%.*]], [[LOADBB2:%.*]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ], [ [[TMP25:%.*]], [[LOADBB2]] ] -; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] -; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1 -; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1 -; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] -; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) -; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]] -; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2]], label [[RES_BLOCK]] -; X32: loadbb2: -; X32-NEXT: [[TMP18:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[TMP18]], i32 2 -; X32-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP19]], i32 2 -; X32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP20]] -; X32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]] -; X32-NEXT: [[TMP24]] = call i32 @llvm.bswap.i32(i32 [[TMP22]]) -; X32-NEXT: [[TMP25]] = call i32 @llvm.bswap.i32(i32 [[TMP23]]) -; X32-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP24]], [[TMP25]] -; X32-NEXT: br i1 [[TMP26]], label [[ENDBLOCK]], label [[RES_BLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X32-NEXT: ret i32 [[PHI_RES]] +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 12) +; X32-NEXT: ret i32 [[CALL]] ; ; X64-LABEL: @cmp12( ; X64-NEXT: loadbb: @@ -712,268 +425,27 @@ define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) { } define i32 @cmp13(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; X32-LABEL: @cmp13( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) -; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] -; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ], [ [[TMP24:%.*]], [[LOADBB2:%.*]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ], [ [[TMP25:%.*]], [[LOADBB2]] ] -; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] -; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1 -; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1 -; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] -; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) -; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]] -; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2]], label [[RES_BLOCK]] -; X32: loadbb2: -; X32-NEXT: [[TMP18:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[TMP18]], i32 2 -; X32-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP19]], i32 2 -; X32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP20]] -; X32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]] -; X32-NEXT: [[TMP24]] = call i32 @llvm.bswap.i32(i32 [[TMP22]]) -; X32-NEXT: [[TMP25]] = call i32 @llvm.bswap.i32(i32 [[TMP23]]) -; X32-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP24]], [[TMP25]] -; X32-NEXT: br i1 [[TMP26]], label [[LOADBB3:%.*]], label [[RES_BLOCK]] -; X32: loadbb3: -; X32-NEXT: [[TMP27:%.*]] = getelementptr i8, i8* [[X]], i8 12 -; X32-NEXT: [[TMP28:%.*]] = getelementptr i8, i8* [[Y]], i8 12 -; X32-NEXT: [[TMP29:%.*]] = load i8, i8* [[TMP27]] -; X32-NEXT: [[TMP30:%.*]] = load i8, i8* [[TMP28]] -; X32-NEXT: [[TMP31:%.*]] = zext i8 [[TMP29]] to i32 -; X32-NEXT: [[TMP32:%.*]] = zext i8 [[TMP30]] to i32 -; X32-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP32]] -; X32-NEXT: br label [[ENDBLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP33]], [[LOADBB3]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X32-NEXT: ret i32 [[PHI_RES]] -; -; X64-LABEL: @cmp13( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]] -; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] -; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X64-NEXT: br label [[ENDBLOCK:%.*]] -; X64: loadbb1: -; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32* -; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32* -; X64-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 2 -; X64-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 2 -; X64-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] -; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X64-NEXT: [[TMP15:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) -; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X64-NEXT: [[TMP17]] = zext i32 [[TMP15]] to i64 -; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64 -; X64-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], [[TMP18]] -; X64-NEXT: br i1 [[TMP19]], label [[LOADBB2:%.*]], label [[RES_BLOCK]] -; X64: loadbb2: -; X64-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[X]], i8 12 -; X64-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[Y]], i8 12 -; X64-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP20]] -; X64-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP21]] -; X64-NEXT: [[TMP24:%.*]] = zext i8 [[TMP22]] to i32 -; X64-NEXT: [[TMP25:%.*]] = zext i8 [[TMP23]] to i32 -; X64-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP25]] -; X64-NEXT: br label [[ENDBLOCK]] -; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP26]], [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X64-NEXT: ret i32 [[PHI_RES]] +; ALL-LABEL: @cmp13( +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13) +; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13) ret i32 %call } define i32 @cmp14(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; X32-LABEL: @cmp14( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) -; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] -; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ], [ [[TMP24:%.*]], [[LOADBB2:%.*]] ], [ [[TMP35:%.*]], [[LOADBB3:%.*]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ], [ [[TMP25:%.*]], [[LOADBB2]] ], [ [[TMP36:%.*]], [[LOADBB3]] ] -; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] -; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1 -; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1 -; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] -; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) -; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]] -; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2]], label [[RES_BLOCK]] -; X32: loadbb2: -; X32-NEXT: [[TMP18:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[TMP18]], i32 2 -; X32-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP19]], i32 2 -; X32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP20]] -; X32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]] -; X32-NEXT: [[TMP24]] = call i32 @llvm.bswap.i32(i32 [[TMP22]]) -; X32-NEXT: [[TMP25]] = call i32 @llvm.bswap.i32(i32 [[TMP23]]) -; X32-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP24]], [[TMP25]] -; X32-NEXT: br i1 [[TMP26]], label [[LOADBB3]], label [[RES_BLOCK]] -; X32: loadbb3: -; X32-NEXT: [[TMP27:%.*]] = bitcast i8* [[X]] to i16* -; X32-NEXT: [[TMP28:%.*]] = bitcast i8* [[Y]] to i16* -; X32-NEXT: [[TMP29:%.*]] = getelementptr i16, i16* [[TMP27]], i16 6 -; X32-NEXT: [[TMP30:%.*]] = getelementptr i16, i16* [[TMP28]], i16 6 -; X32-NEXT: [[TMP31:%.*]] = load i16, i16* [[TMP29]] -; X32-NEXT: [[TMP32:%.*]] = load i16, i16* [[TMP30]] -; X32-NEXT: [[TMP33:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP31]]) -; X32-NEXT: [[TMP34:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP32]]) -; X32-NEXT: [[TMP35]] = zext i16 [[TMP33]] to i32 -; X32-NEXT: [[TMP36]] = zext i16 [[TMP34]] to i32 -; X32-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP35]], [[TMP36]] -; X32-NEXT: br i1 [[TMP37]], label [[ENDBLOCK]], label [[RES_BLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X32-NEXT: ret i32 [[PHI_RES]] -; -; X64-LABEL: @cmp14( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]] -; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ], [ [[TMP28:%.*]], [[LOADBB2:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ], [ [[TMP29:%.*]], [[LOADBB2]] ] -; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] -; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X64-NEXT: br label [[ENDBLOCK:%.*]] -; X64: loadbb1: -; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32* -; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32* -; X64-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 2 -; X64-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 2 -; X64-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] -; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X64-NEXT: [[TMP15:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) -; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X64-NEXT: [[TMP17]] = zext i32 [[TMP15]] to i64 -; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64 -; X64-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], [[TMP18]] -; X64-NEXT: br i1 [[TMP19]], label [[LOADBB2]], label [[RES_BLOCK]] -; X64: loadbb2: -; X64-NEXT: [[TMP20:%.*]] = bitcast i8* [[X]] to i16* -; X64-NEXT: [[TMP21:%.*]] = bitcast i8* [[Y]] to i16* -; X64-NEXT: [[TMP22:%.*]] = getelementptr i16, i16* [[TMP20]], i16 6 -; X64-NEXT: [[TMP23:%.*]] = getelementptr i16, i16* [[TMP21]], i16 6 -; X64-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP22]] -; X64-NEXT: [[TMP25:%.*]] = load i16, i16* [[TMP23]] -; X64-NEXT: [[TMP26:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP24]]) -; X64-NEXT: [[TMP27:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP25]]) -; X64-NEXT: [[TMP28]] = zext i16 [[TMP26]] to i64 -; X64-NEXT: [[TMP29]] = zext i16 [[TMP27]] to i64 -; X64-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP28]], [[TMP29]] -; X64-NEXT: br i1 [[TMP30]], label [[ENDBLOCK]], label [[RES_BLOCK]] -; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X64-NEXT: ret i32 [[PHI_RES]] +; ALL-LABEL: @cmp14( +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14) +; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14) ret i32 %call } define i32 @cmp15(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; X32-LABEL: @cmp15( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15) -; X32-NEXT: ret i32 [[CALL]] -; -; X64-LABEL: @cmp15( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]] -; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ], [ [[TMP28:%.*]], [[LOADBB2:%.*]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ], [ [[TMP29:%.*]], [[LOADBB2]] ] -; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] -; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X64-NEXT: br label [[ENDBLOCK:%.*]] -; X64: loadbb1: -; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32* -; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32* -; X64-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 2 -; X64-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 2 -; X64-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] -; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X64-NEXT: [[TMP15:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) -; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X64-NEXT: [[TMP17]] = zext i32 [[TMP15]] to i64 -; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64 -; X64-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], [[TMP18]] -; X64-NEXT: br i1 [[TMP19]], label [[LOADBB2]], label [[RES_BLOCK]] -; X64: loadbb2: -; X64-NEXT: [[TMP20:%.*]] = bitcast i8* [[X]] to i16* -; X64-NEXT: [[TMP21:%.*]] = bitcast i8* [[Y]] to i16* -; X64-NEXT: [[TMP22:%.*]] = getelementptr i16, i16* [[TMP20]], i16 6 -; X64-NEXT: [[TMP23:%.*]] = getelementptr i16, i16* [[TMP21]], i16 6 -; X64-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP22]] -; X64-NEXT: [[TMP25:%.*]] = load i16, i16* [[TMP23]] -; X64-NEXT: [[TMP26:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP24]]) -; X64-NEXT: [[TMP27:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP25]]) -; X64-NEXT: [[TMP28]] = zext i16 [[TMP26]] to i64 -; X64-NEXT: [[TMP29]] = zext i16 [[TMP27]] to i64 -; X64-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP28]], [[TMP29]] -; X64-NEXT: br i1 [[TMP30]], label [[LOADBB3:%.*]], label [[RES_BLOCK]] -; X64: loadbb3: -; X64-NEXT: [[TMP31:%.*]] = getelementptr i8, i8* [[X]], i8 14 -; X64-NEXT: [[TMP32:%.*]] = getelementptr i8, i8* [[Y]], i8 14 -; X64-NEXT: [[TMP33:%.*]] = load i8, i8* [[TMP31]] -; X64-NEXT: [[TMP34:%.*]] = load i8, i8* [[TMP32]] -; X64-NEXT: [[TMP35:%.*]] = zext i8 [[TMP33]] to i32 -; X64-NEXT: [[TMP36:%.*]] = zext i8 [[TMP34]] to i32 -; X64-NEXT: [[TMP37:%.*]] = sub i32 [[TMP35]], [[TMP36]] -; X64-NEXT: br label [[ENDBLOCK]] -; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP37]], [[LOADBB3]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X64-NEXT: ret i32 [[PHI_RES]] +; ALL-LABEL: @cmp15( +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15) +; ALL-NEXT: ret i32 [[CALL]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15) ret i32 %call @@ -981,57 +453,8 @@ define i32 @cmp15(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp16( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) -; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] -; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ], [ [[TMP24:%.*]], [[LOADBB2:%.*]] ], [ [[TMP33:%.*]], [[LOADBB3:%.*]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ], [ [[TMP25:%.*]], [[LOADBB2]] ], [ [[TMP34:%.*]], [[LOADBB3]] ] -; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] -; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1 -; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1 -; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] -; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) -; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]] -; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2]], label [[RES_BLOCK]] -; X32: loadbb2: -; X32-NEXT: [[TMP18:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[TMP18]], i32 2 -; X32-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP19]], i32 2 -; X32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP20]] -; X32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]] -; X32-NEXT: [[TMP24]] = call i32 @llvm.bswap.i32(i32 [[TMP22]]) -; X32-NEXT: [[TMP25]] = call i32 @llvm.bswap.i32(i32 [[TMP23]]) -; X32-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP24]], [[TMP25]] -; X32-NEXT: br i1 [[TMP26]], label [[LOADBB3]], label [[RES_BLOCK]] -; X32: loadbb3: -; X32-NEXT: [[TMP27:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP28:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP29:%.*]] = getelementptr i32, i32* [[TMP27]], i32 3 -; X32-NEXT: [[TMP30:%.*]] = getelementptr i32, i32* [[TMP28]], i32 3 -; X32-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP29]] -; X32-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP30]] -; X32-NEXT: [[TMP33]] = call i32 @llvm.bswap.i32(i32 [[TMP31]]) -; X32-NEXT: [[TMP34]] = call i32 @llvm.bswap.i32(i32 [[TMP32]]) -; X32-NEXT: [[TMP35:%.*]] = icmp eq i32 [[TMP33]], [[TMP34]] -; X32-NEXT: br i1 [[TMP35]], label [[ENDBLOCK]], label [[RES_BLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X32-NEXT: ret i32 [[PHI_RES]] +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 16) +; X32-NEXT: ret i32 [[CALL]] ; ; X64-LABEL: @cmp16( ; X64-NEXT: loadbb: @@ -1198,34 +621,8 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq7( -; ALL-NEXT: loadbb: -; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]] -; ALL-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; ALL: res_block: -; ALL-NEXT: br label [[ENDBLOCK:%.*]] -; ALL: loadbb1: -; ALL-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i16* -; ALL-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i16* -; ALL-NEXT: [[TMP7:%.*]] = getelementptr i16, i16* [[TMP5]], i16 2 -; ALL-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2 -; ALL-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]] -; ALL-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] -; ALL-NEXT: [[TMP11:%.*]] = icmp ne i16 [[TMP9]], [[TMP10]] -; ALL-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] -; ALL: loadbb2: -; ALL-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[X]], i8 6 -; ALL-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[Y]], i8 6 -; ALL-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]] -; ALL-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP13]] -; ALL-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP14]], [[TMP15]] -; ALL-NEXT: br i1 [[TMP16]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; ALL: endblock: -; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ] -; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 7) +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] ; @@ -1280,34 +677,8 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp_eq9( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]] -; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; X32: res_block: -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1 -; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1 -; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]] -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]] -; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] -; X32: loadbb2: -; X32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[X]], i8 8 -; X32-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[Y]], i8 8 -; X32-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]] -; X32-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP13]] -; X32-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP14]], [[TMP15]] -; X32-NEXT: br i1 [[TMP16]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ] -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 9) +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -1342,36 +713,8 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp_eq10( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]] -; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; X32: res_block: -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1 -; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1 -; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]] -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]] -; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] -; X32: loadbb2: -; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i16* -; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i16* -; X32-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 4 -; X32-NEXT: [[TMP15:%.*]] = getelementptr i16, i16* [[TMP13]], i16 4 -; X32-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]] -; X32-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]] -; X32-NEXT: [[TMP18:%.*]] = icmp ne i16 [[TMP16]], [[TMP17]] -; X32-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ] -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 10) +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -1407,78 +750,11 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) { } define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; X32-LABEL: @cmp_eq11( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]] -; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; X32: res_block: -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1 -; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1 -; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]] -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]] -; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] -; X32: loadbb2: -; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i16* -; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i16* -; X32-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 4 -; X32-NEXT: [[TMP15:%.*]] = getelementptr i16, i16* [[TMP13]], i16 4 -; X32-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]] -; X32-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]] -; X32-NEXT: [[TMP18:%.*]] = icmp ne i16 [[TMP16]], [[TMP17]] -; X32-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[LOADBB3:%.*]] -; X32: loadbb3: -; X32-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[X]], i8 10 -; X32-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[Y]], i8 10 -; X32-NEXT: [[TMP21:%.*]] = load i8, i8* [[TMP19]] -; X32-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP20]] -; X32-NEXT: [[TMP23:%.*]] = icmp ne i8 [[TMP21]], [[TMP22]] -; X32-NEXT: br i1 [[TMP23]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ 1, [[RES_BLOCK]] ] -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 -; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; X32-NEXT: ret i32 [[CONV]] -; -; X64-LABEL: @cmp_eq11( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; X64: res_block: -; X64-NEXT: br label [[ENDBLOCK:%.*]] -; X64: loadbb1: -; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i16* -; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i16* -; X64-NEXT: [[TMP7:%.*]] = getelementptr i16, i16* [[TMP5]], i16 4 -; X64-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4 -; X64-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]] -; X64-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] -; X64-NEXT: [[TMP11:%.*]] = icmp ne i16 [[TMP9]], [[TMP10]] -; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] -; X64: loadbb2: -; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[X]], i8 10 -; X64-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[Y]], i8 10 -; X64-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]] -; X64-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP13]] -; X64-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP14]], [[TMP15]] -; X64-NEXT: br i1 [[TMP16]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ] -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 -; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; X64-NEXT: ret i32 [[CONV]] +; ALL-LABEL: @cmp_eq11( +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11) +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; ALL-NEXT: ret i32 [[CONV]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11) %cmp = icmp eq i32 %call, 0 @@ -1488,36 +764,8 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp_eq12( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]] -; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; X32: res_block: -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1 -; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1 -; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]] -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]] -; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] -; X32: loadbb2: -; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[TMP12]], i32 2 -; X32-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[TMP13]], i32 2 -; X32-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP14]] -; X32-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP15]] -; X32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP16]], [[TMP17]] -; X32-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ] -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 12) +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -1553,78 +801,11 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) { } define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; X32-LABEL: @cmp_eq13( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]] -; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; X32: res_block: -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1 -; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1 -; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]] -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]] -; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] -; X32: loadbb2: -; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[TMP12]], i32 2 -; X32-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[TMP13]], i32 2 -; X32-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP14]] -; X32-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP15]] -; X32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP16]], [[TMP17]] -; X32-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[LOADBB3:%.*]] -; X32: loadbb3: -; X32-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[X]], i8 12 -; X32-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[Y]], i8 12 -; X32-NEXT: [[TMP21:%.*]] = load i8, i8* [[TMP19]] -; X32-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP20]] -; X32-NEXT: [[TMP23:%.*]] = icmp ne i8 [[TMP21]], [[TMP22]] -; X32-NEXT: br i1 [[TMP23]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ 1, [[RES_BLOCK]] ] -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 -; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; X32-NEXT: ret i32 [[CONV]] -; -; X64-LABEL: @cmp_eq13( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; X64: res_block: -; X64-NEXT: br label [[ENDBLOCK:%.*]] -; X64: loadbb1: -; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32* -; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32* -; X64-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 2 -; X64-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2 -; X64-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]] -; X64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]] -; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] -; X64: loadbb2: -; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[X]], i8 12 -; X64-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[Y]], i8 12 -; X64-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]] -; X64-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP13]] -; X64-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP14]], [[TMP15]] -; X64-NEXT: br i1 [[TMP16]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ] -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 -; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; X64-NEXT: ret i32 [[CONV]] +; ALL-LABEL: @cmp_eq13( +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13) +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; ALL-NEXT: ret i32 [[CONV]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13) %cmp = icmp eq i32 %call, 0 @@ -1633,82 +814,11 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) { } define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; X32-LABEL: @cmp_eq14( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]] -; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; X32: res_block: -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1 -; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1 -; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]] -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]] -; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] -; X32: loadbb2: -; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[TMP12]], i32 2 -; X32-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[TMP13]], i32 2 -; X32-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP14]] -; X32-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP15]] -; X32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP16]], [[TMP17]] -; X32-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[LOADBB3:%.*]] -; X32: loadbb3: -; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[X]] to i16* -; X32-NEXT: [[TMP20:%.*]] = bitcast i8* [[Y]] to i16* -; X32-NEXT: [[TMP21:%.*]] = getelementptr i16, i16* [[TMP19]], i16 6 -; X32-NEXT: [[TMP22:%.*]] = getelementptr i16, i16* [[TMP20]], i16 6 -; X32-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP21]] -; X32-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP22]] -; X32-NEXT: [[TMP25:%.*]] = icmp ne i16 [[TMP23]], [[TMP24]] -; X32-NEXT: br i1 [[TMP25]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ 1, [[RES_BLOCK]] ] -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 -; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; X32-NEXT: ret i32 [[CONV]] -; -; X64-LABEL: @cmp_eq14( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; X64: res_block: -; X64-NEXT: br label [[ENDBLOCK:%.*]] -; X64: loadbb1: -; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32* -; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32* -; X64-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 2 -; X64-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2 -; X64-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]] -; X64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]] -; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] -; X64: loadbb2: -; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i16* -; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i16* -; X64-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 6 -; X64-NEXT: [[TMP15:%.*]] = getelementptr i16, i16* [[TMP13]], i16 6 -; X64-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]] -; X64-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]] -; X64-NEXT: [[TMP18:%.*]] = icmp ne i16 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ] -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 -; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; X64-NEXT: ret i32 [[CONV]] +; ALL-LABEL: @cmp_eq14( +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14) +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; ALL-NEXT: ret i32 [[CONV]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14) %cmp = icmp eq i32 %call, 0 @@ -1717,52 +827,11 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) { } define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) { -; X32-LABEL: @cmp_eq15( -; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15) -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 -; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; X32-NEXT: ret i32 [[CONV]] -; -; X64-LABEL: @cmp_eq15( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; X64: res_block: -; X64-NEXT: br label [[ENDBLOCK:%.*]] -; X64: loadbb1: -; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32* -; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32* -; X64-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 2 -; X64-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2 -; X64-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]] -; X64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]] -; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] -; X64: loadbb2: -; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i16* -; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i16* -; X64-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 6 -; X64-NEXT: [[TMP15:%.*]] = getelementptr i16, i16* [[TMP13]], i16 6 -; X64-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]] -; X64-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]] -; X64-NEXT: [[TMP18:%.*]] = icmp ne i16 [[TMP16]], [[TMP17]] -; X64-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[LOADBB3:%.*]] -; X64: loadbb3: -; X64-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[X]], i8 14 -; X64-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[Y]], i8 14 -; X64-NEXT: [[TMP21:%.*]] = load i8, i8* [[TMP19]] -; X64-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP20]] -; X64-NEXT: [[TMP23:%.*]] = icmp ne i8 [[TMP21]], [[TMP22]] -; X64-NEXT: br i1 [[TMP23]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ 1, [[RES_BLOCK]] ] -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 -; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; X64-NEXT: ret i32 [[CONV]] +; ALL-LABEL: @cmp_eq15( +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15) +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; ALL-NEXT: ret i32 [[CONV]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15) %cmp = icmp eq i32 %call, 0 @@ -1772,45 +841,8 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq16(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp_eq16( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]] -; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; X32: res_block: -; X32-NEXT: br label [[ENDBLOCK:%.*]] -; X32: loadbb1: -; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1 -; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1 -; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]] -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]] -; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]] -; X32: loadbb2: -; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[TMP12]], i32 2 -; X32-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[TMP13]], i32 2 -; X32-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP14]] -; X32-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP15]] -; X32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP16]], [[TMP17]] -; X32-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[LOADBB3:%.*]] -; X32: loadbb3: -; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP20:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP19]], i32 3 -; X32-NEXT: [[TMP22:%.*]] = getelementptr i32, i32* [[TMP20]], i32 3 -; X32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]] -; X32-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP22]] -; X32-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP23]], [[TMP24]] -; X32-NEXT: br i1 [[TMP25]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ 1, [[RES_BLOCK]] ] -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 16) +; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; diff --git a/test/Transforms/JumpThreading/pr33605.ll b/test/Transforms/JumpThreading/pr33605.ll new file mode 100644 index 000000000000..eb8cab90fa50 --- /dev/null +++ b/test/Transforms/JumpThreading/pr33605.ll @@ -0,0 +1,64 @@ +; RUN: opt < %s -jump-threading -S | FileCheck %s + +; Skip simplifying unconditional branches from empty blocks in simplifyCFG, +; when it can destroy canonical loop structure. + +; void foo(); +; bool test(int a, int b, int *c) { +; bool changed = false; +; for (unsigned int i = 2; i--;) { +; int r = a | b; +; if ( r != c[i]) { +; c[i] = r; +; foo(); +; changed = true; +; } +; } +; return changed; +; } + +; CHECK-LABEL: @test( +; CHECK: for.cond: +; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] +; CHECK: for.body: +; CHECK: br i1 %cmp, label %if.end, label %if.then +; CHECK-NOT: br i1 %cmp, label %for.cond, label %if.then +; CHECK: if.then: +; CHECK: br label %if.end +; CHECK-NOT: br label %for.cond +; CHECK: if.end: +; CHECK br label %for.cond +define i1 @test(i32 %a, i32 %b, i32* %c) { +entry: + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] + %changed.0.off0 = phi i1 [ false, %entry ], [ %changed.1.off0, %if.end ] + %dec = add nsw i32 %i.0, -1 + %tobool = icmp eq i32 %i.0, 0 + br i1 %tobool, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.cond + %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ] + ret i1 %changed.0.off0.lcssa + +for.body: ; preds = %for.cond + %or = or i32 %a, %b + %idxprom = sext i32 %dec to i64 + %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp eq i32 %or, %0 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %for.body + store i32 %or, i32* %arrayidx, align 4 + call void @foo() + br label %if.end + +if.end: ; preds = %for.body, %if.then + %changed.1.off0 = phi i1 [ true, %if.then ], [ %changed.0.off0, %for.body ] + br label %for.cond +} + +declare void @foo() diff --git a/test/Transforms/JumpThreading/pr33917.ll b/test/Transforms/JumpThreading/pr33917.ll new file mode 100644 index 000000000000..30652279a0e1 --- /dev/null +++ b/test/Transforms/JumpThreading/pr33917.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -jump-threading -correlated-propagation %s -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i8* @foo() + +declare i32 @rust_eh_personality() unnamed_addr + +; Function Attrs: nounwind +declare void @llvm.assume(i1) #0 + +define void @patatino() personality i32 ()* @rust_eh_personality { +; CHECK-LABEL: @patatino( +; CHECK-NEXT: bb9: +; CHECK-NEXT: [[T9:%.*]] = invoke i8* @foo() +; CHECK-NEXT: to label [[GOOD:%.*]] unwind label [[BAD:%.*]] +; CHECK: bad: +; CHECK-NEXT: [[T10:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: resume { i8*, i32 } [[T10]] +; CHECK: good: +; CHECK-NEXT: [[T11:%.*]] = icmp ne i8* [[T9]], null +; CHECK-NEXT: [[T12:%.*]] = zext i1 [[T11]] to i64 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[T12]], 1 +; CHECK-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[DONE:%.*]] +; CHECK: if_true: +; CHECK-NEXT: call void @llvm.assume(i1 [[T11]]) +; CHECK-NEXT: br label [[DONE]] +; CHECK: done: +; CHECK-NEXT: ret void +; +bb9: + %t9 = invoke i8* @foo() + to label %good unwind label %bad + +bad: + %t10 = landingpad { i8*, i32 } + cleanup + resume { i8*, i32 } %t10 + +good: + %t11 = icmp ne i8* %t9, null + %t12 = zext i1 %t11 to i64 + %cond = icmp eq i64 %t12, 1 + br i1 %cond, label %if_true, label %done + +if_true: + call void @llvm.assume(i1 %t11) + br label %done + +done: + ret void +} + +attributes #0 = { nounwind } diff --git a/test/Transforms/JumpThreading/static-profile.ll b/test/Transforms/JumpThreading/static-profile.ll index d634a607eabf..505e849f4806 100644 --- a/test/Transforms/JumpThreading/static-profile.ll +++ b/test/Transforms/JumpThreading/static-profile.ll @@ -86,7 +86,7 @@ eq_1: ; Verify the new backedge: ; CHECK: check_2.thread: ; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br label %check_1 +; CHECK-NEXT: br label %check_3.thread check_2: %cond2 = icmp eq i32 %v, 2 @@ -100,7 +100,7 @@ eq_2: ; Verify the new backedge: ; CHECK: eq_2: ; CHECK-NEXT: call void @bar() -; CHECK-NEXT: br label %check_1 +; CHECK-NEXT: br label %check_3.thread check_3: %condE = icmp eq i32 %v, 3 diff --git a/test/Transforms/LoopUnroll/peel-loop.ll b/test/Transforms/LoopUnroll/peel-loop.ll index bf0801fc760a..3f7c64d8154c 100644 --- a/test/Transforms/LoopUnroll/peel-loop.ll +++ b/test/Transforms/LoopUnroll/peel-loop.ll @@ -18,9 +18,11 @@ ; CHECK: %[[INC2:.*]] = getelementptr inbounds i32, i32* %p, i64 2 ; CHECK: store i32 2, i32* %[[INC2]], align 4 ; CHECK: %[[CMP3:.*]] = icmp eq i32 %k, 3 -; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP:.*]] +; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP_PH:.*]] +; CHECK: [[LOOP_PH]]: +; CHECK: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK: %[[IV:.*]] = phi i32 [ {{.*}}, %[[LOOP]] ], [ 3, %[[NEXT2]] ] +; CHECK: %[[IV:.*]] = phi i32 [ 3, %[[LOOP_PH]] ], [ {{.*}}, %[[LOOP]] ] define void @basic(i32* %p, i32 %k) #0 { entry: @@ -65,9 +67,11 @@ for.end: ; preds = %for.cond.for.end_cr ; CHECK: %[[INC2:.*]] = getelementptr inbounds i32, i32* %p, i64 2 ; CHECK: store i32 2, i32* %[[INC2]], align 4 ; CHECK: %[[CMP3:.*]] = icmp eq i32 %k, 3 -; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP:.*]] +; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP_PH:.*]] +; CHECK: [[LOOP_PH]]: +; CHECK: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK: %[[IV:.*]] = phi i32 [ %[[IV:.*]], %[[LOOP]] ], [ 3, %[[NEXT2]] ] +; CHECK: %[[IV:.*]] = phi i32 [ 3, %[[LOOP_PH]] ], [ %[[IV:.*]], %[[LOOP]] ] ; CHECK: %ret = phi i32 [ 0, %entry ], [ 1, %[[NEXT0]] ], [ 2, %[[NEXT1]] ], [ 3, %[[NEXT2]] ], [ %[[IV]], %[[LOOP]] ] ; CHECK: ret i32 %ret define i32 @output(i32* %p, i32 %k) #0 { diff --git a/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll b/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll index a215be9d4877..8652829bc511 100644 --- a/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll +++ b/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll @@ -16,7 +16,7 @@ for.body: ; preds = %for.inc, %for.body. %cmp1 = icmp eq i32 %a, 12345 br i1 %cmp1, label %if.then, label %if.else, !prof !0 ; CHECK: %cmp1 = icmp eq i32 %a, 12345 -; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0 +; CHECK-NEXT: br i1 %cmp1, label %for.body.preheader.split.us, label %for.body.preheader.split, !prof !0 if.then: ; preds = %for.body ; CHECK: for.body.us: ; CHECK: add nsw i32 %{{.*}}, 123 @@ -53,7 +53,7 @@ entry: br label %for.body ;CHECK: entry: ;CHECK-NEXT: %cmp1 = icmp eq i32 1, 2 -;CHECK-NEXT: br i1 %cmp1, label %for.body, label %for.cond.cleanup.split, !prof !1 +;CHECK-NEXT: br i1 %cmp1, label %entry.split, label %for.cond.cleanup.split, !prof !1 ;CHECK: for.body: for.body: ; preds = %for.inc, %entry %inc.i = phi i32 [ 0, %entry ], [ %inc, %if.then ] diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll index 0aef9092a1fe..af8725b02a14 100644 --- a/test/Transforms/LoopUnswitch/infinite-loop.ll +++ b/test/Transforms/LoopUnswitch/infinite-loop.ll @@ -6,7 +6,7 @@ ; Loop unswitching shouldn't trivially unswitch the true case of condition %a ; in the code here because it leads to an infinite loop. While this doesn't ; contain any instructions with side effects, it's still a kind of side effect. -; It can trivially unswitch on the false cas of condition %a though. +; It can trivially unswitch on the false case of condition %a though. ; STATS: 2 loop-unswitch - Number of branches unswitched ; STATS: 2 loop-unswitch - Number of unswitches that are trivial @@ -16,7 +16,7 @@ ; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split ; CHECK: entry.split: -; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split +; CHECK-NEXT: br i1 %b, label %entry.split.split, label %abort1.split ; CHECK: for.body: ; CHECK-NEXT: br label %for.body diff --git a/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 31c564779fb2..bf455807c586 100644 --- a/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O3 -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s +; RUN: opt < %s -O3 -latesimplifycfg -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s ; This test checks auto-vectorization with FP induction variable. ; The FP operation is not "fast" and requires "fast-math" function attribute. diff --git a/test/Transforms/LoopVectorize/float-induction.ll b/test/Transforms/LoopVectorize/float-induction.ll index a7cc4530ceb3..cf6ec88478be 100644 --- a/test/Transforms/LoopVectorize/float-induction.ll +++ b/test/Transforms/LoopVectorize/float-induction.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s -; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -latesimplifycfg -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s @fp_inc = common global float 0.000000e+00, align 4 diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll index 656a276969f3..536d1d85c3d8 100644 --- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -1322,8 +1322,8 @@ l6: ; Speculation depth must be limited to avoid a zero-cost instruction cycle. ; CHECK-LABEL: @PR26308( -; CHECK: while.body: -; CHECK-NEXT: br label %while.body +; CHECK: cleanup4: +; CHECK-NEXT: br label %cleanup4 define i32 @PR26308(i1 %B, i64 %load) { entry: diff --git a/test/Transforms/SimplifyCFG/multiple-phis.ll b/test/Transforms/SimplifyCFG/multiple-phis.ll index a6eef09ae646..823e2500eac1 100644 --- a/test/Transforms/SimplifyCFG/multiple-phis.ll +++ b/test/Transforms/SimplifyCFG/multiple-phis.ll @@ -1,4 +1,4 @@ -; RUN: opt -simplifycfg -S < %s | FileCheck %s +; RUN: opt -latesimplifycfg -S < %s | FileCheck %s ; It's not worthwhile to if-convert one of the phi nodes and leave ; the other behind, because that still requires a branch. If diff --git a/test/Transforms/SimplifyCFG/pr33605.ll b/test/Transforms/SimplifyCFG/pr33605.ll new file mode 100644 index 000000000000..963b15991263 --- /dev/null +++ b/test/Transforms/SimplifyCFG/pr33605.ll @@ -0,0 +1,64 @@ +; RUN: opt < %s -simplifycfg -S | FileCheck %s + +; Skip simplifying unconditional branches from empty blocks in simplifyCFG, +; when it can destroy canonical loop structure. + +; void foo(); +; bool test(int a, int b, int *c) { +; bool changed = false; +; for (unsigned int i = 2; i--;) { +; int r = a | b; +; if ( r != c[i]) { +; c[i] = r; +; foo(); +; changed = true; +; } +; } +; return changed; +; } + +; CHECK-LABEL: @test( +; CHECK: for.cond: +; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] +; CHECK: for.body: +; CHECK: br i1 %cmp, label %if.end, label %if.then +; CHECK-NOT: br i1 %cmp, label %for.cond, label %if.then +; CHECK: if.then: +; CHECK: br label %if.end +; CHECK-NOT: br label %for.cond +; CHECK: if.end: +; CHECK br label %for.cond +define i1 @test(i32 %a, i32 %b, i32* %c) { +entry: + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] + %changed.0.off0 = phi i1 [ false, %entry ], [ %changed.1.off0, %if.end ] + %dec = add nsw i32 %i.0, -1 + %tobool = icmp eq i32 %i.0, 0 + br i1 %tobool, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.cond + %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ] + ret i1 %changed.0.off0.lcssa + +for.body: ; preds = %for.cond + %or = or i32 %a, %b + %idxprom = sext i32 %dec to i64 + %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp eq i32 %or, %0 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %for.body + store i32 %or, i32* %arrayidx, align 4 + call void @foo() + br label %if.end + +if.end: ; preds = %for.body, %if.then + %changed.1.off0 = phi i1 [ true, %if.then ], [ %changed.0.off0, %for.body ] + br label %for.cond +} + +declare void @foo() diff --git a/test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll b/test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll index 12a908b20f9e..e357104f2a66 100644 --- a/test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll +++ b/test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll @@ -1,4 +1,4 @@ -; RUN: opt -simplifycfg -S < %s | FileCheck %s +; RUN: opt -latesimplifycfg -S < %s | FileCheck %s define void @test1(i32 %n) #0 { entry: diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh index 8ec3abb17551..02d8e7925f6e 100755 --- a/utils/release/test-release.sh +++ b/utils/release/test-release.sh @@ -562,7 +562,7 @@ for Flavor in $Flavors ; do # case there are build paths in the debug info. On some systems, # sed adds a newline to the output, so pass $p3 through sed too. if ! cmp -s \ - <(env LC_CTYPE=C sed -e 's,Phase2,Phase3,g' $p2) \ + <(env LC_CTYPE=C sed -e 's,Phase2,Phase3,g' -e 's,Phase1,Phase2,g' $p2) \ <(env LC_CTYPE=C sed -e '' $p3) 16 16; then echo "file `basename $p2` differs between phase 2 and phase 3" fi |