diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-06-27 19:14:09 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-06-27 19:14:09 +0000 |
commit | eb1edd4d5902fdc561fd68fa70400fbd11127998 (patch) | |
tree | 0b10ccde4b5d3acf243966db54f4f3afef10cf93 | |
parent | 2ed8710148a921286717212737771dd31c518fb7 (diff) |
Vendor import of llvm 6.0.1 release r335540:vendor/llvm/llvm-release_601-r335540vendor/llvm-60
Notes
Notes:
svn path=/vendor/llvm/dist-release_60/; revision=335720
svn path=/vendor/llvm/llvm-release_601-r335540/; revision=335721; tag=vendor/llvm/llvm-release_601-r335540
133 files changed, 12793 insertions, 6621 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 2bf2c21a306f..f8da6cf92119 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 0) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 0) + set(LLVM_VERSION_PATCH 1) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX "") diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 0c9110cbaa87..89210e16629e 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -449,6 +449,13 @@ public: /// Replace successor OLD with NEW and update probability info. void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New); + /// Copy a successor (and any probability info) from original block to this + /// block's. Uses an iterator into the original blocks successors. + /// + /// This is useful when doing a partial clone of successors. Afterward, the + /// probabilities may need to be normalized. + void copySuccessor(MachineBasicBlock *Orig, succ_iterator I); + /// Transfers all the successors from MBB to this machine basic block (i.e., /// copies all the successors FromMBB and remove all the successors from /// FromMBB). diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h index 6b5404be35d3..57dee3bb44b3 100644 --- a/include/llvm/CodeGen/TargetInstrInfo.h +++ b/include/llvm/CodeGen/TargetInstrInfo.h @@ -421,7 +421,8 @@ public: /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI /// and \p DefIdx. /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of - /// the list is modeled as <Reg:SubReg, SubIdx>. + /// the list is modeled as <Reg:SubReg, SubIdx>. Operands with the undef + /// flag are not added to this list. /// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce /// two elements: /// - %1:sub1, sub0 @@ -446,7 +447,8 @@ public: /// - %1:sub1, sub0 /// /// \returns true if it is possible to build such an input sequence - /// with the pair \p MI, \p DefIdx. False otherwise. + /// with the pair \p MI, \p DefIdx and the operand has no undef flag set. + /// False otherwise. /// /// \pre MI.isExtractSubreg() or MI.isExtractSubregLike(). /// @@ -465,7 +467,8 @@ public: /// - InsertedReg: %1:sub1, sub3 /// /// \returns true if it is possible to build such an input sequence - /// with the pair \p MI, \p DefIdx. False otherwise. + /// with the pair \p MI, \p DefIdx and the operand has no undef flag set. + /// False otherwise. /// /// \pre MI.isInsertSubreg() or MI.isInsertSubregLike(). /// diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td index 6321bb81b8cb..a302d5726aa3 100644 --- a/include/llvm/IR/IntrinsicsPowerPC.td +++ b/include/llvm/IR/IntrinsicsPowerPC.td @@ -36,8 +36,12 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // Intrinsics used to generate ctr-based loops. These should only be // generated by the PowerPC backend! + // The branch intrinsic is marked as NoDuplicate because loop rotation will + // attempt to duplicate it forming loops where a block reachable from one + // instance of it can contain another. def int_ppc_mtctr : Intrinsic<[], [llvm_anyint_ty], []>; - def int_ppc_is_decremented_ctr_nonzero : Intrinsic<[llvm_i1_ty], [], []>; + def int_ppc_is_decremented_ctr_nonzero : + Intrinsic<[llvm_i1_ty], [], [IntrNoDuplicate]>; // Intrinsics for [double]word extended forms of divide instructions def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">, diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp index daee93267f56..94306d0f54ad 100644 --- a/lib/Analysis/GlobalsModRef.cpp +++ b/lib/Analysis/GlobalsModRef.cpp @@ -502,6 +502,8 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { } FunctionInfo &FI = FunctionInfos[F]; + Handles.emplace_front(*this, F); + Handles.front().I = Handles.begin(); bool KnowNothing = false; // Collect the mod/ref properties due to called functions. We only compute diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp index 6e9368c49d65..09605f61fa93 100644 --- a/lib/Analysis/MemorySSA.cpp +++ b/lib/Analysis/MemorySSA.cpp @@ -153,9 +153,14 @@ public: if (IsCall != Other.IsCall) return false; - if (IsCall) - return CS.getCalledValue() == Other.CS.getCalledValue(); - return Loc == Other.Loc; + if (!IsCall) + return Loc == Other.Loc; + + if (CS.getCalledValue() != Other.CS.getCalledValue()) + return false; + + return CS.arg_size() == Other.CS.arg_size() && + std::equal(CS.arg_begin(), CS.arg_end(), Other.CS.arg_begin()); } private: @@ -179,12 +184,18 @@ template <> struct DenseMapInfo<MemoryLocOrCall> { } static unsigned getHashValue(const MemoryLocOrCall &MLOC) { - if (MLOC.IsCall) - return hash_combine(MLOC.IsCall, - DenseMapInfo<const Value *>::getHashValue( - MLOC.getCS().getCalledValue())); - return hash_combine( - MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); + if (!MLOC.IsCall) + return hash_combine( + MLOC.IsCall, + DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); + + hash_code hash = + hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue( + MLOC.getCS().getCalledValue())); + + for (const Value *Arg : MLOC.getCS().args()) + hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg)); + return hash; } static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index a22ce0dab9c2..d8ce90e63a9d 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -1714,20 +1714,25 @@ bool IfConverter::IfConvertDiamondCommon( } // Remove the duplicated instructions at the beginnings of both paths. - // Skip dbg_value instructions + // Skip dbg_value instructions. MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr(); MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr(); BBI1->NonPredSize -= NumDups1; BBI2->NonPredSize -= NumDups1; // Skip past the dups on each side separately since there may be - // differing dbg_value entries. + // differing dbg_value entries. NumDups1 can include a "return" + // instruction, if it's not marked as "branch". for (unsigned i = 0; i < NumDups1; ++DI1) { + if (DI1 == MBB1.end()) + break; if (!DI1->isDebugValue()) ++i; } while (NumDups1 != 0) { ++DI2; + if (DI2 == MBB2.end()) + break; if (!DI2->isDebugValue()) --NumDups1; } @@ -1738,11 +1743,16 @@ bool IfConverter::IfConvertDiamondCommon( Redefs.stepForward(MI, Dummy); } } + BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1); MBB2.erase(MBB2.begin(), DI2); - // The branches have been checked to match, so it is safe to remove the branch - // in BB1 and rely on the copy in BB2 + // The branches have been checked to match, so it is safe to remove the + // branch in BB1 and rely on the copy in BB2. The complication is that + // the blocks may end with a return instruction, which may or may not + // be marked as "branch". If it's not, then it could be included in + // "dups1", leaving the blocks potentially empty after moving the common + // duplicates. #ifndef NDEBUG // Unanalyzable branches must match exactly. Check that now. if (!BBI1->IsBrAnalyzable) @@ -1768,11 +1778,14 @@ bool IfConverter::IfConvertDiamondCommon( if (RemoveBranch) BBI2->NonPredSize -= TII->removeBranch(*BBI2->BB); else { - do { - assert(DI2 != MBB2.begin()); - DI2--; - } while (DI2->isBranch() || DI2->isDebugValue()); - DI2++; + // Make DI2 point to the end of the range where the common "tail" + // instructions could be found. + while (DI2 != MBB2.begin()) { + MachineBasicBlock::iterator Prev = std::prev(DI2); + if (!Prev->isBranch() && !Prev->isDebugValue()) + break; + DI2 = Prev; + } } while (NumDups2 != 0) { // NumDups2 only counted non-dbg_value instructions, so this won't @@ -1833,11 +1846,15 @@ bool IfConverter::IfConvertDiamondCommon( // a non-predicated in BBI2, then we don't want to predicate the one from // BBI2. The reason is that if we merged these blocks, we would end up with // two predicated terminators in the same block. + // Also, if the branches in MBB1 and MBB2 were non-analyzable, then don't + // predicate them either. They were checked to be identical, and so the + // same branch would happen regardless of which path was taken. if (!MBB2.empty() && (DI2 == MBB2.end())) { MachineBasicBlock::iterator BBI1T = MBB1.getFirstTerminator(); MachineBasicBlock::iterator BBI2T = MBB2.getFirstTerminator(); - if (BBI1T != MBB1.end() && TII->isPredicated(*BBI1T) && - BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T)) + bool BB1Predicated = BBI1T != MBB1.end() && TII->isPredicated(*BBI1T); + bool BB2NonPredicated = BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T); + if (BB2NonPredicated && (BB1Predicated || !BBI2->IsBrAnalyzable)) --DI2; } diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 75e3d35169cf..4ffcffcea693 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -514,6 +514,39 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { return false; } + // Detect invalid DBG_VALUE instructions, with a debug-use of a virtual + // register that hasn't been defined yet. If we do not remove those here, then + // the re-insertion of the DBG_VALUE instruction after register allocation + // will be incorrect. + // TODO: If earlier passes are corrected to generate sane debug information + // (and if the machine verifier is improved to catch this), then these checks + // could be removed or replaced by asserts. + bool Discard = false; + if (MI.getOperand(0).isReg() && + TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) { + const unsigned Reg = MI.getOperand(0).getReg(); + if (!LIS->hasInterval(Reg)) { + // The DBG_VALUE is described by a virtual register that does not have a + // live interval. Discard the DBG_VALUE. + Discard = true; + DEBUG(dbgs() << "Discarding debug info (no LIS interval): " + << Idx << " " << MI); + } else { + // The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg + // is defined dead at Idx (where Idx is the slot index for the instruction + // preceeding the DBG_VALUE). + const LiveInterval &LI = LIS->getInterval(Reg); + LiveQueryResult LRQ = LI.Query(Idx); + if (!LRQ.valueOutOrDead()) { + // We have found a DBG_VALUE with the value in a virtual register that + // is not live. Discard the DBG_VALUE. + Discard = true; + DEBUG(dbgs() << "Discarding debug info (reg not live): " + << Idx << " " << MI); + } + } + } + // Get or create the UserValue for (variable,offset) here. bool IsIndirect = MI.getOperand(1).isImm(); if (IsIndirect) @@ -522,7 +555,13 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { const DIExpression *Expr = MI.getDebugExpression(); UserValue *UV = getUserValue(Var, Expr, MI.getDebugLoc()); - UV->addDef(Idx, MI.getOperand(0), IsIndirect); + if (!Discard) + UV->addDef(Idx, MI.getOperand(0), IsIndirect); + else { + MachineOperand MO = MachineOperand::CreateReg(0U, false); + MO.setIsDebug(); + UV->addDef(Idx, MO, false); + } return true; } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 209abf34d885..cd67449e3acf 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -646,6 +646,14 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, removeSuccessor(OldI); } +void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig, + succ_iterator I) { + if (Orig->Probs.empty()) + addSuccessor(*I, Orig->getSuccProbability(I)); + else + addSuccessorWithoutProb(*I); +} + void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) { Predecessors.push_back(Pred); } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 84c808ee7938..167135b56ec0 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -513,6 +513,11 @@ public: bool runOnMachineFunction(MachineFunction &F) override; + bool allowTailDupPlacement() const { + assert(F); + return TailDupPlacement && !F->getTarget().requiresStructuredCFG(); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineBlockFrequencyInfo>(); @@ -1018,7 +1023,7 @@ MachineBlockPlacement::getBestTrellisSuccessor( MachineBasicBlock *Succ1 = BestA.Dest; MachineBasicBlock *Succ2 = BestB.Dest; // Check to see if tail-duplication would be profitable. - if (TailDupPlacement && shouldTailDuplicate(Succ2) && + if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) && canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) && isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1), Chain, BlockFilter)) { @@ -1044,7 +1049,7 @@ MachineBlockPlacement::getBestTrellisSuccessor( return Result; } -/// When the option TailDupPlacement is on, this method checks if the +/// When the option allowTailDupPlacement() is on, this method checks if the /// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated /// into all of its unplaced, unfiltered predecessors, that are not BB. bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( @@ -1493,7 +1498,7 @@ MachineBlockPlacement::selectBestSuccessor( if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb, Chain, BlockFilter)) { // If tail duplication would make Succ profitable, place it. - if (TailDupPlacement && shouldTailDuplicate(Succ)) + if (allowTailDupPlacement() && shouldTailDuplicate(Succ)) DupCandidates.push_back(std::make_tuple(SuccProb, Succ)); continue; } @@ -1702,7 +1707,7 @@ void MachineBlockPlacement::buildChain( auto Result = selectBestSuccessor(BB, Chain, BlockFilter); MachineBasicBlock* BestSucc = Result.BB; bool ShouldTailDup = Result.ShouldTailDup; - if (TailDupPlacement) + if (allowTailDupPlacement()) ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc)); // If an immediate successor isn't available, look for the best viable @@ -1724,7 +1729,7 @@ void MachineBlockPlacement::buildChain( // Placement may have changed tail duplication opportunities. // Check for that now. - if (TailDupPlacement && BestSucc && ShouldTailDup) { + if (allowTailDupPlacement() && BestSucc && ShouldTailDup) { // If the chosen successor was duplicated into all its predecessors, // don't bother laying it out, just go round the loop again with BB as // the chain end. @@ -2758,7 +2763,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { TailDupSize = TailDupPlacementAggressiveThreshold; } - if (TailDupPlacement) { + if (allowTailDupPlacement()) { MPDT = &getAnalysis<MachinePostDominatorTree>(); if (MF.getFunction().optForSize()) TailDupSize = 1; diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 11acbe687a31..1090550243f8 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -1882,6 +1882,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() { return ValueTrackerResult(); // Otherwise, we want the whole source. const MachineOperand &Src = Def->getOperand(1); + if (Src.isUndef()) + return ValueTrackerResult(); return ValueTrackerResult(Src.getReg(), Src.getSubReg()); } @@ -1925,6 +1927,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { } const MachineOperand &Src = Def->getOperand(SrcIdx); + if (Src.isUndef()) + return ValueTrackerResult(); return ValueTrackerResult(Src.getReg(), Src.getSubReg()); } @@ -2093,6 +2097,10 @@ ValueTrackerResult ValueTracker::getNextSourceFromPHI() { for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) { auto &MO = Def->getOperand(i); assert(MO.isReg() && "Invalid PHI instruction"); + // We have no code to deal with undef operands. They shouldn't happen in + // normal programs anyway. + if (MO.isUndef()) + return ValueTrackerResult(); Res.addSource(MO.getReg(), MO.getSubReg()); } @@ -2149,9 +2157,14 @@ ValueTrackerResult ValueTracker::getNextSource() { // If we can still move up in the use-def chain, move to the next // definition. if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) { - Def = MRI.getVRegDef(Reg); - DefIdx = MRI.def_begin(Reg).getOperandNo(); - DefSubReg = Res.getSrcSubReg(0); + MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg); + if (DI != MRI.def_end()) { + Def = DI->getParent(); + DefIdx = DI.getOperandNo(); + DefSubReg = Res.getSrcSubReg(0); + } else { + Def = nullptr; + } return Res; } } diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index db925f803db6..bd90ed5b55b8 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -1151,6 +1151,8 @@ bool TargetInstrInfo::getRegSequenceInputs( for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx; OpIdx += 2) { const MachineOperand &MOReg = MI.getOperand(OpIdx); + if (MOReg.isUndef()) + continue; const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1); assert(MOSubIdx.isImm() && "One of the subindex of the reg_sequence is not an immediate"); @@ -1174,6 +1176,8 @@ bool TargetInstrInfo::getExtractSubregInputs( // Def = EXTRACT_SUBREG v0.sub1, sub0. assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def"); const MachineOperand &MOReg = MI.getOperand(1); + if (MOReg.isUndef()) + return false; const MachineOperand &MOSubIdx = MI.getOperand(2); assert(MOSubIdx.isImm() && "The subindex of the extract_subreg is not an immediate"); @@ -1198,6 +1202,8 @@ bool TargetInstrInfo::getInsertSubregInputs( assert(DefIdx == 0 && "INSERT_SUBREG only has one def"); const MachineOperand &MOBaseReg = MI.getOperand(1); const MachineOperand &MOInsertedReg = MI.getOperand(2); + if (MOInsertedReg.isUndef()) + return false; const MachineOperand &MOSubIdx = MI.getOperand(3); assert(MOSubIdx.isImm() && "One of the subindex of the reg_sequence is not an immediate"); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index c0047d0cde6a..2c57eee191db 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1422,7 +1422,8 @@ RuntimeDyldELF::processRelocationRef( SectionEntry &Section = Sections[SectionID]; uint8_t *Target = Section.getAddressWithOffset(Offset); bool RangeOverflow = false; - if (!Value.SymbolName && SymType != SymbolRef::ST_Unknown) { + bool IsExtern = Value.SymbolName || SymType == SymbolRef::ST_Unknown; + if (!IsExtern) { if (AbiVariant != 2) { // In the ELFv1 ABI, a function call may point to the .opd entry, // so the final symbol value is calculated based on the relocation @@ -1432,21 +1433,24 @@ RuntimeDyldELF::processRelocationRef( } else { // In the ELFv2 ABI, a function symbol may provide a local entry // point, which must be used for direct calls. - uint8_t SymOther = Symbol->getOther(); - Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther); + if (Value.SectionID == SectionID){ + uint8_t SymOther = Symbol->getOther(); + Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther); + } } uint8_t *RelocTarget = Sections[Value.SectionID].getAddressWithOffset(Value.Addend); int64_t delta = static_cast<int64_t>(Target - RelocTarget); // If it is within 26-bits branch range, just set the branch target - if (SignExtend64<26>(delta) == delta) { + if (SignExtend64<26>(delta) != delta) { + RangeOverflow = true; + } else if ((AbiVariant != 2) || + (AbiVariant == 2 && Value.SectionID == SectionID)) { RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); addRelocationForSection(RE, Value.SectionID); - } else { - RangeOverflow = true; } } - if (Value.SymbolName || SymType == SymbolRef::ST_Unknown || + if (IsExtern || (AbiVariant == 2 && Value.SectionID != SectionID) || RangeOverflow) { // It is an external symbol (either Value.SymbolName is set, or // SymType is SymbolRef::ST_Unknown) or out of range. @@ -1503,10 +1507,10 @@ RuntimeDyldELF::processRelocationRef( RelType, 0); Section.advanceStubOffset(getMaxStubSize()); } - if (Value.SymbolName || SymType == SymbolRef::ST_Unknown) { + if (IsExtern || (AbiVariant == 2 && Value.SectionID != SectionID)) { // Restore the TOC for external calls if (AbiVariant == 2) - writeInt32BE(Target + 4, 0xE8410018); // ld r2,28(r1) + writeInt32BE(Target + 4, 0xE8410018); // ld r2,24(r1) else writeInt32BE(Target + 4, 0xE8410028); // ld r2,40(r1) } diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index d3c33edec186..743e3710fd68 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -359,11 +359,9 @@ LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) { return wrap(&unwrap(Ty)->getContext()); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void LLVMDumpType(LLVMTypeRef Ty) { - return unwrap(Ty)->dump(); +void LLVMDumpType(LLVMTypeRef Ty) { + return unwrap(Ty)->print(errs(), /*IsForDebug=*/true); } -#endif char *LLVMPrintTypeToString(LLVMTypeRef Ty) { std::string buf; @@ -658,7 +656,7 @@ void LLVMSetValueName(LLVMValueRef Val, const char *Name) { unwrap(Val)->setName(Name); } -LLVM_DUMP_METHOD void LLVMDumpValue(LLVMValueRef Val) { +void LLVMDumpValue(LLVMValueRef Val) { unwrap(Val)->print(errs(), /*IsForDebug=*/true); } diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index a6b5c43f1d2a..328f000f37c9 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -289,6 +289,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { case Triple::mips64el: FDECFIEncoding = dwarf::DW_EH_PE_sdata8; break; + case Triple::ppc64: + case Triple::ppc64le: case Triple::x86_64: FDECFIEncoding = dwarf::DW_EH_PE_pcrel | (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index d968688911eb..6439d16a2a3f 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -13,8 +13,13 @@ elseif( CMAKE_HOST_UNIX ) if( HAVE_LIBDL ) set(system_libs ${system_libs} ${CMAKE_DL_LIBS}) endif() - if( HAVE_BACKTRACE ) - set(system_libs ${system_libs} ${Backtrace_LIBRARIES}) + if( HAVE_BACKTRACE AND NOT "${Backtrace_LIBRARIES}" STREQUAL "" ) + # On BSDs, CMake returns a fully qualified path to the backtrace library. + # We need to remove the path and the 'lib' prefix, to make it look like a + # regular short library name, suitable for appending to a -l link flag. + get_filename_component(Backtrace_LIBFILE ${Backtrace_LIBRARIES} NAME_WE) + STRING(REGEX REPLACE "^lib" "" Backtrace_LIBFILE ${Backtrace_LIBFILE}) + set(system_libs ${system_libs} ${Backtrace_LIBFILE}) endif() if(LLVM_ENABLE_TERMINFO) if(HAVE_TERMINFO) diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 3dc67ad782af..6e65b5e6c807 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -1009,7 +1009,7 @@ StringRef sys::getHostCPUName() { #include "llvm/Support/X86TargetParser.def" // Now check types. -#define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ +#define X86_CPU_TYPE(ARCHNAME, ENUM) \ if (Type == X86::ENUM) \ return ARCHNAME; #include "llvm/Support/X86TargetParser.def" diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 2ff2ee347f56..6704fa27c86e 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -299,6 +299,11 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, printOffset(MO.getOffset(), O); break; } + case MachineOperand::MO_BlockAddress: { + MCSymbol *Sym = GetBlockAddressSymbol(MO.getBlockAddress()); + Sym->print(O, MAI); + break; + } } } diff --git a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp index d1ddb2e3ef70..0d00dab598d5 100644 --- a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -46,6 +46,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <iterator> @@ -60,6 +61,8 @@ STATISTIC(NumCollisionsAvoided, "Number of HW prefetch tag collisions avoided"); STATISTIC(NumCollisionsNotAvoided, "Number of HW prefetch tag collisions not avoided due to lack of regsiters"); +DEBUG_COUNTER(FixCounter, "falkor-hwpf", + "Controls which tag collisions are avoided"); namespace { @@ -729,6 +732,21 @@ void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) { bool Fixed = false; DEBUG(dbgs() << "Attempting to fix tag collision: " << MI); + if (!DebugCounter::shouldExecute(FixCounter)) { + DEBUG(dbgs() << "Skipping fix due to debug counter:\n " << MI); + continue; + } + + // Add the non-base registers of MI as live so we don't use them as + // scratch registers. + for (unsigned OpI = 0, OpE = MI.getNumOperands(); OpI < OpE; ++OpI) { + if (OpI == static_cast<unsigned>(LdI.BaseRegIdx)) + continue; + MachineOperand &MO = MI.getOperand(OpI); + if (MO.isReg() && MO.readsReg()) + LR.addReg(MO.getReg()); + } + for (unsigned ScratchReg : AArch64::GPR64RegClass) { if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg)) continue; diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index d66f7b59a4b5..789200b28445 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -917,6 +917,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16; int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); bool isFixed = MFI.isFixedObjectIndex(FI); + bool isCSR = !isFixed && MFI.getObjectOffset(FI) >= + -((int)AFI->getCalleeSavedStackSize()); // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs or a dynamically realigned SP (and thus the SP isn't @@ -930,6 +932,12 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, // Argument access should always use the FP. if (isFixed) { UseFP = hasFP(MF); + } else if (isCSR && RegInfo->needsStackRealignment(MF)) { + // References to the CSR area must use FP if we're re-aligning the stack + // since the dynamically-sized alignment padding is between the SP/BP and + // the CSR area. + assert(hasFP(MF) && "Re-aligned stack must have frame pointer"); + UseFP = true; } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && !RegInfo->needsStackRealignment(MF)) { // Use SP or FP, whichever gives us the best chance of the offset @@ -947,9 +955,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, } } - assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && + assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) && "In the presence of dynamic stack pointer realignment, " - "non-argument objects cannot be accessed through the frame pointer"); + "non-argument/CSR objects cannot be accessed through the frame pointer"); if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 41ed24c329ef..233d6be247c2 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4930,7 +4930,8 @@ bool AArch64TargetLowering::isOffsetFoldingLegal( bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases. // FIXME: We should be able to handle f128 as well with a clever lowering. - if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) { + if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 || + (VT == MVT::f16 && Subtarget->hasFullFP16()))) { DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n"); return true; } @@ -5066,7 +5067,7 @@ SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand, // Table of Constraints // TODO: This is the current set of constraints supported by ARM for the -// compiler, not all of them may make sense, e.g. S may be difficult to support. +// compiler, not all of them may make sense. // // r - A general register // w - An FP/SIMD register of some size in the range v0-v31 @@ -5126,6 +5127,8 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const { // currently handle addresses it is the same as 'r'. case 'Q': return C_Memory; + case 'S': // A symbolic address + return C_Other; } } return TargetLowering::getConstraintType(Constraint); @@ -5250,6 +5253,23 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint( Result = DAG.getRegister(AArch64::WZR, MVT::i32); break; } + case 'S': { + // An absolute symbolic address or label reference. + if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { + Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), + GA->getValueType(0)); + } else if (const BlockAddressSDNode *BA = + dyn_cast<BlockAddressSDNode>(Op)) { + Result = + DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0)); + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(Op)) { + Result = + DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0)); + } else + return; + break; + } case 'I': case 'J': @@ -9637,6 +9657,15 @@ static SDValue performPostLD1Combine(SDNode *N, if (LD->getOpcode() != ISD::LOAD) return SDValue(); + // The vector lane must be a constant in the LD1LANE opcode. + SDValue Lane; + if (IsLaneOp) { + Lane = N->getOperand(2); + auto *LaneC = dyn_cast<ConstantSDNode>(Lane); + if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements()) + return SDValue(); + } + LoadSDNode *LoadSDN = cast<LoadSDNode>(LD); EVT MemVT = LoadSDN->getMemoryVT(); // Check if memory operand is the same type as the vector element. @@ -9693,7 +9722,7 @@ static SDValue performPostLD1Combine(SDNode *N, Ops.push_back(LD->getOperand(0)); // Chain if (IsLaneOp) { Ops.push_back(Vector); // The vector to be inserted - Ops.push_back(N->getOperand(2)); // The lane to be inserted in the vector + Ops.push_back(Lane); // The lane to be inserted in the vector } Ops.push_back(Addr); Ops.push_back(Inc); diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 79826ca2ed8d..040011d858e7 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2713,7 +2713,7 @@ defm FMOV : UnscaledConversion<"fmov">; // Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in { def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, - Sched<[WriteF]>; + Sched<[WriteF]>, Requires<[HasFullFP16]>; def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, Sched<[WriteF]>; def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 2042dbf6d5e2..e09263b6fac9 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -147,6 +147,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeR600PacketizerPass(*PR); initializeR600ExpandSpecialInstrsPassPass(*PR); initializeR600VectorRegMergerPass(*PR); + initializeGlobalISel(*PR); initializeAMDGPUDAGToDAGISelPass(*PR); initializeSILowerI1CopiesPass(*PR); initializeSIFixSGPRCopiesPass(*PR); diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 6d89aa6968e9..41ca7fe8bfaa 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -358,6 +358,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote); setOperationAction(ISD::CTLZ, MVT::i16, Promote); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote); + setOperationAction(ISD::CTPOP, MVT::i16, Promote); setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 9740a18b7248..8c02e8da8d79 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -726,6 +726,10 @@ def : GCNPat < (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)), (V_BCNT_U32_B32_e64 $popcnt, $val) >; +def : GCNPat < + (i16 (add (i16 (trunc (ctpop i32:$popcnt))), i16:$val)), + (V_BCNT_U32_B32_e64 $popcnt, $val) +>; /********** ============================================ **********/ /********** Extraction, Insertion, Building and Casting **********/ diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8c1727724a9e..cff24a10bb5f 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -4864,12 +4864,14 @@ bool ARMBaseInstrInfo::getRegSequenceLikeInputs( // Populate the InputRegs accordingly. // rY const MachineOperand *MOReg = &MI.getOperand(1); - InputRegs.push_back( - RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0)); + if (!MOReg->isUndef()) + InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), + MOReg->getSubReg(), ARM::ssub_0)); // rZ MOReg = &MI.getOperand(2); - InputRegs.push_back( - RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1)); + if (!MOReg->isUndef()) + InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), + MOReg->getSubReg(), ARM::ssub_1)); return true; } llvm_unreachable("Target dependent opcode missing"); @@ -4888,6 +4890,8 @@ bool ARMBaseInstrInfo::getExtractSubregLikeInputs( // rX = EXTRACT_SUBREG dZ, ssub_0 // rY = EXTRACT_SUBREG dZ, ssub_1 const MachineOperand &MOReg = MI.getOperand(2); + if (MOReg.isUndef()) + return false; InputReg.Reg = MOReg.getReg(); InputReg.SubReg = MOReg.getSubReg(); InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; @@ -4907,6 +4911,8 @@ bool ARMBaseInstrInfo::getInsertSubregLikeInputs( // dX = VSETLNi32 dY, rZ, imm const MachineOperand &MOBaseReg = MI.getOperand(1); const MachineOperand &MOInsertedReg = MI.getOperand(2); + if (MOInsertedReg.isUndef()) + return false; const MachineOperand &MOIndex = MI.getOperand(3); BaseReg.Reg = MOBaseReg.getReg(); BaseReg.SubReg = MOBaseReg.getSubReg(); diff --git a/lib/Target/ARM/ARMComputeBlockSize.cpp b/lib/Target/ARM/ARMComputeBlockSize.cpp index 2e97b99b05a7..b263e9d86c42 100644 --- a/lib/Target/ARM/ARMComputeBlockSize.cpp +++ b/lib/Target/ARM/ARMComputeBlockSize.cpp @@ -35,6 +35,7 @@ mayOptimizeThumb2Instruction(const MachineInstr *MI) { case ARM::tBcc: // optimizeThumb2JumpTables. case ARM::t2BR_JT: + case ARM::tBR_JTr: return true; } return false; diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 345b081500a4..f36a4317b1b9 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -5136,6 +5136,7 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { // It also applies for registers Rt and Rs of microMIPSr6 jalrc.hb instruction // and registers Rd and Base for microMIPS lwp instruction case Mips::JALR_HB: + case Mips::JALR_HB64: case Mips::JALRC_HB_MMR6: case Mips::JALRC_MMR6: if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 6d2f098a6b32..3c67743947cb 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -225,6 +225,8 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx, switch (Kind) { case Mips::fixup_Mips_NONE: return ELF::R_MIPS_NONE; + case FK_Data_1: + report_fatal_error("MIPS does not support one byte relocations"); case Mips::fixup_Mips_16: case FK_Data_2: return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16; diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td index 3ff3f07654d9..326897dc5c63 100644 --- a/lib/Target/Mips/MicroMips32r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td @@ -1886,6 +1886,12 @@ let AddedComplexity = 41 in { def TAILCALL_MMR6 : TailCall<BC_MMR6, brtarget26_mm>, ISA_MICROMIPS32R6; +def TAILCALLREG_MMR6 : TailCallReg<JRC16_MM, GPR32Opnd>, ISA_MICROMIPS32R6; + +def PseudoIndirectBranch_MMR6 : PseudoIndirectBranchBase<JRC16_MMR6, + GPR32Opnd>, + ISA_MICROMIPS32R6; + def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)), (TAILCALL_MMR6 tglobaladdr:$dst)>, ISA_MICROMIPS32R6; diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 64fe55e9776b..1fef51fd69d0 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -1003,6 +1003,12 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>, ISA_MIPS1_NOT_32R6_64R6; +def TAILCALLREG_MM : TailCallReg<JRC16_MM, GPR32Opnd>, + ISA_MICROMIPS32_NOT_MIPS32R6; + +def PseudoIndirectBranch_MM : PseudoIndirectBranchBase<JR_MM, GPR32Opnd>, + ISA_MICROMIPS32_NOT_MIPS32R6; + let DecoderNamespace = "MicroMips" in { def RDHWR_MM : MMRel, R6MMR6Rel, ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM_MM, ISA_MICROMIPS32_NOT_MIPS32R6; diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 6ceb05577538..f8e739497f4c 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -193,6 +193,10 @@ def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">; def FeatureLongCalls : SubtargetFeature<"long-calls", "UseLongCalls", "true", "Disable use of the jal instruction">; +def FeatureUseIndirectJumpsHazard : SubtargetFeature<"use-indirect-jump-hazard", + "UseIndirectJumpsHazard", + "true", "Use indirect jump" + " guards to prevent certain speculation based attacks">; //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 62f045e77fdb..9e9e074875d0 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -1036,3 +1036,42 @@ def : MipsPat<(select i32:$cond, immz, i32:$f), (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6; } + +// Pseudo instructions +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1, + hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in { + class TailCallRegR6<Instruction JumpInst, Register RT, RegisterOperand RO> : + PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>, + PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)>; +} + +class PseudoIndirectBranchBaseR6<Instruction JumpInst, Register RT, + RegisterOperand RO> : + MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], + II_IndirectBranchPseudo>, + PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)> { + let isTerminator=1; + let isBarrier=1; + let hasDelaySlot = 1; + let isBranch = 1; + let isIndirectBranch = 1; + bit isCTI = 1; +} + + +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + NoIndirectJumpGuards] in { + def TAILCALLR6REG : TailCallRegR6<JALR, ZERO, GPR32Opnd>, ISA_MIPS32R6; + def PseudoIndirectBranchR6 : PseudoIndirectBranchBaseR6<JALR, ZERO, + GPR32Opnd>, + ISA_MIPS32R6; +} + +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + UseIndirectJumpsHazard] in { + def TAILCALLHBR6REG : TailCallReg<JR_HB_R6, GPR32Opnd>, ISA_MIPS32R6; + def PseudoIndrectHazardBranchR6 : PseudoIndirectBranchBase<JR_HB_R6, + GPR32Opnd>, + ISA_MIPS32R6; +} + diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index e008aeafaa2b..828dd4f54223 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -240,13 +240,32 @@ let isCodeGenOnly = 1 in { def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>; def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>; def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>; - def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>; + let AdditionalPredicates = [NoIndirectJumpGuards] in + def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>; } +let AdditionalPredicates = [NotInMicroMips], + DecoderNamespace = "Mips64" in { + def JR_HB64 : JR_HB_DESC<GPR64Opnd>, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6; + def JALR_HB64 : JALR_HB_DESC<GPR64Opnd>, JALR_HB_ENC, ISA_MIPS32R2; +} +def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>; -def TAILCALLREG64 : TailCallReg<GPR64Opnd>; +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + NoIndirectJumpGuards] in { + def TAILCALLREG64 : TailCallReg<JR64, GPR64Opnd>, ISA_MIPS3_NOT_32R6_64R6, + PTR_64; + def PseudoIndirectBranch64 : PseudoIndirectBranchBase<JR64, GPR64Opnd>, + ISA_MIPS3_NOT_32R6_64R6; +} -def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>; -def PseudoIndirectBranch64 : PseudoIndirectBranchBase<GPR64Opnd>; +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + UseIndirectJumpsHazard] in { + def TAILCALLREGHB64 : TailCallReg<JR_HB64, GPR64Opnd>, + ISA_MIPS32R2_NOT_32R6_64R6, PTR_64; + def PseudoIndirectHazardBranch64 : PseudoIndirectBranchBase<JR_HB64, + GPR64Opnd>, + ISA_MIPS32R2_NOT_32R6_64R6; +} /// Multiply and Divide Instructions. let AdditionalPredicates = [NotInMicroMips] in { @@ -536,6 +555,10 @@ def DMTC2 : MTC3OP<"dmtc2", COP2Opnd, GPR64Opnd, II_DMTC2>, MFC3OP_FM<0x12, 5>, ISA_MIPS3; } + +let AdditionalPredicates = [UseIndirectJumpsHazard] in + def JALRHB64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR_HB64, RA_64>; + //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// @@ -843,7 +866,8 @@ let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"dext $rt, $rs, $pos, $size", (DEXTU GPR64Opnd:$rt, GPR64Opnd:$rs, uimm5_plus32:$pos, uimm5_plus1:$size), 0>, ISA_MIPS64R2; - + def : MipsInstAlias<"jalr.hb $rs", (JALR_HB64 RA_64, GPR64Opnd:$rs), 1>, + ISA_MIPS64; // Two operand (implicit 0 selector) versions: def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 COP0Opnd:$rd, GPR64Opnd:$rt, 0), 0>; diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td index 1cd43ee6f1c3..da743fbdee45 100644 --- a/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/lib/Target/Mips/Mips64r6InstrInfo.td @@ -104,6 +104,16 @@ class JIC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR64Opnd, class LL64_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9, II_LL>; class SC64_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd, II_SC>; + +class JR_HB64_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR64Opnd> { + bit isBranch = 1; + bit isIndirectBranch = 1; + bit hasDelaySlot = 1; + bit isTerminator=1; + bit isBarrier=1; + bit isCTI = 1; + InstrItinClass Itinerary = II_JR_HB; +} //===----------------------------------------------------------------------===// // // Instruction Definitions @@ -136,6 +146,7 @@ def SCD_R6 : SCD_R6_ENC, SCD_R6_DESC, ISA_MIPS32R6; let DecoderNamespace = "Mips32r6_64r6_GP64" in { def SELEQZ64 : SELEQZ_ENC, SELEQZ64_DESC, ISA_MIPS32R6, GPR_64; def SELNEZ64 : SELNEZ_ENC, SELNEZ64_DESC, ISA_MIPS32R6, GPR_64; + def JR_HB64_R6 : JR_HB_R6_ENC, JR_HB64_R6_DESC, ISA_MIPS32R6; } let AdditionalPredicates = [NotInMicroMips], DecoderNamespace = "Mips32r6_64r6_PTR64" in { @@ -277,3 +288,22 @@ def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i64:$f), def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i64:$f), (SELNEZ64 i64:$f, (SLL64_32 i32:$cond))>, ISA_MIPS64R6; + +// Pseudo instructions + +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + NoIndirectJumpGuards] in { + def TAILCALL64R6REG : TailCallRegR6<JALR64, ZERO_64, GPR64Opnd>, ISA_MIPS64R6; + def PseudoIndirectBranch64R6 : PseudoIndirectBranchBaseR6<JALR64, ZERO_64, + GPR64Opnd>, + ISA_MIPS64R6; +} + +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + UseIndirectJumpsHazard] in { + def TAILCALLHB64R6REG : TailCallReg<JR_HB64_R6, GPR64Opnd>, + ISA_MIPS64R6; + def PseudoIndrectHazardBranch64R6 : PseudoIndirectBranchBase<JR_HB64_R6, + GPR64Opnd>, + ISA_MIPS64R6; +} diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td index 0ceb1858fb09..2dcefdc789a5 100644 --- a/lib/Target/Mips/MipsDSPInstrFormats.td +++ b/lib/Target/Mips/MipsDSPInstrFormats.td @@ -53,7 +53,7 @@ class DSPInst<string opstr = ""> class PseudoDSP<dag outs, dag ins, list<dag> pattern, InstrItinClass itin = IIPseudo> - : MipsPseudo<outs, ins, pattern, itin>, PredicateControl { + : MipsPseudo<outs, ins, pattern, itin> { let InsnPredicates = [HasDSP]; } diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 8bbac3ed7cfb..d3048c7390e1 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -67,6 +67,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> +#include <array> #include <cassert> #include <cstdint> @@ -1306,13 +1307,13 @@ bool MipsFastISel::fastLowerArguments() { return false; } - const ArrayRef<MCPhysReg> GPR32ArgRegs = {Mips::A0, Mips::A1, Mips::A2, - Mips::A3}; - const ArrayRef<MCPhysReg> FGR32ArgRegs = {Mips::F12, Mips::F14}; - const ArrayRef<MCPhysReg> AFGR64ArgRegs = {Mips::D6, Mips::D7}; - ArrayRef<MCPhysReg>::iterator NextGPR32 = GPR32ArgRegs.begin(); - ArrayRef<MCPhysReg>::iterator NextFGR32 = FGR32ArgRegs.begin(); - ArrayRef<MCPhysReg>::iterator NextAFGR64 = AFGR64ArgRegs.begin(); + std::array<MCPhysReg, 4> GPR32ArgRegs = {{Mips::A0, Mips::A1, Mips::A2, + Mips::A3}}; + std::array<MCPhysReg, 2> FGR32ArgRegs = {{Mips::F12, Mips::F14}}; + std::array<MCPhysReg, 2> AFGR64ArgRegs = {{Mips::D6, Mips::D7}}; + auto NextGPR32 = GPR32ArgRegs.begin(); + auto NextFGR32 = FGR32ArgRegs.begin(); + auto NextAFGR64 = AFGR64ArgRegs.begin(); struct AllocatedReg { const TargetRegisterClass *RC; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index ba05b0f48df7..3d383b3dfe3e 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3868,7 +3868,7 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, nullptr); case 'l': // use the `lo` register to store values // that are no bigger than a word - if (VT == MVT::i32) + if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass); return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass); case 'x': // use the concatenated `hi` and `lo` registers diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 817d9b44b9c2..516edef0556c 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -128,7 +128,7 @@ class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern, // Mips Pseudo Instructions Format class MipsPseudo<dag outs, dag ins, list<dag> pattern, InstrItinClass itin = IIPseudo> : - MipsInst<outs, ins, "", pattern, itin, Pseudo> { + MipsInst<outs, ins, "", pattern, itin, Pseudo>, PredicateControl { let isCodeGenOnly = 1; let isPseudo = 1; } @@ -136,7 +136,7 @@ class MipsPseudo<dag outs, dag ins, list<dag> pattern, // Mips32/64 Pseudo Instruction Format class PseudoSE<dag outs, dag ins, list<dag> pattern, InstrItinClass itin = IIPseudo> : - MipsPseudo<outs, ins, pattern, itin>, PredicateControl { + MipsPseudo<outs, ins, pattern, itin> { let EncodingPredicates = [HasStdEnc]; } diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 51ddc0d44c00..2e30d271e130 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -298,7 +298,6 @@ unsigned MipsInstrInfo::getEquivalentCompactForm( case Mips::JR: case Mips::PseudoReturn: case Mips::PseudoIndirectBranch: - case Mips::TAILCALLREG: canUseShortMicroMipsCTI = true; break; } @@ -377,18 +376,18 @@ unsigned MipsInstrInfo::getEquivalentCompactForm( // For MIPSR6, the instruction 'jic' can be used for these cases. Some // tools will accept 'jrc reg' as an alias for 'jic 0, $reg'. case Mips::JR: + case Mips::PseudoIndirectBranchR6: case Mips::PseudoReturn: - case Mips::PseudoIndirectBranch: - case Mips::TAILCALLREG: + case Mips::TAILCALLR6REG: if (canUseShortMicroMipsCTI) return Mips::JRC16_MM; return Mips::JIC; case Mips::JALRPseudo: return Mips::JIALC; case Mips::JR64: + case Mips::PseudoIndirectBranch64R6: case Mips::PseudoReturn64: - case Mips::PseudoIndirectBranch64: - case Mips::TAILCALLREG64: + case Mips::TAILCALL64R6REG: return Mips::JIC64; case Mips::JALR64Pseudo: return Mips::JIALC64; @@ -617,6 +616,18 @@ bool MipsInstrInfo::verifyInstruction(const MachineInstr &MI, return verifyInsExtInstruction(MI, ErrInfo, 0, 32, 32, 64, 32, 64); case Mips::DEXTU: return verifyInsExtInstruction(MI, ErrInfo, 32, 64, 0, 32, 32, 64); + case Mips::TAILCALLREG: + case Mips::PseudoIndirectBranch: + case Mips::JR: + case Mips::JR64: + case Mips::JALR: + case Mips::JALR64: + case Mips::JALRPseudo: + if (!Subtarget.useIndirectJumpsHazard()) + return true; + + ErrInfo = "invalid instruction when using jump guards!"; + return false; default: return true; } diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index e0d818b749df..33a061e12a3f 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -244,7 +244,10 @@ def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">, AssemblerPredicate<"!FeatureMadd4">; def HasMT : Predicate<"Subtarget->hasMT()">, AssemblerPredicate<"FeatureMT">; - +def UseIndirectJumpsHazard : Predicate<"Subtarget->useIndirectJumpsHazard()">, + AssemblerPredicate<"FeatureUseIndirectJumpsHazard">; +def NoIndirectJumpGuards : Predicate<"!Subtarget->useIndirectJumpsHazard()">, + AssemblerPredicate<"!FeatureUseIndirectJumpsHazard">; //===----------------------------------------------------------------------===// // Mips GPR size adjectives. // They are mutually exclusive. @@ -1540,8 +1543,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1, PseudoSE<(outs), (ins calltarget:$target), [], II_J>, PseudoInstExpansion<(JumpInst Opnd:$target)>; - class TailCallReg<RegisterOperand RO> : - PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>; + class TailCallReg<Instruction JumpInst, RegisterOperand RO> : + PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>, + PseudoInstExpansion<(JumpInst RO:$rs)>; } class BAL_BR_Pseudo<Instruction RealInst> : @@ -2068,7 +2072,7 @@ def B : UncondBranch<BEQ, brtarget>, AdditionalRequires<[NotInMicroMips]>; def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>; -let AdditionalPredicates = [NotInMicroMips] in { +let AdditionalPredicates = [NotInMicroMips, NoIndirectJumpGuards] in { def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM; def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>; } @@ -2088,24 +2092,28 @@ def BAL_BR : BAL_BR_Pseudo<BGEZAL>; let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips] in { def TAILCALL : TailCall<J, jmptarget>; } - -def TAILCALLREG : TailCallReg<GPR32Opnd>; +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + NoIndirectJumpGuards] in + def TAILCALLREG : TailCallReg<JR, GPR32Opnd>, ISA_MIPS1_NOT_32R6_64R6; // Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64 // then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA. -class PseudoIndirectBranchBase<RegisterOperand RO> : +class PseudoIndirectBranchBase<Instruction JumpInst, RegisterOperand RO> : MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], - II_IndirectBranchPseudo> { + II_IndirectBranchPseudo>, + PseudoInstExpansion<(JumpInst RO:$rs)> { let isTerminator=1; let isBarrier=1; let hasDelaySlot = 1; let isBranch = 1; let isIndirectBranch = 1; bit isCTI = 1; - let Predicates = [NotInMips16Mode]; } -def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>; +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + NoIndirectJumpGuards] in + def PseudoIndirectBranch : PseudoIndirectBranchBase<JR, GPR32Opnd>, + ISA_MIPS1_NOT_32R6_64R6; // Return instructions are matched as a RetRA instruction, then are expanded // into PseudoReturn/PseudoReturn64 after register allocation. Finally, @@ -2278,8 +2286,8 @@ class JALR_HB_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { list<dag> Pattern = []; } -class JR_HB_DESC : InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>, - JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> { +class JR_HB_DESC<RegisterOperand RO> : + InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>, JR_HB_DESC_BASE<"jr.hb", RO> { let isBranch=1; let isIndirectBranch=1; let hasDelaySlot=1; @@ -2288,8 +2296,9 @@ class JR_HB_DESC : InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>, bit isCTI = 1; } -class JALR_HB_DESC : InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>, - JALR_HB_DESC_BASE<"jalr.hb", GPR32Opnd> { +class JALR_HB_DESC<RegisterOperand RO> : + InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>, JALR_HB_DESC_BASE<"jalr.hb", + RO> { let isIndirectBranch=1; let hasDelaySlot=1; bit isCTI = 1; @@ -2298,8 +2307,19 @@ class JALR_HB_DESC : InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>, class JR_HB_ENC : JR_HB_FM<8>; class JALR_HB_ENC : JALR_HB_FM<9>; -def JR_HB : JR_HB_DESC, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6; -def JALR_HB : JALR_HB_DESC, JALR_HB_ENC, ISA_MIPS32; +def JR_HB : JR_HB_DESC<GPR32Opnd>, JR_HB_ENC, ISA_MIPS32R2_NOT_32R6_64R6; +def JALR_HB : JALR_HB_DESC<GPR32Opnd>, JALR_HB_ENC, ISA_MIPS32; + +let AdditionalPredicates = [NotInMicroMips, UseIndirectJumpsHazard] in + def JALRHBPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR_HB, RA>; + + +let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips, + UseIndirectJumpsHazard] in { + def TAILCALLREGHB : TailCallReg<JR_HB, GPR32Opnd>, ISA_MIPS32_NOT_32R6_64R6; + def PseudoIndirectHazardBranch : PseudoIndirectBranchBase<JR_HB, GPR32Opnd>, + ISA_MIPS32R2_NOT_32R6_64R6; +} class TLB<string asmstr, InstrItinClass itin = NoItinerary> : InstSE<(outs), (ins), asmstr, [], itin, FrmOther, asmstr>; @@ -2433,7 +2453,8 @@ def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>; let Predicates = [NotInMicroMips] in { def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>; } -def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>, ISA_MIPS32; +def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>, + ISA_MIPS32; def : MipsInstAlias<"neg $rt, $rs", (SUB GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>; def : MipsInstAlias<"neg $rt", diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index bbf2050ce1eb..e6ecbe9b5f66 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -371,11 +371,12 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { // In NaCl, modifying the sp is not allowed in branch delay slot. // For MIPS32R6, we can skip using a delay slot branch. - if (Subtarget.isTargetNaCl() || Subtarget.hasMips32r6()) + if (Subtarget.isTargetNaCl() || + (Subtarget.hasMips32r6() && !Subtarget.useIndirectJumpsHazard())) BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) .addReg(Mips::SP).addImm(8); - if (Subtarget.hasMips32r6()) { + if (Subtarget.hasMips32r6() && !Subtarget.useIndirectJumpsHazard()) { const unsigned JICOp = Subtarget.inMicroMipsMode() ? Mips::JIC_MMR6 : Mips::JIC; BuildMI(*BalTgtMBB, Pos, DL, TII->get(JICOp)) @@ -383,7 +384,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addImm(0); } else { - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT); + unsigned JROp = + Subtarget.useIndirectJumpsHazard() + ? (Subtarget.hasMips32r6() ? Mips::JR_HB_R6 : Mips::JR_HB) + : Mips::JR; + BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT); if (Subtarget.isTargetNaCl()) { BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP)); @@ -475,7 +480,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64) .addReg(Mips::SP_64).addImm(0); - if (Subtarget.hasMips64r6()) { + if (Subtarget.hasMips64r6() && !Subtarget.useIndirectJumpsHazard()) { BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) .addReg(Mips::SP_64) .addImm(16); @@ -483,7 +488,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::AT_64) .addImm(0); } else { - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64); + unsigned JROp = + Subtarget.useIndirectJumpsHazard() + ? (Subtarget.hasMips32r6() ? Mips::JR_HB64_R6 : Mips::JR_HB64) + : Mips::JR64; + BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT_64); BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) .addReg(Mips::SP_64) .addImm(16); diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index f7d7e2af85e4..eee5b23117f6 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -701,6 +701,77 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, + SelectionDAG &DAG, + const MipsSubtarget &Subtarget) { + // Estimate the number of operations the below transform will turn a + // constant multiply into. The number is approximately how many powers + // of two summed together that the constant can be broken down into. + + SmallVector<APInt, 16> WorkStack(1, C); + unsigned Steps = 0; + unsigned BitWidth = C.getBitWidth(); + + while (!WorkStack.empty()) { + APInt Val = WorkStack.pop_back_val(); + + if (Val == 0 || Val == 1) + continue; + + if (Val.isPowerOf2()) { + ++Steps; + continue; + } + + APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); + APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) + : APInt(BitWidth, 1) << C.ceilLogBase2(); + + if ((Val - Floor).ule(Ceil - Val)) { + WorkStack.push_back(Floor); + WorkStack.push_back(Val - Floor); + ++Steps; + continue; + } + + WorkStack.push_back(Ceil); + WorkStack.push_back(Ceil - Val); + ++Steps; + + // If we have taken more than 12[1] / 8[2] steps to attempt the + // optimization for a native sized value, it is more than likely that this + // optimization will make things worse. + // + // [1] MIPS64 requires 6 instructions at most to materialize any constant, + // multiplication requires at least 4 cycles, but another cycle (or two) + // to retrieve the result from the HI/LO registers. + // + // [2] For MIPS32, more than 8 steps is expensive as the constant could be + // materialized in 2 instructions, multiplication requires at least 4 + // cycles, but another cycle (or two) to retrieve the result from the + // HI/LO registers. + + if (Steps > 12 && (Subtarget.isABI_N32() || Subtarget.isABI_N64())) + return false; + + if (Steps > 8 && Subtarget.isABI_O32()) + return false; + } + + // If the value being multiplied is not supported natively, we have to pay + // an additional legalization cost, conservatively assume an increase in the + // cost of 3 instructions per step. This values for this heuristic were + // determined experimentally. + unsigned RegisterSize = DAG.getTargetLoweringInfo() + .getRegisterType(*DAG.getContext(), VT) + .getSizeInBits(); + Steps *= (VT.getSizeInBits() != RegisterSize) * 3; + if (Steps > 27) + return false; + + return true; +} + static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG) { // Return 0. @@ -739,11 +810,13 @@ static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, - const MipsSETargetLowering *TL) { + const MipsSETargetLowering *TL, + const MipsSubtarget &Subtarget) { EVT VT = N->getValueType(0); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) - if (!VT.isVector()) + if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( + C->getAPIntValue(), VT, DAG, Subtarget)) return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), DAG); @@ -983,7 +1056,7 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { Val = performORCombine(N, DAG, DCI, Subtarget); break; case ISD::MUL: - return performMULCombine(N, DAG, DCI, this); + return performMULCombine(N, DAG, DCI, this, Subtarget); case ISD::SHL: Val = performSHLCombine(N, DAG, DCI, Subtarget); break; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index f6af7e22e351..ddaa07ea9bc1 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -72,9 +72,10 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false), HasEVA(false), DisableMadd4(false), HasMT(false), - StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT), - TSInfo(), InstrInfo(MipsInstrInfo::create( - initializeSubtargetDependencies(CPU, FS, TM))), + UseIndirectJumpsHazard(false), StackAlignOverride(StackAlignOverride), + TM(TM), TargetTriple(TT), TSInfo(), + InstrInfo( + MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))), FrameLowering(MipsFrameLowering::create(*this)), TLInfo(MipsTargetLowering::create(TM, *this)) { @@ -107,6 +108,15 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, if (hasMips64r6() && InMicroMipsMode) report_fatal_error("microMIPS64R6 is not supported", false); + + if (UseIndirectJumpsHazard) { + if (InMicroMipsMode) + report_fatal_error( + "cannot combine indirect jumps with hazard barriers and microMIPS"); + if (!hasMips32r2()) + report_fatal_error( + "indirect jumps with hazard barriers requires MIPS32R2 or later"); + } if (hasMips32r6()) { StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6"; diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 8b10b0596e0e..ad2905c51601 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -152,6 +152,10 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // HasMT -- support MT ASE. bool HasMT; + // Use hazard variants of the jump register instructions for indirect + // function calls and jump tables. + bool UseIndirectJumpsHazard; + // Disable use of the `jal` instruction. bool UseLongCalls = false; @@ -272,6 +276,9 @@ public: bool disableMadd4() const { return DisableMadd4; } bool hasEVA() const { return HasEVA; } bool hasMT() const { return HasMT; } + bool useIndirectJumpsHazard() const { + return UseIndirectJumpsHazard && hasMips32r2(); + } bool useSmallSection() const { return UseSmallSection; } bool hasStandardEncoding() const { return !inMips16Mode(); } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index d31e1cb5047b..cb8cc7bb347a 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -44,6 +44,14 @@ static cl::opt<bool> cl::desc("Disable load/store vectorizer"), cl::init(false), cl::Hidden); +// TODO: Remove this flag when we are confident with no regressions. +static cl::opt<bool> DisableRequireStructuredCFG( + "disable-nvptx-require-structured-cfg", + cl::desc("Transitional flag to turn off NVPTX's requirement on preserving " + "structured CFG. The requirement should be disabled only when " + "unexpected regressions happen."), + cl::init(false), cl::Hidden); + namespace llvm { void initializeNVVMIntrRangePass(PassRegistry&); @@ -108,6 +116,8 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, drvInterface = NVPTX::NVCL; else drvInterface = NVPTX::CUDA; + if (!DisableRequireStructuredCFG) + setRequiresStructuredCFG(true); initAsmInfo(); } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f0e8b11a3d9c..26e9f13f9ff4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12264,6 +12264,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, N->getOperand(1).getValueType() == MVT::i16 || (Subtarget.hasLDBRX() && Subtarget.isPPC64() && N->getOperand(1).getValueType() == MVT::i64))) { + // STBRX can only handle simple types. + EVT mVT = cast<StoreSDNode>(N)->getMemoryVT(); + if (mVT.isExtended()) + break; + SDValue BSwapOp = N->getOperand(1).getOperand(0); // Do an any-extend to 32-bits if this is a half-word input. if (BSwapOp.getValueType() == MVT::i16) @@ -12271,7 +12276,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // If the type of BSWAP operand is wider than stored memory width // it need to be shifted to the right side before STBRX. - EVT mVT = cast<StoreSDNode>(N)->getMemoryVT(); if (Op1VT.bitsGT(mVT)) { int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits(); BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index fb16700a5e17..4ef71effd49b 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2431,7 +2431,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, // Use APInt's rotate function. int64_t SH = MI.getOperand(2).getImm(); int64_t MB = MI.getOperand(3).getImm(); - APInt InVal(Opc == PPC::RLDICL ? 64 : 32, SExtImm, true); + APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICLo) ? + 64 : 32, SExtImm, true); InVal = InVal.rotl(SH); uint64_t Mask = (1LLU << (63 - MB + 1)) - 1; InVal &= Mask; @@ -2444,6 +2445,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, Is64BitLI = Opc != PPC::RLDICL_32; NewImm = InVal.getSExtValue(); SetCR = Opc == PPC::RLDICLo; + if (SetCR && (SExtImm & NewImm) != NewImm) + return false; break; } return false; @@ -2471,6 +2474,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o; NewImm = InVal.getSExtValue(); SetCR = Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o; + if (SetCR && (SExtImm & NewImm) != NewImm) + return false; break; } return false; diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 23ac9d9936ad..44400813094b 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -31,6 +31,7 @@ set(sources X86FixupBWInsts.cpp X86FixupLEAs.cpp X86FixupSetCC.cpp + X86FlagsCopyLowering.cpp X86FloatingPoint.cpp X86FrameLowering.cpp X86InstructionSelector.cpp diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index c58254ae38c1..b3c491b3de5e 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -265,13 +265,10 @@ MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( /// @param reg - The Reg to append. static void translateRegister(MCInst &mcInst, Reg reg) { #define ENTRY(x) X86::x, - uint8_t llvmRegnums[] = { - ALL_REGS - 0 - }; + static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS}; #undef ENTRY - uint8_t llvmRegnum = llvmRegnums[reg]; + MCPhysReg llvmRegnum = llvmRegnums[reg]; mcInst.addOperand(MCOperand::createReg(llvmRegnum)); } diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 361326824292..642dda8f4225 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -66,6 +66,9 @@ FunctionPass *createX86OptimizeLEAs(); /// Return a pass that transforms setcc + movzx pairs into xor + setcc. FunctionPass *createX86FixupSetCC(); +/// Return a pass that lowers EFLAGS copy pseudo instructions. +FunctionPass *createX86FlagsCopyLoweringPass(); + /// Return a pass that expands WinAlloca pseudo-instructions. FunctionPass *createX86WinAllocaExpander(); diff --git a/lib/Target/X86/X86DomainReassignment.cpp b/lib/Target/X86/X86DomainReassignment.cpp index bc0f55f581ff..ffe176ad4770 100644 --- a/lib/Target/X86/X86DomainReassignment.cpp +++ b/lib/Target/X86/X86DomainReassignment.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Printable.h" #include <bitset> using namespace llvm; @@ -262,25 +263,6 @@ public: } }; -/// An Instruction Converter which completely deletes an instruction. -/// For example, IMPLICIT_DEF instructions can be deleted when converting from -/// GPR to mask. -class InstrDeleter : public InstrConverterBase { -public: - InstrDeleter(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {} - - bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, - MachineRegisterInfo *MRI) const override { - assert(isLegal(MI, TII) && "Cannot convert instruction"); - return true; - } - - double getExtraCost(const MachineInstr *MI, - MachineRegisterInfo *MRI) const override { - return 0; - } -}; - // Key type to be used by the Instruction Converters map. // A converter is identified by <destination domain, source opcode> typedef std::pair<int, unsigned> InstrConverterBaseKeyTy; @@ -310,8 +292,12 @@ private: /// Domains which this closure can legally be reassigned to. std::bitset<NumDomains> LegalDstDomains; + /// An ID to uniquely identify this closure, even when it gets + /// moved around + unsigned ID; + public: - Closure(std::initializer_list<RegDomain> LegalDstDomainList) { + Closure(unsigned ID, std::initializer_list<RegDomain> LegalDstDomainList) : ID(ID) { for (RegDomain D : LegalDstDomainList) LegalDstDomains.set(D); } @@ -347,6 +333,27 @@ public: return Instrs; } + LLVM_DUMP_METHOD void dump(const MachineRegisterInfo *MRI) const { + dbgs() << "Registers: "; + bool First = true; + for (unsigned Reg : Edges) { + if (!First) + dbgs() << ", "; + First = false; + dbgs() << printReg(Reg, MRI->getTargetRegisterInfo()); + } + dbgs() << "\n" << "Instructions:"; + for (MachineInstr *MI : Instrs) { + dbgs() << "\n "; + MI->print(dbgs()); + } + dbgs() << "\n"; + } + + unsigned getID() const { + return ID; + } + }; class X86DomainReassignment : public MachineFunctionPass { @@ -358,7 +365,7 @@ class X86DomainReassignment : public MachineFunctionPass { DenseSet<unsigned> EnclosedEdges; /// All instructions that are included in some closure. - DenseMap<MachineInstr *, Closure *> EnclosedInstrs; + DenseMap<MachineInstr *, unsigned> EnclosedInstrs; public: static char ID; @@ -435,14 +442,14 @@ void X86DomainReassignment::visitRegister(Closure &C, unsigned Reg, void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) { auto I = EnclosedInstrs.find(MI); if (I != EnclosedInstrs.end()) { - if (I->second != &C) + if (I->second != C.getID()) // Instruction already belongs to another closure, avoid conflicts between // closure and mark this closure as illegal. C.setAllIllegal(); return; } - EnclosedInstrs[MI] = &C; + EnclosedInstrs[MI] = C.getID(); C.addInstruction(MI); // Mark closure as illegal for reassignment to domains, if there is no @@ -587,7 +594,7 @@ void X86DomainReassignment::initConverters() { new InstrIgnore(TargetOpcode::PHI); Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] = - new InstrDeleter(TargetOpcode::IMPLICIT_DEF); + new InstrIgnore(TargetOpcode::IMPLICIT_DEF); Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] = new InstrReplaceWithCopy(TargetOpcode::INSERT_SUBREG, 2); @@ -723,6 +730,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { std::vector<Closure> Closures; // Go over all virtual registers and calculate a closure. + unsigned ClosureID = 0; for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) { unsigned Reg = TargetRegisterInfo::index2VirtReg(Idx); @@ -735,7 +743,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { continue; // Calculate closure starting with Reg. - Closure C({MaskDomain}); + Closure C(ClosureID++, {MaskDomain}); buildClosure(C, Reg); // Collect all closures that can potentially be converted. @@ -743,15 +751,16 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { Closures.push_back(std::move(C)); } - for (Closure &C : Closures) + for (Closure &C : Closures) { + DEBUG(C.dump(MRI)); if (isReassignmentProfitable(C, MaskDomain)) { reassign(C, MaskDomain); ++NumClosuresConverted; Changed = true; } + } - for (auto I : Converters) - delete I.second; + DeleteContainerSeconds(Converters); DEBUG(dbgs() << "***** Machine Function after Domain Reassignment *****\n"); DEBUG(MF.print(dbgs())); diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 80ce3c579fe0..dca6c592614c 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1789,9 +1789,16 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned CReg = 0, OpReg = 0; const TargetRegisterClass *RC = nullptr; - assert(!I->getType()->isIntegerTy(8) && - "i8 shifts should be handled by autogenerated table"); - if (I->getType()->isIntegerTy(16)) { + if (I->getType()->isIntegerTy(8)) { + CReg = X86::CL; + RC = &X86::GR8RegClass; + switch (I->getOpcode()) { + case Instruction::LShr: OpReg = X86::SHR8rCL; break; + case Instruction::AShr: OpReg = X86::SAR8rCL; break; + case Instruction::Shl: OpReg = X86::SHL8rCL; break; + default: return false; + } + } else if (I->getType()->isIntegerTy(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -1836,10 +1843,10 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { // The shift instruction uses X86::CL. If we defined a super-register // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. - assert(CReg != X86::CL && "CReg should be a super register of CL"); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::KILL), X86::CL) - .addReg(CReg, RegState::Kill); + if (CReg != X86::CL) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::KILL), X86::CL) + .addReg(CReg, RegState::Kill); unsigned ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg) diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp new file mode 100644 index 000000000000..a6fccd134740 --- /dev/null +++ b/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -0,0 +1,935 @@ +//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual +/// flag bits. +/// +/// We have to do this by carefully analyzing and rewriting the usage of the +/// copied EFLAGS register because there is no general way to rematerialize the +/// entire EFLAGS register safely and efficiently. Using `popf` both forces +/// dynamic stack adjustment and can create correctness issues due to IF, TF, +/// and other non-status flags being overwritten. Using sequences involving +/// SAHF don't work on all x86 processors and are often quite slow compared to +/// directly testing a single status preserved in its own GPR. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <utility> + +using namespace llvm; + +#define PASS_KEY "x86-flags-copy-lowering" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated"); +STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted"); +STATISTIC(NumTestsInserted, "Number of test instructions inserted"); +STATISTIC(NumAddsInserted, "Number of adds instructions inserted"); + +namespace llvm { + +void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); + +} // end namespace llvm + +namespace { + +// Convenient array type for storing registers associated with each condition. +using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>; + +class X86FlagsCopyLoweringPass : public MachineFunctionPass { +public: + X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { + initializeX86FlagsCopyLoweringPassPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; } + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Pass identification, replacement for typeid. + static char ID; + +private: + MachineRegisterInfo *MRI; + const X86InstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetRegisterClass *PromoteRC; + MachineDominatorTree *MDT; + + CondRegArray collectCondsInRegs(MachineBasicBlock &MBB, + MachineInstr &CopyDefI); + + unsigned promoteCondToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, X86::CondCode Cond); + std::pair<unsigned, bool> + getCondOrInverseInReg(MachineBasicBlock &TestMBB, + MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, + X86::CondCode Cond, CondRegArray &CondRegs); + void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, + DebugLoc Loc, unsigned Reg); + + void rewriteArithmetic(MachineBasicBlock &TestMBB, + MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, + MachineInstr &MI, MachineOperand &FlagUse, + CondRegArray &CondRegs); + void rewriteCMov(MachineBasicBlock &TestMBB, + MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, + MachineInstr &CMovI, MachineOperand &FlagUse, + CondRegArray &CondRegs); + void rewriteCondJmp(MachineBasicBlock &TestMBB, + MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, + MachineInstr &JmpI, CondRegArray &CondRegs); + void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse, + MachineInstr &CopyDefI); + void rewriteSetCarryExtended(MachineBasicBlock &TestMBB, + MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &SetBI, + MachineOperand &FlagUse, CondRegArray &CondRegs); + void rewriteSetCC(MachineBasicBlock &TestMBB, + MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, + MachineInstr &SetCCI, MachineOperand &FlagUse, + CondRegArray &CondRegs); +}; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE, + "X86 EFLAGS copy lowering", false, false) +INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE, + "X86 EFLAGS copy lowering", false, false) + +FunctionPass *llvm::createX86FlagsCopyLoweringPass() { + return new X86FlagsCopyLoweringPass(); +} + +char X86FlagsCopyLoweringPass::ID = 0; + +void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +namespace { +/// An enumeration of the arithmetic instruction mnemonics which have +/// interesting flag semantics. +/// +/// We can map instruction opcodes into these mnemonics to make it easy to +/// dispatch with specific functionality. +enum class FlagArithMnemonic { + ADC, + ADCX, + ADOX, + RCL, + RCR, + SBB, +}; +} // namespace + +static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) { + switch (Opcode) { + default: + report_fatal_error("No support for lowering a copy into EFLAGS when used " + "by this instruction!"); + +#define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX) \ + case X86::MNEMONIC##8##SUFFIX: \ + case X86::MNEMONIC##16##SUFFIX: \ + case X86::MNEMONIC##32##SUFFIX: \ + case X86::MNEMONIC##64##SUFFIX: + +#define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \ + LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \ + LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \ + LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \ + LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \ + case X86::MNEMONIC##8ri: \ + case X86::MNEMONIC##16ri8: \ + case X86::MNEMONIC##32ri8: \ + case X86::MNEMONIC##64ri8: \ + case X86::MNEMONIC##16ri: \ + case X86::MNEMONIC##32ri: \ + case X86::MNEMONIC##64ri32: \ + case X86::MNEMONIC##8mi: \ + case X86::MNEMONIC##16mi8: \ + case X86::MNEMONIC##32mi8: \ + case X86::MNEMONIC##64mi8: \ + case X86::MNEMONIC##16mi: \ + case X86::MNEMONIC##32mi: \ + case X86::MNEMONIC##64mi32: \ + case X86::MNEMONIC##8i8: \ + case X86::MNEMONIC##16i16: \ + case X86::MNEMONIC##32i32: \ + case X86::MNEMONIC##64i32: + + LLVM_EXPAND_ADC_SBB_INSTR(ADC) + return FlagArithMnemonic::ADC; + + LLVM_EXPAND_ADC_SBB_INSTR(SBB) + return FlagArithMnemonic::SBB; + +#undef LLVM_EXPAND_ADC_SBB_INSTR + + LLVM_EXPAND_INSTR_SIZES(RCL, rCL) + LLVM_EXPAND_INSTR_SIZES(RCL, r1) + LLVM_EXPAND_INSTR_SIZES(RCL, ri) + return FlagArithMnemonic::RCL; + + LLVM_EXPAND_INSTR_SIZES(RCR, rCL) + LLVM_EXPAND_INSTR_SIZES(RCR, r1) + LLVM_EXPAND_INSTR_SIZES(RCR, ri) + return FlagArithMnemonic::RCR; + +#undef LLVM_EXPAND_INSTR_SIZES + + case X86::ADCX32rr: + case X86::ADCX64rr: + case X86::ADCX32rm: + case X86::ADCX64rm: + return FlagArithMnemonic::ADCX; + + case X86::ADOX32rr: + case X86::ADOX64rr: + case X86::ADOX32rm: + case X86::ADOX64rm: + return FlagArithMnemonic::ADOX; + } +} + +static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB, + MachineInstr &SplitI, + const X86InstrInfo &TII) { + MachineFunction &MF = *MBB.getParent(); + + assert(SplitI.getParent() == &MBB && + "Split instruction must be in the split block!"); + assert(SplitI.isBranch() && + "Only designed to split a tail of branch instructions!"); + assert(X86::getCondFromBranchOpc(SplitI.getOpcode()) != X86::COND_INVALID && + "Must split on an actual jCC instruction!"); + + // Dig out the previous instruction to the split point. + MachineInstr &PrevI = *std::prev(SplitI.getIterator()); + assert(PrevI.isBranch() && "Must split after a branch!"); + assert(X86::getCondFromBranchOpc(PrevI.getOpcode()) != X86::COND_INVALID && + "Must split after an actual jCC instruction!"); + assert(!std::prev(PrevI.getIterator())->isTerminator() && + "Must only have this one terminator prior to the split!"); + + // Grab the one successor edge that will stay in `MBB`. + MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB(); + + // Analyze the original block to see if we are actually splitting an edge + // into two edges. This can happen when we have multiple conditional jumps to + // the same successor. + bool IsEdgeSplit = + std::any_of(SplitI.getIterator(), MBB.instr_end(), + [&](MachineInstr &MI) { + assert(MI.isTerminator() && + "Should only have spliced terminators!"); + return llvm::any_of( + MI.operands(), [&](MachineOperand &MOp) { + return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc; + }); + }) || + MBB.getFallThrough() == &UnsplitSucc; + + MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock(); + + // Insert the new block immediately after the current one. Any existing + // fallthrough will be sunk into this new block anyways. + MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB); + + // Splice the tail of instructions into the new block. + NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end()); + + // Copy the necessary succesors (and their probability info) into the new + // block. + for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) + if (IsEdgeSplit || *SI != &UnsplitSucc) + NewMBB.copySuccessor(&MBB, SI); + // Normalize the probabilities if we didn't end up splitting the edge. + if (!IsEdgeSplit) + NewMBB.normalizeSuccProbs(); + + // Now replace all of the moved successors in the original block with the new + // block. This will merge their probabilities. + for (MachineBasicBlock *Succ : NewMBB.successors()) + if (Succ != &UnsplitSucc) + MBB.replaceSuccessor(Succ, &NewMBB); + + // We should always end up replacing at least one successor. + assert(MBB.isSuccessor(&NewMBB) && + "Failed to make the new block a successor!"); + + // Now update all the PHIs. + for (MachineBasicBlock *Succ : NewMBB.successors()) { + for (MachineInstr &MI : *Succ) { + if (!MI.isPHI()) + break; + + for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; + OpIdx += 2) { + MachineOperand &OpV = MI.getOperand(OpIdx); + MachineOperand &OpMBB = MI.getOperand(OpIdx + 1); + assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!"); + if (OpMBB.getMBB() != &MBB) + continue; + + // Replace the operand for unsplit successors + if (!IsEdgeSplit || Succ != &UnsplitSucc) { + OpMBB.setMBB(&NewMBB); + + // We have to continue scanning as there may be multiple entries in + // the PHI. + continue; + } + + // When we have split the edge append a new successor. + MI.addOperand(MF, OpV); + MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB)); + break; + } + } + } + + return NewMBB; +} + +bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() + << " **********\n"); + + auto &Subtarget = MF.getSubtarget<X86Subtarget>(); + MRI = &MF.getRegInfo(); + TII = Subtarget.getInstrInfo(); + TRI = Subtarget.getRegisterInfo(); + MDT = &getAnalysis<MachineDominatorTree>(); + PromoteRC = &X86::GR8RegClass; + + if (MF.begin() == MF.end()) + // Nothing to do for a degenerate empty function... + return false; + + SmallVector<MachineInstr *, 4> Copies; + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &MI : MBB) + if (MI.getOpcode() == TargetOpcode::COPY && + MI.getOperand(0).getReg() == X86::EFLAGS) + Copies.push_back(&MI); + + for (MachineInstr *CopyI : Copies) { + MachineBasicBlock &MBB = *CopyI->getParent(); + + MachineOperand &VOp = CopyI->getOperand(1); + assert(VOp.isReg() && + "The input to the copy for EFLAGS should always be a register!"); + MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg()); + if (CopyDefI.getOpcode() != TargetOpcode::COPY) { + // FIXME: The big likely candidate here are PHI nodes. We could in theory + // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard + // enough that it is probably better to change every other part of LLVM + // to avoid creating them. The issue is that once we have PHIs we won't + // know which original EFLAGS value we need to capture with our setCCs + // below. The end result will be computing a complete set of setCCs that + // we *might* want, computing them in every place where we copy *out* of + // EFLAGS and then doing SSA formation on all of them to insert necessary + // PHI nodes and consume those here. Then hoping that somehow we DCE the + // unnecessary ones. This DCE seems very unlikely to be successful and so + // we will almost certainly end up with a glut of dead setCC + // instructions. Until we have a motivating test case and fail to avoid + // it by changing other parts of LLVM's lowering, we refuse to handle + // this complex case here. + DEBUG(dbgs() << "ERROR: Encountered unexpected def of an eflags copy: "; + CopyDefI.dump()); + report_fatal_error( + "Cannot lower EFLAGS copy unless it is defined in turn by a copy!"); + } + + auto Cleanup = make_scope_exit([&] { + // All uses of the EFLAGS copy are now rewritten, kill the copy into + // eflags and if dead the copy from. + CopyI->eraseFromParent(); + if (MRI->use_empty(CopyDefI.getOperand(0).getReg())) + CopyDefI.eraseFromParent(); + ++NumCopiesEliminated; + }); + + MachineOperand &DOp = CopyI->getOperand(0); + assert(DOp.isDef() && "Expected register def!"); + assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!"); + if (DOp.isDead()) + continue; + + MachineBasicBlock &TestMBB = *CopyDefI.getParent(); + auto TestPos = CopyDefI.getIterator(); + DebugLoc TestLoc = CopyDefI.getDebugLoc(); + + DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump()); + + // Scan for usage of newly set EFLAGS so we can rewrite them. We just buffer + // jumps because their usage is very constrained. + bool FlagsKilled = false; + SmallVector<MachineInstr *, 4> JmpIs; + + // Gather the condition flags that have already been preserved in + // registers. We do this from scratch each time as we expect there to be + // very few of them and we expect to not revisit the same copy definition + // many times. If either of those change sufficiently we could build a map + // of these up front instead. + CondRegArray CondRegs = collectCondsInRegs(TestMBB, CopyDefI); + + // Collect the basic blocks we need to scan. Typically this will just be + // a single basic block but we may have to scan multiple blocks if the + // EFLAGS copy lives into successors. + SmallVector<MachineBasicBlock *, 2> Blocks; + SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks; + Blocks.push_back(&MBB); + VisitedBlocks.insert(&MBB); + + do { + MachineBasicBlock &UseMBB = *Blocks.pop_back_val(); + + // We currently don't do any PHI insertion and so we require that the + // test basic block dominates all of the use basic blocks. + // + // We could in theory do PHI insertion here if it becomes useful by just + // taking undef values in along every edge that we don't trace this + // EFLAGS copy along. This isn't as bad as fully general PHI insertion, + // but still seems like a great deal of complexity. + // + // Because it is theoretically possible that some earlier MI pass or + // other lowering transformation could induce this to happen, we do + // a hard check even in non-debug builds here. + if (&TestMBB != &UseMBB && !MDT->dominates(&TestMBB, &UseMBB)) { + DEBUG({ + dbgs() << "ERROR: Encountered use that is not dominated by our test " + "basic block! Rewriting this would require inserting PHI " + "nodes to track the flag state across the CFG.\n\nTest " + "block:\n"; + TestMBB.dump(); + dbgs() << "Use block:\n"; + UseMBB.dump(); + }); + report_fatal_error("Cannot lower EFLAGS copy when original copy def " + "does not dominate all uses."); + } + + for (auto MII = &UseMBB == &MBB ? std::next(CopyI->getIterator()) + : UseMBB.instr_begin(), + MIE = UseMBB.instr_end(); + MII != MIE;) { + MachineInstr &MI = *MII++; + MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS); + if (!FlagUse) { + if (MI.findRegisterDefOperand(X86::EFLAGS)) { + // If EFLAGS are defined, it's as-if they were killed. We can stop + // scanning here. + // + // NB!!! Many instructions only modify some flags. LLVM currently + // models this as clobbering all flags, but if that ever changes + // this will need to be carefully updated to handle that more + // complex logic. + FlagsKilled = true; + break; + } + continue; + } + + DEBUG(dbgs() << " Rewriting use: "; MI.dump()); + + // Check the kill flag before we rewrite as that may change it. + if (FlagUse->isKill()) + FlagsKilled = true; + + // Once we encounter a branch, the rest of the instructions must also be + // branches. We can't rewrite in place here, so we handle them below. + // + // Note that we don't have to handle tail calls here, even conditional + // tail calls, as those are not introduced into the X86 MI until post-RA + // branch folding or black placement. As a consequence, we get to deal + // with the simpler formulation of conditional branches followed by tail + // calls. + if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) { + auto JmpIt = MI.getIterator(); + do { + JmpIs.push_back(&*JmpIt); + ++JmpIt; + } while (JmpIt != UseMBB.instr_end() && + X86::getCondFromBranchOpc(JmpIt->getOpcode()) != + X86::COND_INVALID); + break; + } + + // Otherwise we can just rewrite in-place. + if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) { + rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); + } else if (X86::getCondFromSETOpc(MI.getOpcode()) != + X86::COND_INVALID) { + rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); + } else if (MI.getOpcode() == TargetOpcode::COPY) { + rewriteCopy(MI, *FlagUse, CopyDefI); + } else { + // We assume all other instructions that use flags also def them. + assert(MI.findRegisterDefOperand(X86::EFLAGS) && + "Expected a def of EFLAGS for this instruction!"); + + // NB!!! Several arithmetic instructions only *partially* update + // flags. Theoretically, we could generate MI code sequences that + // would rely on this fact and observe different flags independently. + // But currently LLVM models all of these instructions as clobbering + // all the flags in an undef way. We rely on that to simplify the + // logic. + FlagsKilled = true; + + switch (MI.getOpcode()) { + case X86::SETB_C8r: + case X86::SETB_C16r: + case X86::SETB_C32r: + case X86::SETB_C64r: + // Use custom lowering for arithmetic that is merely extending the + // carry flag. We model this as the SETB_C* pseudo instructions. + rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse, + CondRegs); + break; + + default: + // Generically handle remaining uses as arithmetic instructions. + rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, + CondRegs); + break; + } + break; + } + + // If this was the last use of the flags, we're done. + if (FlagsKilled) + break; + } + + // If the flags were killed, we're done with this block. + if (FlagsKilled) + break; + + // Otherwise we need to scan successors for ones where the flags live-in + // and queue those up for processing. + for (MachineBasicBlock *SuccMBB : UseMBB.successors()) + if (SuccMBB->isLiveIn(X86::EFLAGS) && + VisitedBlocks.insert(SuccMBB).second) + Blocks.push_back(SuccMBB); + } while (!Blocks.empty()); + + // Now rewrite the jumps that use the flags. These we handle specially + // because if there are multiple jumps in a single basic block we'll have + // to do surgery on the CFG. + MachineBasicBlock *LastJmpMBB = nullptr; + for (MachineInstr *JmpI : JmpIs) { + // Past the first jump within a basic block we need to split the blocks + // apart. + if (JmpI->getParent() == LastJmpMBB) + splitBlock(*JmpI->getParent(), *JmpI, *TII); + else + LastJmpMBB = JmpI->getParent(); + + rewriteCondJmp(TestMBB, TestPos, TestLoc, *JmpI, CondRegs); + } + + // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if + // the copy's def operand is itself a kill. + } + +#ifndef NDEBUG + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &MI : MBB) + if (MI.getOpcode() == TargetOpcode::COPY && + (MI.getOperand(0).getReg() == X86::EFLAGS || + MI.getOperand(1).getReg() == X86::EFLAGS)) { + DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: "; MI.dump()); + llvm_unreachable("Unlowered EFLAGS copy!"); + } +#endif + + return true; +} + +/// Collect any conditions that have already been set in registers so that we +/// can re-use them rather than adding duplicates. +CondRegArray +X86FlagsCopyLoweringPass::collectCondsInRegs(MachineBasicBlock &MBB, + MachineInstr &CopyDefI) { + CondRegArray CondRegs = {}; + + // Scan backwards across the range of instructions with live EFLAGS. + for (MachineInstr &MI : llvm::reverse( + llvm::make_range(MBB.instr_begin(), CopyDefI.getIterator()))) { + X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode()); + if (Cond != X86::COND_INVALID && MI.getOperand(0).isReg() && + TRI->isVirtualRegister(MI.getOperand(0).getReg())) + CondRegs[Cond] = MI.getOperand(0).getReg(); + + // Stop scanning when we see the first definition of the EFLAGS as prior to + // this we would potentially capture the wrong flag state. + if (MI.findRegisterDefOperand(X86::EFLAGS)) + break; + } + return CondRegs; +} + +unsigned X86FlagsCopyLoweringPass::promoteCondToReg( + MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, X86::CondCode Cond) { + unsigned Reg = MRI->createVirtualRegister(PromoteRC); + auto SetI = BuildMI(TestMBB, TestPos, TestLoc, + TII->get(X86::getSETFromCond(Cond)), Reg); + (void)SetI; + DEBUG(dbgs() << " save cond: "; SetI->dump()); + ++NumSetCCsInserted; + return Reg; +} + +std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg( + MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) { + unsigned &CondReg = CondRegs[Cond]; + unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)]; + if (!CondReg && !InvCondReg) + CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); + + if (CondReg) + return {CondReg, false}; + else + return {InvCondReg, true}; +} + +void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB, + MachineBasicBlock::iterator Pos, + DebugLoc Loc, unsigned Reg) { + // We emit test instructions as register/immediate test against -1. This + // allows register allocation to fold a memory operand if needed (that will + // happen often due to the places this code is emitted). But hopefully will + // also allow us to select a shorter encoding of `testb %reg, %reg` when that + // would be equivalent. + auto TestI = + BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg); + (void)TestI; + DEBUG(dbgs() << " test cond: "; TestI->dump()); + ++NumTestsInserted; +} + +void X86FlagsCopyLoweringPass::rewriteArithmetic( + MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse, + CondRegArray &CondRegs) { + // Arithmetic is either reading CF or OF. Figure out which condition we need + // to preserve in a register. + X86::CondCode Cond; + + // The addend to use to reset CF or OF when added to the flag value. + int Addend; + + switch (getMnemonicFromOpcode(MI.getOpcode())) { + case FlagArithMnemonic::ADC: + case FlagArithMnemonic::ADCX: + case FlagArithMnemonic::RCL: + case FlagArithMnemonic::RCR: + case FlagArithMnemonic::SBB: + Cond = X86::COND_B; // CF == 1 + // Set up an addend that when one is added will need a carry due to not + // having a higher bit available. + Addend = 255; + break; + + case FlagArithMnemonic::ADOX: + Cond = X86::COND_O; // OF == 1 + // Set up an addend that when one is added will turn from positive to + // negative and thus overflow in the signed domain. + Addend = 127; + break; + } + + // Now get a register that contains the value of the flag input to the + // arithmetic. We require exactly this flag to simplify the arithmetic + // required to materialize it back into the flag. + unsigned &CondReg = CondRegs[Cond]; + if (!CondReg) + CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); + + MachineBasicBlock &MBB = *MI.getParent(); + + // Insert an instruction that will set the flag back to the desired value. + unsigned TmpReg = MRI->createVirtualRegister(PromoteRC); + auto AddI = + BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri)) + .addDef(TmpReg, RegState::Dead) + .addReg(CondReg) + .addImm(Addend); + (void)AddI; + DEBUG(dbgs() << " add cond: "; AddI->dump()); + ++NumAddsInserted; + FlagUse.setIsKill(true); +} + +void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB, + MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, + MachineInstr &CMovI, + MachineOperand &FlagUse, + CondRegArray &CondRegs) { + // First get the register containing this specific condition. + X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode()); + unsigned CondReg; + bool Inverted; + std::tie(CondReg, Inverted) = + getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); + + MachineBasicBlock &MBB = *CMovI.getParent(); + + // Insert a direct test of the saved register. + insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg); + + // Rewrite the CMov to use the !ZF flag from the test (but match register + // size and memory operand), and then kill its use of the flags afterward. + auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg()); + CMovI.setDesc(TII->get(X86::getCMovFromCond( + Inverted ? X86::COND_E : X86::COND_NE, TRI->getRegSizeInBits(CMovRC) / 8, + !CMovI.memoperands_empty()))); + FlagUse.setIsKill(true); + DEBUG(dbgs() << " fixed cmov: "; CMovI.dump()); +} + +void X86FlagsCopyLoweringPass::rewriteCondJmp( + MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) { + // First get the register containing this specific condition. + X86::CondCode Cond = X86::getCondFromBranchOpc(JmpI.getOpcode()); + unsigned CondReg; + bool Inverted; + std::tie(CondReg, Inverted) = + getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); + + MachineBasicBlock &JmpMBB = *JmpI.getParent(); + + // Insert a direct test of the saved register. + insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg); + + // Rewrite the jump to use the !ZF flag from the test, and kill its use of + // flags afterward. + JmpI.setDesc(TII->get( + X86::GetCondBranchFromCond(Inverted ? X86::COND_E : X86::COND_NE))); + const int ImplicitEFLAGSOpIdx = 1; + JmpI.getOperand(ImplicitEFLAGSOpIdx).setIsKill(true); + DEBUG(dbgs() << " fixed jCC: "; JmpI.dump()); +} + +void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI, + MachineOperand &FlagUse, + MachineInstr &CopyDefI) { + // Just replace this copy with the the original copy def. + MRI->replaceRegWith(MI.getOperand(0).getReg(), + CopyDefI.getOperand(0).getReg()); + MI.eraseFromParent(); +} + +void X86FlagsCopyLoweringPass::rewriteSetCarryExtended( + MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse, + CondRegArray &CondRegs) { + // This routine is only used to handle pseudos for setting a register to zero + // or all ones based on CF. This is essentially the sign extended from 1-bit + // form of SETB and modeled with the SETB_C* pseudos. They require special + // handling as they aren't normal SETcc instructions and are lowered to an + // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that + // they are only provided in reg-defining forms. A complicating factor is that + // they can define many different register widths. + assert(SetBI.getOperand(0).isReg() && + "Cannot have a non-register defined operand to this variant of SETB!"); + + // Little helper to do the common final step of replacing the register def'ed + // by this SETB instruction with a new register and removing the SETB + // instruction. + auto RewriteToReg = [&](unsigned Reg) { + MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg); + SetBI.eraseFromParent(); + }; + + // Grab the register class used for this particular instruction. + auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg()); + + MachineBasicBlock &MBB = *SetBI.getParent(); + auto SetPos = SetBI.getIterator(); + auto SetLoc = SetBI.getDebugLoc(); + + auto AdjustReg = [&](unsigned Reg) { + auto &OrigRC = *MRI->getRegClass(Reg); + if (&OrigRC == &SetBRC) + return Reg; + + unsigned NewReg; + + int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8; + int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8; + assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!"); + assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!"); + int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit, + X86::NoSubRegister, X86::sub_32bit}; + + // If the original size is smaller than the target *and* is smaller than 4 + // bytes, we need to explicitly zero extend it. We always extend to 4-bytes + // to maximize the chance of being able to CSE that operation and to avoid + // partial dependency stalls extending to 2-bytes. + if (OrigRegSize < TargetRegSize && OrigRegSize < 4) { + NewReg = MRI->createVirtualRegister(&X86::GR32RegClass); + BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg) + .addReg(Reg); + if (&SetBRC == &X86::GR32RegClass) + return NewReg; + Reg = NewReg; + OrigRegSize = 4; + } + + NewReg = MRI->createVirtualRegister(&SetBRC); + if (OrigRegSize < TargetRegSize) { + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG), + NewReg) + .addImm(0) + .addReg(Reg) + .addImm(SubRegIdx[OrigRegSize]); + } else if (OrigRegSize > TargetRegSize) { + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG), + NewReg) + .addReg(Reg) + .addImm(SubRegIdx[TargetRegSize]); + } else { + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg) + .addReg(Reg); + } + return NewReg; + }; + + unsigned &CondReg = CondRegs[X86::COND_B]; + if (!CondReg) + CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B); + + // Adjust the condition to have the desired register width by zero-extending + // as needed. + // FIXME: We should use a better API to avoid the local reference and using a + // different variable here. + unsigned ExtCondReg = AdjustReg(CondReg); + + // Now we need to turn this into a bitmask. We do this by subtracting it from + // zero. + unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass); + BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg); + ZeroReg = AdjustReg(ZeroReg); + + unsigned Sub; + switch (SetBI.getOpcode()) { + case X86::SETB_C8r: + Sub = X86::SUB8rr; + break; + + case X86::SETB_C16r: + Sub = X86::SUB16rr; + break; + + case X86::SETB_C32r: + Sub = X86::SUB32rr; + break; + + case X86::SETB_C64r: + Sub = X86::SUB64rr; + break; + + default: + llvm_unreachable("Invalid SETB_C* opcode!"); + } + unsigned ResultReg = MRI->createVirtualRegister(&SetBRC); + BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg) + .addReg(ZeroReg) + .addReg(ExtCondReg); + return RewriteToReg(ResultReg); +} + +void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB, + MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, + MachineInstr &SetCCI, + MachineOperand &FlagUse, + CondRegArray &CondRegs) { + X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode()); + // Note that we can't usefully rewrite this to the inverse without complex + // analysis of the users of the setCC. Largely we rely on duplicates which + // could have been avoided already being avoided here. + unsigned &CondReg = CondRegs[Cond]; + if (!CondReg) + CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); + + // Rewriting a register def is trivial: we just replace the register and + // remove the setcc. + if (!SetCCI.mayStore()) { + assert(SetCCI.getOperand(0).isReg() && + "Cannot have a non-register defined operand to SETcc!"); + MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg); + SetCCI.eraseFromParent(); + return; + } + + // Otherwise, we need to emit a store. + auto MIB = BuildMI(*SetCCI.getParent(), SetCCI.getIterator(), + SetCCI.getDebugLoc(), TII->get(X86::MOV8mr)); + // Copy the address operands. + for (int i = 0; i < X86::AddrNumOperands; ++i) + MIB.add(SetCCI.getOperand(i)); + + MIB.addReg(CondReg); + + MIB->setMemRefs(SetCCI.memoperands_begin(), SetCCI.memoperands_end()); + + SetCCI.eraseFromParent(); + return; +} diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 10e19f92b4a6..c1ddb771e2fa 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -27781,11 +27781,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64; unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r; MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF)); - // Permit reads of the FLAGS register without it being defined. + // Permit reads of the EFLAGS and DF registers without them being defined. // This intrinsic exists to read external processor state in flags, such as // the trap flag, interrupt flag, and direction flag, none of which are // modeled by the backend. + assert(Push->getOperand(2).getReg() == X86::EFLAGS && + "Unexpected register in operand!"); Push->getOperand(2).setIsUndef(); + assert(Push->getOperand(3).getReg() == X86::DF && + "Unexpected register in operand!"); + Push->getOperand(3).setIsUndef(); BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg()); MI.eraseFromParent(); // The pseudo is gone now. @@ -37829,25 +37834,6 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { } } -/// This function checks if any of the users of EFLAGS copies the EFLAGS. We -/// know that the code that lowers COPY of EFLAGS has to use the stack, and if -/// we don't adjust the stack we clobber the first frame index. -/// See X86InstrInfo::copyPhysReg. -static bool hasCopyImplyingStackAdjustment(const MachineFunction &MF) { - const MachineRegisterInfo &MRI = MF.getRegInfo(); - return any_of(MRI.reg_instructions(X86::EFLAGS), - [](const MachineInstr &RI) { return RI.isCopy(); }); -} - -void X86TargetLowering::finalizeLowering(MachineFunction &MF) const { - if (hasCopyImplyingStackAdjustment(MF)) { - MachineFrameInfo &MFI = MF.getFrameInfo(); - MFI.setHasCopyImplyingStackAdjustment(true); - } - - TargetLoweringBase::finalizeLowering(MF); -} - /// This method query the target whether it is beneficial for dag combiner to /// promote the specified node. If true, it should return the desired promotion /// type by reference. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 3aa9d01bff20..7820c3e032e5 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1099,9 +1099,6 @@ namespace llvm { bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override; - - void finalizeLowering(MachineFunction &MF) const override; - protected: std::pair<const TargetRegisterClass *, uint8_t> findRepresentativeClass(const TargetRegisterInfo *TRI, diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index d09deb5b7584..98cc8fb7439e 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1334,7 +1334,7 @@ let Predicates = [HasBMI2] in { } //===----------------------------------------------------------------------===// -// ADCX Instruction +// ADCX and ADOX Instructions // let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS], Constraints = "$src0 = $dst", AddedComplexity = 10 in { @@ -1349,6 +1349,15 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS], [(set GR64:$dst, EFLAGS, (X86adc_flag GR64:$src0, GR64:$src, EFLAGS))], IIC_BIN_CARRY_NONMEM>, T8PD; + + // We don't have patterns for ADOX yet. + let hasSideEffects = 0 in { + def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src0, GR32:$src), + "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; + + def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src0, GR64:$src), + "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; + } // hasSideEffects = 0 } // SchedRW let mayLoad = 1, SchedRW = [WriteALULd] in { @@ -1363,27 +1372,14 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS], [(set GR64:$dst, EFLAGS, (X86adc_flag GR64:$src0, (loadi64 addr:$src), EFLAGS))], IIC_BIN_CARRY_MEM>, T8PD; - } -} -//===----------------------------------------------------------------------===// -// ADOX Instruction -// -let Predicates = [HasADX], hasSideEffects = 0, Defs = [EFLAGS], - Uses = [EFLAGS] in { - let SchedRW = [WriteALU] in { - def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; - - def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; - } // SchedRW - - let mayLoad = 1, SchedRW = [WriteALULd] in { - def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + // We don't have patterns for ADOX yet. + let hasSideEffects = 0 in { + def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src0, i32mem:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS; - def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src0, i64mem:$src), "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS; } + } // hasSideEffects = 0 } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index d66d9258e96f..b3371c96cc29 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -473,7 +473,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF], usesCustomInserter = 1, Uses = [ESP, SSP] in { def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", @@ -493,7 +493,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF], usesCustomInserter = 1, Uses = [RSP, SSP] in { def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_addr64", @@ -509,7 +509,7 @@ def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), // For i386, the address of the thunk is passed on the stack, on return the // address of the variable is in %eax. %ecx is trashed during the function // call. All other registers are preserved. -let Defs = [EAX, ECX, EFLAGS], +let Defs = [EAX, ECX, EFLAGS, DF], Uses = [ESP, SSP], usesCustomInserter = 1 in def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), @@ -522,7 +522,7 @@ def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), // %rdi. The lowering will do the right thing with RDI. // On return the address of the variable is in %rax. All other // registers are preserved. -let Defs = [RAX, EFLAGS], +let Defs = [RAX, EFLAGS, DF], Uses = [RSP, SSP], usesCustomInserter = 1 in def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 7ca1c58184f6..11ada51a8704 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -5782,7 +5782,7 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, return false; } -static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) { +X86::CondCode X86::getCondFromBranchOpc(unsigned BrOpc) { switch (BrOpc) { default: return X86::COND_INVALID; case X86::JE_1: return X86::COND_E; @@ -5805,7 +5805,7 @@ static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) { } /// Return condition code of a SET opcode. -static X86::CondCode getCondFromSETOpc(unsigned Opc) { +X86::CondCode X86::getCondFromSETOpc(unsigned Opc) { switch (Opc) { default: return X86::COND_INVALID; case X86::SETAr: case X86::SETAm: return X86::COND_A; @@ -6130,7 +6130,7 @@ void X86InstrInfo::replaceBranchWithTailCall( if (!I->isBranch()) assert(0 && "Can't find the branch to replace!"); - X86::CondCode CC = getCondFromBranchOpc(I->getOpcode()); + X86::CondCode CC = X86::getCondFromBranchOpc(I->getOpcode()); assert(BranchCond.size() == 1); if (CC != BranchCond[0].getImm()) continue; @@ -6237,7 +6237,7 @@ bool X86InstrInfo::AnalyzeBranchImpl( } // Handle conditional branches. - X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode()); + X86::CondCode BranchCode = X86::getCondFromBranchOpc(I->getOpcode()); if (BranchCode == X86::COND_INVALID) return true; // Can't handle indirect branch. @@ -6433,7 +6433,7 @@ unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB, if (I->isDebugValue()) continue; if (I->getOpcode() != X86::JMP_1 && - getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) + X86::getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) break; // Remove the branch. I->eraseFromParent(); @@ -6710,102 +6710,12 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - bool FromEFLAGS = SrcReg == X86::EFLAGS; - bool ToEFLAGS = DestReg == X86::EFLAGS; - int Reg = FromEFLAGS ? DestReg : SrcReg; - bool is32 = X86::GR32RegClass.contains(Reg); - bool is64 = X86::GR64RegClass.contains(Reg); - - if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) { - int Mov = is64 ? X86::MOV64rr : X86::MOV32rr; - int Push = is64 ? X86::PUSH64r : X86::PUSH32r; - int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32; - int Pop = is64 ? X86::POP64r : X86::POP32r; - int PopF = is64 ? X86::POPF64 : X86::POPF32; - int AX = is64 ? X86::RAX : X86::EAX; - - if (!Subtarget.hasLAHFSAHF()) { - assert(Subtarget.is64Bit() && - "Not having LAHF/SAHF only happens on 64-bit."); - // Moving EFLAGS to / from another register requires a push and a pop. - // Notice that we have to adjust the stack if we don't want to clobber the - // first frame index. See X86FrameLowering.cpp - usesTheStack. - if (FromEFLAGS) { - BuildMI(MBB, MI, DL, get(PushF)); - BuildMI(MBB, MI, DL, get(Pop), DestReg); - } - if (ToEFLAGS) { - BuildMI(MBB, MI, DL, get(Push)) - .addReg(SrcReg, getKillRegState(KillSrc)); - BuildMI(MBB, MI, DL, get(PopF)); - } - return; - } - - // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is - // inefficient. Instead: - // - Save the overflow flag OF into AL using SETO, and restore it using a - // signed 8-bit addition of AL and INT8_MAX. - // - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH - // using LAHF/SAHF. - // - When RAX/EAX is live and isn't the destination register, make sure it - // isn't clobbered by PUSH/POP'ing it before and after saving/restoring - // the flags. - // This approach is ~2.25x faster than using PUSHF/POPF. - // - // This is still somewhat inefficient because we don't know which flags are - // actually live inside EFLAGS. Were we able to do a single SETcc instead of - // SETO+LAHF / ADDB+SAHF the code could be 1.02x faster. - // - // PUSHF/POPF is also potentially incorrect because it affects other flags - // such as TF/IF/DF, which LLVM doesn't model. - // - // Notice that we have to adjust the stack if we don't want to clobber the - // first frame index. - // See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment. - - const TargetRegisterInfo &TRI = getRegisterInfo(); - MachineBasicBlock::LivenessQueryResult LQR = - MBB.computeRegisterLiveness(&TRI, AX, MI); - // We do not want to save and restore AX if we do not have to. - // Moreover, if we do so whereas AX is dead, we would need to set - // an undef flag on the use of AX, otherwise the verifier will - // complain that we read an undef value. - // We do not want to change the behavior of the machine verifier - // as this is usually wrong to read an undef value. - if (MachineBasicBlock::LQR_Unknown == LQR) { - LivePhysRegs LPR(TRI); - LPR.addLiveOuts(MBB); - MachineBasicBlock::iterator I = MBB.end(); - while (I != MI) { - --I; - LPR.stepBackward(*I); - } - // AX contains the top most register in the aliasing hierarchy. - // It may not be live, but one of its aliases may be. - for (MCRegAliasIterator AI(AX, &TRI, true); - AI.isValid() && LQR != MachineBasicBlock::LQR_Live; ++AI) - LQR = LPR.contains(*AI) ? MachineBasicBlock::LQR_Live - : MachineBasicBlock::LQR_Dead; - } - bool AXDead = (Reg == AX) || (MachineBasicBlock::LQR_Dead == LQR); - if (!AXDead) - BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true)); - if (FromEFLAGS) { - BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL); - BuildMI(MBB, MI, DL, get(X86::LAHF)); - BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX); - } - if (ToEFLAGS) { - BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc)); - BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL) - .addReg(X86::AL) - .addImm(INT8_MAX); - BuildMI(MBB, MI, DL, get(X86::SAHF)); - } - if (!AXDead) - BuildMI(MBB, MI, DL, get(Pop), AX); - return; + if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) { + // FIXME: We use a fatal error here because historically LLVM has tried + // lower some of these physreg copies and we want to ensure we get + // reasonable bug reports if someone encounters a case no other testing + // found. This path should be removed after the LLVM 7 release. + report_fatal_error("Unable to copy EFLAGS physical register!"); } DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) @@ -7465,9 +7375,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (IsCmpZero || IsSwapped) { // We decode the condition code from opcode. if (Instr.isBranch()) - OldCC = getCondFromBranchOpc(Instr.getOpcode()); + OldCC = X86::getCondFromBranchOpc(Instr.getOpcode()); else { - OldCC = getCondFromSETOpc(Instr.getOpcode()); + OldCC = X86::getCondFromSETOpc(Instr.getOpcode()); if (OldCC != X86::COND_INVALID) OpcIsSET = true; else @@ -9413,8 +9323,9 @@ bool X86InstrInfo:: isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { // FIXME: Return false for x87 stack register classes for now. We can't // allow any loads of these registers before FpGet_ST0_80. - return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || - RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); + return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass || + RC == &X86::RFP32RegClass || RC == &X86::RFP64RegClass || + RC == &X86::RFP80RegClass); } /// Return a virtual register initialized with the diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 02a09c340cef..2b5ad934f9b1 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -77,6 +77,12 @@ unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false); unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, bool HasMemoryOperand = false); +// Turn jCC opcode into condition code. +CondCode getCondFromBranchOpc(unsigned Opc); + +// Turn setCC opcode into condition code. +CondCode getCondFromSETOpc(unsigned Opc); + // Turn CMov opcode into condition code. CondCode getCondFromCMovOpc(unsigned Opc); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index a657b19c08c9..68f40c28d527 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1235,18 +1235,18 @@ let mayLoad = 1, mayStore = 1, usesCustomInserter = 1, let mayLoad = 1, mayStore = 1, usesCustomInserter = 1, SchedRW = [WriteRMW] in { - let Defs = [ESP, EFLAGS], Uses = [ESP] in + let Defs = [ESP, EFLAGS, DF], Uses = [ESP] in def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src), [(int_x86_flags_write_u32 GR32:$src)]>, Requires<[Not64BitMode]>; - let Defs = [RSP, EFLAGS], Uses = [RSP] in + let Defs = [RSP, EFLAGS, DF], Uses = [RSP] in def WRFLAGS64 : PseudoI<(outs), (ins GR64:$src), [(int_x86_flags_write_u64 GR64:$src)]>, Requires<[In64BitMode]>; } -let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0, +let Defs = [ESP, EFLAGS, DF], Uses = [ESP], mayLoad = 1, hasSideEffects=0, SchedRW = [WriteLoad] in { def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize16; @@ -1254,7 +1254,7 @@ def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>, OpSize32, Requires<[Not64BitMode]>; } -let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, hasSideEffects=0, +let Defs = [ESP], Uses = [ESP, EFLAGS, DF], mayStore = 1, hasSideEffects=0, SchedRW = [WriteStore] in { def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>, OpSize16; @@ -1294,10 +1294,10 @@ def PUSH64i32 : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm), Requires<[In64BitMode]>; } -let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, hasSideEffects=0 in +let Defs = [RSP, EFLAGS, DF], Uses = [RSP], mayLoad = 1, hasSideEffects=0 in def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>, OpSize32, Requires<[In64BitMode]>, Sched<[WriteLoad]>; -let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, hasSideEffects=0 in +let Defs = [RSP], Uses = [RSP, EFLAGS, DF], mayStore = 1, hasSideEffects=0 in def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, OpSize32, Requires<[In64BitMode]>, Sched<[WriteStore]>; @@ -1382,8 +1382,7 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), } // Defs = [EFLAGS] let SchedRW = [WriteMicrocoded] in { -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in { +let Defs = [EDI,ESI], Uses = [EDI,ESI,DF] in { def MOVSB : I<0xA4, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src), "movsb\t{$src, $dst|$dst, $src}", [], IIC_MOVS>; def MOVSW : I<0xA5, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src), @@ -1394,36 +1393,33 @@ def MOVSQ : RI<0xA5, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src), "movsq\t{$src, $dst|$dst, $src}", [], IIC_MOVS>; } -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in +let Defs = [EDI], Uses = [AL,EDI,DF] in def STOSB : I<0xAA, RawFrmDst, (outs), (ins dstidx8:$dst), "stosb\t{%al, $dst|$dst, al}", [], IIC_STOS>; -let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in +let Defs = [EDI], Uses = [AX,EDI,DF] in def STOSW : I<0xAB, RawFrmDst, (outs), (ins dstidx16:$dst), "stosw\t{%ax, $dst|$dst, ax}", [], IIC_STOS>, OpSize16; -let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in +let Defs = [EDI], Uses = [EAX,EDI,DF] in def STOSL : I<0xAB, RawFrmDst, (outs), (ins dstidx32:$dst), "stos{l|d}\t{%eax, $dst|$dst, eax}", [], IIC_STOS>, OpSize32; -let Defs = [RDI], Uses = [RAX,RDI,EFLAGS] in +let Defs = [RDI], Uses = [RAX,RDI,DF] in def STOSQ : RI<0xAB, RawFrmDst, (outs), (ins dstidx64:$dst), "stosq\t{%rax, $dst|$dst, rax}", [], IIC_STOS>; -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [EDI,EFLAGS], Uses = [AL,EDI,EFLAGS] in +let Defs = [EDI,EFLAGS], Uses = [AL,EDI,DF] in def SCASB : I<0xAE, RawFrmDst, (outs), (ins dstidx8:$dst), "scasb\t{$dst, %al|al, $dst}", [], IIC_SCAS>; -let Defs = [EDI,EFLAGS], Uses = [AX,EDI,EFLAGS] in +let Defs = [EDI,EFLAGS], Uses = [AX,EDI,DF] in def SCASW : I<0xAF, RawFrmDst, (outs), (ins dstidx16:$dst), "scasw\t{$dst, %ax|ax, $dst}", [], IIC_SCAS>, OpSize16; -let Defs = [EDI,EFLAGS], Uses = [EAX,EDI,EFLAGS] in +let Defs = [EDI,EFLAGS], Uses = [EAX,EDI,DF] in def SCASL : I<0xAF, RawFrmDst, (outs), (ins dstidx32:$dst), "scas{l|d}\t{$dst, %eax|eax, $dst}", [], IIC_SCAS>, OpSize32; -let Defs = [EDI,EFLAGS], Uses = [RAX,EDI,EFLAGS] in +let Defs = [EDI,EFLAGS], Uses = [RAX,EDI,DF] in def SCASQ : RI<0xAF, RawFrmDst, (outs), (ins dstidx64:$dst), "scasq\t{$dst, %rax|rax, $dst}", [], IIC_SCAS>; -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [EDI,ESI,EFLAGS], Uses = [EDI,ESI,EFLAGS] in { +let Defs = [EDI,ESI,EFLAGS], Uses = [EDI,ESI,DF] in { def CMPSB : I<0xA6, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src), "cmpsb\t{$dst, $src|$src, $dst}", [], IIC_CMPS>; def CMPSW : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src), @@ -2070,8 +2066,7 @@ def DATA32_PREFIX : I<0x66, RawFrm, (outs), (ins), "data32", [], IIC_NOP>, } // SchedRW // Repeat string operation instruction prefixes -// These use the DF flag in the EFLAGS register to inc or dec ECX -let Defs = [ECX], Uses = [ECX,EFLAGS], SchedRW = [WriteMicrocoded] in { +let Defs = [ECX], Uses = [ECX,DF], SchedRW = [WriteMicrocoded] in { // Repeat (used with INS, OUTS, MOVS, LODS and STOS) def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>; // Repeat while not equal (used with CMPS and SCAS) @@ -2080,24 +2075,22 @@ def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>; // String manipulation instructions let SchedRW = [WriteMicrocoded] in { -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [AL,ESI], Uses = [ESI,EFLAGS] in +let Defs = [AL,ESI], Uses = [ESI,DF] in def LODSB : I<0xAC, RawFrmSrc, (outs), (ins srcidx8:$src), "lodsb\t{$src, %al|al, $src}", [], IIC_LODS>; -let Defs = [AX,ESI], Uses = [ESI,EFLAGS] in +let Defs = [AX,ESI], Uses = [ESI,DF] in def LODSW : I<0xAD, RawFrmSrc, (outs), (ins srcidx16:$src), "lodsw\t{$src, %ax|ax, $src}", [], IIC_LODS>, OpSize16; -let Defs = [EAX,ESI], Uses = [ESI,EFLAGS] in +let Defs = [EAX,ESI], Uses = [ESI,DF] in def LODSL : I<0xAD, RawFrmSrc, (outs), (ins srcidx32:$src), "lods{l|d}\t{$src, %eax|eax, $src}", [], IIC_LODS>, OpSize32; -let Defs = [RAX,ESI], Uses = [ESI,EFLAGS] in +let Defs = [RAX,ESI], Uses = [ESI,DF] in def LODSQ : RI<0xAD, RawFrmSrc, (outs), (ins srcidx64:$src), "lodsq\t{$src, %rax|rax, $src}", [], IIC_LODS>; } let SchedRW = [WriteSystem] in { -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [ESI], Uses = [DX,ESI,EFLAGS] in { +let Defs = [ESI], Uses = [DX,ESI,DF] in { def OUTSB : I<0x6E, RawFrmSrc, (outs), (ins srcidx8:$src), "outsb\t{$src, %dx|dx, $src}", [], IIC_OUTS>; def OUTSW : I<0x6F, RawFrmSrc, (outs), (ins srcidx16:$src), @@ -2106,8 +2099,7 @@ def OUTSL : I<0x6F, RawFrmSrc, (outs), (ins srcidx32:$src), "outs{l|d}\t{$src, %dx|dx, $src}", [], IIC_OUTS>, OpSize32; } -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [EDI], Uses = [DX,EDI,EFLAGS] in { +let Defs = [EDI], Uses = [DX,EDI,DF] in { def INSB : I<0x6C, RawFrmDst, (outs), (ins dstidx8:$dst), "insb\t{%dx, $dst|$dst, dx}", [], IIC_INS>; def INSW : I<0x6D, RawFrmDst, (outs), (ins dstidx16:$dst), @@ -2117,19 +2109,22 @@ def INSL : I<0x6D, RawFrmDst, (outs), (ins dstidx32:$dst), } } -// Flag instructions -let SchedRW = [WriteALU] in { -def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>; -def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>; -def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>; -def STI : I<0xFB, RawFrm, (outs), (ins), "sti", [], IIC_STI>; +// EFLAGS management instructions. +let SchedRW = [WriteALU], Defs = [EFLAGS], Uses = [EFLAGS] in { +def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC_CMC_STC>; +def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_CLC_CMC_STC>; +def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CLC_CMC_STC>; +} + +// DF management instructions. +// FIXME: These are a bit more expensive than CLC and STC. We should consider +// adjusting their schedule bucket. +let SchedRW = [WriteALU], Defs = [DF] in { def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", [], IIC_CLD>; def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>; -def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>; - -def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB; } + // Table lookup instructions let Uses = [AL,EBX], Defs = [AL], hasSideEffects = 0, mayLoad = 1 in def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>, diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 40d2dca4f9ec..576f87b13ab4 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -693,6 +693,19 @@ let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in { } // SchedRW //===----------------------------------------------------------------------===// +// TS flag control instruction. +let SchedRW = [WriteSystem] in { +def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB; +} + +//===----------------------------------------------------------------------===// +// IF (inside EFLAGS) management instructions. +let SchedRW = [WriteSystem], Uses = [EFLAGS], Defs = [EFLAGS] in { +def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>; +def STI : I<0xFB, RawFrm, (outs), (ins), "sti", [], IIC_STI>; +} + +//===----------------------------------------------------------------------===// // RDPID Instruction let SchedRW = [WriteSystem] in { def RDPID32 : I<0xC7, MRM7r, (outs GR32:$src), (ins), diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 2341e1fb0fac..1a776dcd04eb 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -251,9 +251,19 @@ def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>; // Floating-point status word def FPSW : X86Reg<"fpsw", 0>; -// Status flags register +// Status flags register. +// +// Note that some flags that are commonly thought of as part of the status +// flags register are modeled separately. Typically this is due to instructions +// reading and updating those flags independently of all the others. We don't +// want to create false dependencies between these instructions and so we use +// a separate register to model them. def EFLAGS : X86Reg<"flags", 0>; +// The direction flag. +def DF : X86Reg<"DF", 0>; + + // Segment registers def CS : X86Reg<"cs", 1>; def DS : X86Reg<"ds", 3>; @@ -497,6 +507,10 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> { let CopyCost = -1; // Don't allow copying of status registers. let isAllocatable = 0; } +def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> { + let CopyCost = -1; // Don't allow copying of status registers. + let isAllocatable = 0; +} // AVX-512 vector/mask registers. def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 2e21a97541b2..078d459634ce 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -608,12 +608,10 @@ def IIC_CMPXCHG_8B : InstrItinClass; def IIC_CMPXCHG_16B : InstrItinClass; def IIC_LODS : InstrItinClass; def IIC_OUTS : InstrItinClass; -def IIC_CLC : InstrItinClass; +def IIC_CLC_CMC_STC : InstrItinClass; def IIC_CLD : InstrItinClass; def IIC_CLI : InstrItinClass; -def IIC_CMC : InstrItinClass; def IIC_CLTS : InstrItinClass; -def IIC_STC : InstrItinClass; def IIC_STI : InstrItinClass; def IIC_STD : InstrItinClass; def IIC_XLAT : InstrItinClass; diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index e052ad98104c..460b9823a7e7 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -514,12 +514,10 @@ def AtomItineraries : ProcessorItineraries< InstrItinData<IIC_CMPXCHG_16B, [InstrStage<22, [Port0, Port1]>] >, InstrItinData<IIC_LODS, [InstrStage<2, [Port0, Port1]>] >, InstrItinData<IIC_OUTS, [InstrStage<74, [Port0, Port1]>] >, - InstrItinData<IIC_CLC, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_CLC_CMC_STC, [InstrStage<1, [Port0, Port1]>] >, InstrItinData<IIC_CLD, [InstrStage<3, [Port0, Port1]>] >, InstrItinData<IIC_CLI, [InstrStage<14, [Port0, Port1]>] >, - InstrItinData<IIC_CMC, [InstrStage<1, [Port0, Port1]>] >, InstrItinData<IIC_CLTS, [InstrStage<33, [Port0, Port1]>] >, - InstrItinData<IIC_STC, [InstrStage<1, [Port0, Port1]>] >, InstrItinData<IIC_STI, [InstrStage<17, [Port0, Port1]>] >, InstrItinData<IIC_STD, [InstrStage<21, [Port0, Port1]>] >, InstrItinData<IIC_XLAT, [InstrStage<6, [Port0, Port1]>] >, diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index ac242e1c00e0..e41e16d82d83 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -62,6 +62,7 @@ void initializeX86CallFrameOptimizationPass(PassRegistry &); void initializeX86CmovConverterPassPass(PassRegistry &); void initializeX86ExecutionDepsFixPass(PassRegistry &); void initializeX86DomainReassignmentPass(PassRegistry &); +void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); } // end namespace llvm @@ -80,6 +81,7 @@ extern "C" void LLVMInitializeX86Target() { initializeX86CmovConverterPassPass(PR); initializeX86ExecutionDepsFixPass(PR); initializeX86DomainReassignmentPass(PR); + initializeX86FlagsCopyLoweringPassPass(PR); } static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { @@ -415,6 +417,7 @@ void X86PassConfig::addPreRegAlloc() { addPass(createX86CallFrameOptimization()); } + addPass(createX86FlagsCopyLoweringPass()); addPass(createX86WinAllocaExpander()); } void X86PassConfig::addMachineSSAOptimization() { diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index b25cbcad3b9d..76c4a8fbc16e 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -847,10 +847,20 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; + // Can't change signature of musttail callee + if (CS.isMustTailCall()) + return nullptr; + if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; } + // Can't change signature of musttail caller + // FIXME: Support promoting whole chain of musttail functions + for (BasicBlock &BB : *F) + if (BB.getTerminatingMustTailCall()) + return nullptr; + const DataLayout &DL = F->getParent()->getDataLayout(); AAResults &AAR = AARGetter(*F); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 5446541550e5..b2afa6f2c9cd 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -507,14 +507,28 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // MaybeLive. Initialized to a list of RetCount empty lists. RetUses MaybeLiveRetUses(RetCount); - for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) + bool HasMustTailCalls = false; + + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType() != F.getFunctionType()->getReturnType()) { // We don't support old style multiple return values. MarkLive(F); return; } + } + + // If we have any returns of `musttail` results - the signature can't + // change + if (BB->getTerminatingMustTailCall() != nullptr) + HasMustTailCalls = true; + } + + if (HasMustTailCalls) { + DEBUG(dbgs() << "DeadArgumentEliminationPass - " << F.getName() + << " has musttail calls\n"); + } if (!F.hasLocalLinkage() && (!ShouldHackArguments || F.isIntrinsic())) { MarkLive(F); @@ -526,6 +540,9 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // Keep track of the number of live retvals, so we can skip checks once all // of them turn out to be live. unsigned NumLiveRetVals = 0; + + bool HasMustTailCallers = false; + // Loop all uses of the function. for (const Use &U : F.uses()) { // If the function is PASSED IN as an argument, its address has been @@ -536,6 +553,11 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { return; } + // The number of arguments for `musttail` call must match the number of + // arguments of the caller + if (CS.isMustTailCall()) + HasMustTailCallers = true; + // If this use is anything other than a call site, the function is alive. const Instruction *TheCall = CS.getInstruction(); if (!TheCall) { // Not a direct call site? @@ -580,6 +602,11 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { } } + if (HasMustTailCallers) { + DEBUG(dbgs() << "DeadArgumentEliminationPass - " << F.getName() + << " has musttail callers\n"); + } + // Now we've inspected all callers, record the liveness of our return values. for (unsigned i = 0; i != RetCount; ++i) MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]); @@ -593,12 +620,19 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { for (Function::const_arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E; ++AI, ++i) { Liveness Result; - if (F.getFunctionType()->isVarArg()) { + if (F.getFunctionType()->isVarArg() || HasMustTailCallers || + HasMustTailCalls) { // Variadic functions will already have a va_arg function expanded inside // them, making them potentially very sensitive to ABI changes resulting // from removing arguments entirely, so don't. For example AArch64 handles // register and stack HFAs very differently, and this is reflected in the // IR which has already been generated. + // + // `musttail` calls to this function restrict argument removal attempts. + // The signature of the caller must match the signature of the function. + // + // `musttail` calls in this function prevents us from changing its + // signature Result = Live; } else { // See what the effect of this use is (recording any uses that cause diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 4bb2984e3b47..e0bbf45d316a 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -2099,8 +2099,31 @@ static void RemoveNestAttribute(Function *F) { /// GHC, or anyregcc. static bool isProfitableToMakeFastCC(Function *F) { CallingConv::ID CC = F->getCallingConv(); + // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc? - return CC == CallingConv::C || CC == CallingConv::X86_ThisCall; + if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall) + return false; + + // FIXME: Change CC for the whole chain of musttail calls when possible. + // + // Can't change CC of the function that either has musttail calls, or is a + // musttail callee itself + for (User *U : F->users()) { + if (isa<BlockAddress>(U)) + continue; + CallInst* CI = dyn_cast<CallInst>(U); + if (!CI) + continue; + + if (CI->isMustTailCall()) + return false; + } + + for (BasicBlock &BB : *F) + if (BB.getTerminatingMustTailCall()) + return false; + + return true; } static bool diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 76b90391fbb1..8886af90ba65 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -638,6 +638,19 @@ void MergeFunctions::filterInstsUnrelatedToPDI( DEBUG(dbgs() << " }\n"); } +// Don't merge tiny functions using a thunk, since it can just end up +// making the function larger. +static bool isThunkProfitable(Function * F) { + if (F->size() == 1) { + if (F->front().size() <= 2) { + DEBUG(dbgs() << "isThunkProfitable: " << F->getName() + << " is too small to bother creating a thunk for\n"); + return false; + } + } + return true; +} + // Replace G with a simple tail call to bitcast(F). Also (unless // MergeFunctionsPDI holds) replace direct uses of G with bitcast(F), // delete G. Under MergeFunctionsPDI, we use G itself for creating @@ -647,39 +660,6 @@ void MergeFunctions::filterInstsUnrelatedToPDI( // For better debugability, under MergeFunctionsPDI, we do not modify G's // call sites to point to F even when within the same translation unit. void MergeFunctions::writeThunk(Function *F, Function *G) { - if (!G->isInterposable() && !MergeFunctionsPDI) { - if (G->hasGlobalUnnamedAddr()) { - // G might have been a key in our GlobalNumberState, and it's illegal - // to replace a key in ValueMap<GlobalValue *> with a non-global. - GlobalNumbers.erase(G); - // If G's address is not significant, replace it entirely. - Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); - G->replaceAllUsesWith(BitcastF); - } else { - // Redirect direct callers of G to F. (See note on MergeFunctionsPDI - // above). - replaceDirectCallers(G, F); - } - } - - // If G was internal then we may have replaced all uses of G with F. If so, - // stop here and delete G. There's no need for a thunk. (See note on - // MergeFunctionsPDI above). - if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) { - G->eraseFromParent(); - return; - } - - // Don't merge tiny functions using a thunk, since it can just end up - // making the function larger. - if (F->size() == 1) { - if (F->front().size() <= 2) { - DEBUG(dbgs() << "writeThunk: " << F->getName() - << " is too small to bother creating a thunk for\n"); - return; - } - } - BasicBlock *GEntryBlock = nullptr; std::vector<Instruction *> PDIUnrelatedWL; BasicBlock *BB = nullptr; @@ -754,6 +734,10 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { if (F->isInterposable()) { assert(G->isInterposable()); + if (!isThunkProfitable(F)) { + return; + } + // Make them both thunks to the same internal function. Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", F->getParent()); @@ -770,11 +754,41 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { F->setAlignment(MaxAlignment); F->setLinkage(GlobalValue::PrivateLinkage); ++NumDoubleWeak; + ++NumFunctionsMerged; } else { + // For better debugability, under MergeFunctionsPDI, we do not modify G's + // call sites to point to F even when within the same translation unit. + if (!G->isInterposable() && !MergeFunctionsPDI) { + if (G->hasGlobalUnnamedAddr()) { + // G might have been a key in our GlobalNumberState, and it's illegal + // to replace a key in ValueMap<GlobalValue *> with a non-global. + GlobalNumbers.erase(G); + // If G's address is not significant, replace it entirely. + Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); + G->replaceAllUsesWith(BitcastF); + } else { + // Redirect direct callers of G to F. (See note on MergeFunctionsPDI + // above). + replaceDirectCallers(G, F); + } + } + + // If G was internal then we may have replaced all uses of G with F. If so, + // stop here and delete G. There's no need for a thunk. (See note on + // MergeFunctionsPDI above). + if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) { + G->eraseFromParent(); + ++NumFunctionsMerged; + return; + } + + if (!isThunkProfitable(F)) { + return; + } + writeThunk(F, G); + ++NumFunctionsMerged; } - - ++NumFunctionsMerged; } /// Replace function F by function G. diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index b332e75c7feb..8fa7d0684b94 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -34,6 +34,7 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm-c/Initialization.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -1946,13 +1947,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // addrspacecast between types is canonicalized as a bitcast, then an // addrspacecast. To take advantage of the below bitcast + struct GEP, look // through the addrspacecast. + Value *ASCStrippedPtrOp = PtrOp; if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) { // X = bitcast A addrspace(1)* to B addrspace(1)* // Y = addrspacecast A addrspace(1)* to B addrspace(2)* // Z = gep Y, <...constant indices...> // Into an addrspacecasted GEP of the struct. if (BitCastInst *BC = dyn_cast<BitCastInst>(ASC->getOperand(0))) - PtrOp = BC; + ASCStrippedPtrOp = BC; } /// See if we can simplify: @@ -1960,7 +1962,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { /// Y = gep X, <...constant indices...> /// into a gep of the original struct. This is important for SROA and alias /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { + if (BitCastInst *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp)) { Value *Operand = BCI->getOperand(0); PointerType *OpType = cast<PointerType>(Operand->getType()); unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType()); diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp index 4edea7cc3c82..7488cd5af8be 100644 --- a/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -201,6 +201,46 @@ static bool canSplitCallSite(CallSite CS) { return CallSiteBB->canSplitPredecessors(); } +static Instruction *cloneInstForMustTail(Instruction *I, Instruction *Before, + Value *V) { + Instruction *Copy = I->clone(); + Copy->setName(I->getName()); + Copy->insertBefore(Before); + if (V) + Copy->setOperand(0, V); + return Copy; +} + +/// Copy mandatory `musttail` return sequence that follows original `CI`, and +/// link it up to `NewCI` value instead: +/// +/// * (optional) `bitcast NewCI to ...` +/// * `ret bitcast or NewCI` +/// +/// Insert this sequence right before `SplitBB`'s terminator, which will be +/// cleaned up later in `splitCallSite` below. +static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI, + Instruction *NewCI) { + bool IsVoid = SplitBB->getParent()->getReturnType()->isVoidTy(); + auto II = std::next(CI->getIterator()); + + BitCastInst *BCI = dyn_cast<BitCastInst>(&*II); + if (BCI) + ++II; + + ReturnInst *RI = dyn_cast<ReturnInst>(&*II); + assert(RI && "`musttail` call must be followed by `ret` instruction"); + + TerminatorInst *TI = SplitBB->getTerminator(); + Value *V = NewCI; + if (BCI) + V = cloneInstForMustTail(BCI, TI, V); + cloneInstForMustTail(RI, TI, IsVoid ? nullptr : V); + + // FIXME: remove TI here, `DuplicateInstructionsInSplitBetween` has a bug + // that prevents doing this now. +} + /// Return true if the CS is split into its new predecessors which are directly /// hooked to each of its original predecessors pointed by PredBB1 and PredBB2. /// CallInst1 and CallInst2 will be the new call-sites placed in the new @@ -245,6 +285,7 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, Instruction *CallInst1, Instruction *CallInst2) { Instruction *Instr = CS.getInstruction(); BasicBlock *TailBB = Instr->getParent(); + bool IsMustTailCall = CS.isMustTailCall(); assert(Instr == (TailBB->getFirstNonPHIOrDbg()) && "Unexpected call-site"); BasicBlock *SplitBlock1 = @@ -276,9 +317,14 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, ++ArgNo; } } + // Clone and place bitcast and return instructions before `TI` + if (IsMustTailCall) { + copyMustTailReturn(SplitBlock1, CS.getInstruction(), CallInst1); + copyMustTailReturn(SplitBlock2, CS.getInstruction(), CallInst2); + } // Replace users of the original call with a PHI mering call-sites split. - if (Instr->getNumUses()) { + if (!IsMustTailCall && Instr->getNumUses()) { PHINode *PN = PHINode::Create(Instr->getType(), 2, "phi.call", TailBB->getFirstNonPHI()); PN->addIncoming(CallInst1, SplitBlock1); @@ -290,8 +336,25 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, << "\n"); DEBUG(dbgs() << " " << *CallInst2 << " in " << SplitBlock2->getName() << "\n"); - Instr->eraseFromParent(); + NumCallSiteSplit++; + + // FIXME: remove TI in `copyMustTailReturn` + if (IsMustTailCall) { + // Remove superfluous `br` terminators from the end of the Split blocks + // NOTE: Removing terminator removes the SplitBlock from the TailBB's + // predecessors. Therefore we must get complete list of Splits before + // attempting removal. + SmallVector<BasicBlock *, 2> Splits(predecessors((TailBB))); + assert(Splits.size() == 2 && "Expected exactly 2 splits!"); + for (unsigned i = 0; i < Splits.size(); i++) + Splits[i]->getTerminator()->eraseFromParent(); + + // Erase the tail block once done with musttail patching + TailBB->eraseFromParent(); + return; + } + Instr->eraseFromParent(); } // Return true if the call-site has an argument which is a PHI with only @@ -369,7 +432,17 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) { Function *Callee = CS.getCalledFunction(); if (!Callee || Callee->isDeclaration()) continue; + + // Successful musttail call-site splits result in erased CI and erased BB. + // Check if such path is possible before attempting the splitting. + bool IsMustTail = CS.isMustTailCall(); + Changed |= tryToSplitCallSite(CS); + + // There're no interesting instructions after this. The call site + // itself might have been erased on splitting. + if (IsMustTail) + break; } } return Changed; diff --git a/lib/Transforms/Scalar/DivRemPairs.cpp b/lib/Transforms/Scalar/DivRemPairs.cpp index e383af89a384..e1bc590c5c9a 100644 --- a/lib/Transforms/Scalar/DivRemPairs.cpp +++ b/lib/Transforms/Scalar/DivRemPairs.cpp @@ -13,6 +13,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/DivRemPairs.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -48,7 +50,10 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, // Insert all divide and remainder instructions into maps keyed by their // operands and opcode (signed or unsigned). - DenseMap<DivRemMapKey, Instruction *> DivMap, RemMap; + DenseMap<DivRemMapKey, Instruction *> DivMap; + // Use a MapVector for RemMap so that instructions are moved/inserted in a + // deterministic order. + MapVector<DivRemMapKey, Instruction *> RemMap; for (auto &BB : F) { for (auto &I : BB) { if (I.getOpcode() == Instruction::SDiv) @@ -67,14 +72,14 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, // rare than division. for (auto &RemPair : RemMap) { // Find the matching division instruction from the division map. - Instruction *DivInst = DivMap[RemPair.getFirst()]; + Instruction *DivInst = DivMap[RemPair.first]; if (!DivInst) continue; // We have a matching pair of div/rem instructions. If one dominates the // other, hoist and/or replace one. NumPairs++; - Instruction *RemInst = RemPair.getSecond(); + Instruction *RemInst = RemPair.second; bool IsSigned = DivInst->getOpcode() == Instruction::SDiv; bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 141c9938bf8b..2f1645433fb8 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1454,6 +1454,9 @@ FindMostPopularDest(BasicBlock *BB, if (PredToDest.second) DestPopularity[PredToDest.second]++; + if (DestPopularity.empty()) + return nullptr; + // Find the most popular dest. DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin(); BasicBlock *MostPopularDest = DPI->first; @@ -1629,8 +1632,20 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // threadable destination (the common case) we can avoid this. BasicBlock *MostPopularDest = OnlyDest; - if (MostPopularDest == MultipleDestSentinel) + if (MostPopularDest == MultipleDestSentinel) { + // Remove any loop headers from the Dest list, ThreadEdge conservatively + // won't process them, but we might have other destination that are eligible + // and we still want to process. + erase_if(PredToDestList, + [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) { + return LoopHeaders.count(PredToDest.second) != 0; + }); + + if (PredToDestList.empty()) + return false; + MostPopularDest = FindMostPopularDest(BB, PredToDestList); + } // Now that we know what the most popular destination is, factor all // predecessors that will jump to it into a single predecessor. diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 9dc550ceaeca..3e12649ddedc 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -223,6 +223,10 @@ class SCCPSolver : public InstVisitor<SCCPSolver> { /// represented here for efficient lookup. SmallPtrSet<Function *, 16> MRVFunctionsTracked; + /// MustTailFunctions - Each function here is a callee of non-removable + /// musttail call site. + SmallPtrSet<Function *, 16> MustTailCallees; + /// TrackingIncomingArguments - This is the set of functions for whose /// arguments we make optimistic assumptions about and try to prove as /// constants. @@ -289,6 +293,18 @@ public: TrackedRetVals.insert(std::make_pair(F, LatticeVal())); } + /// AddMustTailCallee - If the SCCP solver finds that this function is called + /// from non-removable musttail call site. + void AddMustTailCallee(Function *F) { + MustTailCallees.insert(F); + } + + /// Returns true if the given function is called from non-removable musttail + /// call site. + bool isMustTailCallee(Function *F) { + return MustTailCallees.count(F); + } + void AddArgumentTrackedFunction(Function *F) { TrackingIncomingArguments.insert(F); } @@ -358,6 +374,12 @@ public: return MRVFunctionsTracked; } + /// getMustTailCallees - Get the set of functions which are called + /// from non-removable musttail call sites. + const SmallPtrSet<Function *, 16> getMustTailCallees() { + return MustTailCallees; + } + /// markOverdefined - Mark the specified value overdefined. This /// works with both scalars and structs. void markOverdefined(Value *V) { @@ -1672,6 +1694,23 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { IV.isConstant() ? IV.getConstant() : UndefValue::get(V->getType()); } assert(Const && "Constant is nullptr here!"); + + // Replacing `musttail` instructions with constant breaks `musttail` invariant + // unless the call itself can be removed + CallInst *CI = dyn_cast<CallInst>(V); + if (CI && CI->isMustTailCall() && !isInstructionTriviallyDead(CI)) { + CallSite CS(CI); + Function *F = CS.getCalledFunction(); + + // Don't zap returns of the callee + if (F) + Solver.AddMustTailCallee(F); + + DEBUG(dbgs() << " Can\'t treat the result of musttail call : " << *CI + << " as a constant\n"); + return false; + } + DEBUG(dbgs() << " Constant: " << *Const << " = " << *V << '\n'); // Replaces all of the uses of a variable with uses of the constant. @@ -1802,10 +1841,26 @@ static void findReturnsToZap(Function &F, if (!Solver.isArgumentTrackedFunction(&F)) return; - for (BasicBlock &BB : F) + // There is a non-removable musttail call site of this function. Zapping + // returns is not allowed. + if (Solver.isMustTailCallee(&F)) { + DEBUG(dbgs() << "Can't zap returns of the function : " << F.getName() + << " due to present musttail call of it\n"); + return; + } + + for (BasicBlock &BB : F) { + if (CallInst *CI = BB.getTerminatingMustTailCall()) { + DEBUG(dbgs() << "Can't zap return of the block due to present " + << "musttail call : " << *CI << "\n"); + (void)CI; + return; + } + if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) if (!isa<UndefValue>(RI->getOperand(0))) ReturnsToZap.push_back(RI); + } } static bool runIPSCCP(Module &M, const DataLayout &DL, diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp index bddcbd86e914..75539428b688 100644 --- a/lib/Transforms/Utils/FunctionComparator.cpp +++ b/lib/Transforms/Utils/FunctionComparator.cpp @@ -710,7 +710,7 @@ int FunctionComparator::cmpInlineAsm(const InlineAsm *L, return Res; if (int Res = cmpNumbers(L->getDialect(), R->getDialect())) return Res; - llvm_unreachable("InlineAsm blocks were not uniqued."); + assert(L->getFunctionType() != R->getFunctionType()); return 0; } diff --git a/test/Analysis/MemorySSA/pr36883.ll b/test/Analysis/MemorySSA/pr36883.ll new file mode 100644 index 000000000000..8411b0c228b8 --- /dev/null +++ b/test/Analysis/MemorySSA/pr36883.ll @@ -0,0 +1,38 @@ +; RUN: opt -basicaa -memoryssa -analyze < %s 2>&1 -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -S < %s 2>&1 | FileCheck %s +; +; We weren't properly considering the args in callsites in equality or hashing. + +target triple = "armv7-dcg-linux-gnueabi" + +; CHECK-LABEL: define <8 x i16> @vpx_idct32_32_neon +define <8 x i16> @vpx_idct32_32_neon(i8* %p, <8 x i16> %v) { +entry: +; CHECK: MemoryUse(liveOnEntry) + %load1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 2) #4 ; load CSE replacement + +; CHECK: 1 = MemoryDef(liveOnEntry) + call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %p, <8 x i16> %v, i32 2) #4 ; clobber + + %p_next = getelementptr inbounds i8, i8* %p, i32 16 +; CHECK: MemoryUse(liveOnEntry) + %load2 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p_next, i32 2) #4 ; non-aliasing load needed to trigger bug + +; CHECK: MemoryUse(1) + %load3 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 2) #4 ; load CSE removed + + %add = add <8 x i16> %load1, %load2 + %ret = add <8 x i16> %add, %load3 + ret <8 x i16> %ret +} + +; Function Attrs: argmemonly nounwind readonly +declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) #1 + +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind readonly } +attributes #3 = { nounwind readnone } +attributes #4 = { nounwind } diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll index 14beb1ae9c36..1032a6d620ba 100644 --- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll +++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll @@ -28,6 +28,28 @@ return: ; preds = %if.then172, %cond.e ret void } +; Avoid an assert/bad codegen in LD1LANEPOST lowering by not forming +; LD1LANEPOST ISD nodes with a non-constant lane index. +define <4 x i32> @f2(i32 *%p, <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2, i32 %idx) { + %L0 = load i32, i32* %p + %p1 = getelementptr i32, i32* %p, i64 1 + %L1 = load i32, i32* %p1 + %v = select <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2 + %vret = insertelement <4 x i32> %v, i32 %L0, i32 %idx + store i32 %L1, i32 *%p + ret <4 x i32> %vret +} + +; Check that a cycle is avoided during isel between the LD1LANEPOST instruction and the load of %L1. +define <4 x i32> @f3(i32 *%p, <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2) { + %L0 = load i32, i32* %p + %p1 = getelementptr i32, i32* %p, i64 1 + %L1 = load i32, i32* %p1 + %v = select <4 x i1> %m, <4 x i32> %v1, <4 x i32> %v2 + %vret = insertelement <4 x i32> %v, i32 %L0, i32 %L1 + ret <4 x i32> %vret +} + ; Function Attrs: nounwind readnone declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #1 diff --git a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll index 2fb9d3b2d030..664078fb7e94 100644 --- a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll +++ b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll @@ -1,27 +1,31 @@ -; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefix=CYCLONE --check-prefix=ALL -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefix=KRYO --check-prefix=ALL -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor < %s | FileCheck %s -check-prefix=FALKOR --check-prefix=ALL +; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefixes=ALL,CYCLONE +; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 < %s | FileCheck %s -check-prefixes=CYCLONE-FULLFP16 +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m1 < %s | FileCheck %s -check-prefixes=ALL,OTHERS +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m3 < %s | FileCheck %s -check-prefixes=ALL,OTHERS +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefixes=ALL,OTHERS +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor < %s | FileCheck %s -check-prefixes=ALL,OTHERS -; rdar://11481771 -; rdar://13713797 +declare void @bar(half, float, double, <2 x double>) +declare void @bari(i32, i32) +declare void @barl(i64, i64) +declare void @barf(float, float) define void @t1() nounwind ssp { entry: ; ALL-LABEL: t1: ; ALL-NOT: fmov -; CYCLONE: fmov d0, xzr -; CYCLONE: fmov d1, xzr +; ALL: ldr h0,{{.*}} +; CYCLONE: fmov s1, wzr ; CYCLONE: fmov d2, xzr -; CYCLONE: fmov d3, xzr -; KRYO: movi v0.2d, #0000000000000000 -; KRYO: movi v1.2d, #0000000000000000 -; KRYO: movi v2.2d, #0000000000000000 -; KRYO: movi v3.2d, #0000000000000000 -; FALKOR: movi v0.2d, #0000000000000000 -; FALKOR: movi v1.2d, #0000000000000000 -; FALKOR: movi v2.2d, #0000000000000000 -; FALKOR: movi v3.2d, #0000000000000000 - tail call void @bar(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00) nounwind +; CYCLONE: movi.16b v3, #0 +; CYCLONE-FULLFP16: fmov h0, wzr +; CYCLONE-FULLFP16: fmov s1, wzr +; CYCLONE-FULLFP16: fmov d2, xzr +; CYCLONE-FULLFP16: movi.16b v3, #0 +; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000 +; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000 +; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000 + tail call void @bar(half 0.000000e+00, float 0.000000e+00, double 0.000000e+00, <2 x double> <double 0.000000e+00, double 0.000000e+00>) nounwind ret void } @@ -29,8 +33,8 @@ define void @t2() nounwind ssp { entry: ; ALL-LABEL: t2: ; ALL-NOT: mov w0, wzr -; ALL: mov w0, #0 -; ALL: mov w1, #0 +; ALL: mov w{{[0-3]+}}, #0 +; ALL: mov w{{[0-3]+}}, #0 tail call void @bari(i32 0, i32 0) nounwind ret void } @@ -39,8 +43,8 @@ define void @t3() nounwind ssp { entry: ; ALL-LABEL: t3: ; ALL-NOT: mov x0, xzr -; ALL: mov x0, #0 -; ALL: mov x1, #0 +; ALL: mov x{{[0-3]+}}, #0 +; ALL: mov x{{[0-3]+}}, #0 tail call void @barl(i64 0, i64 0) nounwind ret void } @@ -48,26 +52,21 @@ entry: define void @t4() nounwind ssp { ; ALL-LABEL: t4: ; ALL-NOT: fmov -; CYCLONE: fmov s0, wzr -; CYCLONE: fmov s1, wzr -; KRYO: movi v0.2d, #0000000000000000 -; KRYO: movi v1.2d, #0000000000000000 -; FALKOR: movi v0.2d, #0000000000000000 -; FALKOR: movi v1.2d, #0000000000000000 +; CYCLONE: fmov s{{[0-3]+}}, wzr +; CYCLONE: fmov s{{[0-3]+}}, wzr +; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr +; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr +; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000 +; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000 tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind ret void } -declare void @bar(double, double, double, double) -declare void @bari(i32, i32) -declare void @barl(i64, i64) -declare void @barf(float, float) - ; We used to produce spills+reloads for a Q register with zero cycle zeroing ; enabled. ; ALL-LABEL: foo: -; ALL-NOT: str {{q[0-9]+}} -; ALL-NOT: ldr {{q[0-9]+}} +; ALL-NOT: str q{{[0-9]+}} +; ALL-NOT: ldr q{{[0-9]+}} define double @foo(i32 %n) { entry: br label %for.body @@ -90,8 +89,7 @@ for.end: define <2 x i64> @t6() { ; ALL-LABEL: t6: ; CYCLONE: movi.16b v0, #0 -; KRYO: movi v0.2d, #0000000000000000 -; FALKOR: movi v0.2d, #0000000000000000 +; OTHERS: movi v0.2d, #0000000000000000 ret <2 x i64> zeroinitializer } diff --git a/test/CodeGen/AArch64/falkor-hwpf-fix.mir b/test/CodeGen/AArch64/falkor-hwpf-fix.mir index 38622ae0e49a..28b19f877685 100644 --- a/test/CodeGen/AArch64/falkor-hwpf-fix.mir +++ b/test/CodeGen/AArch64/falkor-hwpf-fix.mir @@ -353,3 +353,28 @@ body: | bb.1: RET_ReallyLR ... +--- +# Check that non-base registers are considered live when finding a +# scratch register by making sure we don't use %x2 for the scratch +# register for the inserted ORRXrs. +# CHECK-LABEL: name: hwpf_offreg +# CHECK: %x3 = ORRXrs %xzr, %x1, 0 +# CHECK: %w10 = LDRWroX %x3, %x2, 0, 0 +name: hwpf_offreg +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %x2, %x17, %x18 + + %w10 = LDRWroX %x1, %x2, 0, 0 :: ("aarch64-strided-access" load 4) + + %x2 = ORRXrs %xzr, %x10, 0 + %w26 = LDRWroX %x1, %x2, 0, 0 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... diff --git a/test/CodeGen/AArch64/inlineasm-S-constraint.ll b/test/CodeGen/AArch64/inlineasm-S-constraint.ll new file mode 100644 index 000000000000..3fb2a3f32cea --- /dev/null +++ b/test/CodeGen/AArch64/inlineasm-S-constraint.ll @@ -0,0 +1,20 @@ +;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +@var = global i32 0 +define void @test_inline_constraint_S() { +; CHECK-LABEL: test_inline_constraint_S: + call void asm sideeffect "adrp x0, $0", "S"(i32* @var) + call void asm sideeffect "add x0, x0, :lo12:$0", "S"(i32* @var) +; CHECK: adrp x0, var +; CHECK: add x0, x0, :lo12:var + ret void +} +define i32 @test_inline_constraint_S_label(i1 %in) { +; CHECK-LABEL: test_inline_constraint_S_label: + call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc)) +; CHECK: adr x0, .Ltmp{{[0-9]+}} +br i1 %in, label %loc, label %loc2 +loc: + ret i32 0 +loc2: + ret i32 42 +} diff --git a/test/CodeGen/AArch64/spill-stack-realignment.mir b/test/CodeGen/AArch64/spill-stack-realignment.mir new file mode 100644 index 000000000000..fe85f4b64027 --- /dev/null +++ b/test/CodeGen/AArch64/spill-stack-realignment.mir @@ -0,0 +1,35 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s + +# Ensure references to scavenged stack slots in the CSR area use the +# FP as a base when the stack pointer must be aligned to something +# larger than required by the target. This is necessary because the +# alignment padding area is between the CSR area and the SP, so the SP +# cannot be used to reference the CSR area. +name: test +tracksRegLiveness: true +frameInfo: + maxAlignment: 64 +# CHECK: stack: +# CHECK: id: 0, name: '', type: default, offset: -64, size: 4, alignment: 64 +# CHECK-NEXT: stack-id: 0 +# CHECK-NEXT: local-offset: -64 +# CHECK: id: 1, name: '', type: default, offset: -20, size: 4, alignment: 4 +# CHECK-NEXT: stack-id: 0 +# CHECK-NEXT: local-offset: -68 +stack: + - { id: 0, size: 4, alignment: 64, local-offset: -64 } + - { id: 1, size: 4, alignment: 4, local-offset: -68 } + +# CHECK: body: +# CHECK: %sp = ANDXri killed %{{x[0-9]+}}, 7865 +# CHECK: STRSui %s0, %sp, 0 +# CHECK: STURSi %s0, %fp, -4 +body: | + bb.0.entry: + liveins: %s0 + + STRSui %s0, %stack.0, 0 + STRSui %s0, %stack.1, 0 + ; Force preserve a CSR to create a hole in the CSR stack region. + %x28 = IMPLICIT_DEF + RET_ReallyLR diff --git a/test/CodeGen/AMDGPU/ctpop16.ll b/test/CodeGen/AMDGPU/ctpop16.ll new file mode 100644 index 000000000000..8236ac07a680 --- /dev/null +++ b/test/CodeGen/AMDGPU/ctpop16.ll @@ -0,0 +1,334 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s + +declare i16 @llvm.ctpop.i16(i16) nounwind readnone +declare <2 x i16> @llvm.ctpop.v2i16(<2 x i16>) nounwind readnone +declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) nounwind readnone +declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone +declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) nounwind readnone + +declare i32 @llvm.r600.read.tidig.x() nounwind readnone + +; FUNC-LABEL: {{^}}s_ctpop_i16: +; GCN: s_load_dword [[SVAL:s[0-9]+]], +; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]] +; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; GCN: buffer_store_short [[VRESULT]], +; GCN: s_endpgm + +; EG: BCNT_INT +define amdgpu_kernel void @s_ctpop_i16(i16 addrspace(1)* noalias %out, i16 %val) nounwind { + %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone + store i16 %ctpop, i16 addrspace(1)* %out, align 4 + ret void +} + +; XXX - Why 0 in register? +; FUNC-LABEL: {{^}}v_ctpop_i16: +; GCN: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]], +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 0 +; GCN: buffer_store_short [[RESULT]], +; GCN: s_endpgm + +; EG: BCNT_INT +define amdgpu_kernel void @v_ctpop_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone + store i16 %ctpop, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_ctpop_add_chain_i16: +; SI: buffer_load_ushort [[VAL0:v[0-9]+]], +; SI: buffer_load_ushort [[VAL1:v[0-9]+]], +; VI: flat_load_ushort [[VAL0:v[0-9]+]], +; VI: flat_load_ushort [[VAL1:v[0-9]+]], +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 +; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] +; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] +; GCN: buffer_store_short [[RESULT]], +; GCN: s_endpgm + +; EG: BCNT_INT +; EG: BCNT_INT +define amdgpu_kernel void @v_ctpop_add_chain_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in0, i16 addrspace(1)* noalias %in1) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in0.gep = getelementptr i16, i16 addrspace(1)* %in0, i32 %tid + %in1.gep = getelementptr i16, i16 addrspace(1)* %in1, i32 %tid + %val0 = load volatile i16, i16 addrspace(1)* %in0.gep, align 4 + %val1 = load volatile i16, i16 addrspace(1)* %in1.gep, align 4 + %ctpop0 = call i16 @llvm.ctpop.i16(i16 %val0) nounwind readnone + %ctpop1 = call i16 @llvm.ctpop.i16(i16 %val1) nounwind readnone + %add = add i16 %ctpop0, %ctpop1 + store i16 %add, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i16: +; GCN: {{buffer|flat}}_load_ushort [[VAL0:v[0-9]+]], +; GCN: s_waitcnt +; GCN-NEXT: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} +; GCN: buffer_store_short [[RESULT]], +; GCN: s_endpgm +define amdgpu_kernel void @v_ctpop_add_sgpr_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %sval) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone + %add = add i16 %ctpop, %sval + store i16 %add, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_ctpop_v2i16: +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: s_endpgm + +; EG: BCNT_INT +; EG: BCNT_INT +define amdgpu_kernel void @v_ctpop_v2i16(<2 x i16> addrspace(1)* noalias %out, <2 x i16> addrspace(1)* noalias %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in.gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid + %val = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep, align 8 + %ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %val) nounwind readnone + store <2 x i16> %ctpop, <2 x i16> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}v_ctpop_v4i16: +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: s_endpgm + +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +define amdgpu_kernel void @v_ctpop_v4i16(<4 x i16> addrspace(1)* noalias %out, <4 x i16> addrspace(1)* noalias %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in.gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid + %val = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep, align 16 + %ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %val) nounwind readnone + store <4 x i16> %ctpop, <4 x i16> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: {{^}}v_ctpop_v8i16: +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: s_endpgm + +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +define amdgpu_kernel void @v_ctpop_v8i16(<8 x i16> addrspace(1)* noalias %out, <8 x i16> addrspace(1)* noalias %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in.gep = getelementptr <8 x i16>, <8 x i16> addrspace(1)* %in, i32 %tid + %val = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep, align 32 + %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val) nounwind readnone + store <8 x i16> %ctpop, <8 x i16> addrspace(1)* %out, align 32 + ret void +} + +; FUNC-LABEL: {{^}}v_ctpop_v16i16: +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: s_endpgm + +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +define amdgpu_kernel void @v_ctpop_v16i16(<16 x i16> addrspace(1)* noalias %out, <16 x i16> addrspace(1)* noalias %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in.gep = getelementptr <16 x i16>, <16 x i16> addrspace(1)* %in, i32 %tid + %val = load <16 x i16>, <16 x i16> addrspace(1)* %in.gep, align 32 + %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %val) nounwind readnone + store <16 x i16> %ctpop, <16 x i16> addrspace(1)* %out, align 32 + ret void +} + +; FUNC-LABEL: {{^}}v_ctpop_i16_add_inline_constant: +; GCN: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]], +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4 +; GCN: buffer_store_short [[RESULT]], +; GCN: s_endpgm + +; EG: BCNT_INT +define amdgpu_kernel void @v_ctpop_i16_add_inline_constant(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone + %add = add i16 %ctpop, 4 + store i16 %add, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_ctpop_i16_add_inline_constant_inv: +; GCN: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]], +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4 +; GCN: buffer_store_short [[RESULT]], +; GCN: s_endpgm + +; EG: BCNT_INT +define amdgpu_kernel void @v_ctpop_i16_add_inline_constant_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone + %add = add i16 4, %ctpop + store i16 %add, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_ctpop_i16_add_literal: +; GCN-DAG: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]], +; SI-DAG: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x3e7 +; VI-DAG: s_movk_i32 [[LIT:s[0-9]+]], 0x3e7 +; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] +; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] +; GCN: buffer_store_short [[RESULT]], +; GCN: s_endpgm +define amdgpu_kernel void @v_ctpop_i16_add_literal(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone + %add = add i16 %ctpop, 999 + store i16 %add, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_ctpop_i16_add_var: +; GCN-DAG: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]], +; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: buffer_store_short [[RESULT]], +; GCN: s_endpgm + +; EG: BCNT_INT +define amdgpu_kernel void @v_ctpop_i16_add_var(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone + %add = add i16 %ctpop, %const + store i16 %add, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_ctpop_i16_add_var_inv: +; GCN-DAG: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]], +; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: buffer_store_short [[RESULT]], +; GCN: s_endpgm + +; EG: BCNT_INT +define amdgpu_kernel void @v_ctpop_i16_add_var_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone + %add = add i16 %const, %ctpop + store i16 %add, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_ctpop_i16_add_vvar_inv: +; SI: buffer_load_ushort [[VAR:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 +; SI: buffer_load_ushort [[VAL:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 +; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAR]], [[VAL]] +; VI: flat_load_ushort [[VAR:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] +; VI: flat_load_ushort [[VAL:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] +; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: buffer_store_short [[RESULT]], +; GCN: s_endpgm + +; EG: BCNT_INT +define amdgpu_kernel void @v_ctpop_i16_add_vvar_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 addrspace(1)* noalias %constptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone + %gep = getelementptr i16, i16 addrspace(1)* %constptr, i32 %tid + %const = load i16, i16 addrspace(1)* %gep, align 4 + %add = add i16 %const, %ctpop + store i16 %add, i16 addrspace(1)* %out, align 4 + ret void +} + +; FIXME: We currently disallow SALU instructions in all branches, +; but there are some cases when the should be allowed. + +; FUNC-LABEL: {{^}}ctpop_i16_in_br: +; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd +; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34 +; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]] +; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], [[SRESULT]] +; GCN: buffer_store_short [[RESULT]], +; GCN: s_endpgm +; EG: BCNT_INT +define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %ctpop_arg, i16 %cond) { +entry: + %tmp0 = icmp eq i16 %cond, 0 + br i1 %tmp0, label %if, label %else + +if: + %tmp2 = call i16 @llvm.ctpop.i16(i16 %ctpop_arg) + br label %endif + +else: + %tmp3 = getelementptr i16, i16 addrspace(1)* %in, i16 1 + %tmp4 = load i16, i16 addrspace(1)* %tmp3 + br label %endif + +endif: + %tmp5 = phi i16 [%tmp2, %if], [%tmp4, %else] + store i16 %tmp5, i16 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/ARM/peephole-phi.mir b/test/CodeGen/ARM/peephole-phi.mir index 30343654dea1..54ae0115840b 100644 --- a/test/CodeGen/ARM/peephole-phi.mir +++ b/test/CodeGen/ARM/peephole-phi.mir @@ -65,3 +65,39 @@ body: | %4:gpr = PHI %0, %bb.1, %2, %bb.2 %5:spr = VMOVSR %4, 14, %noreg ... + +# The current implementation doesn't perform any transformations if undef +# operands are involved. +# CHECK-LABEL: name: func-undefops +# CHECK: body: | +# CHECK: bb.0: +# CHECK: Bcc %bb.2, 1, undef %cpsr +# +# CHECK: bb.1: +# CHECK: %0:gpr = VMOVRS undef %1:spr, 14, %noreg +# CHECK: B %bb.3 +# +# CHECK: bb.2: +# CHECK: %2:gpr = VMOVRS undef %3:spr, 14, %noreg +# +# CHECK: bb.3: +# CHECK: %4:gpr = PHI %0, %bb.1, %2, %bb.2 +# CHECK: %5:spr = VMOVSR %4, 14, %noreg +--- +name: func-undefops +tracksRegLiveness: true +body: | + bb.0: + Bcc %bb.2, 1, undef %cpsr + + bb.1: + %0:gpr = VMOVRS undef %1:spr, 14, %noreg + B %bb.3 + + bb.2: + %2:gpr = VMOVRS undef %3:spr, 14, %noreg + + bb.3: + %4:gpr = PHI %0, %bb.1, %2, %bb.2 + %5:spr = VMOVSR %4, 14, %noreg +... diff --git a/test/CodeGen/Hexagon/ifcvt-diamond-ret.mir b/test/CodeGen/Hexagon/ifcvt-diamond-ret.mir new file mode 100644 index 000000000000..e896d9aaa9a4 --- /dev/null +++ b/test/CodeGen/Hexagon/ifcvt-diamond-ret.mir @@ -0,0 +1,25 @@ +# RUN: llc -march=hexagon -run-pass if-converter %s -o - | FileCheck %s + +# Make sure this gets if-converted and it doesn't crash. +# CHECK-LABEL: bb.0 +# CHECK: PS_jmpret %r31 +# CHECK-NOT: bb.{{[1-9]+}}: + +--- +name: fred +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %r0 + renamable %p0 = C2_cmpeqi killed renamable %r0, 0 + J2_jumpf killed renamable %p0, %bb.2, implicit-def dead %pc + + bb.1: + S4_storeiri_io undef renamable %r0, 0, 32768 :: (store 4 into `i32* undef`) + PS_jmpret %r31, implicit-def dead %pc + + bb.2: + S4_storeiri_io undef renamable %r0, 0, 32768 :: (store 4 into `i32* undef`) + PS_jmpret %r31, implicit-def dead %pc +... diff --git a/test/CodeGen/MIR/PowerPC/ifcvt-diamond-ret.mir b/test/CodeGen/MIR/PowerPC/ifcvt-diamond-ret.mir new file mode 100644 index 000000000000..c63c055c3b31 --- /dev/null +++ b/test/CodeGen/MIR/PowerPC/ifcvt-diamond-ret.mir @@ -0,0 +1,34 @@ +# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -run-pass=if-converter %s -o - | FileCheck %s +--- +name: foo +body: | + bb.0: + liveins: %x0, %x3 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + + dead renamable %x3 = ANDIo8 killed renamable %x3, 1, implicit-def dead %cr0, implicit-def %cr0gt + %cr2lt = CROR %cr0gt, %cr0gt + BCn killed renamable %cr2lt, %bb.2 + B %bb.1 + + bb.1: + renamable %x3 = LIS8 4096 + MTLR8 %x0, implicit-def %lr8 + BLR8 implicit %lr8, implicit %rm, implicit %x3 + + bb.2: + renamable %x3 = LIS8 4096 + MTLR8 %x0, implicit-def %lr8 + BLR8 implicit %lr8, implicit %rm, implicit %x3 +... + +# Diamond testcase with equivalent branches terminating in returns. + +# CHECK: body: | +# CHECK: bb.0: +# CHECK: dead renamable %x3 = ANDIo8 killed renamable %x3, 1, implicit-def dead %cr0, implicit-def %cr0gt +# CHECK: %cr2lt = CROR %cr0gt, %cr0gt +# CHECK: renamable %x3 = LIS8 4096 +# CHECK: MTLR8 %x0, implicit-def %lr8 +# CHECK: BLR8 implicit %lr8, implicit %rm, implicit %x3 + diff --git a/test/CodeGen/Mips/const-mult.ll b/test/CodeGen/Mips/const-mult.ll index 459aad61828c..dc4f2f9c862b 100644 --- a/test/CodeGen/Mips/const-mult.ll +++ b/test/CodeGen/Mips/const-mult.ll @@ -1,93 +1,626 @@ -; RUN: llc -march=mipsel < %s | FileCheck %s -; RUN: llc -march=mips64el < %s | FileCheck %s -check-prefixes=CHECK,CHECK64 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mipsel-mti-linux-gnu < %s | FileCheck %s -check-prefix=MIPS32 +; RUN: llc -mtriple=mips64el-mti-linux-gnu < %s | FileCheck %s -check-prefix=MIPS64 -; CHECK-LABEL: mul5_32: -; CHECK: sll $[[R0:[0-9]+]], $4, 2 -; CHECK: addu ${{[0-9]+}}, $[[R0]], $4 define i32 @mul5_32(i32 signext %a) { +; MIPS32-LABEL: mul5_32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sll $1, $4, 2 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addu $2, $1, $4 +; +; MIPS64-LABEL: mul5_32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: sll $1, $4, 2 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: addu $2, $1, $4 entry: %mul = mul nsw i32 %a, 5 ret i32 %mul } -; CHECK-LABEL: mul27_32: -; CHECK-DAG: sll $[[R0:[0-9]+]], $4, 2 -; CHECK-DAG: addu $[[R1:[0-9]+]], $[[R0]], $4 -; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 5 -; CHECK: subu ${{[0-9]+}}, $[[R2]], $[[R1]] - define i32 @mul27_32(i32 signext %a) { +; MIPS32-LABEL: mul27_32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sll $1, $4, 2 +; MIPS32-NEXT: addu $1, $1, $4 +; MIPS32-NEXT: sll $2, $4, 5 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: subu $2, $2, $1 +; +; MIPS64-LABEL: mul27_32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: sll $1, $4, 2 +; MIPS64-NEXT: addu $1, $1, $4 +; MIPS64-NEXT: sll $2, $4, 5 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: subu $2, $2, $1 entry: %mul = mul nsw i32 %a, 27 ret i32 %mul } -; CHECK-LABEL: muln2147483643_32: -; CHECK-DAG: sll $[[R0:[0-9]+]], $4, 2 -; CHECK-DAG: addu $[[R1:[0-9]+]], $[[R0]], $4 -; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 31 -; CHECK: addu ${{[0-9]+}}, $[[R2]], $[[R1]] - define i32 @muln2147483643_32(i32 signext %a) { +; MIPS32-LABEL: muln2147483643_32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sll $1, $4, 2 +; MIPS32-NEXT: addu $1, $1, $4 +; MIPS32-NEXT: sll $2, $4, 31 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addu $2, $2, $1 +; +; MIPS64-LABEL: muln2147483643_32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: sll $1, $4, 2 +; MIPS64-NEXT: addu $1, $1, $4 +; MIPS64-NEXT: sll $2, $4, 31 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: addu $2, $2, $1 entry: %mul = mul nsw i32 %a, -2147483643 ret i32 %mul } -; CHECK64-LABEL: muln9223372036854775805_64: -; CHECK64-DAG: dsll $[[R0:[0-9]+]], $4, 1 -; CHECK64-DAG: daddu $[[R1:[0-9]+]], $[[R0]], $4 -; CHECK64-DAG: dsll $[[R2:[0-9]+]], $4, 63 -; CHECK64: daddu ${{[0-9]+}}, $[[R2]], $[[R1]] - define i64 @muln9223372036854775805_64(i64 signext %a) { +; MIPS32-LABEL: muln9223372036854775805_64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sll $1, $4, 1 +; MIPS32-NEXT: addu $2, $1, $4 +; MIPS32-NEXT: sltu $1, $2, $1 +; MIPS32-NEXT: srl $3, $4, 31 +; MIPS32-NEXT: sll $6, $5, 1 +; MIPS32-NEXT: or $3, $6, $3 +; MIPS32-NEXT: addu $3, $3, $5 +; MIPS32-NEXT: addu $1, $3, $1 +; MIPS32-NEXT: sll $3, $4, 31 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addu $3, $3, $1 +; +; MIPS64-LABEL: muln9223372036854775805_64: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: dsll $1, $4, 1 +; MIPS64-NEXT: daddu $1, $1, $4 +; MIPS64-NEXT: dsll $2, $4, 63 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddu $2, $2, $1 entry: %mul = mul nsw i64 %a, -9223372036854775805 ret i64 %mul } -; CHECK64-LABEL: muln170141183460469231731687303715884105725_128: -; CHECK64-DAG: dsrl $[[R0:[0-9]+]], $4, 63 -; CHECK64-DAG: dsll $[[R1:[0-9]+]], $5, 1 -; CHECK64-DAG: or $[[R2:[0-9]+]], $[[R1]], $[[R0]] -; CHECK64-DAG: daddu $[[R3:[0-9]+]], $[[R2]], $5 -; CHECK64-DAG: dsll $[[R4:[0-9]+]], $4, 1 -; CHECK64-DAG: daddu $[[R5:[0-9]+]], $[[R4]], $4 -; CHECK64-DAG: sltu $[[R6:[0-9]+]], $[[R5]], $[[R4]] -; CHECK64-DAG: dsll $[[R7:[0-9]+]], $[[R6]], 32 -; CHECK64-DAG: dsrl $[[R8:[0-9]+]], $[[R7]], 32 -; CHECK64-DAG: daddu $[[R9:[0-9]+]], $[[R3]], $[[R8]] -; CHECK64-DAG: dsll $[[R10:[0-9]+]], $4, 63 -; CHECK64: daddu ${{[0-9]+}}, $[[R10]], $[[R9]] - define i128 @muln170141183460469231731687303715884105725_128(i128 signext %a) { +; MIPS32-LABEL: muln170141183460469231731687303715884105725_128: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sll $1, $4, 1 +; MIPS32-NEXT: addu $2, $1, $4 +; MIPS32-NEXT: sltu $1, $2, $1 +; MIPS32-NEXT: srl $3, $4, 31 +; MIPS32-NEXT: sll $8, $5, 1 +; MIPS32-NEXT: or $8, $8, $3 +; MIPS32-NEXT: addu $3, $8, $5 +; MIPS32-NEXT: addu $3, $3, $1 +; MIPS32-NEXT: sltu $9, $3, $8 +; MIPS32-NEXT: xor $8, $3, $8 +; MIPS32-NEXT: movz $9, $1, $8 +; MIPS32-NEXT: srl $1, $5, 31 +; MIPS32-NEXT: sll $5, $6, 1 +; MIPS32-NEXT: or $5, $5, $1 +; MIPS32-NEXT: addu $8, $5, $6 +; MIPS32-NEXT: addu $1, $8, $9 +; MIPS32-NEXT: sltu $5, $8, $5 +; MIPS32-NEXT: srl $6, $6, 31 +; MIPS32-NEXT: sll $9, $7, 1 +; MIPS32-NEXT: or $6, $9, $6 +; MIPS32-NEXT: addu $6, $6, $7 +; MIPS32-NEXT: addu $5, $6, $5 +; MIPS32-NEXT: sll $4, $4, 31 +; MIPS32-NEXT: sltu $6, $1, $8 +; MIPS32-NEXT: addu $5, $5, $6 +; MIPS32-NEXT: addu $5, $4, $5 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: move $4, $1 +; +; MIPS64-LABEL: muln170141183460469231731687303715884105725_128: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: dsrl $1, $4, 63 +; MIPS64-NEXT: dsll $2, $5, 1 +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: daddu $1, $1, $5 +; MIPS64-NEXT: dsll $3, $4, 1 +; MIPS64-NEXT: daddu $2, $3, $4 +; MIPS64-NEXT: sltu $3, $2, $3 +; MIPS64-NEXT: dsll $3, $3, 32 +; MIPS64-NEXT: dsrl $3, $3, 32 +; MIPS64-NEXT: daddu $1, $1, $3 +; MIPS64-NEXT: dsll $3, $4, 63 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddu $3, $3, $1 entry: %mul = mul nsw i128 %a, -170141183460469231731687303715884105725 ret i128 %mul } -; CHECK64-LABEL: mul170141183460469231731687303715884105723_128: -; CHECK64-DAG: dsrl $[[R0:[0-9]+]], $4, 62 -; CHECK64-DAG: dsll $[[R1:[0-9]+]], $5, 2 -; CHECK64-DAG: or $[[R2:[0-9]+]], $[[R1]], $[[R0]] -; CHECK64-DAG: daddu $[[R3:[0-9]+]], $[[R2]], $5 -; CHECK64-DAG: dsll $[[R4:[0-9]+]], $4, 2 -; CHECK64-DAG: daddu $[[R5:[0-9]+]], $[[R4]], $4 -; CHECK64-DAG: sltu $[[R6:[0-9]+]], $[[R5]], $[[R4]] -; CHECK64-DAG: dsll $[[R7:[0-9]+]], $[[R6]], 32 -; CHECK64-DAG: dsrl $[[R8:[0-9]+]], $[[R7]], 32 -; CHECK64-DAG: daddu $[[R9:[0-9]+]], $[[R3]], $[[R8]] -; CHECK64-DAG: dsll $[[R10:[0-9]+]], $4, 63 -; CHECK64-DAG: dsubu $[[R11:[0-9]+]], $[[R10]], $[[R9]] -; CHECK64-DAG: sltu $[[R12:[0-9]+]], $zero, $[[R5]] -; CHECK64-DAG: dsll $[[R13:[0-9]+]], $[[R12]], 32 -; CHECK64-DAG: dsrl $[[R14:[0-9]+]], $[[R13]], 32 -; CHECK64-DAG: dsubu $[[R15:[0-9]+]], $[[R11]], $[[R14]] -; CHECK64: dnegu ${{[0-9]+}}, $[[R5]] - define i128 @mul170141183460469231731687303715884105723_128(i128 signext %a) { +; MIPS32-LABEL: mul170141183460469231731687303715884105723_128: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sll $1, $4, 2 +; MIPS32-NEXT: addu $2, $1, $4 +; MIPS32-NEXT: sltu $1, $2, $1 +; MIPS32-NEXT: srl $3, $4, 30 +; MIPS32-NEXT: sll $8, $5, 2 +; MIPS32-NEXT: or $3, $8, $3 +; MIPS32-NEXT: addu $8, $3, $5 +; MIPS32-NEXT: addu $8, $8, $1 +; MIPS32-NEXT: sltu $9, $8, $3 +; MIPS32-NEXT: xor $3, $8, $3 +; MIPS32-NEXT: sltu $10, $zero, $8 +; MIPS32-NEXT: sltu $11, $zero, $2 +; MIPS32-NEXT: movz $10, $11, $8 +; MIPS32-NEXT: movz $9, $1, $3 +; MIPS32-NEXT: srl $1, $5, 30 +; MIPS32-NEXT: sll $3, $6, 2 +; MIPS32-NEXT: or $1, $3, $1 +; MIPS32-NEXT: addu $3, $1, $6 +; MIPS32-NEXT: addu $5, $3, $9 +; MIPS32-NEXT: sll $4, $4, 31 +; MIPS32-NEXT: negu $9, $5 +; MIPS32-NEXT: sltu $12, $9, $10 +; MIPS32-NEXT: sltu $13, $5, $3 +; MIPS32-NEXT: sltu $1, $3, $1 +; MIPS32-NEXT: srl $3, $6, 30 +; MIPS32-NEXT: sll $6, $7, 2 +; MIPS32-NEXT: or $3, $6, $3 +; MIPS32-NEXT: addu $3, $3, $7 +; MIPS32-NEXT: addu $1, $3, $1 +; MIPS32-NEXT: addu $1, $1, $13 +; MIPS32-NEXT: subu $1, $4, $1 +; MIPS32-NEXT: sltu $3, $zero, $5 +; MIPS32-NEXT: subu $1, $1, $3 +; MIPS32-NEXT: subu $5, $1, $12 +; MIPS32-NEXT: subu $4, $9, $10 +; MIPS32-NEXT: negu $1, $8 +; MIPS32-NEXT: subu $3, $1, $11 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: negu $2, $2 +; +; MIPS64-LABEL: mul170141183460469231731687303715884105723_128: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: dsrl $1, $4, 62 +; MIPS64-NEXT: dsll $2, $5, 2 +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: daddu $1, $1, $5 +; MIPS64-NEXT: dsll $2, $4, 2 +; MIPS64-NEXT: daddu $5, $2, $4 +; MIPS64-NEXT: sltu $2, $5, $2 +; MIPS64-NEXT: dsll $2, $2, 32 +; MIPS64-NEXT: dsrl $2, $2, 32 +; MIPS64-NEXT: daddu $1, $1, $2 +; MIPS64-NEXT: dsll $2, $4, 63 +; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: sltu $2, $zero, $5 +; MIPS64-NEXT: dsll $2, $2, 32 +; MIPS64-NEXT: dsrl $2, $2, 32 +; MIPS64-NEXT: dsubu $3, $1, $2 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: dnegu $2, $5 entry: %mul = mul nsw i128 %a, 170141183460469231731687303715884105723 ret i128 %mul } + +define i32 @mul42949673_32(i32 %a) { +; MIPS32-LABEL: mul42949673_32: +; MIPS32: # %bb.0: +; MIPS32-NEXT: sll $1, $4, 3 +; MIPS32-NEXT: addu $1, $1, $4 +; MIPS32-NEXT: sll $2, $4, 5 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 10 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 13 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 15 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 20 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 25 +; MIPS32-NEXT: sll $3, $4, 23 +; MIPS32-NEXT: addu $1, $3, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addu $2, $2, $1 +; +; MIPS64-LABEL: mul42949673_32: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sll $1, $4, 0 +; MIPS64-NEXT: sll $2, $1, 3 +; MIPS64-NEXT: addu $2, $2, $1 +; MIPS64-NEXT: sll $3, $1, 5 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 10 +; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 13 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 15 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 20 +; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 25 +; MIPS64-NEXT: sll $1, $1, 23 +; MIPS64-NEXT: addu $1, $1, $2 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: addu $2, $3, $1 + %b = mul i32 %a, 42949673 + ret i32 %b +} + +define i64 @mul42949673_64(i64 %a) { +; MIPS32-LABEL: mul42949673_64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $1, 655 +; MIPS32-NEXT: ori $1, $1, 23593 +; MIPS32-NEXT: multu $4, $1 +; MIPS32-NEXT: mflo $2 +; MIPS32-NEXT: mfhi $1 +; MIPS32-NEXT: sll $3, $5, 3 +; MIPS32-NEXT: addu $3, $3, $5 +; MIPS32-NEXT: sll $4, $5, 5 +; MIPS32-NEXT: addu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 10 +; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 13 +; MIPS32-NEXT: addu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 15 +; MIPS32-NEXT: addu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 20 +; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 25 +; MIPS32-NEXT: sll $5, $5, 23 +; MIPS32-NEXT: addu $3, $5, $3 +; MIPS32-NEXT: addu $3, $4, $3 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addu $3, $1, $3 +; +; MIPS64-LABEL: mul42949673_64: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: dsll $1, $4, 3 +; MIPS64-NEXT: daddu $1, $1, $4 +; MIPS64-NEXT: dsll $2, $4, 5 +; MIPS64-NEXT: daddu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 10 +; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 13 +; MIPS64-NEXT: daddu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 15 +; MIPS64-NEXT: daddu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 20 +; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 25 +; MIPS64-NEXT: dsll $3, $4, 23 +; MIPS64-NEXT: daddu $1, $3, $1 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddu $2, $2, $1 +entry: + %b = mul i64 %a, 42949673 + ret i64 %b +} + +define i32 @mul22224078_32(i32 %a) { +; MIPS32-LABEL: mul22224078_32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sll $1, $4, 1 +; MIPS32-NEXT: sll $2, $4, 4 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 6 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 8 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 10 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 13 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 16 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 24 +; MIPS32-NEXT: sll $3, $4, 22 +; MIPS32-NEXT: sll $5, $4, 20 +; MIPS32-NEXT: sll $4, $4, 18 +; MIPS32-NEXT: subu $1, $4, $1 +; MIPS32-NEXT: addu $1, $5, $1 +; MIPS32-NEXT: addu $1, $3, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addu $2, $2, $1 +; +; MIPS64-LABEL: mul22224078_32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: sll $1, $4, 0 +; MIPS64-NEXT: sll $2, $1, 1 +; MIPS64-NEXT: sll $3, $1, 4 +; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 6 +; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 8 +; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 10 +; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 13 +; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 16 +; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 24 +; MIPS64-NEXT: sll $4, $1, 22 +; MIPS64-NEXT: sll $5, $1, 20 +; MIPS64-NEXT: sll $1, $1, 18 +; MIPS64-NEXT: subu $1, $1, $2 +; MIPS64-NEXT: addu $1, $5, $1 +; MIPS64-NEXT: addu $1, $4, $1 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: addu $2, $3, $1 +entry: + %b = mul i32 %a, 22224078 + ret i32 %b +} + +define i64 @mul22224078_64(i64 %a) { +; MIPS32-LABEL: mul22224078_64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $1, 339 +; MIPS32-NEXT: ori $1, $1, 7374 +; MIPS32-NEXT: multu $4, $1 +; MIPS32-NEXT: mflo $2 +; MIPS32-NEXT: mfhi $1 +; MIPS32-NEXT: sll $3, $5, 1 +; MIPS32-NEXT: sll $4, $5, 4 +; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 6 +; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 8 +; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 10 +; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 13 +; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 16 +; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 24 +; MIPS32-NEXT: sll $6, $5, 22 +; MIPS32-NEXT: sll $7, $5, 20 +; MIPS32-NEXT: sll $5, $5, 18 +; MIPS32-NEXT: subu $3, $5, $3 +; MIPS32-NEXT: addu $3, $7, $3 +; MIPS32-NEXT: addu $3, $6, $3 +; MIPS32-NEXT: addu $3, $4, $3 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addu $3, $1, $3 +; +; MIPS64-LABEL: mul22224078_64: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: dsll $1, $4, 1 +; MIPS64-NEXT: dsll $2, $4, 4 +; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 6 +; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 8 +; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 10 +; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 13 +; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 16 +; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 24 +; MIPS64-NEXT: dsll $3, $4, 22 +; MIPS64-NEXT: dsll $5, $4, 20 +; MIPS64-NEXT: dsll $4, $4, 18 +; MIPS64-NEXT: dsubu $1, $4, $1 +; MIPS64-NEXT: daddu $1, $5, $1 +; MIPS64-NEXT: daddu $1, $3, $1 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddu $2, $2, $1 +entry: + %b = mul i64 %a, 22224078 + ret i64 %b +} + +define i32 @mul22245375_32(i32 %a) { +; MIPS32-LABEL: mul22245375_32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sll $1, $4, 12 +; MIPS32-NEXT: addu $1, $1, $4 +; MIPS32-NEXT: sll $2, $4, 15 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 18 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 20 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 22 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 24 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addu $2, $2, $1 +; +; MIPS64-LABEL: mul22245375_32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: sll $1, $4, 0 +; MIPS64-NEXT: sll $2, $1, 12 +; MIPS64-NEXT: addu $2, $2, $1 +; MIPS64-NEXT: sll $3, $1, 15 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 18 +; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 20 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 22 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: sll $1, $1, 24 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: addu $2, $1, $2 +entry: + %b = mul i32 %a, 22245375 + ret i32 %b +} + +define i64 @mul22245375_64(i64 %a) { +; MIPS32-LABEL: mul22245375_64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $1, 339 +; MIPS32-NEXT: ori $1, $1, 28671 +; MIPS32-NEXT: multu $4, $1 +; MIPS32-NEXT: mflo $2 +; MIPS32-NEXT: mfhi $1 +; MIPS32-NEXT: sll $3, $5, 12 +; MIPS32-NEXT: addu $3, $3, $5 +; MIPS32-NEXT: sll $4, $5, 15 +; MIPS32-NEXT: addu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 18 +; MIPS32-NEXT: subu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 20 +; MIPS32-NEXT: addu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 22 +; MIPS32-NEXT: addu $3, $4, $3 +; MIPS32-NEXT: sll $4, $5, 24 +; MIPS32-NEXT: addu $3, $4, $3 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addu $3, $1, $3 +; +; MIPS64-LABEL: mul22245375_64: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: dsll $1, $4, 12 +; MIPS64-NEXT: daddu $1, $1, $4 +; MIPS64-NEXT: dsll $2, $4, 15 +; MIPS64-NEXT: daddu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 18 +; MIPS64-NEXT: dsubu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 20 +; MIPS64-NEXT: daddu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 22 +; MIPS64-NEXT: daddu $1, $2, $1 +; MIPS64-NEXT: dsll $2, $4, 24 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddu $2, $2, $1 +entry: + %b = mul i64 %a, 22245375 + ret i64 %b +} + +define i32 @mul25165824_32(i32 %a) { +; MIPS32-LABEL: mul25165824_32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sll $1, $4, 12 +; MIPS32-NEXT: addu $1, $1, $4 +; MIPS32-NEXT: sll $2, $4, 15 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 18 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 20 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 22 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 24 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addu $2, $2, $1 +; +; MIPS64-LABEL: mul25165824_32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: sll $1, $4, 0 +; MIPS64-NEXT: sll $2, $1, 12 +; MIPS64-NEXT: addu $2, $2, $1 +; MIPS64-NEXT: sll $3, $1, 15 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 18 +; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 20 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 22 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: sll $1, $1, 24 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: addu $2, $1, $2 +entry: + %b = mul i32 %a, 22245375 + ret i32 %b +} + +define i64 @mul25165824_64(i64 %a) { +; MIPS32-LABEL: mul25165824_64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: srl $1, $4, 9 +; MIPS32-NEXT: sll $2, $5, 23 +; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: srl $2, $4, 8 +; MIPS32-NEXT: sll $3, $5, 24 +; MIPS32-NEXT: or $2, $3, $2 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 23 +; MIPS32-NEXT: sll $3, $4, 24 +; MIPS32-NEXT: addu $2, $3, $2 +; MIPS32-NEXT: sltu $3, $2, $3 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addu $3, $1, $3 +; +; MIPS64-LABEL: mul25165824_64: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: dsll $1, $4, 23 +; MIPS64-NEXT: dsll $2, $4, 24 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddu $2, $2, $1 +entry: + %b = mul i64 %a, 25165824 + ret i64 %b +} + +define i32 @mul33554432_32(i32 %a) { +; MIPS32-LABEL: mul33554432_32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sll $1, $4, 12 +; MIPS32-NEXT: addu $1, $1, $4 +; MIPS32-NEXT: sll $2, $4, 15 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 18 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 20 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 22 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 24 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addu $2, $2, $1 +; +; MIPS64-LABEL: mul33554432_32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: sll $1, $4, 0 +; MIPS64-NEXT: sll $2, $1, 12 +; MIPS64-NEXT: addu $2, $2, $1 +; MIPS64-NEXT: sll $3, $1, 15 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 18 +; MIPS64-NEXT: subu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 20 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: sll $3, $1, 22 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: sll $1, $1, 24 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: addu $2, $1, $2 +entry: + %b = mul i32 %a, 22245375 + ret i32 %b +} + +define i64 @mul33554432_64(i64 %a) { +; MIPS32-LABEL: mul33554432_64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: srl $1, $4, 7 +; MIPS32-NEXT: sll $2, $5, 25 +; MIPS32-NEXT: or $3, $2, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sll $2, $4, 25 +; +; MIPS64-LABEL: mul33554432_64: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: dsll $2, $4, 25 +entry: + %b = mul i64 %a, 33554432 + ret i64 %b +} diff --git a/test/CodeGen/Mips/indirect-jump-hazard/calls.ll b/test/CodeGen/Mips/indirect-jump-hazard/calls.ll new file mode 100644 index 000000000000..20e89136d87c --- /dev/null +++ b/test/CodeGen/Mips/indirect-jump-hazard/calls.ll @@ -0,0 +1,188 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=mips-mti-linux-gnu -relocation-model=static \ +; RUN: -mips-tail-calls=1 -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS32R2 +; RUN: llc < %s -mtriple=mips-img-linux-gnu -relocation-model=static \ +; RUN: -mips-tail-calls=1 -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS32R6 +; RUN: llc < %s -mtriple=mips64-mti-linux-gnu -relocation-model=static \ +; RUN: -mips-tail-calls=1 -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS64R2 +; RUN: llc < %s -mtriple=mips64-img-linux-gnu -relocation-model=static \ +; RUN: -mips-tail-calls=1 -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS64R6 + +; RUN: llc < %s -mtriple=mips-mti-linux-gnu -relocation-model=pic \ +; RUN: -mips-tail-calls=1 -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS32R2 +; RUN: llc < %s -mtriple=mips-img-linux-gnu -relocation-model=pic \ +; RUN: -mips-tail-calls=1 -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS32R6 +; RUN: llc < %s -mtriple=mips64-mti-linux-gnu -relocation-model=pic \ +; RUN: -mips-tail-calls=1 -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS64R2 +; RUN: llc < %s -mtriple=mips64-img-linux-gnu -relocation-model=pic \ +; RUN: -mips-tail-calls=1 -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS64R6 + +define void @fooNonTail(void (i32)* nocapture %f1) nounwind { +; MIPS32R2-LABEL: fooNonTail: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: addiu $sp, $sp, -24 +; MIPS32R2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32R2-NEXT: move $1, $4 +; MIPS32R2-NEXT: move $25, $1 +; MIPS32R2-NEXT: jalr.hb $25 +; MIPS32R2-NEXT: addiu $4, $zero, 13 +; MIPS32R2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: addiu $sp, $sp, 24 +; +; MIPS32R6-LABEL: fooNonTail: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: addiu $sp, $sp, -24 +; MIPS32R6-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; MIPS32R6-NEXT: move $1, $4 +; MIPS32R6-NEXT: move $25, $1 +; MIPS32R6-NEXT: jalr.hb $25 +; MIPS32R6-NEXT: addiu $4, $zero, 13 +; MIPS32R6-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: addiu $sp, $sp, 24 +; +; MIPS64R2-LABEL: fooNonTail: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: daddiu $sp, $sp, -16 +; MIPS64R2-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R2-NEXT: move $1, $4 +; MIPS64R2-NEXT: move $25, $1 +; MIPS64R2-NEXT: jalr.hb $25 +; MIPS64R2-NEXT: daddiu $4, $zero, 13 +; MIPS64R2-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R6-LABEL: fooNonTail: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64R6-NEXT: move $1, $4 +; MIPS64R6-NEXT: move $25, $1 +; MIPS64R6-NEXT: jalr.hb $25 +; MIPS64R6-NEXT: daddiu $4, $zero, 13 +; MIPS64R6-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64R6-NEXT: jr $ra +; MIPS64R6-NEXT: daddiu $sp, $sp, 16 +; +; PIC-MIPS32R2-LABEL: fooNonTail: +; PIC-MIPS32R2: # %bb.0: # %entry +; PIC-MIPS32R2-NEXT: addiu $sp, $sp, -24 +; PIC-MIPS32R2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; PIC-MIPS32R2-NEXT: move $1, $4 +; PIC-MIPS32R2-NEXT: move $25, $1 +; PIC-MIPS32R2-NEXT: jalr.hb $25 +; PIC-MIPS32R2-NEXT: addiu $4, $zero, 13 +; PIC-MIPS32R2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; PIC-MIPS32R2-NEXT: jr $ra +; PIC-MIPS32R2-NEXT: addiu $sp, $sp, 24 +; +; PIC-MIPS32R6-LABEL: fooNonTail: +; PIC-MIPS32R6: # %bb.0: # %entry +; PIC-MIPS32R6-NEXT: addiu $sp, $sp, -24 +; PIC-MIPS32R6-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; PIC-MIPS32R6-NEXT: move $1, $4 +; PIC-MIPS32R6-NEXT: move $25, $1 +; PIC-MIPS32R6-NEXT: jalr.hb $25 +; PIC-MIPS32R6-NEXT: addiu $4, $zero, 13 +; PIC-MIPS32R6-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; PIC-MIPS32R6-NEXT: jr $ra +; PIC-MIPS32R6-NEXT: addiu $sp, $sp, 24 +; +; PIC-MIPS64R2-LABEL: fooNonTail: +; PIC-MIPS64R2: # %bb.0: # %entry +; PIC-MIPS64R2-NEXT: daddiu $sp, $sp, -16 +; PIC-MIPS64R2-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; PIC-MIPS64R2-NEXT: move $1, $4 +; PIC-MIPS64R2-NEXT: move $25, $1 +; PIC-MIPS64R2-NEXT: jalr.hb $25 +; PIC-MIPS64R2-NEXT: daddiu $4, $zero, 13 +; PIC-MIPS64R2-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; PIC-MIPS64R2-NEXT: jr $ra +; PIC-MIPS64R2-NEXT: daddiu $sp, $sp, 16 +; +; PIC-MIPS64R6-LABEL: fooNonTail: +; PIC-MIPS64R6: # %bb.0: # %entry +; PIC-MIPS64R6-NEXT: daddiu $sp, $sp, -16 +; PIC-MIPS64R6-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; PIC-MIPS64R6-NEXT: move $1, $4 +; PIC-MIPS64R6-NEXT: move $25, $1 +; PIC-MIPS64R6-NEXT: jalr.hb $25 +; PIC-MIPS64R6-NEXT: daddiu $4, $zero, 13 +; PIC-MIPS64R6-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; PIC-MIPS64R6-NEXT: jr $ra +; PIC-MIPS64R6-NEXT: daddiu $sp, $sp, 16 +entry: + call void %f1(i32 13) nounwind + ret void +} + +define i32 @fooTail(i32 (i32)* nocapture %f1) nounwind { +; MIPS32R2-LABEL: fooTail: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: move $1, $4 +; MIPS32R2-NEXT: move $25, $1 +; MIPS32R2-NEXT: jr.hb $25 +; MIPS32R2-NEXT: addiu $4, $zero, 14 +; +; MIPS32R6-LABEL: fooTail: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: move $1, $4 +; MIPS32R6-NEXT: move $25, $1 +; MIPS32R6-NEXT: jr.hb $25 +; MIPS32R6-NEXT: addiu $4, $zero, 14 +; +; MIPS64R2-LABEL: fooTail: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: move $1, $4 +; MIPS64R2-NEXT: move $25, $1 +; MIPS64R2-NEXT: jr.hb $25 +; MIPS64R2-NEXT: daddiu $4, $zero, 14 +; +; MIPS64R6-LABEL: fooTail: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: move $1, $4 +; MIPS64R6-NEXT: move $25, $1 +; MIPS64R6-NEXT: jr.hb $25 +; MIPS64R6-NEXT: daddiu $4, $zero, 14 +; +; PIC-MIPS32R2-LABEL: fooTail: +; PIC-MIPS32R2: # %bb.0: # %entry +; PIC-MIPS32R2-NEXT: move $1, $4 +; PIC-MIPS32R2-NEXT: move $25, $1 +; PIC-MIPS32R2-NEXT: jr.hb $25 +; PIC-MIPS32R2-NEXT: addiu $4, $zero, 14 +; +; PIC-MIPS32R6-LABEL: fooTail: +; PIC-MIPS32R6: # %bb.0: # %entry +; PIC-MIPS32R6-NEXT: move $1, $4 +; PIC-MIPS32R6-NEXT: move $25, $1 +; PIC-MIPS32R6-NEXT: jr.hb $25 +; PIC-MIPS32R6-NEXT: addiu $4, $zero, 14 +; +; PIC-MIPS64R2-LABEL: fooTail: +; PIC-MIPS64R2: # %bb.0: # %entry +; PIC-MIPS64R2-NEXT: move $1, $4 +; PIC-MIPS64R2-NEXT: move $25, $1 +; PIC-MIPS64R2-NEXT: jr.hb $25 +; PIC-MIPS64R2-NEXT: daddiu $4, $zero, 14 +; +; PIC-MIPS64R6-LABEL: fooTail: +; PIC-MIPS64R6: # %bb.0: # %entry +; PIC-MIPS64R6-NEXT: move $1, $4 +; PIC-MIPS64R6-NEXT: move $25, $1 +; PIC-MIPS64R6-NEXT: jr.hb $25 +; PIC-MIPS64R6-NEXT: daddiu $4, $zero, 14 +entry: + %0 = tail call i32 %f1(i32 14) nounwind + ret i32 %0 +} diff --git a/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-call.mir b/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-call.mir new file mode 100644 index 000000000000..1c11d700b53e --- /dev/null +++ b/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-call.mir @@ -0,0 +1,58 @@ +# RUN: not llc -mtriple=mips-mti-linux-gnu -mcpu=mips32r2 %s \ +# RUN: -start-after=expand-isel-pseudos -stop-after=expand-isel-pseudos \ +# RUN: -verify-machineinstrs -mattr=+use-indirect-jump-hazard -o - 2>&1 \ +# RUN: | FileCheck %s + +# Test that calls are checked when using indirect jumps guards (hazard variant). + +# CHECK: Bad machine code: invalid instruction when using jump guards! +--- | + define i32 @fooTail(i32 (i32)* nocapture %f1) { + entry: + %0 = tail call i32 %f1(i32 14) + ret i32 %0 + } +... +--- +name: fooTail +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32, preferred-register: '' } + - { id: 1, class: gpr32, preferred-register: '' } +liveins: + - { reg: '%a0', virtual-reg: '%0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0.entry: + liveins: %a0 + + %0:gpr32 = COPY %a0 + %1:gpr32 = ADDiu $zero, 14 + %a0 = COPY %1 + TAILCALLREG %0, csr_o32, implicit-def dead %at, implicit %a0 + +... diff --git a/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-tailcall.mir b/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-tailcall.mir new file mode 100644 index 000000000000..00e22b934bbc --- /dev/null +++ b/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-tailcall.mir @@ -0,0 +1,59 @@ +# RUN: not llc -mtriple=mips-mti-linux-gnu -mcpu=mips32r2 %s \ +# RUN: -start-after=expand-isel-pseudos -stop-after=expand-isel-pseudos \ +# RUN: -verify-machineinstrs -mattr=+use-indirect-jump-hazard -o - 2>&1 \ +# RUN: | FileCheck %s + +# That that tail calls are checked when using indirect jump guards (hazard variant). + +# CHECK: Bad machine code: invalid instruction when using jump guards! +--- | + define i32 @fooTail(i32 (i32)* nocapture %f1) { + entry: + %0 = tail call i32 %f1(i32 14) + ret i32 %0 + } + +... +--- +name: fooTail +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32, preferred-register: '' } + - { id: 1, class: gpr32, preferred-register: '' } +liveins: + - { reg: '%a0', virtual-reg: '%0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0.entry: + liveins: %a0 + + %0:gpr32 = COPY %a0 + %1:gpr32 = ADDiu $zero, 14 + %a0 = COPY %1 + TAILCALLREG %0, csr_o32, implicit-def dead %at, implicit %a0 + +... diff --git a/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll b/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll new file mode 100644 index 000000000000..c530dd614ef8 --- /dev/null +++ b/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll @@ -0,0 +1,649 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=mips-mti-linux-gnu -relocation-model=static \ +; RUN: -mips-tail-calls=1 -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS32R2 +; RUN: llc < %s -mtriple=mips-img-linux-gnu -relocation-model=static \ +; RUN: -mips-tail-calls=1 -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS32R6 +; RUN: llc < %s -mtriple=mips64-mti-linux-gnu -relocation-model=static \ +; RUN: -mips-tail-calls=1 -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS64R2 +; RUN: llc < %s -mtriple=mips64-img-linux-gnu -relocation-model=static \ +; RUN: -mips-tail-calls=1 -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=MIPS64R6 + +; RUN: llc < %s -mtriple=mips-mti-linux-gnu -relocation-model=pic \ +; RUN: -mips-tail-calls=1 -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS32R2 +; RUN: llc < %s -mtriple=mips-img-linux-gnu -relocation-model=pic \ +; RUN: -mips-tail-calls=1 -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS32R6 +; RUN: llc < %s -mtriple=mips64-mti-linux-gnu -relocation-model=pic \ +; RUN: -mips-tail-calls=1 -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS64R2 +; RUN: llc < %s -mtriple=mips64-img-linux-gnu -relocation-model=pic \ +; RUN: -mips-tail-calls=1 -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS64R6 + +@.str = private unnamed_addr constant [2 x i8] c"A\00", align 1 +@.str.1 = private unnamed_addr constant [2 x i8] c"B\00", align 1 +@.str.2 = private unnamed_addr constant [2 x i8] c"C\00", align 1 +@.str.3 = private unnamed_addr constant [2 x i8] c"D\00", align 1 +@.str.4 = private unnamed_addr constant [2 x i8] c"E\00", align 1 +@.str.5 = private unnamed_addr constant [2 x i8] c"F\00", align 1 +@.str.6 = private unnamed_addr constant [2 x i8] c"G\00", align 1 +@.str.7 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 + +define i8* @_Z3fooi(i32 signext %Letter) { +; MIPS32R2-LABEL: _Z3fooi: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: addiu $sp, $sp, -16 +; MIPS32R2-NEXT: .cfi_def_cfa_offset 16 +; MIPS32R2-NEXT: sltiu $1, $4, 7 +; MIPS32R2-NEXT: beqz $1, $BB0_3 +; MIPS32R2-NEXT: sw $4, 4($sp) +; MIPS32R2-NEXT: $BB0_1: # %entry +; MIPS32R2-NEXT: sll $1, $4, 2 +; MIPS32R2-NEXT: lui $2, %hi($JTI0_0) +; MIPS32R2-NEXT: addu $1, $1, $2 +; MIPS32R2-NEXT: lw $1, %lo($JTI0_0)($1) +; MIPS32R2-NEXT: jr.hb $1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: $BB0_2: # %sw.bb +; MIPS32R2-NEXT: lui $1, %hi($.str) +; MIPS32R2-NEXT: addiu $1, $1, %lo($.str) +; MIPS32R2-NEXT: j $BB0_10 +; MIPS32R2-NEXT: sw $1, 8($sp) +; MIPS32R2-NEXT: $BB0_3: # %sw.epilog +; MIPS32R2-NEXT: lui $1, %hi($.str.7) +; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.7) +; MIPS32R2-NEXT: j $BB0_10 +; MIPS32R2-NEXT: sw $1, 8($sp) +; MIPS32R2-NEXT: $BB0_4: # %sw.bb1 +; MIPS32R2-NEXT: lui $1, %hi($.str.1) +; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.1) +; MIPS32R2-NEXT: j $BB0_10 +; MIPS32R2-NEXT: sw $1, 8($sp) +; MIPS32R2-NEXT: $BB0_5: # %sw.bb2 +; MIPS32R2-NEXT: lui $1, %hi($.str.2) +; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.2) +; MIPS32R2-NEXT: j $BB0_10 +; MIPS32R2-NEXT: sw $1, 8($sp) +; MIPS32R2-NEXT: $BB0_6: # %sw.bb3 +; MIPS32R2-NEXT: lui $1, %hi($.str.3) +; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.3) +; MIPS32R2-NEXT: j $BB0_10 +; MIPS32R2-NEXT: sw $1, 8($sp) +; MIPS32R2-NEXT: $BB0_7: # %sw.bb4 +; MIPS32R2-NEXT: lui $1, %hi($.str.4) +; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.4) +; MIPS32R2-NEXT: j $BB0_10 +; MIPS32R2-NEXT: sw $1, 8($sp) +; MIPS32R2-NEXT: $BB0_8: # %sw.bb5 +; MIPS32R2-NEXT: lui $1, %hi($.str.5) +; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.5) +; MIPS32R2-NEXT: j $BB0_10 +; MIPS32R2-NEXT: sw $1, 8($sp) +; MIPS32R2-NEXT: $BB0_9: # %sw.bb6 +; MIPS32R2-NEXT: lui $1, %hi($.str.6) +; MIPS32R2-NEXT: addiu $1, $1, %lo($.str.6) +; MIPS32R2-NEXT: sw $1, 8($sp) +; MIPS32R2-NEXT: $BB0_10: # %return +; MIPS32R2-NEXT: lw $2, 8($sp) +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: addiu $sp, $sp, 16 +; +; MIPS32R6-LABEL: _Z3fooi: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: addiu $sp, $sp, -16 +; MIPS32R6-NEXT: .cfi_def_cfa_offset 16 +; MIPS32R6-NEXT: sltiu $1, $4, 7 +; MIPS32R6-NEXT: beqz $1, $BB0_3 +; MIPS32R6-NEXT: sw $4, 4($sp) +; MIPS32R6-NEXT: $BB0_1: # %entry +; MIPS32R6-NEXT: sll $1, $4, 2 +; MIPS32R6-NEXT: lui $2, %hi($JTI0_0) +; MIPS32R6-NEXT: addu $1, $1, $2 +; MIPS32R6-NEXT: lw $1, %lo($JTI0_0)($1) +; MIPS32R6-NEXT: jr.hb $1 +; MIPS32R6-NEXT: nop +; MIPS32R6-NEXT: $BB0_2: # %sw.bb +; MIPS32R6-NEXT: lui $1, %hi($.str) +; MIPS32R6-NEXT: addiu $1, $1, %lo($.str) +; MIPS32R6-NEXT: j $BB0_10 +; MIPS32R6-NEXT: sw $1, 8($sp) +; MIPS32R6-NEXT: $BB0_3: # %sw.epilog +; MIPS32R6-NEXT: lui $1, %hi($.str.7) +; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.7) +; MIPS32R6-NEXT: j $BB0_10 +; MIPS32R6-NEXT: sw $1, 8($sp) +; MIPS32R6-NEXT: $BB0_4: # %sw.bb1 +; MIPS32R6-NEXT: lui $1, %hi($.str.1) +; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.1) +; MIPS32R6-NEXT: j $BB0_10 +; MIPS32R6-NEXT: sw $1, 8($sp) +; MIPS32R6-NEXT: $BB0_5: # %sw.bb2 +; MIPS32R6-NEXT: lui $1, %hi($.str.2) +; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.2) +; MIPS32R6-NEXT: j $BB0_10 +; MIPS32R6-NEXT: sw $1, 8($sp) +; MIPS32R6-NEXT: $BB0_6: # %sw.bb3 +; MIPS32R6-NEXT: lui $1, %hi($.str.3) +; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.3) +; MIPS32R6-NEXT: j $BB0_10 +; MIPS32R6-NEXT: sw $1, 8($sp) +; MIPS32R6-NEXT: $BB0_7: # %sw.bb4 +; MIPS32R6-NEXT: lui $1, %hi($.str.4) +; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.4) +; MIPS32R6-NEXT: j $BB0_10 +; MIPS32R6-NEXT: sw $1, 8($sp) +; MIPS32R6-NEXT: $BB0_8: # %sw.bb5 +; MIPS32R6-NEXT: lui $1, %hi($.str.5) +; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.5) +; MIPS32R6-NEXT: j $BB0_10 +; MIPS32R6-NEXT: sw $1, 8($sp) +; MIPS32R6-NEXT: $BB0_9: # %sw.bb6 +; MIPS32R6-NEXT: lui $1, %hi($.str.6) +; MIPS32R6-NEXT: addiu $1, $1, %lo($.str.6) +; MIPS32R6-NEXT: sw $1, 8($sp) +; MIPS32R6-NEXT: $BB0_10: # %return +; MIPS32R6-NEXT: lw $2, 8($sp) +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: addiu $sp, $sp, 16 +; +; MIPS64R2-LABEL: _Z3fooi: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: daddiu $sp, $sp, -16 +; MIPS64R2-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R2-NEXT: sw $4, 4($sp) +; MIPS64R2-NEXT: lwu $2, 4($sp) +; MIPS64R2-NEXT: sltiu $1, $2, 7 +; MIPS64R2-NEXT: beqz $1, .LBB0_3 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: .LBB0_1: # %entry +; MIPS64R2-NEXT: daddiu $1, $zero, 8 +; MIPS64R2-NEXT: dmult $2, $1 +; MIPS64R2-NEXT: mflo $1 +; MIPS64R2-NEXT: lui $2, %highest(.LJTI0_0) +; MIPS64R2-NEXT: daddiu $2, $2, %higher(.LJTI0_0) +; MIPS64R2-NEXT: dsll $2, $2, 16 +; MIPS64R2-NEXT: daddiu $2, $2, %hi(.LJTI0_0) +; MIPS64R2-NEXT: dsll $2, $2, 16 +; MIPS64R2-NEXT: daddu $1, $1, $2 +; MIPS64R2-NEXT: ld $1, %lo(.LJTI0_0)($1) +; MIPS64R2-NEXT: jr.hb $1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: .LBB0_2: # %sw.bb +; MIPS64R2-NEXT: lui $1, %highest(.L.str) +; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str) +; MIPS64R2-NEXT: j .LBB0_10 +; MIPS64R2-NEXT: sd $1, 8($sp) +; MIPS64R2-NEXT: .LBB0_3: # %sw.epilog +; MIPS64R2-NEXT: lui $1, %highest(.L.str.7) +; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.7) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.7) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.7) +; MIPS64R2-NEXT: j .LBB0_10 +; MIPS64R2-NEXT: sd $1, 8($sp) +; MIPS64R2-NEXT: .LBB0_4: # %sw.bb1 +; MIPS64R2-NEXT: lui $1, %highest(.L.str.1) +; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.1) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.1) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.1) +; MIPS64R2-NEXT: j .LBB0_10 +; MIPS64R2-NEXT: sd $1, 8($sp) +; MIPS64R2-NEXT: .LBB0_5: # %sw.bb2 +; MIPS64R2-NEXT: lui $1, %highest(.L.str.2) +; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.2) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.2) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.2) +; MIPS64R2-NEXT: j .LBB0_10 +; MIPS64R2-NEXT: sd $1, 8($sp) +; MIPS64R2-NEXT: .LBB0_6: # %sw.bb3 +; MIPS64R2-NEXT: lui $1, %highest(.L.str.3) +; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.3) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.3) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.3) +; MIPS64R2-NEXT: j .LBB0_10 +; MIPS64R2-NEXT: sd $1, 8($sp) +; MIPS64R2-NEXT: .LBB0_7: # %sw.bb4 +; MIPS64R2-NEXT: lui $1, %highest(.L.str.4) +; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.4) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.4) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.4) +; MIPS64R2-NEXT: j .LBB0_10 +; MIPS64R2-NEXT: sd $1, 8($sp) +; MIPS64R2-NEXT: .LBB0_8: # %sw.bb5 +; MIPS64R2-NEXT: lui $1, %highest(.L.str.5) +; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.5) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.5) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.5) +; MIPS64R2-NEXT: j .LBB0_10 +; MIPS64R2-NEXT: sd $1, 8($sp) +; MIPS64R2-NEXT: .LBB0_9: # %sw.bb6 +; MIPS64R2-NEXT: lui $1, %highest(.L.str.6) +; MIPS64R2-NEXT: daddiu $1, $1, %higher(.L.str.6) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %hi(.L.str.6) +; MIPS64R2-NEXT: dsll $1, $1, 16 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(.L.str.6) +; MIPS64R2-NEXT: sd $1, 8($sp) +; MIPS64R2-NEXT: .LBB0_10: # %return +; MIPS64R2-NEXT: ld $2, 8($sp) +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R6-LABEL: _Z3fooi: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R6-NEXT: sw $4, 4($sp) +; MIPS64R6-NEXT: lwu $2, 4($sp) +; MIPS64R6-NEXT: sltiu $1, $2, 7 +; MIPS64R6-NEXT: beqzc $1, .LBB0_3 +; MIPS64R6-NEXT: .LBB0_1: # %entry +; MIPS64R6-NEXT: dsll $1, $2, 3 +; MIPS64R6-NEXT: lui $2, %highest(.LJTI0_0) +; MIPS64R6-NEXT: daddiu $2, $2, %higher(.LJTI0_0) +; MIPS64R6-NEXT: dsll $2, $2, 16 +; MIPS64R6-NEXT: daddiu $2, $2, %hi(.LJTI0_0) +; MIPS64R6-NEXT: dsll $2, $2, 16 +; MIPS64R6-NEXT: daddu $1, $1, $2 +; MIPS64R6-NEXT: ld $1, %lo(.LJTI0_0)($1) +; MIPS64R6-NEXT: jr.hb $1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: .LBB0_2: # %sw.bb +; MIPS64R6-NEXT: lui $1, %highest(.L.str) +; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str) +; MIPS64R6-NEXT: j .LBB0_10 +; MIPS64R6-NEXT: sd $1, 8($sp) +; MIPS64R6-NEXT: .LBB0_3: # %sw.epilog +; MIPS64R6-NEXT: lui $1, %highest(.L.str.7) +; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.7) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.7) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.7) +; MIPS64R6-NEXT: j .LBB0_10 +; MIPS64R6-NEXT: sd $1, 8($sp) +; MIPS64R6-NEXT: .LBB0_4: # %sw.bb1 +; MIPS64R6-NEXT: lui $1, %highest(.L.str.1) +; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.1) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.1) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.1) +; MIPS64R6-NEXT: j .LBB0_10 +; MIPS64R6-NEXT: sd $1, 8($sp) +; MIPS64R6-NEXT: .LBB0_5: # %sw.bb2 +; MIPS64R6-NEXT: lui $1, %highest(.L.str.2) +; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.2) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.2) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.2) +; MIPS64R6-NEXT: j .LBB0_10 +; MIPS64R6-NEXT: sd $1, 8($sp) +; MIPS64R6-NEXT: .LBB0_6: # %sw.bb3 +; MIPS64R6-NEXT: lui $1, %highest(.L.str.3) +; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.3) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.3) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.3) +; MIPS64R6-NEXT: j .LBB0_10 +; MIPS64R6-NEXT: sd $1, 8($sp) +; MIPS64R6-NEXT: .LBB0_7: # %sw.bb4 +; MIPS64R6-NEXT: lui $1, %highest(.L.str.4) +; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.4) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.4) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.4) +; MIPS64R6-NEXT: j .LBB0_10 +; MIPS64R6-NEXT: sd $1, 8($sp) +; MIPS64R6-NEXT: .LBB0_8: # %sw.bb5 +; MIPS64R6-NEXT: lui $1, %highest(.L.str.5) +; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.5) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.5) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.5) +; MIPS64R6-NEXT: j .LBB0_10 +; MIPS64R6-NEXT: sd $1, 8($sp) +; MIPS64R6-NEXT: .LBB0_9: # %sw.bb6 +; MIPS64R6-NEXT: lui $1, %highest(.L.str.6) +; MIPS64R6-NEXT: daddiu $1, $1, %higher(.L.str.6) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %hi(.L.str.6) +; MIPS64R6-NEXT: dsll $1, $1, 16 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(.L.str.6) +; MIPS64R6-NEXT: sd $1, 8($sp) +; MIPS64R6-NEXT: .LBB0_10: # %return +; MIPS64R6-NEXT: ld $2, 8($sp) +; MIPS64R6-NEXT: jr $ra +; MIPS64R6-NEXT: daddiu $sp, $sp, 16 +; +; PIC-MIPS32R2-LABEL: _Z3fooi: +; PIC-MIPS32R2: # %bb.0: # %entry +; PIC-MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; PIC-MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; PIC-MIPS32R2-NEXT: addiu $sp, $sp, -16 +; PIC-MIPS32R2-NEXT: .cfi_def_cfa_offset 16 +; PIC-MIPS32R2-NEXT: addu $2, $2, $25 +; PIC-MIPS32R2-NEXT: sltiu $1, $4, 7 +; PIC-MIPS32R2-NEXT: beqz $1, $BB0_3 +; PIC-MIPS32R2-NEXT: sw $4, 4($sp) +; PIC-MIPS32R2-NEXT: $BB0_1: # %entry +; PIC-MIPS32R2-NEXT: sll $1, $4, 2 +; PIC-MIPS32R2-NEXT: lw $3, %got($JTI0_0)($2) +; PIC-MIPS32R2-NEXT: addu $1, $1, $3 +; PIC-MIPS32R2-NEXT: lw $1, %lo($JTI0_0)($1) +; PIC-MIPS32R2-NEXT: addu $1, $1, $2 +; PIC-MIPS32R2-NEXT: jr.hb $1 +; PIC-MIPS32R2-NEXT: nop +; PIC-MIPS32R2-NEXT: $BB0_2: # %sw.bb +; PIC-MIPS32R2-NEXT: lw $1, %got($.str)($2) +; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str) +; PIC-MIPS32R2-NEXT: b $BB0_10 +; PIC-MIPS32R2-NEXT: sw $1, 8($sp) +; PIC-MIPS32R2-NEXT: $BB0_3: # %sw.epilog +; PIC-MIPS32R2-NEXT: lw $1, %got($.str.7)($2) +; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.7) +; PIC-MIPS32R2-NEXT: b $BB0_10 +; PIC-MIPS32R2-NEXT: sw $1, 8($sp) +; PIC-MIPS32R2-NEXT: $BB0_4: # %sw.bb1 +; PIC-MIPS32R2-NEXT: lw $1, %got($.str.1)($2) +; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.1) +; PIC-MIPS32R2-NEXT: b $BB0_10 +; PIC-MIPS32R2-NEXT: sw $1, 8($sp) +; PIC-MIPS32R2-NEXT: $BB0_5: # %sw.bb2 +; PIC-MIPS32R2-NEXT: lw $1, %got($.str.2)($2) +; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.2) +; PIC-MIPS32R2-NEXT: b $BB0_10 +; PIC-MIPS32R2-NEXT: sw $1, 8($sp) +; PIC-MIPS32R2-NEXT: $BB0_6: # %sw.bb3 +; PIC-MIPS32R2-NEXT: lw $1, %got($.str.3)($2) +; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.3) +; PIC-MIPS32R2-NEXT: b $BB0_10 +; PIC-MIPS32R2-NEXT: sw $1, 8($sp) +; PIC-MIPS32R2-NEXT: $BB0_7: # %sw.bb4 +; PIC-MIPS32R2-NEXT: lw $1, %got($.str.4)($2) +; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.4) +; PIC-MIPS32R2-NEXT: b $BB0_10 +; PIC-MIPS32R2-NEXT: sw $1, 8($sp) +; PIC-MIPS32R2-NEXT: $BB0_8: # %sw.bb5 +; PIC-MIPS32R2-NEXT: lw $1, %got($.str.5)($2) +; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.5) +; PIC-MIPS32R2-NEXT: b $BB0_10 +; PIC-MIPS32R2-NEXT: sw $1, 8($sp) +; PIC-MIPS32R2-NEXT: $BB0_9: # %sw.bb6 +; PIC-MIPS32R2-NEXT: lw $1, %got($.str.6)($2) +; PIC-MIPS32R2-NEXT: addiu $1, $1, %lo($.str.6) +; PIC-MIPS32R2-NEXT: sw $1, 8($sp) +; PIC-MIPS32R2-NEXT: $BB0_10: # %return +; PIC-MIPS32R2-NEXT: lw $2, 8($sp) +; PIC-MIPS32R2-NEXT: jr $ra +; PIC-MIPS32R2-NEXT: addiu $sp, $sp, 16 +; +; PIC-MIPS32R6-LABEL: _Z3fooi: +; PIC-MIPS32R6: # %bb.0: # %entry +; PIC-MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; PIC-MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; PIC-MIPS32R6-NEXT: addiu $sp, $sp, -16 +; PIC-MIPS32R6-NEXT: .cfi_def_cfa_offset 16 +; PIC-MIPS32R6-NEXT: addu $2, $2, $25 +; PIC-MIPS32R6-NEXT: sltiu $1, $4, 7 +; PIC-MIPS32R6-NEXT: beqz $1, $BB0_3 +; PIC-MIPS32R6-NEXT: sw $4, 4($sp) +; PIC-MIPS32R6-NEXT: $BB0_1: # %entry +; PIC-MIPS32R6-NEXT: sll $1, $4, 2 +; PIC-MIPS32R6-NEXT: lw $3, %got($JTI0_0)($2) +; PIC-MIPS32R6-NEXT: addu $1, $1, $3 +; PIC-MIPS32R6-NEXT: lw $1, %lo($JTI0_0)($1) +; PIC-MIPS32R6-NEXT: addu $1, $1, $2 +; PIC-MIPS32R6-NEXT: jr.hb $1 +; PIC-MIPS32R6-NEXT: nop +; PIC-MIPS32R6-NEXT: $BB0_2: # %sw.bb +; PIC-MIPS32R6-NEXT: lw $1, %got($.str)($2) +; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str) +; PIC-MIPS32R6-NEXT: b $BB0_10 +; PIC-MIPS32R6-NEXT: sw $1, 8($sp) +; PIC-MIPS32R6-NEXT: $BB0_3: # %sw.epilog +; PIC-MIPS32R6-NEXT: lw $1, %got($.str.7)($2) +; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.7) +; PIC-MIPS32R6-NEXT: b $BB0_10 +; PIC-MIPS32R6-NEXT: sw $1, 8($sp) +; PIC-MIPS32R6-NEXT: $BB0_4: # %sw.bb1 +; PIC-MIPS32R6-NEXT: lw $1, %got($.str.1)($2) +; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.1) +; PIC-MIPS32R6-NEXT: b $BB0_10 +; PIC-MIPS32R6-NEXT: sw $1, 8($sp) +; PIC-MIPS32R6-NEXT: $BB0_5: # %sw.bb2 +; PIC-MIPS32R6-NEXT: lw $1, %got($.str.2)($2) +; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.2) +; PIC-MIPS32R6-NEXT: b $BB0_10 +; PIC-MIPS32R6-NEXT: sw $1, 8($sp) +; PIC-MIPS32R6-NEXT: $BB0_6: # %sw.bb3 +; PIC-MIPS32R6-NEXT: lw $1, %got($.str.3)($2) +; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.3) +; PIC-MIPS32R6-NEXT: b $BB0_10 +; PIC-MIPS32R6-NEXT: sw $1, 8($sp) +; PIC-MIPS32R6-NEXT: $BB0_7: # %sw.bb4 +; PIC-MIPS32R6-NEXT: lw $1, %got($.str.4)($2) +; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.4) +; PIC-MIPS32R6-NEXT: b $BB0_10 +; PIC-MIPS32R6-NEXT: sw $1, 8($sp) +; PIC-MIPS32R6-NEXT: $BB0_8: # %sw.bb5 +; PIC-MIPS32R6-NEXT: lw $1, %got($.str.5)($2) +; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.5) +; PIC-MIPS32R6-NEXT: b $BB0_10 +; PIC-MIPS32R6-NEXT: sw $1, 8($sp) +; PIC-MIPS32R6-NEXT: $BB0_9: # %sw.bb6 +; PIC-MIPS32R6-NEXT: lw $1, %got($.str.6)($2) +; PIC-MIPS32R6-NEXT: addiu $1, $1, %lo($.str.6) +; PIC-MIPS32R6-NEXT: sw $1, 8($sp) +; PIC-MIPS32R6-NEXT: $BB0_10: # %return +; PIC-MIPS32R6-NEXT: lw $2, 8($sp) +; PIC-MIPS32R6-NEXT: jr $ra +; PIC-MIPS32R6-NEXT: addiu $sp, $sp, 16 +; +; PIC-MIPS64R2-LABEL: _Z3fooi: +; PIC-MIPS64R2: # %bb.0: # %entry +; PIC-MIPS64R2-NEXT: daddiu $sp, $sp, -16 +; PIC-MIPS64R2-NEXT: .cfi_def_cfa_offset 16 +; PIC-MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(_Z3fooi))) +; PIC-MIPS64R2-NEXT: daddu $1, $1, $25 +; PIC-MIPS64R2-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(_Z3fooi))) +; PIC-MIPS64R2-NEXT: sw $4, 4($sp) +; PIC-MIPS64R2-NEXT: lwu $3, 4($sp) +; PIC-MIPS64R2-NEXT: sltiu $1, $3, 7 +; PIC-MIPS64R2-NEXT: beqz $1, .LBB0_3 +; PIC-MIPS64R2-NEXT: nop +; PIC-MIPS64R2-NEXT: .LBB0_1: # %entry +; PIC-MIPS64R2-NEXT: daddiu $1, $zero, 8 +; PIC-MIPS64R2-NEXT: dmult $3, $1 +; PIC-MIPS64R2-NEXT: mflo $1 +; PIC-MIPS64R2-NEXT: ld $3, %got_page(.LJTI0_0)($2) +; PIC-MIPS64R2-NEXT: daddu $1, $1, $3 +; PIC-MIPS64R2-NEXT: ld $1, %got_ofst(.LJTI0_0)($1) +; PIC-MIPS64R2-NEXT: daddu $1, $1, $2 +; PIC-MIPS64R2-NEXT: jr.hb $1 +; PIC-MIPS64R2-NEXT: nop +; PIC-MIPS64R2-NEXT: .LBB0_2: # %sw.bb +; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str)($2) +; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str) +; PIC-MIPS64R2-NEXT: b .LBB0_10 +; PIC-MIPS64R2-NEXT: sd $1, 8($sp) +; PIC-MIPS64R2-NEXT: .LBB0_3: # %sw.epilog +; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.7)($2) +; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.7) +; PIC-MIPS64R2-NEXT: b .LBB0_10 +; PIC-MIPS64R2-NEXT: sd $1, 8($sp) +; PIC-MIPS64R2-NEXT: .LBB0_4: # %sw.bb1 +; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.1)($2) +; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.1) +; PIC-MIPS64R2-NEXT: b .LBB0_10 +; PIC-MIPS64R2-NEXT: sd $1, 8($sp) +; PIC-MIPS64R2-NEXT: .LBB0_5: # %sw.bb2 +; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.2)($2) +; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.2) +; PIC-MIPS64R2-NEXT: b .LBB0_10 +; PIC-MIPS64R2-NEXT: sd $1, 8($sp) +; PIC-MIPS64R2-NEXT: .LBB0_6: # %sw.bb3 +; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.3)($2) +; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.3) +; PIC-MIPS64R2-NEXT: b .LBB0_10 +; PIC-MIPS64R2-NEXT: sd $1, 8($sp) +; PIC-MIPS64R2-NEXT: .LBB0_7: # %sw.bb4 +; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.4)($2) +; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.4) +; PIC-MIPS64R2-NEXT: b .LBB0_10 +; PIC-MIPS64R2-NEXT: sd $1, 8($sp) +; PIC-MIPS64R2-NEXT: .LBB0_8: # %sw.bb5 +; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.5)($2) +; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.5) +; PIC-MIPS64R2-NEXT: b .LBB0_10 +; PIC-MIPS64R2-NEXT: sd $1, 8($sp) +; PIC-MIPS64R2-NEXT: .LBB0_9: # %sw.bb6 +; PIC-MIPS64R2-NEXT: ld $1, %got_page(.L.str.6)($2) +; PIC-MIPS64R2-NEXT: daddiu $1, $1, %got_ofst(.L.str.6) +; PIC-MIPS64R2-NEXT: sd $1, 8($sp) +; PIC-MIPS64R2-NEXT: .LBB0_10: # %return +; PIC-MIPS64R2-NEXT: ld $2, 8($sp) +; PIC-MIPS64R2-NEXT: jr $ra +; PIC-MIPS64R2-NEXT: daddiu $sp, $sp, 16 +; +; PIC-MIPS64R6-LABEL: _Z3fooi: +; PIC-MIPS64R6: # %bb.0: # %entry +; PIC-MIPS64R6-NEXT: daddiu $sp, $sp, -16 +; PIC-MIPS64R6-NEXT: .cfi_def_cfa_offset 16 +; PIC-MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(_Z3fooi))) +; PIC-MIPS64R6-NEXT: daddu $1, $1, $25 +; PIC-MIPS64R6-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(_Z3fooi))) +; PIC-MIPS64R6-NEXT: sw $4, 4($sp) +; PIC-MIPS64R6-NEXT: lwu $3, 4($sp) +; PIC-MIPS64R6-NEXT: sltiu $1, $3, 7 +; PIC-MIPS64R6-NEXT: beqzc $1, .LBB0_3 +; PIC-MIPS64R6-NEXT: .LBB0_1: # %entry +; PIC-MIPS64R6-NEXT: dsll $1, $3, 3 +; PIC-MIPS64R6-NEXT: ld $3, %got_page(.LJTI0_0)($2) +; PIC-MIPS64R6-NEXT: daddu $1, $1, $3 +; PIC-MIPS64R6-NEXT: ld $1, %got_ofst(.LJTI0_0)($1) +; PIC-MIPS64R6-NEXT: daddu $1, $1, $2 +; PIC-MIPS64R6-NEXT: jr.hb $1 +; PIC-MIPS64R6-NEXT: nop +; PIC-MIPS64R6-NEXT: .LBB0_2: # %sw.bb +; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str)($2) +; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str) +; PIC-MIPS64R6-NEXT: b .LBB0_10 +; PIC-MIPS64R6-NEXT: sd $1, 8($sp) +; PIC-MIPS64R6-NEXT: .LBB0_3: # %sw.epilog +; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.7)($2) +; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.7) +; PIC-MIPS64R6-NEXT: b .LBB0_10 +; PIC-MIPS64R6-NEXT: sd $1, 8($sp) +; PIC-MIPS64R6-NEXT: .LBB0_4: # %sw.bb1 +; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.1)($2) +; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.1) +; PIC-MIPS64R6-NEXT: b .LBB0_10 +; PIC-MIPS64R6-NEXT: sd $1, 8($sp) +; PIC-MIPS64R6-NEXT: .LBB0_5: # %sw.bb2 +; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.2)($2) +; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.2) +; PIC-MIPS64R6-NEXT: b .LBB0_10 +; PIC-MIPS64R6-NEXT: sd $1, 8($sp) +; PIC-MIPS64R6-NEXT: .LBB0_6: # %sw.bb3 +; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.3)($2) +; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.3) +; PIC-MIPS64R6-NEXT: b .LBB0_10 +; PIC-MIPS64R6-NEXT: sd $1, 8($sp) +; PIC-MIPS64R6-NEXT: .LBB0_7: # %sw.bb4 +; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.4)($2) +; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.4) +; PIC-MIPS64R6-NEXT: b .LBB0_10 +; PIC-MIPS64R6-NEXT: sd $1, 8($sp) +; PIC-MIPS64R6-NEXT: .LBB0_8: # %sw.bb5 +; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.5)($2) +; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.5) +; PIC-MIPS64R6-NEXT: b .LBB0_10 +; PIC-MIPS64R6-NEXT: sd $1, 8($sp) +; PIC-MIPS64R6-NEXT: .LBB0_9: # %sw.bb6 +; PIC-MIPS64R6-NEXT: ld $1, %got_page(.L.str.6)($2) +; PIC-MIPS64R6-NEXT: daddiu $1, $1, %got_ofst(.L.str.6) +; PIC-MIPS64R6-NEXT: sd $1, 8($sp) +; PIC-MIPS64R6-NEXT: .LBB0_10: # %return +; PIC-MIPS64R6-NEXT: ld $2, 8($sp) +; PIC-MIPS64R6-NEXT: jr $ra +; PIC-MIPS64R6-NEXT: daddiu $sp, $sp, 16 +entry: + %retval = alloca i8*, align 8 + %Letter.addr = alloca i32, align 4 + store i32 %Letter, i32* %Letter.addr, align 4 + %0 = load i32, i32* %Letter.addr, align 4 + switch i32 %0, label %sw.epilog [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + i32 4, label %sw.bb4 + i32 5, label %sw.bb5 + i32 6, label %sw.bb6 + ] + +sw.bb: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.bb1: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.bb2: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.bb3: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.bb4: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.bb5: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.5, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.bb6: + store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.6, i32 0, i32 0), i8** %retval, align 8 + br label %return + +sw.epilog: + store i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.7, i32 0, i32 0), i8** %retval, align 8 + br label %return + +return: + %1 = load i8*, i8** %retval, align 8 + ret i8* %1 +} diff --git a/test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll b/test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll new file mode 100644 index 000000000000..fffda991ae4b --- /dev/null +++ b/test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Except for the NACL version which isn't parsed by update_llc_test_checks.py + +; RUN: llc -mtriple=mipsel-unknown-linux-gnu -force-mips-long-branch -O3 \ +; RUN: -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard -relocation-model=pic \ +; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=O32-PIC + +; RUN: llc -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r6 \ +; RUN: -force-mips-long-branch -O3 -mattr=+use-indirect-jump-hazard \ +; RUN: -relocation-model=pic -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=O32-R6-PIC + +; RUN: llc -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r2 -target-abi=n64 \ +; RUN: -force-mips-long-branch -O3 -relocation-model=pic \ +; RUN: -mattr=+use-indirect-jump-hazard -verify-machineinstrs \ +; RUN: < %s | FileCheck %s -check-prefix=MIPS64 + +; RUN: llc -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r6 -target-abi=n64 \ +; RUN: -force-mips-long-branch -O3 -mattr=+use-indirect-jump-hazard \ +; RUN: -relocation-model=pic -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=N64-R6 + +; Test that the long branches also get changed to their hazard variants. + +@x = external global i32 + +define void @test1(i32 signext %s) { +; O32-PIC-LABEL: test1: +; O32-PIC: # %bb.0: # %entry +; O32-PIC-NEXT: lui $2, %hi(_gp_disp) +; O32-PIC-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-PIC-NEXT: bnez $4, $BB0_3 +; O32-PIC-NEXT: addu $2, $2, $25 +; O32-PIC-NEXT: # %bb.1: # %entry +; O32-PIC-NEXT: addiu $sp, $sp, -8 +; O32-PIC-NEXT: sw $ra, 0($sp) +; O32-PIC-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) +; O32-PIC-NEXT: bal $BB0_2 +; O32-PIC-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) +; O32-PIC-NEXT: $BB0_2: # %entry +; O32-PIC-NEXT: addu $1, $ra, $1 +; O32-PIC-NEXT: lw $ra, 0($sp) +; O32-PIC-NEXT: jr.hb $1 +; O32-PIC-NEXT: addiu $sp, $sp, 8 +; O32-PIC-NEXT: $BB0_3: # %then +; O32-PIC-NEXT: lw $1, %got(x)($2) +; O32-PIC-NEXT: addiu $2, $zero, 1 +; O32-PIC-NEXT: sw $2, 0($1) +; O32-PIC-NEXT: $BB0_4: # %end +; O32-PIC-NEXT: jr $ra +; O32-PIC-NEXT: nop +; +; O32-R6-PIC-LABEL: test1: +; O32-R6-PIC: # %bb.0: # %entry +; O32-R6-PIC-NEXT: lui $2, %hi(_gp_disp) +; O32-R6-PIC-NEXT: addiu $2, $2, %lo(_gp_disp) +; O32-R6-PIC-NEXT: bnez $4, $BB0_3 +; O32-R6-PIC-NEXT: addu $2, $2, $25 +; O32-R6-PIC-NEXT: # %bb.1: # %entry +; O32-R6-PIC-NEXT: addiu $sp, $sp, -8 +; O32-R6-PIC-NEXT: sw $ra, 0($sp) +; O32-R6-PIC-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) +; O32-R6-PIC-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) +; O32-R6-PIC-NEXT: balc $BB0_2 +; O32-R6-PIC-NEXT: $BB0_2: # %entry +; O32-R6-PIC-NEXT: addu $1, $ra, $1 +; O32-R6-PIC-NEXT: lw $ra, 0($sp) +; O32-R6-PIC-NEXT: jr.hb $1 +; O32-R6-PIC-NEXT: addiu $sp, $sp, 8 +; O32-R6-PIC-NEXT: $BB0_3: # %then +; O32-R6-PIC-NEXT: lw $1, %got(x)($2) +; O32-R6-PIC-NEXT: addiu $2, $zero, 1 +; O32-R6-PIC-NEXT: sw $2, 0($1) +; O32-R6-PIC-NEXT: $BB0_4: # %end +; O32-R6-PIC-NEXT: jrc $ra +; +; MIPS64-LABEL: test1: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(test1))) +; MIPS64-NEXT: bnez $4, .LBB0_3 +; MIPS64-NEXT: daddu $2, $1, $25 +; MIPS64-NEXT: # %bb.1: # %entry +; MIPS64-NEXT: daddiu $sp, $sp, -16 +; MIPS64-NEXT: sd $ra, 0($sp) +; MIPS64-NEXT: daddiu $1, $zero, %hi(.LBB0_4-.LBB0_2) +; MIPS64-NEXT: dsll $1, $1, 16 +; MIPS64-NEXT: bal .LBB0_2 +; MIPS64-NEXT: daddiu $1, $1, %lo(.LBB0_4-.LBB0_2) +; MIPS64-NEXT: .LBB0_2: # %entry +; MIPS64-NEXT: daddu $1, $ra, $1 +; MIPS64-NEXT: ld $ra, 0($sp) +; MIPS64-NEXT: jr.hb $1 +; MIPS64-NEXT: daddiu $sp, $sp, 16 +; MIPS64-NEXT: .LBB0_3: # %then +; MIPS64-NEXT: daddiu $1, $2, %lo(%neg(%gp_rel(test1))) +; MIPS64-NEXT: addiu $2, $zero, 1 +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: sw $2, 0($1) +; MIPS64-NEXT: .LBB0_4: # %end +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; N64-R6-LABEL: test1: +; N64-R6: # %bb.0: # %entry +; N64-R6-NEXT: lui $1, %hi(%neg(%gp_rel(test1))) +; N64-R6-NEXT: bnez $4, .LBB0_3 +; N64-R6-NEXT: daddu $2, $1, $25 +; N64-R6-NEXT: # %bb.1: # %entry +; N64-R6-NEXT: daddiu $sp, $sp, -16 +; N64-R6-NEXT: sd $ra, 0($sp) +; N64-R6-NEXT: daddiu $1, $zero, %hi(.LBB0_4-.LBB0_2) +; N64-R6-NEXT: dsll $1, $1, 16 +; N64-R6-NEXT: daddiu $1, $1, %lo(.LBB0_4-.LBB0_2) +; N64-R6-NEXT: balc .LBB0_2 +; N64-R6-NEXT: .LBB0_2: # %entry +; N64-R6-NEXT: daddu $1, $ra, $1 +; N64-R6-NEXT: ld $ra, 0($sp) +; N64-R6-NEXT: jr.hb $1 +; N64-R6-NEXT: daddiu $sp, $sp, 16 +; N64-R6-NEXT: .LBB0_3: # %then +; N64-R6-NEXT: daddiu $1, $2, %lo(%neg(%gp_rel(test1))) +; N64-R6-NEXT: addiu $2, $zero, 1 +; N64-R6-NEXT: ld $1, %got_disp(x)($1) +; N64-R6-NEXT: sw $2, 0($1) +; N64-R6-NEXT: .LBB0_4: # %end +; N64-R6-NEXT: jrc $ra +entry: + %cmp = icmp eq i32 %s, 0 + br i1 %cmp, label %end, label %then + +then: + store i32 1, i32* @x, align 4 + br label %end + +end: + ret void + +} diff --git a/test/CodeGen/Mips/indirect-jump-hazard/long-calls.ll b/test/CodeGen/Mips/indirect-jump-hazard/long-calls.ll new file mode 100644 index 000000000000..88886e13f326 --- /dev/null +++ b/test/CodeGen/Mips/indirect-jump-hazard/long-calls.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mips-unknwon-linux-gnu -mcpu=mips32r2 \ +; RUN: -mattr=+use-indirect-jump-hazard,+long-calls,+noabicalls %s -o - \ +; RUN: -verify-machineinstrs | FileCheck -check-prefix=O32 %s + +; RUN: llc -mtriple=mips64-unknown-linux-gnu -mcpu=mips64r2 -target-abi n32 \ +; RUN: -mattr=+use-indirect-jump-hazard,+long-calls,+noabicalls %s -o - \ +; RUN: -verify-machineinstrs | FileCheck -check-prefix=N32 %s + +; RUN: llc -mtriple=mips64-unknown-linux-gnu -mcpu=mips64r2 -target-abi n64 \ +; RUN: -mattr=+use-indirect-jump-hazard,+long-calls,+noabicalls %s -o - \ +; RUN: -verify-machineinstrs | FileCheck -check-prefix=N64 %s + +declare void @callee() +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) + +@val = internal unnamed_addr global [20 x i32] zeroinitializer, align 4 + +; Test that the long call sequence uses the hazard barrier instruction variant. +define void @caller() { +; O32-LABEL: caller: +; O32: # %bb.0: +; O32-NEXT: addiu $sp, $sp, -24 +; O32-NEXT: .cfi_def_cfa_offset 24 +; O32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; O32-NEXT: .cfi_offset 31, -4 +; O32-NEXT: lui $1, %hi(callee) +; O32-NEXT: addiu $25, $1, %lo(callee) +; O32-NEXT: jalr.hb $25 +; O32-NEXT: nop +; O32-NEXT: lui $1, %hi(val) +; O32-NEXT: addiu $1, $1, %lo(val) +; O32-NEXT: lui $2, 20560 +; O32-NEXT: ori $2, $2, 20560 +; O32-NEXT: sw $2, 96($1) +; O32-NEXT: sw $2, 92($1) +; O32-NEXT: sw $2, 88($1) +; O32-NEXT: sw $2, 84($1) +; O32-NEXT: sw $2, 80($1) +; O32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; O32-NEXT: jr $ra +; O32-NEXT: addiu $sp, $sp, 24 +; +; N32-LABEL: caller: +; N32: # %bb.0: +; N32-NEXT: addiu $sp, $sp, -16 +; N32-NEXT: .cfi_def_cfa_offset 16 +; N32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; N32-NEXT: .cfi_offset 31, -8 +; N32-NEXT: lui $1, %hi(callee) +; N32-NEXT: addiu $25, $1, %lo(callee) +; N32-NEXT: jalr.hb $25 +; N32-NEXT: nop +; N32-NEXT: lui $1, %hi(val) +; N32-NEXT: addiu $1, $1, %lo(val) +; N32-NEXT: lui $2, 1285 +; N32-NEXT: daddiu $2, $2, 1285 +; N32-NEXT: dsll $2, $2, 16 +; N32-NEXT: daddiu $2, $2, 1285 +; N32-NEXT: dsll $2, $2, 20 +; N32-NEXT: daddiu $2, $2, 20560 +; N32-NEXT: sdl $2, 88($1) +; N32-NEXT: sdl $2, 80($1) +; N32-NEXT: lui $3, 20560 +; N32-NEXT: ori $3, $3, 20560 +; N32-NEXT: sw $3, 96($1) +; N32-NEXT: sdr $2, 95($1) +; N32-NEXT: sdr $2, 87($1) +; N32-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; N32-NEXT: jr $ra +; N32-NEXT: addiu $sp, $sp, 16 +; +; N64-LABEL: caller: +; N64: # %bb.0: +; N64-NEXT: daddiu $sp, $sp, -16 +; N64-NEXT: .cfi_def_cfa_offset 16 +; N64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; N64-NEXT: .cfi_offset 31, -8 +; N64-NEXT: lui $1, %highest(callee) +; N64-NEXT: daddiu $1, $1, %higher(callee) +; N64-NEXT: dsll $1, $1, 16 +; N64-NEXT: daddiu $1, $1, %hi(callee) +; N64-NEXT: dsll $1, $1, 16 +; N64-NEXT: daddiu $25, $1, %lo(callee) +; N64-NEXT: jalr.hb $25 +; N64-NEXT: nop +; N64-NEXT: lui $1, %highest(val) +; N64-NEXT: daddiu $1, $1, %higher(val) +; N64-NEXT: dsll $1, $1, 16 +; N64-NEXT: daddiu $1, $1, %hi(val) +; N64-NEXT: dsll $1, $1, 16 +; N64-NEXT: daddiu $1, $1, %lo(val) +; N64-NEXT: lui $2, 1285 +; N64-NEXT: daddiu $2, $2, 1285 +; N64-NEXT: dsll $2, $2, 16 +; N64-NEXT: daddiu $2, $2, 1285 +; N64-NEXT: dsll $2, $2, 20 +; N64-NEXT: daddiu $2, $2, 20560 +; N64-NEXT: lui $3, 20560 +; N64-NEXT: sdl $2, 88($1) +; N64-NEXT: sdl $2, 80($1) +; N64-NEXT: ori $3, $3, 20560 +; N64-NEXT: sw $3, 96($1) +; N64-NEXT: sdr $2, 95($1) +; N64-NEXT: sdr $2, 87($1) +; N64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; N64-NEXT: jr $ra +; N64-NEXT: daddiu $sp, $sp, 16 + call void @callee() + call void @llvm.memset.p0i8.i32(i8* bitcast (i32* getelementptr inbounds ([20 x i32], [20 x i32]* @val, i64 1, i32 0) to i8*), i8 80, i32 20, i32 4, i1 false) + ret void +} + diff --git a/test/CodeGen/Mips/indirect-jump-hazard/unsupported-micromips.ll b/test/CodeGen/Mips/indirect-jump-hazard/unsupported-micromips.ll new file mode 100644 index 000000000000..99612525ae3c --- /dev/null +++ b/test/CodeGen/Mips/indirect-jump-hazard/unsupported-micromips.ll @@ -0,0 +1,5 @@ +; RUN: not llc -mtriple=mips-unknown-linux -mcpu=mips32r2 -mattr=+micromips,+use-indirect-jump-hazard %s 2>&1 | FileCheck %s + +; Test that microMIPS and indirect jump with hazard barriers is not supported. + +; CHECK: LLVM ERROR: cannot combine indirect jumps with hazard barriers and microMIPS diff --git a/test/CodeGen/Mips/indirect-jump-hazard/unsupported-mips32.ll b/test/CodeGen/Mips/indirect-jump-hazard/unsupported-mips32.ll new file mode 100644 index 000000000000..48baedf53eaa --- /dev/null +++ b/test/CodeGen/Mips/indirect-jump-hazard/unsupported-mips32.ll @@ -0,0 +1,5 @@ +; RUN: not llc -mtriple=mips-unknown-linux -mcpu=mips32 -mattr=+use-indirect-jump-hazard %s 2>&1 | FileCheck %s + +; Test that mips32 and indirect jump with hazard barriers is not supported. + +; CHECK: LLVM ERROR: indirect jumps with hazard barriers requires MIPS32R2 or later diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-l1.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-l1.ll new file mode 100644 index 000000000000..1cd86d617a24 --- /dev/null +++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-l1.ll @@ -0,0 +1,13 @@ +; Negative test. The constraint 'l' represents the register 'lo'. +; Check error message in case of invalid usage. +; +; RUN: not llc -march=mips -filetype=obj < %s 2>&1 | FileCheck %s + +define void @constraint_l() nounwind { +entry: + +; CHECK: error: invalid operand for instruction + + tail call i16 asm sideeffect "addiu $0,$1,$2", "=l,r,r,~{$1}"(i16 0, i16 0) + ret void +} diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll index 63ee42c0c7cd..b4c1587a8fbf 100644 --- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll +++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll @@ -41,5 +41,15 @@ entry: call i32 asm sideeffect "\09mtlo $3 \0A\09\09madd $1, $2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind store volatile i32 %4, i32* %bosco, align 4 +; Check the 'l' constraint for 16-bit type. +; CHECK: #APP +; CHECK: mtlo ${{[0-9]+}} +; CHECK-NEXT: madd ${{[0-9]+}}, ${{[0-9]+}} +; CHECK: #NO_APP +; CHECK-NEXT: mflo ${{[0-9]+}} + %bosco16 = alloca i16, align 4 + call i16 asm sideeffect "\09mtlo $3 \0A\09\09madd $1, $2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind + store volatile i16 %5, i16* %bosco16, align 4 + ret i32 0 } diff --git a/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir index 67733795ed5d..f2ca07367b99 100644 --- a/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir +++ b/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir @@ -561,6 +561,25 @@ } ; Function Attrs: norecurse nounwind readnone + define i64 @testRLDICLo2(i64 %a, i64 %b) local_unnamed_addr #0 { + entry: + %shr = lshr i64 %a, 11 + %and = and i64 %shr, 16777215 + %tobool = icmp eq i64 %and, 0 + %cond = select i1 %tobool, i64 %b, i64 %and + ret i64 %cond + } + + define i64 @testRLDICLo3(i64 %a, i64 %b) local_unnamed_addr #0 { + entry: + %shr = lshr i64 %a, 11 + %and = and i64 %shr, 16777215 + %tobool = icmp eq i64 %and, 0 + %cond = select i1 %tobool, i64 %b, i64 %and + ret i64 %cond + } + + ; Function Attrs: norecurse nounwind readnone define zeroext i32 @testRLWINM(i32 zeroext %a) local_unnamed_addr #0 { entry: %shl = shl i32 %a, 4 @@ -602,6 +621,15 @@ } ; Function Attrs: norecurse nounwind readnone + define zeroext i32 @testRLWINMo2(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr #0 { + entry: + %and = and i32 %a, 255 + %tobool = icmp eq i32 %and, 0 + %cond = select i1 %tobool, i32 %b, i32 %a + ret i32 %cond + } + + ; Function Attrs: norecurse nounwind readnone define i64 @testRLWINM8o(i64 %a, i64 %b) local_unnamed_addr #0 { entry: %a.tr = trunc i64 %a to i32 @@ -3904,6 +3932,113 @@ body: | ... --- +name: testRLDICLo2 +# CHECK-ALL: name: testRLDICLo2 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: g8rc, preferred-register: '' } + - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } + - { id: 2, class: g8rc_and_g8rc_nox0, preferred-register: '' } + - { id: 3, class: crrc, preferred-register: '' } + - { id: 4, class: g8rc, preferred-register: '' } +liveins: + - { reg: '%x3', virtual-reg: '%0' } + - { reg: '%x4', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0.entry: + liveins: %x3, %x4 + + %1 = COPY %x4 + %0 = LI8 200 + %2 = RLDICLo %0, 61, 3, implicit-def %cr0 + ; CHECK-NOT: ANDI + ; CHECK-LATE-NOT: andi. + %3 = COPY killed %cr0 + %4 = ISEL8 %1, %2, %3.sub_eq + %x3 = COPY %4 + BLR8 implicit %lr8, implicit %rm, implicit %x3 + +... +--- +name: testRLDICLo3 +# CHECK-ALL: name: testRLDICLo3 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: g8rc, preferred-register: '' } + - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } + - { id: 2, class: g8rc_and_g8rc_nox0, preferred-register: '' } + - { id: 3, class: crrc, preferred-register: '' } + - { id: 4, class: g8rc, preferred-register: '' } +liveins: + - { reg: '%x3', virtual-reg: '%0' } + - { reg: '%x4', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0.entry: + liveins: %x3, %x4 + + %1 = COPY %x4 + %0 = LI8 2 + %2 = RLDICLo %0, 32, 32, implicit-def %cr0 + ; CHECK: ANDIo8 %0, 0 + ; CHECK-LATE: li 3, 2 + ; CHECK-LATE: andi. 3, 3, 0 + %3 = COPY killed %cr0 + %4 = ISEL8 %1, %2, %3.sub_eq + %x3 = COPY %4 + BLR8 implicit %lr8, implicit %rm, implicit %x3 + +... +--- name: testRLWINM # CHECK-ALL: name: testRLWINM alignment: 4 @@ -4170,6 +4305,69 @@ body: | ... --- +name: testRLWINMo2 +# CHECK-ALL: name: testRLWINMo2 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: g8rc, preferred-register: '' } + - { id: 1, class: g8rc, preferred-register: '' } + - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } + - { id: 3, class: gprc_and_gprc_nor0, preferred-register: '' } + - { id: 4, class: gprc, preferred-register: '' } + - { id: 5, class: crrc, preferred-register: '' } + - { id: 6, class: gprc, preferred-register: '' } + - { id: 7, class: g8rc, preferred-register: '' } + - { id: 8, class: g8rc, preferred-register: '' } + - { id: 9, class: g8rc, preferred-register: '' } +liveins: + - { reg: '%x3', virtual-reg: '%0' } + - { reg: '%x4', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0.entry: + liveins: %x3, %x4 + + %1 = COPY %x4 + %0 = COPY %x3 + %2 = COPY %1.sub_32 + %3 = LI -22 + %4 = RLWINMo %3, 5, 24, 31, implicit-def %cr0 + ; CHECK-NOT: ANDI + ; CHECK-LATE-NOT: andi. + %5 = COPY killed %cr0 + %6 = ISEL %2, %3, %5.sub_eq + %8 = IMPLICIT_DEF + %7 = INSERT_SUBREG %8, killed %6, 1 + %9 = RLDICL killed %7, 0, 32 + %x3 = COPY %9 + BLR8 implicit %lr8, implicit %rm, implicit %x3 + +... +--- name: testRLWINM8o # CHECK-ALL: name: testRLWINM8o alignment: 4 diff --git a/test/CodeGen/PowerPC/no-dup-of-bdnz.ll b/test/CodeGen/PowerPC/no-dup-of-bdnz.ll new file mode 100644 index 000000000000..7d72242aa457 --- /dev/null +++ b/test/CodeGen/PowerPC/no-dup-of-bdnz.ll @@ -0,0 +1,75 @@ +; RUN: opt -early-cse-memssa -loop-rotate -licm -loop-rotate -S %s -o - | FileCheck %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-8903f29.bc" +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +define void @test(i64 %arg.ssa, i64 %arg.nb) local_unnamed_addr { +; Ensure that loop rotation doesn't duplicate the call to +; llvm.ppc.is.decremented.ctr.nonzero +; CHECK-LABEL: test +; CHECK: call i1 @llvm.ppc.is.decremented.ctr.nonzero +; CHECK-NOT: call i1 @llvm.ppc.is.decremented.ctr.nonzero +; CHECK: declare i1 @llvm.ppc.is.decremented.ctr.nonzero +entry: + switch i32 undef, label %BB_8 [ + i32 -2, label %BB_9 + i32 0, label %BB_9 + ] + +BB_1: ; preds = %BB_12, %BB_4 + %bcount.1.us = phi i64 [ %.810.us, %BB_4 ], [ 0, %BB_12 ] + %0 = add i64 %arg.ssa, %bcount.1.us + %.568.us = load i32, i32* undef, align 4 + %.15.i.us = icmp slt i32 0, %.568.us + br i1 %.15.i.us, label %BB_3, label %BB_2 + +BB_2: ; preds = %BB_1 + %.982.us = add nsw i64 %0, 1 + unreachable + +BB_3: ; preds = %BB_1 + %1 = add i64 %arg.ssa, %bcount.1.us + %2 = add i64 %1, 1 + %3 = call i1 @llvm.ppc.is.decremented.ctr.nonzero() + br i1 %3, label %BB_4, label %BB_7 + +BB_4: ; preds = %BB_3 + %.810.us = add nuw nsw i64 %bcount.1.us, 1 + br label %BB_1 + +BB_5: ; preds = %BB_7, %BB_5 + %lsr.iv20.i116 = phi i64 [ %2, %BB_7 ], [ %lsr.iv.next21.i126, %BB_5 ] + %lsr.iv.next21.i126 = add i64 %lsr.iv20.i116, 1 + br i1 undef, label %BB_5, label %BB_6 + +BB_6: ; preds = %BB_5 + ret void + +BB_7: ; preds = %BB_3 + br label %BB_5 + +BB_8: ; preds = %entry + ret void + +BB_9: ; preds = %entry, %entry + br label %BB_10 + +BB_10: ; preds = %BB_9 + br label %BB_11 + +BB_11: ; preds = %BB_11, %BB_10 + br i1 undef, label %BB_11, label %BB_12 + +BB_12: ; preds = %BB_11 + call void @llvm.ppc.mtctr.i64(i64 %arg.nb) + br label %BB_1 +} + +; Function Attrs: nounwind +declare void @llvm.ppc.mtctr.i64(i64) #0 + +; Function Attrs: nounwind +declare i1 @llvm.ppc.is.decremented.ctr.nonzero() #0 + +attributes #0 = { nounwind } diff --git a/test/CodeGen/PowerPC/pr35402.ll b/test/CodeGen/PowerPC/pr35402.ll new file mode 100644 index 000000000000..06e6d963b13f --- /dev/null +++ b/test/CodeGen/PowerPC/pr35402.ll @@ -0,0 +1,18 @@ +; RUN: llc -O2 < %s | FileCheck %s +target triple = "powerpc64le-linux-gnu" + +define void @test(i8* %p, i64 %data) { +entry: + %0 = tail call i64 @llvm.bswap.i64(i64 %data) + %ptr = bitcast i8* %p to i48* + %val = trunc i64 %0 to i48 + store i48 %val, i48* %ptr, align 1 + ret void + +; CHECK: sth +; CHECK: stw +; CHECK-NOT: stdbrx + +} + +declare i64 @llvm.bswap.i64(i64) diff --git a/test/CodeGen/Thumb/PR36658.mir b/test/CodeGen/Thumb/PR36658.mir new file mode 100644 index 000000000000..15a3c7f407b1 --- /dev/null +++ b/test/CodeGen/Thumb/PR36658.mir @@ -0,0 +1,359 @@ +# REQUIRES: asserts +# RUN: llc -run-pass arm-cp-islands %s -o - | FileCheck %s +# +# This is a reduced test made to expose a bug in +# ARMConstantIslandPass in Thumb1 mode, see PR36658. + +# Verify optimized JT code uses TBB instructions. +# CHECK-LABEL: bb.7.entry: +# CHECK: tTBB_JT %pc, killed %r2, %jump-table.1, 0 +# CHECK-LABEL: bb.8: +# CHECK: JUMPTABLE_TBB 0, %jump-table.1, 44 + +# CHECK-LABEL: bb.11.entry: +# CHECK: %r1 = tMOVSr %r0, implicit-def dead %cpsr +# CHECK: tTBB_JT %pc, killed %r2, %jump-table.0, 1 +# CHECK-LABEL: bb.12: +# CHECK: JUMPTABLE_TBB 1, %jump-table.0, 44 + +--- | + ; ModuleID = 'PR36658.ll' + source_filename = "PR36658.ll" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv5e-none-linux-gnueabi" + + declare i32 @foo1(...) + + declare i32 @foo2(i32) + + declare i32 @foo3(i32*) + + ; Function Attrs: nounwind optsize + define internal fastcc i32 @foo4(i32* nocapture %ignore_ptr) #0 { + entry: + %call = tail call i32 @foo3(i32* undef) + switch i32 %call, label %sw.epilog [ + i32 120, label %sw.bb + i32 48, label %sw.bb73 + i32 49, label %sw.bb73 + i32 50, label %sw.bb73 + i32 51, label %sw.bb73 + i32 52, label %sw.bb73 + i32 53, label %sw.bb73 + i32 54, label %sw.bb73 + i32 55, label %sw.bb73 + i32 92, label %cleanup + i32 39, label %cleanup + i32 34, label %cleanup + i32 10, label %sw.bb91 + i32 110, label %sw.bb93 + i32 116, label %sw.bb94 + i32 114, label %sw.bb95 + i32 102, label %sw.bb96 + i32 98, label %sw.bb97 + i32 97, label %sw.bb98 + i32 118, label %sw.bb106 + i32 101, label %sw.bb107 + i32 69, label %sw.bb107 + i32 63, label %cleanup + ] + + sw.bb: ; preds = %entry + br label %while.cond + + while.cond: ; preds = %while.cond, %sw.bb + %call5 = tail call i32 @foo3(i32* null) + br label %while.cond + + sw.bb73: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry + %0 = and i32 %call, -8 + %1 = icmp eq i32 %0, 48 + br i1 %1, label %while.body83.preheader, label %while.end88 + + while.body83.preheader: ; preds = %sw.bb73 + br label %while.body83 + + while.body83: ; preds = %while.body83.preheader, %while.body83 + %call87 = tail call i32 @foo3(i32* null) + br label %while.body83 + + while.end88: ; preds = %sw.bb73 + %call89 = tail call i32 @foo2(i32 %call) + unreachable + + sw.bb91: ; preds = %entry + store i32 1, i32* %ignore_ptr, align 4 + br label %cleanup + + sw.bb93: ; preds = %entry + br label %cleanup + + sw.bb94: ; preds = %entry + br label %cleanup + + sw.bb95: ; preds = %entry + br label %cleanup + + sw.bb96: ; preds = %entry + br label %cleanup + + sw.bb97: ; preds = %entry + br label %cleanup + + sw.bb98: ; preds = %entry + br label %cleanup + + sw.bb106: ; preds = %entry + br label %cleanup + + sw.bb107: ; preds = %entry, %entry + br i1 undef, label %cleanup, label %if.then109 + + if.then109: ; preds = %sw.bb107 + %call110 = tail call i32 bitcast (i32 (...)* @foo1 to i32 (i8*, i32)*)(i8* undef, i32 %call) + unreachable + + sw.epilog: ; preds = %entry + %call.off = add i32 %call, -32 + unreachable + + cleanup: ; preds = %sw.bb107, %sw.bb106, %sw.bb98, %sw.bb97, %sw.bb96, %sw.bb95, %sw.bb94, %sw.bb93, %sw.bb91, %entry, %entry, %entry, %entry + %retval.0 = phi i32 [ 11, %sw.bb106 ], [ 7, %sw.bb98 ], [ 8, %sw.bb97 ], [ 12, %sw.bb96 ], [ 13, %sw.bb95 ], [ 9, %sw.bb94 ], [ 10, %sw.bb93 ], [ 0, %sw.bb91 ], [ %call, %entry ], [ %call, %entry ], [ %call, %entry ], [ 27, %sw.bb107 ], [ %call, %entry ] + ret i32 %retval.0 + } + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { nounwind optsize } + attributes #1 = { nounwind } + +... +--- +name: foo4 +alignment: 1 +tracksRegLiveness: true +liveins: + - { reg: '%r0' } +frameInfo: + stackSize: 8 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +stack: + - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, stack-id: 0, + callee-saved-register: '%lr', callee-saved-restored: false } + - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, stack-id: 0, + callee-saved-register: '%r4' } +jumpTable: + kind: inline + entries: + - id: 0 + blocks: [ '%bb.28', '%bb.26', '%bb.26', '%bb.26', '%bb.26', + '%bb.24', '%bb.23', '%bb.26', '%bb.26', '%bb.12', + '%bb.22' ] + - id: 1 + blocks: [ '%bb.19', '%bb.26', '%bb.26', '%bb.26', '%bb.21', + '%bb.26', '%bb.20', '%bb.26', '%bb.25', '%bb.26', + '%bb.15' ] +body: | + bb.0.entry: + successors: %bb.1(0x42c8590b), %bb.9(0x3d37a6f5) + liveins: %r0, %r4, %lr + + frame-setup tPUSH 14, %noreg, killed %r4, killed %lr, implicit-def %sp, implicit %sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset %lr, -4 + frame-setup CFI_INSTRUCTION offset %r4, -8 + %r4 = tMOVSr %r0, implicit-def dead %cpsr + tBL 14, %noreg, @foo3, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit undef %r0, implicit-def %sp, implicit-def %r0 + %r1 = tMOVSr %r0, implicit-def dead %cpsr + tCMPi8 %r0, 68, 14, %noreg, implicit-def %cpsr + tBcc %bb.9, 12, killed %cpsr + + bb.1.entry: + successors: %bb.2(0x20000000), %bb.7(0x60000000) + liveins: %r0, %r1, %r4 + + tCMPi8 renamable %r1, 47, 14, %noreg, implicit-def %cpsr + tBcc %bb.2, 13, killed %cpsr + + bb.7.entry: + successors: %bb.16(0x71c71c72), %bb.8(0x0e38e38e) + liveins: %r0, %r1 + + %r2 = tMOVSr %r1, implicit-def dead %cpsr + renamable %r2, dead %cpsr = tSUBi8 killed renamable %r2, 48, 14, %noreg + tCMPi8 killed renamable %r2, 8, 14, %noreg, implicit-def %cpsr + tBcc %bb.8, 2, killed %cpsr + + bb.16.sw.bb73: + successors: %bb.17(0x7fffffff), %bb.18(0x00000001) + liveins: %r0, %r1 + + renamable %r2, dead %cpsr = tMOVi8 7, 14, %noreg + renamable %r1, dead %cpsr = tBIC killed renamable %r1, killed renamable %r2, 14, %noreg + tCMPi8 killed renamable %r1, 48, 14, %noreg, implicit-def %cpsr + tBcc %bb.18, 1, killed %cpsr + + bb.17.while.body83: + renamable %r0, dead %cpsr = tMOVi8 0, 14, %noreg + tBL 14, %noreg, @foo3, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit-def %sp, implicit-def dead %r0 + tB %bb.17, 14, %noreg + + bb.9.entry: + successors: %bb.10(0x45d1745d), %bb.29(0x3a2e8ba3) + liveins: %r0, %r1 + + %r2 = tMOVSr %r1, implicit-def dead %cpsr + renamable %r2, dead %cpsr = tSUBi8 killed renamable %r2, 92, 14, %noreg + tCMPi8 renamable %r2, 10, 14, %noreg, implicit-def %cpsr + tBcc %bb.29, 9, killed %cpsr + + bb.10.entry: + successors: %bb.11(0x15555555), %bb.14(0x6aaaaaab) + liveins: %r0, %r1 + + %r2 = tMOVSr %r1, implicit-def dead %cpsr + renamable %r2, dead %cpsr = tSUBi8 killed renamable %r2, 110, 14, %noreg + tCMPi8 renamable %r2, 10, 14, %noreg, implicit-def %cpsr + tBcc %bb.11, 8, killed %cpsr + + bb.14.entry: + successors: %bb.19(0x1999999a), %bb.26(0x00000000), %bb.21(0x1999999a), %bb.20(0x1999999a), %bb.25(0x1999999a), %bb.15(0x1999999a) + liveins: %r2 + + renamable %r0, dead %cpsr = tLSLri killed renamable %r2, 2, 14, %noreg + renamable %r1 = tLEApcrelJT %jump-table.1, 14, %noreg + renamable %r0 = tLDRr killed renamable %r1, killed renamable %r0, 14, %noreg :: (load 4 from jump-table) + tBR_JTr killed renamable %r0, %jump-table.1 + + bb.19.sw.bb93: + renamable %r1, dead %cpsr = tMOVi8 10, 14, %noreg + tB %bb.28, 14, %noreg + + bb.15.while.cond: + renamable %r0, dead %cpsr = tMOVi8 0, 14, %noreg + tBL 14, %noreg, @foo3, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit-def %sp, implicit-def dead %r0 + tB %bb.15, 14, %noreg + + bb.29.entry: + successors: %bb.28(0x1999999a), %bb.26(0x00000000), %bb.24(0x1999999a), %bb.23(0x1999999a), %bb.12(0x1999999a), %bb.22(0x1999999a) + liveins: %r0, %r2 + + renamable %r1, dead %cpsr = tLSLri killed renamable %r2, 2, 14, %noreg + renamable %r2 = tLEApcrelJT %jump-table.0, 14, %noreg + renamable %r2 = tLDRr killed renamable %r2, killed renamable %r1, 14, %noreg :: (load 4 from jump-table) + %r1 = tMOVSr %r0, implicit-def dead %cpsr + tBR_JTr killed renamable %r2, %jump-table.0 + + bb.24.sw.bb98: + renamable %r1, dead %cpsr = tMOVi8 7, 14, %noreg + tB %bb.28, 14, %noreg + + bb.2.entry: + successors: %bb.27(0x2aaaaaab), %bb.3(0x55555555) + liveins: %r0, %r1, %r4 + + tCMPi8 renamable %r1, 10, 14, %noreg, implicit-def %cpsr + tBcc %bb.27, 0, killed %cpsr + + bb.3.entry: + successors: %bb.4, %bb.5 + liveins: %r0, %r1 + + tCMPi8 renamable %r1, 34, 14, %noreg, implicit-def %cpsr + tBcc %bb.5, 1, killed %cpsr + + bb.4: + liveins: %r0 + + %r1 = tMOVSr killed %r0, implicit-def dead %cpsr + tB %bb.28, 14, %noreg + + bb.25.sw.bb106: + renamable %r1, dead %cpsr = tMOVi8 11, 14, %noreg + tB %bb.28, 14, %noreg + + bb.23.sw.bb97: + renamable %r1, dead %cpsr = tMOVi8 8, 14, %noreg + tB %bb.28, 14, %noreg + + bb.27.sw.bb91: + liveins: %r4 + + renamable %r0, dead %cpsr = tMOVi8 1, 14, %noreg + tSTRi killed renamable %r0, killed renamable %r4, 0, 14, %noreg :: (store 4 into %ir.ignore_ptr) + renamable %r1, dead %cpsr = tMOVi8 0, 14, %noreg + tB %bb.28, 14, %noreg + + bb.21.sw.bb95: + renamable %r1, dead %cpsr = tMOVi8 13, 14, %noreg + tB %bb.28, 14, %noreg + + bb.20.sw.bb94: + renamable %r1, dead %cpsr = tMOVi8 9, 14, %noreg + tB %bb.28, 14, %noreg + + bb.5.entry: + liveins: %r0, %r1 + + tCMPi8 killed renamable %r1, 39, 14, %noreg, implicit-def %cpsr + tB %bb.6, 14, %noreg + + bb.11.entry: + successors: %bb.12(0x80000000), %bb.26(0x00000000) + liveins: %r0, %r1 + + tCMPi8 killed renamable %r1, 69, 14, %noreg, implicit-def %cpsr + tBcc %bb.26, 1, killed %cpsr + + bb.12.sw.bb107: + successors: %bb.28(0x7fffffff), %bb.13(0x00000001) + liveins: %r0 + + renamable %r1, dead %cpsr = tMOVi8 27, 14, %noreg + renamable %r2, dead %cpsr = tMOVi8 0, 14, %noreg + tCMPi8 killed renamable %r2, 0, 14, %noreg, implicit-def %cpsr + tBcc %bb.28, 1, killed %cpsr + + bb.13.if.then109: + successors: + liveins: %r0 + + %r1 = tMOVSr killed %r0, implicit-def dead %cpsr + tBL 14, %noreg, @foo1, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit undef %r0, implicit %r1, implicit-def %sp, implicit-def dead %r0 + + bb.8.entry: + liveins: %r0, %r1 + + tCMPi8 killed renamable %r1, 63, 14, %noreg, implicit-def %cpsr + + bb.6.entry: + successors: %bb.28(0x80000000), %bb.26(0x00000000) + liveins: %cpsr, %r0 + + tPUSH 14, %noreg, killed %r0, implicit-def %sp, implicit %sp + tPOP 14, %noreg, def %r1, implicit-def %sp, implicit %sp + tBcc %bb.28, 0, killed %cpsr + + bb.26.sw.epilog: + successors: + + + bb.22.sw.bb96: + renamable %r1, dead %cpsr = tMOVi8 12, 14, %noreg + + bb.28.cleanup: + liveins: %r1 + + %r0 = tMOVSr killed %r1, implicit-def dead %cpsr + tPOP_RET 14, %noreg, def %r4, def %pc, implicit-def %sp, implicit %sp, implicit %r0 + + bb.18.while.end88: + liveins: %r0 + + tBL 14, %noreg, @foo2, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit-def %sp, implicit-def dead %r0 + +... diff --git a/test/CodeGen/X86/GlobalISel/add-scalar.ll b/test/CodeGen/X86/GlobalISel/add-scalar.ll index 0ef7c956d493..3d41d759409d 100644 --- a/test/CodeGen/X86/GlobalISel/add-scalar.ll +++ b/test/CodeGen/X86/GlobalISel/add-scalar.ll @@ -10,16 +10,10 @@ define i64 @test_add_i64(i64 %arg1, i64 %arg2) { ; ; X32-LABEL: test_add_i64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: movl 16(%ebp), %eax -; X32-NEXT: movl 20(%ebp), %edx -; X32-NEXT: addl 8(%ebp), %eax -; X32-NEXT: adcl 12(%ebp), %edx -; X32-NEXT: popl %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: addl {{[0-9]+}}(%esp), %eax +; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X32-NEXT: retl %ret = add i64 %arg1, %arg2 ret i64 %ret diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll index 3a720a5288a2..53707cb31380 100644 --- a/test/CodeGen/X86/O0-pipeline.ll +++ b/test/CodeGen/X86/O0-pipeline.ll @@ -37,6 +37,8 @@ ; CHECK-NEXT: X86 PIC Global Base Reg Initialization ; CHECK-NEXT: Expand ISel Pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: X86 EFLAGS copy lowering ; CHECK-NEXT: X86 WinAlloca Expander ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass diff --git a/test/CodeGen/X86/clobber-fi0.ll b/test/CodeGen/X86/clobber-fi0.ll deleted file mode 100644 index b69b18531601..000000000000 --- a/test/CodeGen/X86/clobber-fi0.ll +++ /dev/null @@ -1,37 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mcpu=generic -mtriple=x86_64-linux | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.7.0" - -; In the code below we need to copy the EFLAGS because of scheduling constraints. -; When copying the EFLAGS we need to write to the stack with push/pop. This forces -; us to emit the prolog. - -; CHECK: main -; CHECK: subq{{.*}}rsp -; CHECK: ret -define i32 @main(i32 %arg, i8** %arg1) nounwind { -bb: - %tmp = alloca i32, align 4 ; [#uses=3 type=i32*] - %tmp2 = alloca i32, align 4 ; [#uses=3 type=i32*] - %tmp3 = alloca i32 ; [#uses=1 type=i32*] - store volatile i32 1, i32* %tmp, align 4 - store volatile i32 1, i32* %tmp2, align 4 - br label %bb4 - -bb4: ; preds = %bb4, %bb - %tmp6 = load volatile i32, i32* %tmp2, align 4 ; [#uses=1 type=i32] - %tmp7 = add i32 %tmp6, -1 ; [#uses=2 type=i32] - store volatile i32 %tmp7, i32* %tmp2, align 4 - %tmp8 = icmp eq i32 %tmp7, 0 ; [#uses=1 type=i1] - %tmp9 = load volatile i32, i32* %tmp ; [#uses=1 type=i32] - %tmp10 = add i32 %tmp9, -1 ; [#uses=1 type=i32] - store volatile i32 %tmp10, i32* %tmp3 - br i1 %tmp8, label %bb11, label %bb4 - -bb11: ; preds = %bb4 - %tmp12 = load volatile i32, i32* %tmp, align 4 ; [#uses=1 type=i32] - ret i32 %tmp12 -} - - diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll index 8d289fa9fb03..827aba78699c 100644 --- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll +++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll @@ -1,100 +1,110 @@ -; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386 -; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA +; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA -; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664 -; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664 -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s -check-prefix=x8664-sahf -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664-sahf -; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=corei7 %s -o - | FileCheck %s -check-prefix=x8664-sahf - -; TODO: Reenable verify-machineinstr once the if (!AXDead) // FIXME -; in X86InstrInfo::copyPhysReg() is resolved. +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF declare i32 @foo() declare i32 @bar(i64) -define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) { -; i386-LABEL: test_intervening_call: -; i386: cmpxchg8b -; i386-NEXT: pushl %eax -; i386-NEXT: seto %al -; i386-NEXT: lahf -; i386-NEXT: movl %eax, [[FLAGS:%.*]] -; i386-NEXT: popl %eax -; i386-NEXT: subl $8, %esp -; i386-NEXT: pushl %edx -; i386-NEXT: pushl %eax -; i386-NEXT: calll bar -; i386-NEXT: addl $16, %esp -; i386-NEXT: movl [[FLAGS]], %eax -; i386-NEXT: addb $127, %al -; i386-NEXT: sahf -; i386-NEXT: jne - -; In the following case we get a long chain of EFLAGS save/restore due to -; a sequence of: +; In the following case when using fast scheduling we get a long chain of +; EFLAGS save/restore due to a sequence of: ; cmpxchg8b (implicit-def eflags) ; eax = copy eflags ; adjcallstackdown32 ; ... ; use of eax ; During PEI the adjcallstackdown32 is replaced with the subl which -; clobbers eflags, effectively interfering in the liveness interval. -; Is this a case we care about? Maybe no, considering this issue -; happens with the fast pre-regalloc scheduler enforced. A more -; performant scheduler would move the adjcallstackdown32 out of the -; eflags liveness interval. - -; i386f-LABEL: test_intervening_call: -; i386f: cmpxchg8b -; i386f-NEXT: pushl %eax -; i386f-NEXT: seto %al -; i386f-NEXT: lahf -; i386f-NEXT: movl %eax, [[FLAGS:%.*]] -; i386f-NEXT: popl %eax -; i386f-NEXT: subl $8, %esp -; i386f-NEXT: pushl %eax -; i386f-NEXT: movl %ecx, %eax -; i386f-NEXT: addb $127, %al -; i386f-NEXT: sahf -; i386f-NEXT: popl %eax -; i386f-NEXT: pushl %eax -; i386f-NEXT: seto %al -; i386f-NEXT: lahf -; i386f-NEXT: movl %eax, %esi -; i386f-NEXT: popl %eax -; i386f-NEXT: pushl %edx -; i386f-NEXT: pushl %eax -; i386f-NEXT: calll bar -; i386f-NEXT: addl $16, %esp -; i386f-NEXT: movl %esi, %eax -; i386f-NEXT: addb $127, %al - -; x8664-LABEL: test_intervening_call: -; x8664: cmpxchgq -; x8664: pushfq -; x8664-NEXT: popq [[FLAGS:%.*]] -; x8664-NEXT: movq %rax, %rdi -; x8664-NEXT: callq bar -; x8664-NEXT: pushq [[FLAGS]] -; x8664-NEXT: popfq -; x8664-NEXT: jne - -; x8664-sahf-LABEL: test_intervening_call: -; x8664-sahf: cmpxchgq -; x8664-sahf: pushq %rax -; x8664-sahf-NEXT: seto %al -; x8664-sahf-NEXT: lahf -; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]] -; x8664-sahf-NEXT: popq %rax -; x8664-sahf-NEXT: movq %rax, %rdi -; x8664-sahf-NEXT: callq bar -; RAX is dead, no need to push and pop it. -; x8664-sahf-NEXT: movq [[FLAGS]], %rax -; x8664-sahf-NEXT: addb $127, %al -; x8664-sahf-NEXT: sahf -; x8664-sahf-NEXT: jne - +; clobbers eflags, effectively interfering in the liveness interval. However, +; we then promote these copies into independent conditions in GPRs that avoids +; repeated saving and restoring logic and can be trivially managed by the +; register allocator. +define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind { +; 32-GOOD-RA-LABEL: test_intervening_call: +; 32-GOOD-RA: # %bb.0: # %entry +; 32-GOOD-RA-NEXT: pushl %ebx +; 32-GOOD-RA-NEXT: pushl %esi +; 32-GOOD-RA-NEXT: pushl %eax +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi +; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi) +; 32-GOOD-RA-NEXT: setne %bl +; 32-GOOD-RA-NEXT: subl $8, %esp +; 32-GOOD-RA-NEXT: pushl %edx +; 32-GOOD-RA-NEXT: pushl %eax +; 32-GOOD-RA-NEXT: calll bar +; 32-GOOD-RA-NEXT: addl $16, %esp +; 32-GOOD-RA-NEXT: testb %bl, %bl +; 32-GOOD-RA-NEXT: jne .LBB0_3 +; 32-GOOD-RA-NEXT: # %bb.1: # %t +; 32-GOOD-RA-NEXT: movl $42, %eax +; 32-GOOD-RA-NEXT: jmp .LBB0_2 +; 32-GOOD-RA-NEXT: .LBB0_3: # %f +; 32-GOOD-RA-NEXT: xorl %eax, %eax +; 32-GOOD-RA-NEXT: .LBB0_2: # %t +; 32-GOOD-RA-NEXT: xorl %edx, %edx +; 32-GOOD-RA-NEXT: addl $4, %esp +; 32-GOOD-RA-NEXT: popl %esi +; 32-GOOD-RA-NEXT: popl %ebx +; 32-GOOD-RA-NEXT: retl +; +; 32-FAST-RA-LABEL: test_intervening_call: +; 32-FAST-RA: # %bb.0: # %entry +; 32-FAST-RA-NEXT: pushl %ebx +; 32-FAST-RA-NEXT: pushl %esi +; 32-FAST-RA-NEXT: pushl %eax +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx +; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi) +; 32-FAST-RA-NEXT: setne %bl +; 32-FAST-RA-NEXT: subl $8, %esp +; 32-FAST-RA-NEXT: pushl %edx +; 32-FAST-RA-NEXT: pushl %eax +; 32-FAST-RA-NEXT: calll bar +; 32-FAST-RA-NEXT: addl $16, %esp +; 32-FAST-RA-NEXT: testb %bl, %bl +; 32-FAST-RA-NEXT: jne .LBB0_3 +; 32-FAST-RA-NEXT: # %bb.1: # %t +; 32-FAST-RA-NEXT: movl $42, %eax +; 32-FAST-RA-NEXT: jmp .LBB0_2 +; 32-FAST-RA-NEXT: .LBB0_3: # %f +; 32-FAST-RA-NEXT: xorl %eax, %eax +; 32-FAST-RA-NEXT: .LBB0_2: # %t +; 32-FAST-RA-NEXT: xorl %edx, %edx +; 32-FAST-RA-NEXT: addl $4, %esp +; 32-FAST-RA-NEXT: popl %esi +; 32-FAST-RA-NEXT: popl %ebx +; 32-FAST-RA-NEXT: retl +; +; 64-ALL-LABEL: test_intervening_call: +; 64-ALL: # %bb.0: # %entry +; 64-ALL-NEXT: pushq %rbx +; 64-ALL-NEXT: movq %rsi, %rax +; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi) +; 64-ALL-NEXT: setne %bl +; 64-ALL-NEXT: movq %rax, %rdi +; 64-ALL-NEXT: callq bar +; 64-ALL-NEXT: testb %bl, %bl +; 64-ALL-NEXT: jne .LBB0_2 +; 64-ALL-NEXT: # %bb.1: # %t +; 64-ALL-NEXT: movl $42, %eax +; 64-ALL-NEXT: popq %rbx +; 64-ALL-NEXT: retq +; 64-ALL-NEXT: .LBB0_2: # %f +; 64-ALL-NEXT: xorl %eax, %eax +; 64-ALL-NEXT: popq %rbx +; 64-ALL-NEXT: retq +entry: %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst %v = extractvalue { i64, i1 } %cx, 0 %p = extractvalue { i64, i1 } %cx, 1 @@ -109,23 +119,62 @@ f: } ; Interesting in producing a clobber without any function calls. -define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) { -; i386-LABEL: test_control_flow: -; i386: cmpxchg -; i386-NEXT: jne - -; i386f-LABEL: test_control_flow: -; i386f: cmpxchg -; i386f-NEXT: jne - -; x8664-LABEL: test_control_flow: -; x8664: cmpxchg -; x8664-NEXT: jne - -; x8664-sahf-LABEL: test_control_flow: -; x8664-sahf: cmpxchg -; x8664-sahf-NEXT: jne - +define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) nounwind { +; 32-ALL-LABEL: test_control_flow: +; 32-ALL: # %bb.0: # %entry +; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax +; 32-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; 32-ALL-NEXT: jle .LBB1_6 +; 32-ALL-NEXT: # %bb.1: # %loop_start +; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx +; 32-ALL-NEXT: .p2align 4, 0x90 +; 32-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i +; 32-ALL-NEXT: # =>This Loop Header: Depth=1 +; 32-ALL-NEXT: # Child Loop BB1_3 Depth 2 +; 32-ALL-NEXT: movl (%ecx), %edx +; 32-ALL-NEXT: .p2align 4, 0x90 +; 32-ALL-NEXT: .LBB1_3: # %while.cond.i +; 32-ALL-NEXT: # Parent Loop BB1_2 Depth=1 +; 32-ALL-NEXT: # => This Inner Loop Header: Depth=2 +; 32-ALL-NEXT: movl %edx, %eax +; 32-ALL-NEXT: xorl %edx, %edx +; 32-ALL-NEXT: testl %eax, %eax +; 32-ALL-NEXT: je .LBB1_3 +; 32-ALL-NEXT: # %bb.4: # %while.body.i +; 32-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1 +; 32-ALL-NEXT: lock cmpxchgl %eax, (%ecx) +; 32-ALL-NEXT: jne .LBB1_2 +; 32-ALL-NEXT: # %bb.5: +; 32-ALL-NEXT: xorl %eax, %eax +; 32-ALL-NEXT: .LBB1_6: # %cond.end +; 32-ALL-NEXT: retl +; +; 64-ALL-LABEL: test_control_flow: +; 64-ALL: # %bb.0: # %entry +; 64-ALL-NEXT: cmpl %edx, %esi +; 64-ALL-NEXT: jle .LBB1_5 +; 64-ALL-NEXT: .p2align 4, 0x90 +; 64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i +; 64-ALL-NEXT: # =>This Loop Header: Depth=1 +; 64-ALL-NEXT: # Child Loop BB1_2 Depth 2 +; 64-ALL-NEXT: movl (%rdi), %ecx +; 64-ALL-NEXT: .p2align 4, 0x90 +; 64-ALL-NEXT: .LBB1_2: # %while.cond.i +; 64-ALL-NEXT: # Parent Loop BB1_1 Depth=1 +; 64-ALL-NEXT: # => This Inner Loop Header: Depth=2 +; 64-ALL-NEXT: movl %ecx, %eax +; 64-ALL-NEXT: xorl %ecx, %ecx +; 64-ALL-NEXT: testl %eax, %eax +; 64-ALL-NEXT: je .LBB1_2 +; 64-ALL-NEXT: # %bb.3: # %while.body.i +; 64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1 +; 64-ALL-NEXT: lock cmpxchgl %eax, (%rdi) +; 64-ALL-NEXT: jne .LBB1_1 +; 64-ALL-NEXT: # %bb.4: +; 64-ALL-NEXT: xorl %esi, %esi +; 64-ALL-NEXT: .LBB1_5: # %cond.end +; 64-ALL-NEXT: movl %esi, %eax +; 64-ALL-NEXT: retq entry: %cmp = icmp sgt i32 %i, %j br i1 %cmp, label %loop_start, label %cond.end @@ -158,52 +207,68 @@ cond.end: ; This one is an interesting case because CMOV doesn't have a chain ; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here. -define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) { -; i386-LABEL: test_feed_cmov: -; i386: cmpxchgl -; i386-NEXT: seto %al -; i386-NEXT: lahf -; i386-NEXT: movl %eax, [[FLAGS:%.*]] -; i386-NEXT: calll foo -; i386-NEXT: pushl %eax -; i386-NEXT: movl [[FLAGS]], %eax -; i386-NEXT: addb $127, %al -; i386-NEXT: sahf -; i386-NEXT: popl %eax - -; i386f-LABEL: test_feed_cmov: -; i386f: cmpxchgl -; i386f-NEXT: seto %al -; i386f-NEXT: lahf -; i386f-NEXT: movl %eax, [[FLAGS:%.*]] -; i386f-NEXT: calll foo -; i386f-NEXT: pushl %eax -; i386f-NEXT: movl [[FLAGS]], %eax -; i386f-NEXT: addb $127, %al -; i386f-NEXT: sahf -; i386f-NEXT: popl %eax - -; x8664-LABEL: test_feed_cmov: -; x8664: cmpxchg -; x8664: pushfq -; x8664-NEXT: popq [[FLAGS:%.*]] -; x8664-NEXT: callq foo -; x8664-NEXT: pushq [[FLAGS]] -; x8664-NEXT: popfq - -; x8664-sahf-LABEL: test_feed_cmov: -; x8664-sahf: cmpxchgl -; RAX is dead, do not push or pop it. -; x8664-sahf-NEXT: seto %al -; x8664-sahf-NEXT: lahf -; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]] -; x8664-sahf-NEXT: callq foo -; x8664-sahf-NEXT: pushq %rax -; x8664-sahf-NEXT: movq [[FLAGS]], %rax -; x8664-sahf-NEXT: addb $127, %al -; x8664-sahf-NEXT: sahf -; x8664-sahf-NEXT: popq %rax - +define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind { +; 32-GOOD-RA-LABEL: test_feed_cmov: +; 32-GOOD-RA: # %bb.0: # %entry +; 32-GOOD-RA-NEXT: pushl %ebx +; 32-GOOD-RA-NEXT: pushl %esi +; 32-GOOD-RA-NEXT: pushl %eax +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx) +; 32-GOOD-RA-NEXT: sete %bl +; 32-GOOD-RA-NEXT: calll foo +; 32-GOOD-RA-NEXT: testb %bl, %bl +; 32-GOOD-RA-NEXT: jne .LBB2_2 +; 32-GOOD-RA-NEXT: # %bb.1: # %entry +; 32-GOOD-RA-NEXT: movl %eax, %esi +; 32-GOOD-RA-NEXT: .LBB2_2: # %entry +; 32-GOOD-RA-NEXT: movl %esi, %eax +; 32-GOOD-RA-NEXT: addl $4, %esp +; 32-GOOD-RA-NEXT: popl %esi +; 32-GOOD-RA-NEXT: popl %ebx +; 32-GOOD-RA-NEXT: retl +; +; 32-FAST-RA-LABEL: test_feed_cmov: +; 32-FAST-RA: # %bb.0: # %entry +; 32-FAST-RA-NEXT: pushl %ebx +; 32-FAST-RA-NEXT: pushl %esi +; 32-FAST-RA-NEXT: pushl %eax +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax +; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx) +; 32-FAST-RA-NEXT: sete %bl +; 32-FAST-RA-NEXT: calll foo +; 32-FAST-RA-NEXT: testb %bl, %bl +; 32-FAST-RA-NEXT: jne .LBB2_2 +; 32-FAST-RA-NEXT: # %bb.1: # %entry +; 32-FAST-RA-NEXT: movl %eax, %esi +; 32-FAST-RA-NEXT: .LBB2_2: # %entry +; 32-FAST-RA-NEXT: movl %esi, %eax +; 32-FAST-RA-NEXT: addl $4, %esp +; 32-FAST-RA-NEXT: popl %esi +; 32-FAST-RA-NEXT: popl %ebx +; 32-FAST-RA-NEXT: retl +; +; 64-ALL-LABEL: test_feed_cmov: +; 64-ALL: # %bb.0: # %entry +; 64-ALL-NEXT: pushq %rbp +; 64-ALL-NEXT: pushq %rbx +; 64-ALL-NEXT: pushq %rax +; 64-ALL-NEXT: movl %edx, %ebx +; 64-ALL-NEXT: movl %esi, %eax +; 64-ALL-NEXT: lock cmpxchgl %ebx, (%rdi) +; 64-ALL-NEXT: sete %bpl +; 64-ALL-NEXT: callq foo +; 64-ALL-NEXT: testb %bpl, %bpl +; 64-ALL-NEXT: cmovnel %ebx, %eax +; 64-ALL-NEXT: addq $8, %rsp +; 64-ALL-NEXT: popq %rbx +; 64-ALL-NEXT: popq %rbp +; 64-ALL-NEXT: retq +entry: %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %success = extractvalue { i32, i1 } %res, 1 diff --git a/test/CodeGen/X86/copy-eflags.ll b/test/CodeGen/X86/copy-eflags.ll index d98d8a7839b1..1f44559368a7 100644 --- a/test/CodeGen/X86/copy-eflags.ll +++ b/test/CodeGen/X86/copy-eflags.ll @@ -1,6 +1,8 @@ -; RUN: llc -o - %s | FileCheck %s -; This tests for the problem originally reported in http://llvm.org/PR25951 -target triple = "i686-unknown-linux-gnu" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X32 +; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X64 +; +; Test patterns that require preserving and restoring flags. @b = common global i8 0, align 1 @c = common global i32 0, align 4 @@ -8,13 +10,61 @@ target triple = "i686-unknown-linux-gnu" @d = common global i8 0, align 1 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 -; CHECK-LABEL: func: -; This tests whether eax is properly saved/restored around the -; lahf/sahf instruction sequences. We make mem op volatile to prevent -; their reordering to avoid spills. +declare void @external(i32) - -define i32 @func() { +; A test that re-uses flags in interesting ways due to volatile accesses. +; Specifically, the first increment's flags are reused for the branch despite +; being clobbered by the second increment. +define i32 @test1() nounwind { +; X32-LABEL: test1: +; X32: # %bb.0: # %entry +; X32-NEXT: movb b, %cl +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: incb %al +; X32-NEXT: movb %al, b +; X32-NEXT: incl c +; X32-NEXT: sete %dl +; X32-NEXT: movb a, %ah +; X32-NEXT: movb %ah, %ch +; X32-NEXT: incb %ch +; X32-NEXT: cmpb %cl, %ah +; X32-NEXT: sete d +; X32-NEXT: movb %ch, a +; X32-NEXT: testb %dl, %dl +; X32-NEXT: jne .LBB0_2 +; X32-NEXT: # %bb.1: # %if.then +; X32-NEXT: movsbl %al, %eax +; X32-NEXT: pushl %eax +; X32-NEXT: calll external +; X32-NEXT: addl $4, %esp +; X32-NEXT: .LBB0_2: # %if.end +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: retl +; +; X64-LABEL: test1: +; X64: # %bb.0: # %entry +; X64-NEXT: movb {{.*}}(%rip), %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: incb %al +; X64-NEXT: movb %al, {{.*}}(%rip) +; X64-NEXT: incl {{.*}}(%rip) +; X64-NEXT: sete %sil +; X64-NEXT: movb {{.*}}(%rip), %cl +; X64-NEXT: movl %ecx, %edx +; X64-NEXT: incb %dl +; X64-NEXT: cmpb %dil, %cl +; X64-NEXT: sete {{.*}}(%rip) +; X64-NEXT: movb %dl, {{.*}}(%rip) +; X64-NEXT: testb %sil, %sil +; X64-NEXT: jne .LBB0_2 +; X64-NEXT: # %bb.1: # %if.then +; X64-NEXT: pushq %rax +; X64-NEXT: movsbl %al, %edi +; X64-NEXT: callq external +; X64-NEXT: addq $8, %rsp +; X64-NEXT: .LBB0_2: # %if.end +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: retq entry: %bval = load i8, i8* @b %inc = add i8 %bval, 1 @@ -25,33 +75,290 @@ entry: %aval = load volatile i8, i8* @a %inc2 = add i8 %aval, 1 store volatile i8 %inc2, i8* @a -; Copy flags produced by the incb of %inc1 to a register, need to save+restore -; eax around it. The flags will be reused by %tobool. -; CHECK: pushl %eax -; CHECK: seto %al -; CHECK: lahf -; CHECK: movl %eax, [[REG:%[a-z]+]] -; CHECK: popl %eax %cmp = icmp eq i8 %aval, %bval %conv5 = zext i1 %cmp to i8 store i8 %conv5, i8* @d %tobool = icmp eq i32 %inc1, 0 -; We restore flags with an 'addb, sahf' sequence, need to save+restore eax -; around it. -; CHECK: pushl %eax -; CHECK: movl [[REG]], %eax -; CHECK: addb $127, %al -; CHECK: sahf -; CHECK: popl %eax br i1 %tobool, label %if.end, label %if.then if.then: %conv6 = sext i8 %inc to i32 - %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %conv6) + call void @external(i32 %conv6) br label %if.end if.end: ret i32 0 } -declare i32 @printf(i8* nocapture readonly, ...) +; Preserve increment flags across a call. +define i32 @test2(i32* %ptr) nounwind { +; X32-LABEL: test2: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: incl (%eax) +; X32-NEXT: setne %bl +; X32-NEXT: pushl $42 +; X32-NEXT: calll external +; X32-NEXT: addl $4, %esp +; X32-NEXT: testb %bl, %bl +; X32-NEXT: je .LBB1_1 +; X32-NEXT: # %bb.2: # %else +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: popl %ebx +; X32-NEXT: retl +; X32-NEXT: .LBB1_1: # %then +; X32-NEXT: movl $64, %eax +; X32-NEXT: popl %ebx +; X32-NEXT: retl +; +; X64-LABEL: test2: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rbx +; X64-NEXT: incl (%rdi) +; X64-NEXT: setne %bl +; X64-NEXT: movl $42, %edi +; X64-NEXT: callq external +; X64-NEXT: testb %bl, %bl +; X64-NEXT: je .LBB1_1 +; X64-NEXT: # %bb.2: # %else +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: popq %rbx +; X64-NEXT: retq +; X64-NEXT: .LBB1_1: # %then +; X64-NEXT: movl $64, %eax +; X64-NEXT: popq %rbx +; X64-NEXT: retq +entry: + %val = load i32, i32* %ptr + %inc = add i32 %val, 1 + store i32 %inc, i32* %ptr + %cmp = icmp eq i32 %inc, 0 + call void @external(i32 42) + br i1 %cmp, label %then, label %else + +then: + ret i32 64 + +else: + ret i32 0 +} + +declare void @external_a() +declare void @external_b() + +; This lowers to a conditional tail call instead of a conditional branch. This +; is tricky because we can only do this from a leaf function, and so we have to +; use volatile stores similar to test1 to force the save and restore of +; a condition without calling another function. We then set up subsequent calls +; in tail position. +define void @test_tail_call(i32* %ptr) nounwind optsize { +; X32-LABEL: test_tail_call: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: incl (%eax) +; X32-NEXT: setne %al +; X32-NEXT: incb a +; X32-NEXT: sete d +; X32-NEXT: testb %al, %al +; X32-NEXT: jne external_b # TAILCALL +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: jmp external_a # TAILCALL +; +; X64-LABEL: test_tail_call: +; X64: # %bb.0: # %entry +; X64-NEXT: incl (%rdi) +; X64-NEXT: setne %al +; X64-NEXT: incb {{.*}}(%rip) +; X64-NEXT: sete {{.*}}(%rip) +; X64-NEXT: testb %al, %al +; X64-NEXT: jne external_b # TAILCALL +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: jmp external_a # TAILCALL +entry: + %val = load i32, i32* %ptr + %inc = add i32 %val, 1 + store i32 %inc, i32* %ptr + %cmp = icmp eq i32 %inc, 0 + %aval = load volatile i8, i8* @a + %inc2 = add i8 %aval, 1 + store volatile i8 %inc2, i8* @a + %cmp2 = icmp eq i8 %inc2, 0 + %conv5 = zext i1 %cmp2 to i8 + store i8 %conv5, i8* @d + br i1 %cmp, label %then, label %else + +then: + tail call void @external_a() + ret void + +else: + tail call void @external_b() + ret void +} + +; Test a function that gets special select lowering into CFG with copied EFLAGS +; threaded across the CFG. This requires our EFLAGS copy rewriting to handle +; cross-block rewrites in at least some narrow cases. +define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) { +; X32-LABEL: PR37100: +; X32: # %bb.0: # %bb +; X32-NEXT: pushl %ebp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: pushl %edi +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: .cfi_offset %esi, -20 +; X32-NEXT: .cfi_offset %edi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: movb {{[0-9]+}}(%esp), %ch +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: jmp .LBB3_1 +; X32-NEXT: .p2align 4, 0x90 +; X32-NEXT: .LBB3_5: # %bb1 +; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: idivl %ebp +; X32-NEXT: .LBB3_1: # %bb1 +; X32-NEXT: # =>This Inner Loop Header: Depth=1 +; X32-NEXT: movsbl %cl, %eax +; X32-NEXT: movl %eax, %edx +; X32-NEXT: sarl $31, %edx +; X32-NEXT: cmpl %eax, %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: sbbl %edx, %eax +; X32-NEXT: setl %al +; X32-NEXT: setl %dl +; X32-NEXT: movzbl %dl, %ebp +; X32-NEXT: negl %ebp +; X32-NEXT: testb %al, %al +; X32-NEXT: jne .LBB3_3 +; X32-NEXT: # %bb.2: # %bb1 +; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X32-NEXT: movb %ch, %cl +; X32-NEXT: .LBB3_3: # %bb1 +; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X32-NEXT: movb %cl, (%ebx) +; X32-NEXT: movl (%edi), %edx +; X32-NEXT: testb %al, %al +; X32-NEXT: jne .LBB3_5 +; X32-NEXT: # %bb.4: # %bb1 +; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: jmp .LBB3_5 +; +; X64-LABEL: PR37100: +; X64: # %bb.0: # %bb +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: jmp .LBB3_1 +; X64-NEXT: .p2align 4, 0x90 +; X64-NEXT: .LBB3_5: # %bb1 +; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: idivl %esi +; X64-NEXT: .LBB3_1: # %bb1 +; X64-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NEXT: movsbq %dil, %rax +; X64-NEXT: xorl %esi, %esi +; X64-NEXT: cmpq %rax, %r10 +; X64-NEXT: setl %sil +; X64-NEXT: negl %esi +; X64-NEXT: cmpq %rax, %r10 +; X64-NEXT: jl .LBB3_3 +; X64-NEXT: # %bb.2: # %bb1 +; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X64-NEXT: movl %ecx, %edi +; X64-NEXT: .LBB3_3: # %bb1 +; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X64-NEXT: movb %dil, (%r8) +; X64-NEXT: jl .LBB3_5 +; X64-NEXT: # %bb.4: # %bb1 +; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X64-NEXT: movl (%r9), %esi +; X64-NEXT: jmp .LBB3_5 +bb: + br label %bb1 + +bb1: + %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ] + %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ] + %tmp3 = icmp sgt i16 %tmp2, 7 + %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7 + %tmp5 = sext i8 %tmp to i64 + %tmp6 = icmp slt i64 %arg3, %tmp5 + %tmp7 = sext i1 %tmp6 to i32 + %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4 + store volatile i8 %tmp8, i8* %ptr1 + %tmp9 = load volatile i32, i32* %ptr2 + %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9 + %tmp11 = srem i32 0, %tmp10 + %tmp12 = trunc i32 %tmp11 to i16 + br label %bb1 +} + +; Use a particular instruction pattern in order to lower to the post-RA pseudo +; used to lower SETB into an SBB pattern in order to make sure that kind of +; usage of a copied EFLAGS continues to work. +define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) { +; X32-LABEL: PR37431: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %esi, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: sarl $31, %ecx +; X32-NEXT: cmpl %eax, %eax +; X32-NEXT: sbbl %ecx, %eax +; X32-NEXT: setb %al +; X32-NEXT: sbbb %cl, %cl +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movb %cl, (%edx) +; X32-NEXT: movzbl %al, %eax +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: subl %eax, %ecx +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: idivl %ecx +; X32-NEXT: movb %dl, (%esi) +; X32-NEXT: popl %esi +; X32-NEXT: retl +; +; X64-LABEL: PR37431: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movslq (%rdi), %rax +; X64-NEXT: cmpq %rax, %rax +; X64-NEXT: sbbb %dl, %dl +; X64-NEXT: cmpq %rax, %rax +; X64-NEXT: movb %dl, (%rsi) +; X64-NEXT: sbbl %esi, %esi +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: idivl %esi +; X64-NEXT: movb %dl, (%rcx) +; X64-NEXT: retq +entry: + %tmp = load i32, i32* %arg1 + %tmp1 = sext i32 %tmp to i64 + %tmp2 = icmp ugt i64 %tmp1, undef + %tmp3 = zext i1 %tmp2 to i8 + %tmp4 = sub i8 0, %tmp3 + store i8 %tmp4, i8* %arg2 + %tmp5 = sext i8 %tmp4 to i32 + %tmp6 = srem i32 0, %tmp5 + %tmp7 = trunc i32 %tmp6 to i8 + store i8 %tmp7, i8* %arg3 + ret void +} diff --git a/test/CodeGen/X86/domain-reassignment-implicit-def.ll b/test/CodeGen/X86/domain-reassignment-implicit-def.ll new file mode 100644 index 000000000000..1716b042d8ee --- /dev/null +++ b/test/CodeGen/X86/domain-reassignment-implicit-def.ll @@ -0,0 +1,24 @@ +; RUN: llc -mcpu=skylake-avx512 -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s + +; Check that the X86 Domain Reassignment pass doesn't drop IMPLICIT_DEF nodes, +; which would later cause crashes (e.g. in LiveVariables) - see PR37430 +define void @domain_reassignment_implicit_def(i1 %cond, i8 *%mem, float %arg) { +; CHECK: vxorps %xmm1, %xmm1, %xmm1 +; CHECK: vcmpneqss %xmm1, %xmm0, %k0 +; CHECK: kmovb %k0, (%rsi) +top: + br i1 %cond, label %L19, label %L15 + +L15: ; preds = %top + %tmp47 = fcmp une float 0.000000e+00, %arg + %tmp48 = zext i1 %tmp47 to i8 + br label %L21 + +L19: ; preds = %top + br label %L21 + +L21: ; preds = %L19, %L15 + %.sroa.0.0 = phi i8 [ undef, %L19 ], [ %tmp48, %L15 ] + store i8 %.sroa.0.0, i8* %mem, align 1 + ret void +} diff --git a/test/CodeGen/X86/domain-reassignment-test.ll b/test/CodeGen/X86/domain-reassignment-test.ll new file mode 100644 index 000000000000..2ff5aea9606d --- /dev/null +++ b/test/CodeGen/X86/domain-reassignment-test.ll @@ -0,0 +1,37 @@ +; RUN: llc -mcpu=skylake-avx512 -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s +; RUN: llc -mcpu=skylake-avx512 -mtriple=x86_64-unknown-linux-gnu %s -o - | llvm-mc -triple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512 + +; Check that the X86 domain reassignment pass doesn't introduce an illegal +; test instruction. See PR37396 +define void @japi1_foo2_34617() { +pass2: + br label %if5 + +L174: + %tmp = icmp sgt <2 x i64> undef, zeroinitializer + %tmp1 = icmp sle <2 x i64> undef, undef + %tmp2 = and <2 x i1> %tmp, %tmp1 + %tmp3 = extractelement <2 x i1> %tmp2, i32 0 + %tmp4 = extractelement <2 x i1> %tmp2, i32 1 + %tmp106 = and i1 %tmp4, %tmp3 + %tmp107 = zext i1 %tmp106 to i8 + %tmp108 = and i8 %tmp122, %tmp107 + %tmp109 = icmp eq i8 %tmp108, 0 +; CHECK-NOT: testb {{%k[0-7]}} + br i1 %tmp109, label %L188, label %L190 + +if5: + %b.055 = phi i8 [ 1, %pass2 ], [ %tmp122, %if5 ] + %tmp118 = icmp sgt i64 undef, 0 + %tmp119 = icmp sle i64 undef, undef + %tmp120 = and i1 %tmp118, %tmp119 + %tmp121 = zext i1 %tmp120 to i8 + %tmp122 = and i8 %b.055, %tmp121 + br i1 undef, label %L174, label %if5 + +L188: + unreachable + +L190: + ret void +} diff --git a/test/CodeGen/X86/eflags-copy-expansion.mir b/test/CodeGen/X86/eflags-copy-expansion.mir deleted file mode 100644 index 11d4c81b9253..000000000000 --- a/test/CodeGen/X86/eflags-copy-expansion.mir +++ /dev/null @@ -1,64 +0,0 @@ -# RUN: llc -run-pass postrapseudos -mtriple=i386-apple-macosx -o - %s | FileCheck %s - -# Verify that we correctly save and restore eax when copying eflags, -# even when only a smaller alias of eax is used. We used to check only -# eax and not its aliases. -# PR27624. - ---- | - target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" - - define void @foo() { - entry: - br label %false - false: - ret void - } - -... - ---- -name: foo -tracksRegLiveness: true -liveins: - - { reg: '%edi' } -body: | - bb.0.entry: - liveins: %edi - NOOP implicit-def %al - - ; The bug was triggered only when LivePhysReg is used, which - ; happens only when the heuristic for the liveness computation - ; failed. The liveness computation heuristic looks at 10 instructions - ; before and after the copy. Make sure we do not reach the definition of - ; AL in 10 instructions, otherwise the heuristic will see that it is live. - NOOP - NOOP - NOOP - NOOP - NOOP - NOOP - NOOP - NOOP - NOOP - NOOP - NOOP - NOOP - NOOP - ; Save AL. - ; CHECK: PUSH32r killed %eax - - ; Copy edi into EFLAGS - ; CHECK-NEXT: %eax = MOV32rr %edi - ; CHECK-NEXT: %al = ADD8ri %al, 127, implicit-def %eflags - ; CHECK-NEXT: SAHF implicit-def %eflags, implicit %ah - %eflags = COPY %edi - - ; Restore AL. - ; CHECK-NEXT: %eax = POP32r - bb.1.false: - liveins: %al - NOOP implicit %al - RETQ - -... diff --git a/test/CodeGen/X86/fast-isel-shift.ll b/test/CodeGen/X86/fast-isel-shift.ll index 5d416e18260c..e9f01035b53a 100644 --- a/test/CodeGen/X86/fast-isel-shift.ll +++ b/test/CodeGen/X86/fast-isel-shift.ll @@ -381,3 +381,15 @@ define i64 @ashr_imm4_i64(i64 %a) { %c = ashr i64 %a, 4 ret i64 %c } + +; Make sure we don't crash on out of bounds i8 shifts. +define i8 @PR36731(i8 %a) { +; CHECK-LABEL: PR36731: +; CHECK: ## %bb.0: +; CHECK-NEXT: movb $255, %cl +; CHECK-NEXT: shlb %cl, %dil +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq + %b = shl i8 %a, -1 + ret i8 %b +} diff --git a/test/CodeGen/X86/flags-copy-lowering.mir b/test/CodeGen/X86/flags-copy-lowering.mir new file mode 100644 index 000000000000..3d8a4ed3c734 --- /dev/null +++ b/test/CodeGen/X86/flags-copy-lowering.mir @@ -0,0 +1,555 @@ +# RUN: llc -run-pass x86-flags-copy-lowering -verify-machineinstrs -o - %s | FileCheck %s +# +# Lower various interesting copy patterns of EFLAGS without using LAHF/SAHF. + +--- | + target triple = "x86_64-unknown-unknown" + + declare void @foo() + + define i32 @test_branch(i64 %a, i64 %b) { + entry: + call void @foo() + ret i32 0 + } + + define i32 @test_branch_fallthrough(i64 %a, i64 %b) { + entry: + call void @foo() + ret i32 0 + } + + define void @test_setcc(i64 %a, i64 %b) { + entry: + call void @foo() + ret void + } + + define void @test_cmov(i64 %a, i64 %b) { + entry: + call void @foo() + ret void + } + + define void @test_adc(i64 %a, i64 %b) { + entry: + call void @foo() + ret void + } + + define void @test_sbb(i64 %a, i64 %b) { + entry: + call void @foo() + ret void + } + + define void @test_adcx(i64 %a, i64 %b) { + entry: + call void @foo() + ret void + } + + define void @test_adox(i64 %a, i64 %b) { + entry: + call void @foo() + ret void + } + + define void @test_rcl(i64 %a, i64 %b) { + entry: + call void @foo() + ret void + } + + define void @test_rcr(i64 %a, i64 %b) { + entry: + call void @foo() + ret void + } + + define void @test_setb_c(i64 %a, i64 %b) { + entry: + call void @foo() + ret void + } +... +--- +name: test_branch +# CHECK-LABEL: name: test_branch +liveins: + - { reg: '%rdi', virtual-reg: '%0' } + - { reg: '%rsi', virtual-reg: '%1' } +body: | + bb.0: + successors: %bb.1, %bb.2, %bb.3 + liveins: %rdi, %rsi + + %0:gr64 = COPY %rdi + %1:gr64 = COPY %rsi + CMP64rr %0, %1, implicit-def %eflags + %2:gr64 = COPY %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit %eflags + ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + + %eflags = COPY %2 + JA_1 %bb.1, implicit %eflags + JB_1 %bb.2, implicit %eflags + JMP_1 %bb.3 + ; CHECK-NOT: %eflags = + ; + ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def %eflags + ; CHECK-NEXT: JNE_1 %bb.1, implicit killed %eflags + ; CHECK-SAME: {{$[[:space:]]}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: {{.*$}} + ; CHECK-SAME: {{$[[:space:]]}} + ; CHECK-NEXT: TEST8rr %[[B_REG]], %[[B_REG]], implicit-def %eflags + ; CHECK-NEXT: JNE_1 %bb.2, implicit killed %eflags + ; CHECK-NEXT: JMP_1 %bb.3 + + bb.1: + %3:gr32 = MOV32ri64 42 + %eax = COPY %3 + RET 0, %eax + + bb.2: + %4:gr32 = MOV32ri64 43 + %eax = COPY %4 + RET 0, %eax + + bb.3: + %5:gr32 = MOV32r0 implicit-def dead %eflags + %eax = COPY %5 + RET 0, %eax + +... +--- +name: test_branch_fallthrough +# CHECK-LABEL: name: test_branch_fallthrough +liveins: + - { reg: '%rdi', virtual-reg: '%0' } + - { reg: '%rsi', virtual-reg: '%1' } +body: | + bb.0: + successors: %bb.1, %bb.2, %bb.3 + liveins: %rdi, %rsi + + %0:gr64 = COPY %rdi + %1:gr64 = COPY %rsi + CMP64rr %0, %1, implicit-def %eflags + %2:gr64 = COPY %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit %eflags + ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + + %eflags = COPY %2 + JA_1 %bb.2, implicit %eflags + JB_1 %bb.3, implicit %eflags + ; CHECK-NOT: %eflags = + ; + ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def %eflags + ; CHECK-NEXT: JNE_1 %bb.2, implicit killed %eflags + ; CHECK-SAME: {{$[[:space:]]}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: {{.*$}} + ; CHECK-SAME: {{$[[:space:]]}} + ; CHECK-NEXT: TEST8rr %[[B_REG]], %[[B_REG]], implicit-def %eflags + ; CHECK-NEXT: JNE_1 %bb.3, implicit killed %eflags + ; CHECK-SAME: {{$[[:space:]]}} + ; CHECK-NEXT: bb.1: + + bb.1: + %5:gr32 = MOV32r0 implicit-def dead %eflags + %eax = COPY %5 + RET 0, %eax + + bb.2: + %3:gr32 = MOV32ri64 42 + %eax = COPY %3 + RET 0, %eax + + bb.3: + %4:gr32 = MOV32ri64 43 + %eax = COPY %4 + RET 0, %eax + +... +--- +name: test_setcc +# CHECK-LABEL: name: test_setcc +liveins: + - { reg: '%rdi', virtual-reg: '%0' } + - { reg: '%rsi', virtual-reg: '%1' } +body: | + bb.0: + liveins: %rdi, %rsi + + %0:gr64 = COPY %rdi + %1:gr64 = COPY %rsi + CMP64rr %0, %1, implicit-def %eflags + %2:gr64 = COPY %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit %eflags + ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit %eflags + ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit %eflags + ; CHECK-NEXT: %[[NE_REG:[^:]*]]:gr8 = SETNEr implicit %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + + %eflags = COPY %2 + %3:gr8 = SETAr implicit %eflags + %4:gr8 = SETBr implicit %eflags + %5:gr8 = SETEr implicit %eflags + SETNEm %rsp, 1, %noreg, -16, %noreg, implicit killed %eflags + MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %3 + MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %4 + MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %5 + ; CHECK-NOT: %eflags = + ; CHECK-NOT: = SET{{.*}} + ; CHECK: MOV8mr {{.*}}, killed %[[A_REG]] + ; CHECK-CHECK: MOV8mr {{.*}}, killed %[[B_REG]] + ; CHECK-CHECK: MOV8mr {{.*}}, killed %[[E_REG]] + ; CHECK-CHECK: MOV8mr {{.*}}, killed %[[NE_REG]] + + RET 0 + +... +--- +name: test_cmov +# CHECK-LABEL: name: test_cmov +liveins: + - { reg: '%rdi', virtual-reg: '%0' } + - { reg: '%rsi', virtual-reg: '%1' } +body: | + bb.0: + liveins: %rdi, %rsi + + %0:gr64 = COPY %rdi + %1:gr64 = COPY %rsi + CMP64rr %0, %1, implicit-def %eflags + %2:gr64 = COPY %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + ; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit %eflags + ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit %eflags + ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + + %eflags = COPY %2 + %3:gr64 = CMOVA64rr %0, %1, implicit %eflags + %4:gr64 = CMOVB64rr %0, %1, implicit %eflags + %5:gr64 = CMOVE64rr %0, %1, implicit %eflags + %6:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags + ; CHECK-NOT: %eflags = + ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def %eflags + ; CHECK-NEXT: %3:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags + ; CHECK-NEXT: TEST8rr %[[B_REG]], %[[B_REG]], implicit-def %eflags + ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags + ; CHECK-NEXT: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def %eflags + ; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags + ; CHECK-NEXT: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def %eflags + ; CHECK-NEXT: %6:gr64 = CMOVE64rr %0, %1, implicit killed %eflags + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %3 + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %4 + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5 + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %6 + + RET 0 + +... +--- +name: test_adc +# CHECK-LABEL: name: test_adc +liveins: + - { reg: '%rdi', virtual-reg: '%0' } + - { reg: '%rsi', virtual-reg: '%1' } +body: | + bb.0: + liveins: %rdi, %rsi + + %0:gr64 = COPY %rdi + %1:gr64 = COPY %rsi + %2:gr64 = ADD64rr %0, %1, implicit-def %eflags + %3:gr64 = COPY %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + + %eflags = COPY %3 + %4:gr64 = ADC64ri32 %2:gr64, 42, implicit-def %eflags, implicit %eflags + %5:gr64 = ADC64ri32 %4:gr64, 42, implicit-def %eflags, implicit %eflags + ; CHECK-NOT: %eflags = + ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags + ; CHECK-NEXT: %4:gr64 = ADC64ri32 %2, 42, implicit-def %eflags, implicit killed %eflags + ; CHECK-NEXT: %5:gr64 = ADC64ri32 %4, 42, implicit-def{{( dead)?}} %eflags, implicit{{( killed)?}} %eflags + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5 + + RET 0 + +... +--- +name: test_sbb +# CHECK-LABEL: name: test_sbb +liveins: + - { reg: '%rdi', virtual-reg: '%0' } + - { reg: '%rsi', virtual-reg: '%1' } +body: | + bb.0: + liveins: %rdi, %rsi + + %0:gr64 = COPY %rdi + %1:gr64 = COPY %rsi + %2:gr64 = SUB64rr %0, %1, implicit-def %eflags + %3:gr64 = COPY killed %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + + %eflags = COPY %3 + %4:gr64 = SBB64ri32 %2:gr64, 42, implicit-def %eflags, implicit killed %eflags + %5:gr64 = SBB64ri32 %4:gr64, 42, implicit-def dead %eflags, implicit killed %eflags + ; CHECK-NOT: %eflags = + ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags + ; CHECK-NEXT: %4:gr64 = SBB64ri32 %2, 42, implicit-def %eflags, implicit killed %eflags + ; CHECK-NEXT: %5:gr64 = SBB64ri32 %4, 42, implicit-def{{( dead)?}} %eflags, implicit{{( killed)?}} %eflags + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5 + + RET 0 + +... +--- +name: test_adcx +# CHECK-LABEL: name: test_adcx +liveins: + - { reg: '%rdi', virtual-reg: '%0' } + - { reg: '%rsi', virtual-reg: '%1' } +body: | + bb.0: + liveins: %rdi, %rsi + + %0:gr64 = COPY %rdi + %1:gr64 = COPY %rsi + %2:gr64 = ADD64rr %0, %1, implicit-def %eflags + %3:gr64 = COPY %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + ; CHECK: %[[E_REG:[^:]*]]:gr8 = SETEr implicit %eflags + ; CHECK-NEXT: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + + %eflags = COPY %3 + %4:gr64 = CMOVE64rr %0, %1, implicit %eflags + %5:gr64 = MOV64ri32 42 + %6:gr64 = ADCX64rr %2, %5, implicit-def %eflags, implicit %eflags + ; CHECK-NOT: %eflags = + ; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def %eflags + ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags + ; CHECK-NEXT: %5:gr64 = MOV64ri32 42 + ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags + ; CHECK-NEXT: %6:gr64 = ADCX64rr %2, %5, implicit-def{{( dead)?}} %eflags, implicit killed %eflags + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %4 + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %6 + + RET 0 + +... +--- +name: test_adox +# CHECK-LABEL: name: test_adox +liveins: + - { reg: '%rdi', virtual-reg: '%0' } + - { reg: '%rsi', virtual-reg: '%1' } +body: | + bb.0: + liveins: %rdi, %rsi + + %0:gr64 = COPY %rdi + %1:gr64 = COPY %rsi + %2:gr64 = ADD64rr %0, %1, implicit-def %eflags + %3:gr64 = COPY %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + ; CHECK: %[[E_REG:[^:]*]]:gr8 = SETEr implicit %eflags + ; CHECK-NEXT: %[[OF_REG:[^:]*]]:gr8 = SETOr implicit %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + + %eflags = COPY %3 + %4:gr64 = CMOVE64rr %0, %1, implicit %eflags + %5:gr64 = MOV64ri32 42 + %6:gr64 = ADOX64rr %2, %5, implicit-def %eflags, implicit %eflags + ; CHECK-NOT: %eflags = + ; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def %eflags + ; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed %eflags + ; CHECK-NEXT: %5:gr64 = MOV64ri32 42 + ; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[OF_REG]], 127, implicit-def %eflags + ; CHECK-NEXT: %6:gr64 = ADOX64rr %2, %5, implicit-def{{( dead)?}} %eflags, implicit killed %eflags + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %4 + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %6 + + RET 0 + +... +--- +name: test_rcl +# CHECK-LABEL: name: test_rcl +liveins: + - { reg: '%rdi', virtual-reg: '%0' } + - { reg: '%rsi', virtual-reg: '%1' } +body: | + bb.0: + liveins: %rdi, %rsi + + %0:gr64 = COPY %rdi + %1:gr64 = COPY %rsi + %2:gr64 = ADD64rr %0, %1, implicit-def %eflags + %3:gr64 = COPY %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + + %eflags = COPY %3 + %4:gr64 = RCL64r1 %2:gr64, implicit-def %eflags, implicit %eflags + %5:gr64 = RCL64r1 %4:gr64, implicit-def %eflags, implicit %eflags + ; CHECK-NOT: %eflags = + ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags + ; CHECK-NEXT: %4:gr64 = RCL64r1 %2, implicit-def %eflags, implicit killed %eflags + ; CHECK-NEXT: %5:gr64 = RCL64r1 %4, implicit-def{{( dead)?}} %eflags, implicit{{( killed)?}} %eflags + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5 + + RET 0 + +... +--- +name: test_rcr +# CHECK-LABEL: name: test_rcr +liveins: + - { reg: '%rdi', virtual-reg: '%0' } + - { reg: '%rsi', virtual-reg: '%1' } +body: | + bb.0: + liveins: %rdi, %rsi + + %0:gr64 = COPY %rdi + %1:gr64 = COPY %rsi + %2:gr64 = ADD64rr %0, %1, implicit-def %eflags + %3:gr64 = COPY %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + + %eflags = COPY %3 + %4:gr64 = RCR64r1 %2:gr64, implicit-def %eflags, implicit %eflags + %5:gr64 = RCR64r1 %4:gr64, implicit-def %eflags, implicit %eflags + ; CHECK-NOT: %eflags = + ; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def %eflags + ; CHECK-NEXT: %4:gr64 = RCR64r1 %2, implicit-def %eflags, implicit killed %eflags + ; CHECK-NEXT: %5:gr64 = RCR64r1 %4, implicit-def{{( dead)?}} %eflags, implicit{{( killed)?}} %eflags + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %5 + + RET 0 + +... +--- +name: test_setb_c +# CHECK-LABEL: name: test_setb_c +liveins: + - { reg: '%rdi', virtual-reg: '%0' } + - { reg: '%rsi', virtual-reg: '%1' } +body: | + bb.0: + liveins: %rdi, %rsi + + %0:gr64 = COPY %rdi + %1:gr64 = COPY %rsi + %2:gr64 = ADD64rr %0, %1, implicit-def %eflags + %3:gr64 = COPY %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags + ; CHECK-NOT: COPY{{( killed)?}} %eflags + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp + + %eflags = COPY %3 + %4:gr8 = SETB_C8r implicit-def %eflags, implicit %eflags + MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %4 + ; CHECK-NOT: %eflags = + ; CHECK: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags + ; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr8 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_8bit + ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]] + ; CHECK-NEXT: MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]] + + %eflags = COPY %3 + %5:gr16 = SETB_C16r implicit-def %eflags, implicit %eflags + MOV16mr %rsp, 1, %noreg, -16, %noreg, killed %5 + ; CHECK-NOT: %eflags = + ; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]] + ; CHECK-NEXT: %[[CF_TRUNC:[^:]*]]:gr16 = EXTRACT_SUBREG %[[CF_EXT]], %subreg.sub_16bit + ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags + ; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr16 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_16bit + ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]] + ; CHECK-NEXT: MOV16mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]] + + %eflags = COPY %3 + %6:gr32 = SETB_C32r implicit-def %eflags, implicit %eflags + MOV32mr %rsp, 1, %noreg, -16, %noreg, killed %6 + ; CHECK-NOT: %eflags = + ; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]] + ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags + ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]] + ; CHECK-NEXT: MOV32mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]] + + %eflags = COPY %3 + %7:gr64 = SETB_C64r implicit-def %eflags, implicit %eflags + MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %7 + ; CHECK-NOT: %eflags = + ; CHECK: %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]] + ; CHECK-NEXT: %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit + ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags + ; CHECK-NEXT: %[[ZERO_EXT:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[ZERO]], %subreg.sub_32bit + ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr64 = SUB64rr %[[ZERO_EXT]], %[[CF_EXT2]] + ; CHECK-NEXT: MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]] + + RET 0 + +... diff --git a/test/CodeGen/X86/ipra-reg-usage.ll b/test/CodeGen/X86/ipra-reg-usage.ll index 50c066de9656..e6cf4c023348 100644 --- a/test/CodeGen/X86/ipra-reg-usage.ll +++ b/test/CodeGen/X86/ipra-reg-usage.ll @@ -3,7 +3,7 @@ target triple = "x86_64-unknown-unknown" declare void @bar1() define preserve_allcc void @foo()#0 { -; CHECK: foo Clobbered Registers: %cs %ds %eflags %eip %eiz %es %fpsw %fs %gs %ip %rip %riz %ss %ssp %bnd0 %bnd1 %bnd2 %bnd3 %cr0 %cr1 %cr2 %cr3 %cr4 %cr5 %cr6 %cr7 %cr8 %cr9 %cr10 %cr11 %cr12 %cr13 %cr14 %cr15 %dr0 %dr1 %dr2 %dr3 %dr4 %dr5 %dr6 %dr7 %dr8 %dr9 %dr10 %dr11 %dr12 %dr13 %dr14 %dr15 %fp0 %fp1 %fp2 %fp3 %fp4 %fp5 %fp6 %fp7 %k0 %k1 %k2 %k3 %k4 %k5 %k6 %k7 %mm0 %mm1 %mm2 %mm3 %mm4 %mm5 %mm6 %mm7 %r11 %st0 %st1 %st2 %st3 %st4 %st5 %st6 %st7 %xmm16 %xmm17 %xmm18 %xmm19 %xmm20 %xmm21 %xmm22 %xmm23 %xmm24 %xmm25 %xmm26 %xmm27 %xmm28 %xmm29 %xmm30 %xmm31 %ymm0 %ymm1 %ymm2 %ymm3 %ymm4 %ymm5 %ymm6 %ymm7 %ymm8 %ymm9 %ymm10 %ymm11 %ymm12 %ymm13 %ymm14 %ymm15 %ymm16 %ymm17 %ymm18 %ymm19 %ymm20 %ymm21 %ymm22 %ymm23 %ymm24 %ymm25 %ymm26 %ymm27 %ymm28 %ymm29 %ymm30 %ymm31 %zmm0 %zmm1 %zmm2 %zmm3 %zmm4 %zmm5 %zmm6 %zmm7 %zmm8 %zmm9 %zmm10 %zmm11 %zmm12 %zmm13 %zmm14 %zmm15 %zmm16 %zmm17 %zmm18 %zmm19 %zmm20 %zmm21 %zmm22 %zmm23 %zmm24 %zmm25 %zmm26 %zmm27 %zmm28 %zmm29 %zmm30 %zmm31 %r11b %r11d %r11w +; CHECK: foo Clobbered Registers: %cs %df %ds %eflags %eip %eiz %es %fpsw %fs %gs %ip %rip %riz %ss %ssp %bnd0 %bnd1 %bnd2 %bnd3 %cr0 %cr1 %cr2 %cr3 %cr4 %cr5 %cr6 %cr7 %cr8 %cr9 %cr10 %cr11 %cr12 %cr13 %cr14 %cr15 %dr0 %dr1 %dr2 %dr3 %dr4 %dr5 %dr6 %dr7 %dr8 %dr9 %dr10 %dr11 %dr12 %dr13 %dr14 %dr15 %fp0 %fp1 %fp2 %fp3 %fp4 %fp5 %fp6 %fp7 %k0 %k1 %k2 %k3 %k4 %k5 %k6 %k7 %mm0 %mm1 %mm2 %mm3 %mm4 %mm5 %mm6 %mm7 %r11 %st0 %st1 %st2 %st3 %st4 %st5 %st6 %st7 %xmm16 %xmm17 %xmm18 %xmm19 %xmm20 %xmm21 %xmm22 %xmm23 %xmm24 %xmm25 %xmm26 %xmm27 %xmm28 %xmm29 %xmm30 %xmm31 %ymm0 %ymm1 %ymm2 %ymm3 %ymm4 %ymm5 %ymm6 %ymm7 %ymm8 %ymm9 %ymm10 %ymm11 %ymm12 %ymm13 %ymm14 %ymm15 %ymm16 %ymm17 %ymm18 %ymm19 %ymm20 %ymm21 %ymm22 %ymm23 %ymm24 %ymm25 %ymm26 %ymm27 %ymm28 %ymm29 %ymm30 %ymm31 %zmm0 %zmm1 %zmm2 %zmm3 %zmm4 %zmm5 %zmm6 %zmm7 %zmm8 %zmm9 %zmm10 %zmm11 %zmm12 %zmm13 %zmm14 %zmm15 %zmm16 %zmm17 %zmm18 %zmm19 %zmm20 %zmm21 %zmm22 %zmm23 %zmm24 %zmm25 %zmm26 %zmm27 %zmm28 %zmm29 %zmm30 %zmm31 %r11b %r11d %r11w call void @bar1() call void @bar2() ret void diff --git a/test/CodeGen/X86/mul-i1024.ll b/test/CodeGen/X86/mul-i1024.ll index 9980042a4ccc..16fb112efadb 100644 --- a/test/CodeGen/X86/mul-i1024.ll +++ b/test/CodeGen/X86/mul-i1024.ll @@ -6,4687 +6,4637 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-LABEL: test_1024: ; X32: # %bb.0: ; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $996, %esp # imm = 0x3E4 -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 32(%eax), %eax -; X32-NEXT: movl %eax, -188(%ebp) # 4-byte Spill -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: mull %ecx +; X32-NEXT: subl $1000, %esp # imm = 0x3E8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 48(%eax), %ecx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 32(%edx), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: xorl %edi, %edi +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: movl 48(%esi), %eax -; X32-NEXT: movl %eax, -440(%ebp) # 4-byte Spill -; X32-NEXT: mull %ecx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl %edx, -884(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: movl %edx, %eax +; X32-NEXT: adcl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl 32(%esi), %eax -; X32-NEXT: movl %eax, -416(%ebp) # 4-byte Spill -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -400(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -324(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %ebx, %ecx ; X32-NEXT: movl %edx, %eax -; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl %ecx, -204(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -892(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: adcl %ebp, %eax +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 36(%eax), %eax -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl %edi, -304(%ebp) # 4-byte Spill -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %eax ; X32-NEXT: adcl $0, %eax -; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl 36(%esi), %eax -; X32-NEXT: movl %eax, -316(%ebp) # 4-byte Spill -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, %edx -; X32-NEXT: movl -400(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl %esi, %edx -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -324(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl %edi, %ebp +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ebx, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -656(%ebp) # 4-byte Spill -; X32-NEXT: leal (%ebx,%edi), %eax -; X32-NEXT: movl %edx, %edi -; X32-NEXT: leal (%ecx,%edi), %edx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: leal (%ebx,%eax), %eax +; X32-NEXT: leal (%ecx,%ebp), %edx ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, -700(%ebp) # 4-byte Spill -; X32-NEXT: seto %al -; X32-NEXT: lahf -; X32-NEXT: movl %eax, %eax -; X32-NEXT: movl %eax, -640(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -112(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, -64(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl %esi, %ebx -; X32-NEXT: setb -160(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %eax -; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl 8(%ebp), %ecx -; X32-NEXT: movl 16(%ecx), %eax -; X32-NEXT: movl %eax, -348(%ebp) # 4-byte Spill -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, -320(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill -; X32-NEXT: addl %esi, %eax -; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl %edx, -428(%ebp) # 4-byte Spill -; X32-NEXT: movl (%ecx), %eax -; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: movl 16(%ebp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: adcl %ebx, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl (%ebp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -264(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill -; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movl %edx, %eax -; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -764(%ebp) # 4-byte Spill -; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: movl %ebx, -424(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %ebx -; X32-NEXT: movl %ebx, -256(%ebp) # 4-byte Spill -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -220(%ebp) # 4-byte Folded Spill -; X32-NEXT: setb -388(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 4(%eax), %eax -; X32-NEXT: movl %eax, -92(%ebp) # 4-byte Spill -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebx, %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, -28(%ebp) # 4-byte Spill -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -16(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: setb %bh -; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill -; X32-NEXT: movzbl %bh, %eax -; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl %edi, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 8(%eax), %eax -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -156(%ebp) # 4-byte Spill -; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl -256(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %edx, %eax -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 52(%eax), %eax -; X32-NEXT: movl %eax, -340(%ebp) # 4-byte Spill -; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl %ebx, %edi -; X32-NEXT: movl %edi, -192(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movl %ecx, %edi +; X32-NEXT: adcl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: adcl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl 4(%esi), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: adcl $0, %edi +; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: movl %ebp, %esi +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: movl %ebx, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb %cl -; X32-NEXT: addl %eax, %esi +; X32-NEXT: addl %eax, %edi +; X32-NEXT: movl %edi, (%esp) # 4-byte Spill ; X32-NEXT: movzbl %cl, %eax ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 56(%eax), %eax -; X32-NEXT: movl %eax, -408(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 8(%eax), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -392(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -412(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: addl %eax, %ebx -; X32-NEXT: adcl %edx, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %eax, %esi +; X32-NEXT: adcl %edx, %ebp +; X32-NEXT: addl %edi, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: movl 52(%ebp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl %edi, %ebx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $0, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %ebx -; X32-NEXT: movl %ebx, -272(%ebp) # 4-byte Spill -; X32-NEXT: adcl -216(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -24(%ebp) # 4-byte Spill -; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -16(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -616(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb %bl +; X32-NEXT: addl %eax, %ecx +; X32-NEXT: movzbl %bl, %ebx +; X32-NEXT: adcl %edx, %ebx +; X32-NEXT: movl 56(%ebp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: mull %edx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: adcl %edx, %edi +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -612(%ebp) # 4-byte Spill -; X32-NEXT: movl -64(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -184(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill -; X32-NEXT: movzbl -160(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl -124(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 40(%eax), %eax -; X32-NEXT: movl %eax, -352(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -364(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %ebx, -396(%ebp) # 4-byte Spill -; X32-NEXT: movl -324(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: addl %esi, %edi -; X32-NEXT: movl %edi, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl -152(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill -; X32-NEXT: addl -28(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -16(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill -; X32-NEXT: seto %al -; X32-NEXT: lahf -; X32-NEXT: movl %eax, %eax -; X32-NEXT: movl %eax, -456(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -504(%ebp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -508(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %ecx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl 16(%ecx), %eax -; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl 20(%ecx), %eax -; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: addl %edi, %ebx -; X32-NEXT: movl %ebx, -164(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 24(%eax), %eax -; X32-NEXT: movl %eax, -284(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, -308(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %ebx ; X32-NEXT: addl %eax, %ebx -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, -40(%ebp) # 4-byte Spill ; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, %edx -; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %edi, -116(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl -400(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -84(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl %eax, -768(%ebp) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill -; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, -776(%ebp) # 4-byte Spill -; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %ebx, %eax -; X32-NEXT: movl %eax, -772(%ebp) # 4-byte Spill -; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %ebx, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -780(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %edx # 4-byte Reload -; X32-NEXT: movl %edx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %esi, %eax +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -332(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, -648(%ebp) # 4-byte Spill -; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -40(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -644(%ebp) # 4-byte Spill -; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %edx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax -; X32-NEXT: movl %eax, -572(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 20(%eax), %eax -; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %esi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 20(%edi), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -320(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl -180(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl %edi, %esi -; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl %ebp, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 24(%eax), %eax -; X32-NEXT: movl %eax, -288(%ebp) # 4-byte Spill +; X32-NEXT: movl 24(%edi), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, -280(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -312(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %edx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %edi ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -320(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl %edx, %ebx ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: movl %ebx, -20(%ebp) # 4-byte Spill -; X32-NEXT: addl -28(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -16(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -596(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -464(%ebp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -536(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl 4(%eax), %eax -; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl -136(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl %ebx, %esi -; X32-NEXT: movl %esi, -276(%ebp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl %ebp, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ecx, %edi ; X32-NEXT: setb %cl ; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl %edi, -584(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movzbl %cl, %eax ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, -432(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 8(%eax), %eax -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -268(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %esi +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %esi ; X32-NEXT: movl %esi, %eax ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl -264(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %ecx ; X32-NEXT: adcl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill -; X32-NEXT: adcl -432(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: movl %esi, %edx -; X32-NEXT: addl -28(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -344(%ebp) # 4-byte Spill -; X32-NEXT: movl -276(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %edx -; X32-NEXT: adcl -16(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -404(%ebp) # 4-byte Spill -; X32-NEXT: pushl %eax -; X32-NEXT: seto %al -; X32-NEXT: lahf -; X32-NEXT: movl %eax, %edx -; X32-NEXT: popl %eax -; X32-NEXT: movl %edx, -736(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %edx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %eax, %edx -; X32-NEXT: adcl -120(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -532(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl %ecx, -172(%ebp) # 4-byte Spill -; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -592(%ebp) # 4-byte Spill -; X32-NEXT: movl %esi, %edx -; X32-NEXT: movl %edx, %eax -; X32-NEXT: movl -116(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl %ecx, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -84(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl %ebx, %eax -; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, %eax -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -368(%ebp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, %eax +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -620(%ebp) # 4-byte Spill -; X32-NEXT: movl -240(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -40(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -788(%ebp) # 4-byte Spill -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -784(%ebp) # 4-byte Spill -; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -100(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -204(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl %eax, -804(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl -264(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl %eax, -820(%ebp) # 4-byte Spill -; X32-NEXT: movl -180(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -116(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %eax -; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: movl %esi, -576(%ebp) # 4-byte Spill -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, -540(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -800(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl %ebp, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %edx, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -796(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -792(%ebp) # 4-byte Spill -; X32-NEXT: movl -220(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -220(%ebp) # 4-byte Spill -; X32-NEXT: movzbl -388(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -376(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 40(%eax), %eax -; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl -100(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %edi -; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -204(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %esi, %edi -; X32-NEXT: adcl -376(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -468(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -80(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %edx +; X32-NEXT: addl %eax, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl %ecx, %ebp +; X32-NEXT: addl %edi, %edx +; X32-NEXT: adcl %ebx, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl %eax, -816(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %edi, -372(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -812(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %edx, -292(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, -808(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -512(%ebp) # 4-byte Spill -; X32-NEXT: movl -276(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %ebp, %eax +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl %eax, -676(%ebp) # 4-byte Spill -; X32-NEXT: seto %al -; X32-NEXT: lahf -; X32-NEXT: movl %eax, %eax -; X32-NEXT: movl %eax, -740(%ebp) # 4-byte Spill -; X32-NEXT: movl -240(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -624(%ebp) # 4-byte Spill -; X32-NEXT: movl -172(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, -628(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl 48(%esi), %eax -; X32-NEXT: movl %eax, -300(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl %ebx, -336(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, %ebp ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl 52(%esi), %eax -; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %edi, %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl %ebx, %esi -; X32-NEXT: movl %esi, -200(%ebp) # 4-byte Spill +; X32-NEXT: addl %ebp, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 56(%eax), %eax -; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -360(%ebp) # 4-byte Spill -; X32-NEXT: movl -336(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %ebx ; X32-NEXT: addl %eax, %ebx -; X32-NEXT: movl %edi, %edx -; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill -; X32-NEXT: adcl -360(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl %edx, %edi ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, -472(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %edi -; X32-NEXT: movl %edi, -436(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -336(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl -264(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, -824(%ebp) # 4-byte Spill -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -588(%ebp) # 4-byte Spill -; X32-NEXT: movl -276(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -632(%ebp) # 4-byte Spill -; X32-NEXT: movl -240(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: movl %edx, %eax +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %eax +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax -; X32-NEXT: movl %eax, -828(%ebp) # 4-byte Spill -; X32-NEXT: movl -172(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -636(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 64(%eax), %eax -; X32-NEXT: movl %eax, -476(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 64(%edi), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, -384(%ebp) # 4-byte Spill -; X32-NEXT: movl -116(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl %eax, %edx -; X32-NEXT: movl %edx, -480(%ebp) # 4-byte Spill -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl -84(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, -920(%ebp) # 4-byte Spill -; X32-NEXT: movl -28(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: adcl -384(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -932(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 80(%eax), %eax -; X32-NEXT: movl %eax, -548(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: adcl %edx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 80(%edi), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, -380(%ebp) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -380(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %edx, -356(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl %ecx, -948(%ebp) # 4-byte Spill -; X32-NEXT: addl %esi, %edi +; X32-NEXT: movl %ebp, %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ebp, %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl %ebx, -960(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %ecx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ebp, %esi +; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl 80(%ecx), %eax -; X32-NEXT: movl %eax, -552(%ebp) # 4-byte Spill -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, -528(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -524(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: xorl %edi, %edi +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %edx, %eax -; X32-NEXT: movl -264(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -976(%ebp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl 64(%ecx), %eax -; X32-NEXT: movl %eax, -520(%ebp) # 4-byte Spill -; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, -500(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, -496(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %esi, %ecx -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: movl %ecx, -992(%ebp) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl -180(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl %eax, -1008(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %eax -; X32-NEXT: movl -336(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl %edi, %eax -; X32-NEXT: adcl -176(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -832(%ebp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl %esi, %eax +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %eax -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -672(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -836(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -472(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -840(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -436(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -844(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -680(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -80(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -856(%ebp) # 4-byte Spill -; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -372(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, -852(%ebp) # 4-byte Spill -; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -292(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl %eax, -848(%ebp) # 4-byte Spill -; X32-NEXT: movl -44(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload -; X32-NEXT: pushl %eax -; X32-NEXT: movl %esi, %eax -; X32-NEXT: addb $127, %al -; X32-NEXT: sahf -; X32-NEXT: popl %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, -860(%ebp) # 4-byte Spill -; X32-NEXT: movl -52(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload +; X32-NEXT: addb $255, %al +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl %eax, -864(%ebp) # 4-byte Spill -; X32-NEXT: movl -324(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: adcl %edx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl -400(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -176(%ebp), %edx # 4-byte Reload +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, -868(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -684(%ebp) # 4-byte Spill -; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -876(%ebp) # 4-byte Spill +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -472(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax -; X32-NEXT: movl %eax, -872(%ebp) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -436(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, -880(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl -140(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %eax, -888(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -688(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -900(%ebp) # 4-byte Spill -; X32-NEXT: movl -272(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %ebx, %eax -; X32-NEXT: movl %eax, -896(%ebp) # 4-byte Spill -; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, -904(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 68(%eax), %eax -; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -384(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl %edi, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl -480(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl %ebx, %esi -; X32-NEXT: movl %esi, -652(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -96(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl %ebx, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movzbl -96(%ebp), %edi # 1-byte Folded Reload -; X32-NEXT: adcl %edx, %edi -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %edx, %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 72(%eax), %eax -; X32-NEXT: movl %eax, -516(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl %esi, -484(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -488(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl -384(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: movl -116(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl -480(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %edx +; X32-NEXT: addl %eax, %ebx +; X32-NEXT: adcl %edi, %ebp +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %edx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl %ebp, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, -692(%ebp) # 4-byte Spill -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl -652(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movl %esi, -908(%ebp) # 4-byte Spill -; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl %esi, -916(%ebp) # 4-byte Spill -; X32-NEXT: movl -56(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: movl %esi, -912(%ebp) # 4-byte Spill -; X32-NEXT: movl -28(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl %esi, -696(%ebp) # 4-byte Spill -; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -652(%ebp) # 4-byte Spill -; X32-NEXT: adcl -120(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -924(%ebp) # 4-byte Spill -; X32-NEXT: adcl -60(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -928(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl 84(%eax), %eax -; X32-NEXT: movl %eax, -544(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -356(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl -380(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: addl %edi, %esi -; X32-NEXT: movl %esi, -660(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx -; X32-NEXT: setb %bl +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movzbl %bl, %esi -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload +; X32-NEXT: adcl %edx, %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 88(%eax), %eax -; X32-NEXT: movl %eax, -580(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, -600(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -604(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %ebx -; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -356(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %edx, %eax -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %esi +; X32-NEXT: addl %eax, %esi +; X32-NEXT: adcl %edx, %ebx +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -704(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %edx # 4-byte Reload -; X32-NEXT: movl -660(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, -940(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl %edx, -944(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %edx -; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl %esi, %edi -; X32-NEXT: movl %edi, -936(%ebp) # 4-byte Spill -; X32-NEXT: movl -116(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl %ebx, %edi -; X32-NEXT: movl %edi, -708(%ebp) # 4-byte Spill -; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -660(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -952(%ebp) # 4-byte Spill -; X32-NEXT: adcl -56(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -956(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl %ebx, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl %edi, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 84(%eax), %eax -; X32-NEXT: movl %eax, -460(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl -528(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -524(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -668(%ebp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl %ebp, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movzbl %bl, %edi ; X32-NEXT: adcl %edx, %edi -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 88(%eax), %eax -; X32-NEXT: movl %eax, -492(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl %esi, -556(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -560(%ebp) # 4-byte Spill -; X32-NEXT: movl -524(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx -; X32-NEXT: movl -528(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %ebx +; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %esi ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, -732(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: movl %esi, %edx -; X32-NEXT: movl %edx, -728(%ebp) # 4-byte Spill -; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -712(%ebp) # 4-byte Spill -; X32-NEXT: movl -668(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -276(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -968(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: adcl -240(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -964(%ebp) # 4-byte Spill -; X32-NEXT: adcl -172(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -972(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl 68(%eax), %eax -; X32-NEXT: movl %eax, -444(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -496(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl %edi, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl %ebx, %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -500(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -664(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -96(%ebp) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl %ebp, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movzbl -96(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 72(%eax), %eax -; X32-NEXT: movl %eax, -388(%ebp) # 4-byte Spill -; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, -564(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %ebx, -568(%ebp) # 4-byte Spill -; X32-NEXT: movl -500(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: mull %edx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -496(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %ebx, %eax -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %edx +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: adcl %edi, %ebx +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: movl %edx, %eax -; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -716(%ebp) # 4-byte Spill -; X32-NEXT: movl -664(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %esi -; X32-NEXT: adcl -276(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -988(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %esi -; X32-NEXT: adcl -240(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -984(%ebp) # 4-byte Spill -; X32-NEXT: movl %ecx, %esi -; X32-NEXT: adcl -172(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -980(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl -180(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl %esi, -720(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %ecx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %ecx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, -664(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: movl %edi, -996(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: movl %ecx, -1000(%ebp) # 4-byte Spill -; X32-NEXT: movl -524(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl %edi, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl -528(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -320(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -1004(%ebp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: addl %edx, %eax -; X32-NEXT: movl %eax, -724(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, -668(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %ebx, -732(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %edi, -728(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 12(%eax), %eax -; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %edi ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movl %edi, -232(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: addl %eax, %ebx -; X32-NEXT: movzbl -88(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: addl %edx, %ebx ; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill -; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl -76(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movl -72(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -232(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %edx -; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl %edi, -72(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -16(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl %edi, -76(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-NEXT: addl %ebp, %edi +; X32-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %edi, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movl %ebx, %ecx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl %edx, %ecx -; X32-NEXT: adcl %esi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl $0, %edi +; X32-NEXT: addl %ebp, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl (%esp), %esi # 4-byte Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl $0, %edx +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: setb %dl -; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -232(%ebp), %eax # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movzbl %dl, %edx ; X32-NEXT: adcl %ebx, %edx -; X32-NEXT: movl %edx, -608(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -28(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -116(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -164(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -120(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -56(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -60(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, -232(%ebp) # 4-byte Spill -; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, -164(%ebp) # 4-byte Spill -; X32-NEXT: adcl -608(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: adcl $0, %eax +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %eax, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 44(%eax), %eax -; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl %ebx, %ecx -; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl %esi, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl -364(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl %edi, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: setb -16(%ebp) # 1-byte Folded Spill -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movzbl -16(%ebp), %ebx # 1-byte Folded Reload -; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %esi, %edx -; X32-NEXT: adcl %ecx, %ebx -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -324(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -152(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -400(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill +; X32-NEXT: setb %bl +; X32-NEXT: addl %eax, %edx +; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: adcl %ecx, %eax +; X32-NEXT: movl %edi, %ecx +; X32-NEXT: addl %ecx, %edx +; X32-NEXT: adcl %esi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %edi +; X32-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %edi ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl %esi, -64(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -16(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -112(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill +; X32-NEXT: addl %ebp, (%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %eax -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -88(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill -; X32-NEXT: addl -364(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -60(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movzbl -88(%ebp), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl -324(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl -132(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -112(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -44(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -272(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -52(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -24(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: addl %eax, -88(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill -; X32-NEXT: adcl -60(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: movl %edi, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -456(%ebp), %ecx # 4-byte Reload -; X32-NEXT: pushl %eax -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: addb $127, %al -; X32-NEXT: sahf -; X32-NEXT: popl %eax -; X32-NEXT: adcl -72(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -608(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -76(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -760(%ebp) # 4-byte Spill -; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -232(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -756(%ebp) # 4-byte Spill +; X32-NEXT: addl %esi, %eax +; X32-NEXT: adcl %edi, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload +; X32-NEXT: adcl %edx, %ebp +; X32-NEXT: adcl $0, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %eax -; X32-NEXT: adcl -164(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -752(%ebp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %esi, %eax -; X32-NEXT: adcl -40(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -748(%ebp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: adcl -56(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -744(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %eax +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 12(%eax), %eax -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl -268(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl %edi, %ebx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edx -; X32-NEXT: setb %cl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl %esi, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %edx -; X32-NEXT: movzbl %cl, %eax -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: movl %edi, %esi -; X32-NEXT: addl %esi, %edx -; X32-NEXT: adcl %ebx, %eax -; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl -584(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movl -432(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: addl %ecx, %esi -; X32-NEXT: movl %esi, -432(%ebp) # 4-byte Spill -; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl %esi, -456(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl -112(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl %edi, -432(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -276(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl %edi, -456(%ebp) # 4-byte Folded Spill +; X32-NEXT: movzbl %bl, %ebp +; X32-NEXT: adcl %ecx, %ebp +; X32-NEXT: movl %esi, %ecx +; X32-NEXT: addl %ecx, %edx +; X32-NEXT: adcl %edi, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %edi +; X32-NEXT: addl %eax, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: adcl $0, %ecx +; X32-NEXT: movl %ebp, %esi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl %ebx, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: adcl %ebx, %eax -; X32-NEXT: setb %bl -; X32-NEXT: addl -160(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -24(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movzbl %bl, %esi -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -112(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -136(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -180(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %edi, %edx -; X32-NEXT: adcl -48(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -240(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -36(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -172(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -20(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, -584(%ebp) # 4-byte Spill -; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, -276(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -240(%ebp) # 4-byte Spill -; X32-NEXT: adcl -112(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -172(%ebp) # 4-byte Spill -; X32-NEXT: movl -736(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %eax -; X32-NEXT: addb $127, %al -; X32-NEXT: sahf -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -432(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -456(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, -232(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %edx, -164(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %esi, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %edi, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl $0, %edi +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: setb %cl +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ecx, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 44(%eax), %eax -; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl -128(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl %edi, %ebx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl -304(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movl %ebx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb %bl -; X32-NEXT: addl %eax, %esi -; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl %edi, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebp +; X32-NEXT: setb %cl +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: movzbl %cl, %eax ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: addl %edx, %esi -; X32-NEXT: adcl %edi, %eax -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill -; X32-NEXT: movl -100(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl -376(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edx -; X32-NEXT: movl %edx, -376(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl %edi, %edx +; X32-NEXT: addl %edx, %ebp +; X32-NEXT: adcl %esi, %eax +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill -; X32-NEXT: movl %esi, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %edx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl %edi, -376(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -220(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -80(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, %edi +; X32-NEXT: adcl $0, %edi +; X32-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %eax ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -20(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: addl %edx, %eax +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %dl -; X32-NEXT: addl -304(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movzbl %dl, %edx -; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl %edi, %ebx -; X32-NEXT: addl -336(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -200(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -80(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -472(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -372(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -436(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -292(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movzbl %dl, %eax +; X32-NEXT: adcl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %esi, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill -; X32-NEXT: adcl -36(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -48(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -740(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %eax -; X32-NEXT: addb $127, %al -; X32-NEXT: sahf -; X32-NEXT: movl -376(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, -432(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -220(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -456(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %ebx, -584(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -200(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -276(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %edi, -240(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl %esi, -172(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -640(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %eax -; X32-NEXT: addb $127, %al -; X32-NEXT: sahf -; X32-NEXT: adcl -64(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -376(%ebp) # 4-byte Spill -; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -220(%ebp) # 4-byte Spill -; X32-NEXT: adcl -88(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -640(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -472(%ebp) # 4-byte Spill -; X32-NEXT: adcl -52(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -436(%ebp) # 4-byte Spill -; X32-NEXT: movl -408(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl %eax, %edi +; X32-NEXT: adcl %ebp, %esi +; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 60(%eax), %eax -; X32-NEXT: movl %eax, -192(%ebp) # 4-byte Spill -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 60(%eax), %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -16(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -392(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %edi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp +; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -340(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -68(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -764(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -48(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -36(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -44(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %cl -; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -140(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -80(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -16(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -36(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill -; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -20(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl (%esp), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -392(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -412(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -16(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -80(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -36(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -68(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill -; X32-NEXT: adcl -420(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill -; X32-NEXT: adcl -616(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -88(%ebp) # 4-byte Spill -; X32-NEXT: adcl -612(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -272(%ebp) # 4-byte Spill -; X32-NEXT: movl -352(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ecx, %esi -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %bl -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, -616(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %edi, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -92(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -612(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -152(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -316(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -152(%ebp), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -32(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -424(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -44(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill -; X32-NEXT: adcl -68(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -416(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, -424(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: setb %cl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -424(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -420(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %edi +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -20(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -20(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -44(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -44(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -44(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -364(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebx -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: adcl %edx, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %ebx, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -68(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -32(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -196(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -504(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -508(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: addl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl -64(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -48(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -52(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -272(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -352(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %ebp +; X32-NEXT: adcl $0, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -252(%ebp) # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb %cl +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -508(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -68(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -504(%ebp) # 4-byte Spill +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -296(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -768(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -48(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -372(%ebp) # 4-byte Spill -; X32-NEXT: adcl -64(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -416(%ebp), %esi # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 28(%eax), %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ebx +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: adcl %ebp, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -372(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -152(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -68(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -24(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -372(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -24(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -68(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -68(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -364(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl %edx, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -372(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -68(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -296(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -776(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -772(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -780(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -36(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, -508(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -20(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -504(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -292(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -152(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -44(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, (%esp) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -16(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl -80(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl -272(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill -; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -408(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -16(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: setb %bl +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -16(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -392(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -440(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -292(%ebp) # 4-byte Spill -; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -372(%ebp) # 4-byte Spill +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -340(%ebp), %edi # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -88(%ebp), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -332(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -448(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -36(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill -; X32-NEXT: adcl -272(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill +; X32-NEXT: mull %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %cl -; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -448(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -88(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -296(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -16(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -80(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill -; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl -80(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -80(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -80(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -392(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -80(%ebp) # 4-byte Spill -; X32-NEXT: movl -272(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl %edx, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -16(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -80(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -332(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: adcl -648(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl -644(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -332(%ebp) # 4-byte Spill -; X32-NEXT: adcl -572(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill -; X32-NEXT: movl -292(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -52(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -372(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -88(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -44(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -296(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -68(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -20(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl %ebx, -272(%ebp) # 4-byte Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax -; X32-NEXT: movl -332(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: addl -32(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -196(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -372(%ebp) # 4-byte Spill -; X32-NEXT: adcl -608(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -88(%ebp) # 4-byte Spill -; X32-NEXT: adcl -760(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -296(%ebp) # 4-byte Spill -; X32-NEXT: movl -756(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -272(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -752(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl -748(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -332(%ebp) # 4-byte Spill -; X32-NEXT: movl -744(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -288(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 28(%eax), %eax -; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 28(%eax), %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl -348(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -572(%ebp) # 4-byte Spill -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -32(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -228(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -428(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -52(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -24(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -428(%ebp) # 4-byte Spill +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %cl -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %esi, %ebx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -196(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -428(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -44(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -20(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -52(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -44(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -24(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -280(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -44(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -52(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -228(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -596(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl -464(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -464(%ebp) # 4-byte Spill -; X32-NEXT: adcl -536(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ecx, %esi -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %esi, %ebx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl %bl, %esi +; X32-NEXT: adcl %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %esi, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -648(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -92(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -644(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -536(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -124(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -536(%ebp), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -344(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -452(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -32(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill -; X32-NEXT: adcl -228(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, -536(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -596(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -452(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -536(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -596(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: adcl %edx, %edi +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %edi -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -24(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -228(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -24(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: setb %cl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -32(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl %cl, %esi ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebx -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl -24(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -228(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -344(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -404(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -532(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -592(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: addl -572(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl -448(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -196(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -572(%ebp) # 4-byte Spill -; X32-NEXT: adcl -428(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -464(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -68(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -428(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %esi +; X32-NEXT: adcl %edx, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %esi +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %esi, %ecx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -228(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -252(%ebp) # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb %cl +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -196(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -228(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -532(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -592(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -532(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -532(%ebp) # 4-byte Spill +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -368(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -328(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -428(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -448(%ebp) # 4-byte Spill -; X32-NEXT: adcl -452(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -328(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -196(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -228(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ebp, %esi +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -428(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: addl %ebp, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -48(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -452(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -136(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -448(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -428(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -328(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -452(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -196(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -228(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -448(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -196(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -196(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -328(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill -; X32-NEXT: adcl -328(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -328(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -328(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -328(%ebp) # 4-byte Spill -; X32-NEXT: movl -228(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl %edx, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -196(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -448(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -328(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -368(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -620(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -788(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -784(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -52(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, -592(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -24(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -532(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -572(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -428(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -32(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -452(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -20(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -228(%ebp) # 4-byte Spill -; X32-NEXT: adcl -44(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -196(%ebp) # 4-byte Spill -; X32-NEXT: adcl -464(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -620(%ebp) # 4-byte Spill -; X32-NEXT: adcl -68(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill -; X32-NEXT: setb -464(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -288(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -44(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -44(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -116(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -84(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl -348(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -368(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %edi, %esi ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -368(%ebp), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -540(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -576(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -20(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -576(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -348(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -368(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -368(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -368(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx -; X32-NEXT: adcl %edx, %edi -; X32-NEXT: movl -576(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -20(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -24(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -368(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: addl -52(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: setb -576(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl $0, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -52(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -52(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -48(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -24(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -280(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -44(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl %eax, %edi +; X32-NEXT: adcl %edx, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -52(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: movzbl -576(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: movl -24(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl %ecx, %esi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -540(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl -800(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl -796(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -792(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, %edi -; X32-NEXT: movl -32(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -228(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -68(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -196(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -620(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -368(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -328(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movzbl -464(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -52(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %edi -; X32-NEXT: addl -344(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl -404(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -76(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -44(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -232(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -52(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -164(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -56(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -616(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -68(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -612(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -20(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -424(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -420(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -368(%ebp) # 4-byte Spill -; X32-NEXT: adcl -508(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill -; X32-NEXT: adcl -504(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl -152(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl -64(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -464(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -292(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -372(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -88(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -296(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -272(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -332(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -288(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl $0, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -348(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -72(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -468(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -804(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl -76(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: setb %cl -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -304(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -164(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -232(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -40(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -72(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -40(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -40(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -40(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -280(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -56(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -76(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl %edx, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -72(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -468(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill -; X32-NEXT: adcl -816(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl -812(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -344(%ebp) # 4-byte Spill -; X32-NEXT: adcl -808(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx +; X32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -64(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -468(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -508(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -468(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: setb %cl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp +; X32-NEXT: movzbl %cl, %eax +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -504(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -124(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -512(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -820(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -196(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -404(%ebp) # 4-byte Spill -; X32-NEXT: adcl -328(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -196(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -72(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -328(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -468(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -264(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -404(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -328(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -196(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -468(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -64(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -72(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -196(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -236(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -64(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -404(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -112(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl -404(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -404(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -404(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %edi ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -128(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -72(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl (%esp), %edx # 4-byte Reload ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -64(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -196(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -512(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -676(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -624(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -628(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: addl -152(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl -228(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl -164(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -628(%ebp) # 4-byte Spill -; X32-NEXT: adcl -232(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -624(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -344(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -300(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -232(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %edi, %ecx ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -144(%ebp) # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %ecx -; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull -144(%ebp) # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: setb %cl +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -336(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -232(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -300(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -404(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -540(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -196(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -124(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -196(%ebp), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -588(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -824(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill -; X32-NEXT: adcl -228(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -424(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -152(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -232(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -228(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %esi, %edi +; X32-NEXT: mull %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 60(%eax), %esi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %esi, -164(%ebp) # 4-byte Spill +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 60(%eax), %ebp +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -196(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -264(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -360(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ecx -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -420(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -228(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -424(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -196(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -152(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -232(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -232(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -244(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -424(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -152(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -420(%ebp) # 4-byte Spill -; X32-NEXT: adcl -152(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -152(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -152(%ebp), %edi # 1-byte Folded Reload -; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -360(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -424(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -420(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -232(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl %ecx, %esi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -588(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -632(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -828(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -636(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -404(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -540(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -628(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -228(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -624(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -196(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx -; X32-NEXT: adcl $0, %ecx +; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -56(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -424(%ebp) # 4-byte Spill -; X32-NEXT: adcl -76(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -420(%ebp) # 4-byte Spill -; X32-NEXT: adcl -344(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -636(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -632(%ebp) # 4-byte Spill -; X32-NEXT: setb -588(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -288(%ebp), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -300(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ecx, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb %cl -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -336(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -348(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -300(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -152(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -64(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -64(%ebp), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -672(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -832(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -76(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -344(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %cl -; X32-NEXT: movl -216(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -360(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx -; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -344(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -72(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -64(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl %edx, %ebp +; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -40(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -56(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -344(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -40(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl -76(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -76(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: addl (%esp), %edi # 4-byte Folded Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -76(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -280(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -312(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -360(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl -344(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -40(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -56(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -672(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: adcl -836(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl -840(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -56(%ebp) # 4-byte Spill -; X32-NEXT: adcl -844(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl -232(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -424(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -152(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -420(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -72(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -636(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -64(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -632(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -588(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl %edx, %esi +; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload +; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %esi +; X32-NEXT: adcl %edx, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl %ebp, %edx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %esi +; X32-NEXT: adcl $0, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, %ebx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl %ebx, -344(%ebp) # 4-byte Spill -; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax -; X32-NEXT: movl -56(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: adcl $0, -76(%ebp) # 4-byte Folded Spill -; X32-NEXT: addl -512(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -232(%ebp) # 4-byte Spill -; X32-NEXT: adcl -676(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -152(%ebp) # 4-byte Spill -; X32-NEXT: adcl -432(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl -456(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -344(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -584(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -276(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl -240(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -172(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -32(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %edx, -508(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -68(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, -504(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, -328(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -368(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, -468(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -44(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, -404(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -52(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, -540(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -24(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, -228(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -464(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl %edx, -196(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -232(%ebp), %edx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl -152(%ebp), %esi # 4-byte Reload ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl -72(%ebp), %edi # 4-byte Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl -64(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: adcl $0, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -292(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -232(%ebp) # 4-byte Spill -; X32-NEXT: adcl -372(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -152(%ebp) # 4-byte Spill -; X32-NEXT: adcl -88(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl -296(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl -272(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -344(%ebp) # 4-byte Spill -; X32-NEXT: movl -40(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl -56(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -332(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -56(%ebp) # 4-byte Spill -; X32-NEXT: adcl -80(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: setb -372(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -408(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -240(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -276(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -240(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -148(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -392(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -80(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -172(%ebp) # 4-byte Spill -; X32-NEXT: movl -440(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill -; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -36(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -340(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -680(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -884(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -276(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -240(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -172(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -440(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -276(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: setb %cl -; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -304(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -20(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -276(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -36(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -80(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -172(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -20(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -172(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -172(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -172(%ebp) # 4-byte Spill -; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl -172(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -172(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -172(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -392(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -412(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -172(%ebp) # 4-byte Spill -; X32-NEXT: movl -80(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl %edx, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -20(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -172(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -680(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill -; X32-NEXT: adcl -856(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill -; X32-NEXT: adcl -852(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -848(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill -; X32-NEXT: movl -352(%ebp), %ecx # 4-byte Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -20(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -148(%ebp) # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -396(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -24(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edx, %edi -; X32-NEXT: movl %edi, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -432(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -88(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -456(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -316(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -656(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -892(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -44(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl -52(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -20(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -416(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edx, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -236(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -44(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -112(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -52(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: setb %cl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -304(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -400(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -32(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -24(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -20(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -24(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -236(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -88(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -88(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -112(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill -; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -32(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %edi ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -364(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -304(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -396(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -128(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -20(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -88(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -24(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -656(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -700(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -860(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -864(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: addl -272(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl -296(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -88(%ebp) # 4-byte Spill -; X32-NEXT: adcl -276(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -332(%ebp) # 4-byte Spill -; X32-NEXT: adcl -240(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -368(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -36(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -292(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -172(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -352(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -300(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -276(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %edi, %ecx +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: addl %edi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -336(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -296(%ebp) # 4-byte Spill -; X32-NEXT: movl -416(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -300(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -24(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -272(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -316(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -68(%ebp), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -684(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -868(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -276(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -512(%ebp) # 4-byte Spill -; X32-NEXT: adcl -240(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -296(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -416(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -276(%ebp) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -240(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: addl %edi, %eax +; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -360(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -512(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -276(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -68(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -32(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -296(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -512(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -32(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl -68(%ebp), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, %edi -; X32-NEXT: setb -68(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl -68(%ebp), %esi # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -364(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -360(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -68(%ebp) # 4-byte Spill -; X32-NEXT: movl -296(%ebp), %edx # 4-byte Reload +; X32-NEXT: adcl %edx, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %ebx, %edx -; X32-NEXT: movl -32(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -512(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl -68(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -684(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -876(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -872(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -880(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -20(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -88(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -272(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -332(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -276(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -368(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -240(%ebp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -80(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -296(%ebp) # 4-byte Spill -; X32-NEXT: adcl -36(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl -292(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -172(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -68(%ebp) # 4-byte Spill -; X32-NEXT: setb -88(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -408(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -300(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %ecx, %esi -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -172(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -172(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -336(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -392(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -176(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -412(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -336(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl %ecx, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -440(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -300(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -172(%ebp) # 4-byte Spill -; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -80(%ebp) # 4-byte Spill +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -332(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movzbl -332(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl -688(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -888(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -36(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -20(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -332(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -336(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -20(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -36(%ebp) # 4-byte Spill -; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -20(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull -164(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -20(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -340(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -140(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -360(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %edi, -36(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -332(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -20(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -336(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -132(%ebp) # 4-byte Spill -; X32-NEXT: adcl -176(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -140(%ebp) # 4-byte Spill -; X32-NEXT: setb -176(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -408(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -244(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -332(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -336(%ebp) # 4-byte Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -332(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, %edi +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -332(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -192(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -332(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -392(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -224(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -412(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -360(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -336(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -132(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -140(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -176(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -688(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -900(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -360(%ebp) # 4-byte Spill -; X32-NEXT: adcl -896(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -392(%ebp) # 4-byte Spill -; X32-NEXT: adcl -904(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -412(%ebp) # 4-byte Spill -; X32-NEXT: movl -172(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -296(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -80(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -32(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -36(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -292(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -20(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -68(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -88(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl %ebx, -336(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -360(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -392(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl $0, %eax -; X32-NEXT: movl -412(%ebp), %ebx # 4-byte Reload +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -656(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -172(%ebp) # 4-byte Spill -; X32-NEXT: adcl -700(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -80(%ebp) # 4-byte Spill -; X32-NEXT: adcl -376(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -220(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -336(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -640(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -360(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -472(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -392(%ebp) # 4-byte Spill -; X32-NEXT: adcl -436(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -232(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -432(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -456(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -44(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -52(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -344(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -24(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -40(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -272(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -276(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -240(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -372(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, -172(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -80(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, -36(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %edi, -20(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -336(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -360(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -392(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %ebx, -412(%ebp) # 4-byte Spill -; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -476(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -140(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -308(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -480(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -208(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -384(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -200(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -212(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -476(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -248(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -40(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: movzbl -40(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl -692(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -920(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -132(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -140(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -200(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -516(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -132(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -132(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 76(%eax), %edx -; X32-NEXT: movl %edx, -132(%ebp) # 4-byte Spill -; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 76(%eax), %ecx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %ecx, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %edi -; X32-NEXT: setb %cl -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -132(%ebp) # 4-byte Folded Reload -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -116(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -484(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -488(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ecx -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: addl %ebx, -140(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -56(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -40(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -200(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -176(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -56(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -284(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -516(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -200(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill -; X32-NEXT: movl -284(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -132(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill -; X32-NEXT: adcl -224(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -224(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -224(%ebp), %edi # 1-byte Folded Reload -; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -308(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -484(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -208(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -488(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movzbl (%esp), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -176(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -200(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -56(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl %ecx, %esi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -692(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill -; X32-NEXT: adcl -908(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -200(%ebp) # 4-byte Spill -; X32-NEXT: adcl -916(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl -912(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -32(%ebp) # 4-byte Spill -; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -476(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -248(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -480(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -384(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -224(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -476(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -436(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -344(%ebp) # 4-byte Spill -; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -436(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -248(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -232(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -92(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -696(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -932(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -76(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -224(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -56(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -516(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -436(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -72(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl $0, %ebx ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -472(%ebp) # 4-byte Spill +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %cl -; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp +; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -484(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -256(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -488(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -88(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -436(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -472(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %edi +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -224(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -56(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -56(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -516(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill -; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: addl (%esp), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -76(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -132(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -72(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -72(%ebp), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl -104(%ebp), %ebx # 4-byte Reload -; X32-NEXT: addl -484(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -488(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ebx -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -72(%ebp) # 4-byte Spill -; X32-NEXT: movl -224(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl -76(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -56(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -696(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -652(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -924(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -928(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: addl -64(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill -; X32-NEXT: adcl -220(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -76(%ebp) # 4-byte Spill -; X32-NEXT: adcl -140(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -152(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -200(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -68(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -32(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -548(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -40(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -544(%ebp) # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: setb %bl -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -544(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %edi +; X32-NEXT: adcl %edx, %ebx +; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: addl %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl %ecx, %esi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edi +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %edi, %ecx +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: setb %cl +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %ebp, %esi +; X32-NEXT: mull %esi +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -380(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -356(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -220(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edx, %edi +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -548(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill -; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -56(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull %edi +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -296(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -56(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -92(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -56(%ebp), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -704(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -948(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -140(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -292(%ebp) # 4-byte Spill -; X32-NEXT: adcl -40(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -376(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -220(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -64(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -580(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -40(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -140(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 92(%eax), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 92(%eax), %ebp ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ebx -; X32-NEXT: movl %ebx, %esi -; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, %ebx +; X32-NEXT: mull %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ebx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi ; X32-NEXT: setb %cl -; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -28(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -600(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -256(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -604(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ecx -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -292(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -376(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -56(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -220(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -64(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -376(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -580(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -220(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -292(%ebp) # 4-byte Spill -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -140(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill -; X32-NEXT: adcl -292(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -292(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -292(%ebp), %edi # 1-byte Folded Reload -; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -104(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -600(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -604(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -64(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -220(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -376(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl %ecx, %esi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -704(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -940(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -944(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -936(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -88(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -76(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -296(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -40(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -72(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -56(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx -; X32-NEXT: adcl $0, %ecx +; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -176(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -64(%ebp) # 4-byte Spill -; X32-NEXT: adcl -200(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -220(%ebp) # 4-byte Spill -; X32-NEXT: adcl -68(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -68(%ebp) # 4-byte Spill -; X32-NEXT: adcl -32(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -152(%ebp) # 4-byte Spill -; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -548(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -544(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %esi -; X32-NEXT: setb %cl -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ecx, %esi +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb (%esp) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -380(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -308(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -356(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -380(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl %ecx, -356(%ebp) # 4-byte Spill -; X32-NEXT: movl -212(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -548(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -76(%ebp) # 4-byte Spill -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -544(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -252(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -708(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -960(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -176(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -376(%ebp) # 4-byte Spill -; X32-NEXT: adcl -200(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -380(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -356(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -580(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl %esi, %eax +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl -212(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -140(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -200(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %cl -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edi, %ebx -; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -116(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -600(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -84(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -604(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -376(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -380(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -116(%ebp) # 4-byte Spill -; X32-NEXT: adcl -356(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -84(%ebp) # 4-byte Spill -; X32-NEXT: setb -356(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -580(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -380(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -224(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -380(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx @@ -4694,2031 +4644,2038 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -380(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -380(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -600(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -208(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -604(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -224(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl -116(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -84(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -356(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -708(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -660(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -952(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -956(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -76(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -220(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -72(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -68(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -176(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -152(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -200(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -32(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, (%esp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, -224(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %edi, -380(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, -308(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -208(%ebp) # 4-byte Spill -; X32-NEXT: movl -516(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -116(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -356(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -116(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -32(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl -132(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -484(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -488(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -116(%ebp) # 4-byte Spill -; X32-NEXT: movl -476(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -188(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -220(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill -; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -220(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -220(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edi ; X32-NEXT: setb %bl -; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -100(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -480(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -204(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -384(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %esi -; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl -356(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -100(%ebp) # 4-byte Spill -; X32-NEXT: adcl -32(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -204(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -84(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -116(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -476(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: mull %ecx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %edi +; X32-NEXT: adcl %edx, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -356(%ebp) # 4-byte Spill -; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -112(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -32(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -248(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -32(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -480(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -384(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -356(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -204(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -32(%ebp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -84(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -480(%ebp) # 4-byte Spill -; X32-NEXT: adcl -116(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -384(%ebp) # 4-byte Spill -; X32-NEXT: setb -204(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -516(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -116(%ebp) # 4-byte Spill -; X32-NEXT: movl -132(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -100(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -112(%ebp) # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -84(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -112(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: setb %bl +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -84(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -484(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -304(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -488(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -128(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -480(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -116(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -384(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -100(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -204(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl %esi, -484(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -488(%ebp) # 4-byte Spill -; X32-NEXT: movl -548(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl -236(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull -544(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl %esi, -236(%ebp) # 4-byte Spill -; X32-NEXT: movl -580(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: imull %eax, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: movl %ecx, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -148(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: imull %ebx, %esi -; X32-NEXT: movl -188(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -140(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: imull %edi, %esi ; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl -204(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -236(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax ; X32-NEXT: movl %edi, %esi -; X32-NEXT: movl -548(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %eax, %ebx ; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %edi +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -544(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebp, %esi ; X32-NEXT: setb %bl -; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -84(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill -; X32-NEXT: adcl -140(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill -; X32-NEXT: movl -476(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -164(%ebp), %esi # 4-byte Reload -; X32-NEXT: imull %eax, %esi -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %ebp, %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %esi, %edx -; X32-NEXT: imull -248(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, -244(%ebp) # 4-byte Spill -; X32-NEXT: movl -516(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -144(%ebp), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %esi -; X32-NEXT: movl -300(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: imull %edi, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %esi, %edx +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ebx, %edx ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: imull %eax, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl -148(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -128(%ebp) # 4-byte Spill -; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -132(%ebp) # 4-byte Spill -; X32-NEXT: movl -476(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %ecx, %esi -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl -300(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -248(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: setb -244(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -248(%ebp) # 4-byte Folded Reload -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -244(%ebp), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %esi, %edx -; X32-NEXT: addl -128(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -132(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -148(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -236(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -204(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -304(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -84(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -116(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -148(%ebp) # 4-byte Spill -; X32-NEXT: adcl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -164(%ebp) # 4-byte Spill -; X32-NEXT: adcl -484(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -384(%ebp) # 4-byte Spill -; X32-NEXT: adcl -488(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -300(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: movl 104(%esi), %ebx -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl %ebx, -244(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %edi # 4-byte Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebp, %edi +; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl 104(%esi), %ebp +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl 108(%esi), %eax -; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ebx -; X32-NEXT: setb -116(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ebx, %edi -; X32-NEXT: movzbl -116(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl -244(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill -; X32-NEXT: addl -28(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -256(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: movl %edx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %ecx -; X32-NEXT: movl 96(%ecx), %edi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -132(%ebp) # 4-byte Spill -; X32-NEXT: movl 100(%ecx), %eax -; X32-NEXT: movl %eax, -116(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl 96(%edi), %ebx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl -132(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl 100(%edi), %edi ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -132(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb -144(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -116(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: movzbl -144(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, -188(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill -; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -256(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %edx, %eax +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi -; X32-NEXT: adcl %esi, %eax -; X32-NEXT: addl -236(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -28(%ebp) # 4-byte Spill -; X32-NEXT: adcl -204(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -256(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -112(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -140(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -204(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -236(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %edx, %eax +; X32-NEXT: addl %edi, %ecx +; X32-NEXT: adcl %ebx, %eax +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -204(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -204(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: setb %cl -; X32-NEXT: movl -116(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -144(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -188(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -236(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -256(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -204(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -112(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -144(%ebp) # 4-byte Spill -; X32-NEXT: adcl -140(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -188(%ebp) # 4-byte Spill -; X32-NEXT: setb -112(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -244(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -108(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -256(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -28(%ebp) # 4-byte Spill -; X32-NEXT: movl -100(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -256(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl %ebp, %ebx +; X32-NEXT: adcl $0, %edi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -96(%ebp) # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -256(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -96(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -248(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -104(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -128(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -156(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -28(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -256(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -112(%ebp), %eax # 1-byte Folded Reload -; X32-NEXT: adcl %eax, %edi -; X32-NEXT: movl %edi, -248(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl 8(%ebp), %ecx -; X32-NEXT: movl 112(%ecx), %eax -; X32-NEXT: movl %eax, -156(%ebp) # 4-byte Spill -; X32-NEXT: imull %eax, %esi -; X32-NEXT: movl -108(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill -; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl 116(%ecx), %eax -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill -; X32-NEXT: imull %eax, %edi -; X32-NEXT: addl %edx, %edi -; X32-NEXT: movl %edi, -108(%ebp) # 4-byte Spill -; X32-NEXT: movl 120(%ecx), %eax -; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl -92(%ebp), %esi # 4-byte Reload -; X32-NEXT: imull %esi, %edi -; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill -; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl 124(%ebx), %ebx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: imull %eax, %ebx -; X32-NEXT: addl %edx, %ebx -; X32-NEXT: movl -144(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, -96(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -108(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -156(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -144(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -144(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -104(%ebp) # 4-byte Folded Reload -; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: setb %cl -; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -104(%ebp) # 4-byte Folded Reload -; X32-NEXT: addl %esi, %eax -; X32-NEXT: movzbl %cl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -96(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -92(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edx -; X32-NEXT: movl %edx, -96(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload -; X32-NEXT: imull %eax, %edi -; X32-NEXT: movl -284(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl 112(%esi), %edi +; X32-NEXT: imull %edi, %ebp +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill -; X32-NEXT: addl %edi, %edx -; X32-NEXT: imull -116(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: movl 116(%esi), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: imull %eax, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, -284(%ebp) # 4-byte Spill -; X32-NEXT: movl -244(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, %ebx +; X32-NEXT: movl 120(%esi), %eax ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl -252(%ebp), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %ecx -; X32-NEXT: movl -212(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %esi, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -100(%ebp), %ecx # 4-byte Reload -; X32-NEXT: imull %edi, %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl 124(%ecx), %ecx +; X32-NEXT: imull %ebp, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl -104(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -104(%ebp) # 4-byte Spill -; X32-NEXT: adcl -284(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -100(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl -84(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -284(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -116(%ebp) # 4-byte Folded Reload ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx -; X32-NEXT: adcl %edi, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ebp, %ebx +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -116(%ebp) # 4-byte Folded Reload -; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: adcl %esi, %edx -; X32-NEXT: addl -104(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -100(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -284(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -108(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -168(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -92(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -96(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -28(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ecx, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: imull %eax, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl %esi, %edx +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %edx, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %ebp, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %esi, %edx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %eax, %ecx +; X32-NEXT: addl %edx, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %ebx, %esi +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %esi, %edi +; X32-NEXT: adcl %ebp, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %edi, %ecx +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: movl %esi, %edi -; X32-NEXT: adcl -256(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, %ebx -; X32-NEXT: adcl -248(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -116(%ebp) # 4-byte Spill -; X32-NEXT: adcl -128(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -256(%ebp) # 4-byte Spill -; X32-NEXT: movl -304(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -64(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -132(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -220(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -236(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -356(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -204(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -32(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -148(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -284(%ebp) # 4-byte Spill -; X32-NEXT: adcl -164(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -384(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl %edi, -116(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -256(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -300(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: addl -76(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill -; X32-NEXT: adcl -72(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -132(%ebp) # 4-byte Spill -; X32-NEXT: adcl -176(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill -; X32-NEXT: adcl -200(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -204(%ebp) # 4-byte Spill -; X32-NEXT: movl -224(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -284(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -380(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -140(%ebp) # 4-byte Spill -; X32-NEXT: movl -308(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -116(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -208(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -256(%ebp) # 4-byte Spill -; X32-NEXT: movl -492(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 92(%eax), %esi ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -28(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 92(%eax), %eax -; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -28(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -556(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -136(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -560(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -92(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -28(%ebp) # 4-byte Spill -; X32-NEXT: movl -552(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill -; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -168(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -128(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -460(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -712(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -976(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -108(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill -; X32-NEXT: adcl -104(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -92(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -28(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -552(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -104(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %ecx +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -104(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %cl -; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb %bl +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %edi, %ebp +; X32-NEXT: movzbl %bl, %eax +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -524(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -160(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -528(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -268(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -48(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -108(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -104(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -92(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -28(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -28(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -492(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill -; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -92(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl -492(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill -; X32-NEXT: adcl -92(%ebp), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -92(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -92(%ebp), %edi # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -556(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -160(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -560(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -268(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -212(%ebp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -208(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -28(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -712(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -212(%ebp) # 4-byte Spill -; X32-NEXT: adcl -968(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -208(%ebp) # 4-byte Spill -; X32-NEXT: adcl -964(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -244(%ebp) # 4-byte Spill -; X32-NEXT: adcl -972(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -248(%ebp) # 4-byte Spill -; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -168(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl 76(%eax), %eax -; X32-NEXT: movl %eax, -28(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -168(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb %cl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -564(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -136(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -568(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -156(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill -; X32-NEXT: movl -520(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -260(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -308(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill -; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -308(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -308(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -444(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -716(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -992(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -92(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -252(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -156(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -520(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -92(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -92(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: setb %cl -; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %edi +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -500(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -160(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -496(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -268(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebx ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -252(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -92(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebx ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -156(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -48(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: setb -48(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -388(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -156(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -100(%ebp) # 4-byte Spill -; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -156(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -156(%ebp) # 4-byte Spill -; X32-NEXT: movl -388(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -60(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -156(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -156(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -156(%ebp), %edi # 1-byte Folded Reload -; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -564(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -160(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -568(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -268(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT: adcl %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi -; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -100(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -84(%ebp), %esi # 4-byte Reload +; X32-NEXT: adcl %edx, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %ebx, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movzbl -48(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi -; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -716(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -988(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -984(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -980(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: addl -148(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill -; X32-NEXT: adcl -128(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -108(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -144(%ebp) # 4-byte Spill -; X32-NEXT: adcl -104(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -188(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -212(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -208(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -244(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -248(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -348(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill -; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: adcl $0, %ebp +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %esi +; X32-NEXT: setb %cl +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: addl %esi, %eax +; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -564(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -180(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -568(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -320(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -128(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -148(%ebp) # 4-byte Spill -; X32-NEXT: movl -520(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -348(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, -156(%ebp) # 4-byte Spill -; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -104(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -112(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -444(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -112(%ebp), %ecx # 1-byte Folded Reload -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -720(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -1008(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -108(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -300(%ebp) # 4-byte Spill -; X32-NEXT: adcl -48(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -112(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -128(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -148(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -520(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -288(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl %ebp, %esi +; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT: adcl %eax, %edx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %edi, %ecx -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -16(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebx ; X32-NEXT: setb %cl -; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -500(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -280(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -496(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ecx -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl -300(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -112(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -108(%ebp) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ebp +; X32-NEXT: adcl %edx, %ecx +; X32-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %ebp ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: addl -128(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: setb -112(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -388(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -288(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill -; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -128(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: adcl $0, %edx -; X32-NEXT: movl %edx, -300(%ebp) # 4-byte Spill -; X32-NEXT: movl -388(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -16(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -128(%ebp) # 4-byte Spill -; X32-NEXT: adcl -300(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: setb -300(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -300(%ebp), %edi # 1-byte Folded Reload -; X32-NEXT: adcl %edi, %edx -; X32-NEXT: movl -564(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -280(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -568(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl %bl, %esi +; X32-NEXT: adcl %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ebx -; X32-NEXT: movl -148(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -128(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movzbl -112(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl %ebp, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl %ecx, %esi +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -720(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -664(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -996(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -1000(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -156(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -104(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -48(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -108(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %edx -; X32-NEXT: adcl $0, %ecx +; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %ebx -; X32-NEXT: addl -212(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -148(%ebp) # 4-byte Spill -; X32-NEXT: adcl -208(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -128(%ebp) # 4-byte Spill -; X32-NEXT: adcl -244(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -248(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -144(%ebp) # 4-byte Spill -; X32-NEXT: setb -100(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -492(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill -; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %esi ; X32-NEXT: addl %ecx, %esi -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -216(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %esi, %eax -; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx -; X32-NEXT: setb -248(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -96(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %ecx +; X32-NEXT: setb %bl +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -248(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -180(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -556(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -320(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -560(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl %esi, -180(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl %ecx, -320(%ebp) # 4-byte Spill -; X32-NEXT: movl -552(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -248(%ebp) # 4-byte Spill -; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -216(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -244(%ebp) # 4-byte Spill +; X32-NEXT: addl %ebx, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -188(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ebx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %ecx, %edi -; X32-NEXT: movzbl -188(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %edi, %esi +; X32-NEXT: mull %ebp +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: addl -724(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -1004(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -212(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -208(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -188(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -180(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -320(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -552(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -288(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -208(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -212(%ebp) # 4-byte Spill -; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -208(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull -16(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %ebp, %ecx +; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -208(%ebp) # 4-byte Spill -; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebp ; X32-NEXT: setb %cl -; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -16(%ebp) # 4-byte Folded Reload -; X32-NEXT: addl %esi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: addl %ebp, %eax ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -524(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -280(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -528(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl %edi, -212(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -188(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -208(%ebp) # 4-byte Folded Spill +; X32-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -180(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -524(%ebp) # 4-byte Spill -; X32-NEXT: adcl -320(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -528(%ebp) # 4-byte Spill -; X32-NEXT: setb -180(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -492(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -288(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -188(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -320(%ebp) # 4-byte Spill -; X32-NEXT: movl -96(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -188(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -16(%ebp) # 4-byte Folded Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -188(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull -16(%ebp) # 4-byte Folded Reload +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -188(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -556(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -280(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -560(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -312(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -320(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl -524(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -528(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movzbl -180(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -724(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -668(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -732(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -728(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -148(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -248(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -128(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -244(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -84(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -212(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -144(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -208(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -100(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %edx -; X32-NEXT: movl %edx, -320(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %edi, -300(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %esi, -556(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -560(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %ebx -; X32-NEXT: movl 96(%ebx), %ecx -; X32-NEXT: movl %ecx, -312(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -100(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -100(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl 100(%ebx), %ebx -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl 96(%ecx), %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: movl %ebx, -100(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb -280(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 100(%eax), %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %esi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: setb %bl +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %esi, %ebx -; X32-NEXT: movzbl -280(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %esi, %edi +; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %ecx -; X32-NEXT: movl -312(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %eax, -84(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, -280(%ebp) # 4-byte Spill -; X32-NEXT: movl -160(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl %eax, %edi -; X32-NEXT: movl -268(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: addl %eax, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: adcl %edx, %esi -; X32-NEXT: addl %ebx, %edi -; X32-NEXT: movl %edi, -188(%ebp) # 4-byte Spill +; X32-NEXT: addl %edi, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: movl %esi, -144(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %edi # 4-byte Reload -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -312(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -164(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ecx +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: mull %ebp +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -100(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -384(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %edi -; X32-NEXT: setb %cl -; X32-NEXT: movl -124(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: addl %ecx, %eax +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -84(%ebp), %edi # 4-byte Reload -; X32-NEXT: addl -136(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %edi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl -180(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -148(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -280(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -188(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -144(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 104(%eax), %ecx -; X32-NEXT: movl %ecx, -180(%ebp) # 4-byte Spill -; X32-NEXT: movl -260(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT: movl 104(%ebp), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -128(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -148(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl -128(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 108(%eax), %edx -; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %ebx, -112(%ebp) # 4-byte Spill -; X32-NEXT: mull %ebx -; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl 108(%ebp), %esi +; X32-NEXT: movl %edi, %eax +; X32-NEXT: mull %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -128(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %esi -; X32-NEXT: setb -176(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: setb %bl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl %esi, %edi -; X32-NEXT: movzbl -176(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl %eax, %esi +; X32-NEXT: addl %edi, %esi +; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %eax, %ecx -; X32-NEXT: movl -180(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx -; X32-NEXT: movl %edx, -200(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -176(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl %eax, %esi -; X32-NEXT: movl -264(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl %eax, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax -; X32-NEXT: addl %edi, %esi +; X32-NEXT: addl %esi, %edi ; X32-NEXT: adcl %ecx, %eax -; X32-NEXT: movl -84(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl %ecx, -148(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -280(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl %ecx, -128(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, %edi ; X32-NEXT: adcl $0, %eax -; X32-NEXT: addl -188(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -136(%ebp) # 4-byte Spill -; X32-NEXT: adcl -144(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -264(%ebp) # 4-byte Spill -; X32-NEXT: setb -84(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl %ebx, %esi ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -144(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -280(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %ebx # 4-byte Reload -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -144(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ebx, %ebp ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -112(%ebp) # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, %ebp ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -144(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -112(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %ebx, %edi +; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -144(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -160(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -176(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -268(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -200(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -280(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -264(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -136(%ebp) # 4-byte Spill -; X32-NEXT: movzbl -84(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl %esi, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -268(%ebp) # 4-byte Spill -; X32-NEXT: movl -348(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %ebx, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %edi, %ecx ; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl -180(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -264(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull -216(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl %esi, -180(%ebp) # 4-byte Spill -; X32-NEXT: movl -288(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %esi, %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -100(%ebp), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %esi -; X32-NEXT: movl -312(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -16(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: imull %edi, %esi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %esi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %ecx, %esi ; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl -264(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -84(%ebp) # 4-byte Spill -; X32-NEXT: adcl -180(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl %ebx, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -348(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -288(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: addl %ebx, %edi ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -216(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %esi, %ebx +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -264(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb %bl -; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -84(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -348(%ebp) # 4-byte Spill -; X32-NEXT: adcl -16(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -180(%ebp) # 4-byte Spill -; X32-NEXT: movl 12(%ebp), %edx +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl 124(%edx), %ecx -; X32-NEXT: movl -260(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: imull %eax, %ecx ; X32-NEXT: movl 120(%edx), %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull -124(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl 112(%edi), %ebx -; X32-NEXT: movl 116(%edi), %ecx -; X32-NEXT: movl %ecx, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl 112(%edi), %ebp +; X32-NEXT: movl 116(%edi), %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %edi -; X32-NEXT: imull %ecx, %edi -; X32-NEXT: mull %ebx +; X32-NEXT: imull %ebx, %edi +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: mull %ebp ; X32-NEXT: addl %edi, %edx -; X32-NEXT: movl -60(%ebp), %ecx # 4-byte Reload -; X32-NEXT: imull %ebx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %ebp, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl -216(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -260(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -312(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -216(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl -312(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl %edx, %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull -124(%ebp) # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: adcl %edi, %ebx -; X32-NEXT: setb -260(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull -124(%ebp) # 4-byte Folded Reload -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movzbl -260(%ebp), %esi # 1-byte Folded Reload -; X32-NEXT: adcl %esi, %edx -; X32-NEXT: addl -184(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -60(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -216(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -288(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: adcl -264(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -348(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -180(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: addl -280(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -216(%ebp) # 4-byte Spill -; X32-NEXT: adcl -136(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -264(%ebp) # 4-byte Spill -; X32-NEXT: adcl -160(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -180(%ebp) # 4-byte Spill -; X32-NEXT: adcl -268(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -288(%ebp) # 4-byte Spill -; X32-NEXT: movl -352(%ebp), %esi # 4-byte Reload -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -520(%ebp), %ecx # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -16(%ebp), %ebx # 4-byte Folded Reload +; X32-NEXT: addl %esi, %ebx ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, %eax -; X32-NEXT: movl -444(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ebx, %ebp ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb %bl -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movzbl %bl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -364(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -500(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -496(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl %eax, %ecx -; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edx, %esi -; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -416(%ebp), %ecx # 4-byte Reload -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -520(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl -124(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -444(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %edi, %ebx +; X32-NEXT: adcl $0, %ecx +; X32-NEXT: movl %ebp, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill -; X32-NEXT: adcl %edi, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ecx, %ebp ; X32-NEXT: setb %bl -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl %bl, %ecx -; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -500(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -324(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -496(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -400(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: addl %eax, %esi -; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: addl -60(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -500(%ebp) # 4-byte Spill -; X32-NEXT: adcl -136(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -496(%ebp) # 4-byte Spill -; X32-NEXT: adcl $0, -160(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl $0, -16(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -416(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %edi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edx, %esi +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ebx +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp +; X32-NEXT: adcl $0, %ebx +; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %ebx, %edi +; X32-NEXT: setb %bl +; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %esi, %ebp +; X32-NEXT: mull %ecx +; X32-NEXT: addl %edi, %eax +; X32-NEXT: movzbl %bl, %edi +; X32-NEXT: adcl %edi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl %eax, %edi +; X32-NEXT: adcl %edx, %esi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -28(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax ; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -136(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -316(%ebp), %eax # 4-byte Reload +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -136(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -324(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -564(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -400(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -568(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -500(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -60(%ebp) # 4-byte Folded Spill -; X32-NEXT: adcl -496(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -136(%ebp) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %esi ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: addl -160(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -324(%ebp) # 4-byte Spill -; X32-NEXT: adcl -16(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -400(%ebp) # 4-byte Spill -; X32-NEXT: setb -160(%ebp) # 1-byte Folded Spill -; X32-NEXT: movl -352(%ebp), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, -268(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %edi -; X32-NEXT: addl -268(%ebp), %edi # 4-byte Folded Reload +; X32-NEXT: addl %ebp, %edi ; X32-NEXT: adcl $0, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: mull -28(%ebp) # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %edi, %eax -; X32-NEXT: movl %eax, -268(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, %edi ; X32-NEXT: adcl %esi, %ecx -; X32-NEXT: setb -260(%ebp) # 1-byte Folded Spill +; X32-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %eax -; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi +; X32-NEXT: mull %ebp ; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movzbl -260(%ebp), %ecx # 1-byte Folded Reload +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: movl -364(%ebp), %esi # 4-byte Reload -; X32-NEXT: addl -564(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -396(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -568(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NEXT: addl %eax, %esi ; X32-NEXT: adcl %edx, %ecx -; X32-NEXT: movl -324(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl %eax, -16(%ebp) # 4-byte Folded Spill -; X32-NEXT: movl -400(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl %eax, -268(%ebp) # 4-byte Folded Spill -; X32-NEXT: movzbl -160(%ebp), %eax # 1-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi -; X32-NEXT: movl %esi, -364(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %ecx -; X32-NEXT: movl %ecx, -396(%ebp) # 4-byte Spill -; X32-NEXT: movl -440(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %edi, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %ebp, %ecx ; X32-NEXT: imull %eax, %ecx -; X32-NEXT: movl -388(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi -; X32-NEXT: movl %eax, -28(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: imull -340(%ebp), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: imull %ebp, %esi ; X32-NEXT: addl %edx, %esi -; X32-NEXT: movl %esi, -388(%ebp) # 4-byte Spill -; X32-NEXT: movl -408(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -444(%ebp), %ebx # 4-byte Reload -; X32-NEXT: imull %ebx, %esi -; X32-NEXT: movl -520(%ebp), %edi # 4-byte Reload -; X32-NEXT: mull %edi -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %esi, %edx -; X32-NEXT: movl -192(%ebp), %esi # 4-byte Reload -; X32-NEXT: imull %edi, %esi -; X32-NEXT: addl %edx, %esi -; X32-NEXT: addl -28(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -28(%ebp) # 4-byte Spill -; X32-NEXT: adcl -388(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -192(%ebp) # 4-byte Spill -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -440(%ebp), %esi # 4-byte Reload -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, -324(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl %eax, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: imull %ebx, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: mull %esi +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: addl %edi, %edx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %esi, %eax +; X32-NEXT: imull %eax, %edi +; X32-NEXT: addl %edx, %edi +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl %ecx, %ebx -; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl %edi, %eax -; X32-NEXT: movl -340(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %esi, %ebx +; X32-NEXT: adcl $0, %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %ebp +; X32-NEXT: movl %ebp, %esi +; X32-NEXT: movl %edx, %ebp ; X32-NEXT: addl %ebx, %eax -; X32-NEXT: movl %eax, -260(%ebp) # 4-byte Spill -; X32-NEXT: adcl %esi, %edi -; X32-NEXT: setb %bl -; X32-NEXT: movl -444(%ebp), %eax # 4-byte Reload -; X32-NEXT: mull %ecx -; X32-NEXT: addl %edi, %eax -; X32-NEXT: movzbl %bl, %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl %edi, %ebp +; X32-NEXT: setb %cl +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: mull %esi +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %edx -; X32-NEXT: addl -28(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -340(%ebp) # 4-byte Spill -; X32-NEXT: adcl -192(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -192(%ebp) # 4-byte Spill -; X32-NEXT: movl -416(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -96(%ebp), %edi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NEXT: imull %eax, %edi -; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl -492(%ebp), %ecx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: mull %ecx -; X32-NEXT: movl %eax, -28(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %edi, %edx -; X32-NEXT: imull -316(%ebp), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: imull %ebx, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: movl %ecx, -492(%ebp) # 4-byte Spill -; X32-NEXT: movl -352(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl -460(%ebp), %edi # 4-byte Reload -; X32-NEXT: imull %edi, %ecx -; X32-NEXT: movl -552(%ebp), %ebx # 4-byte Reload -; X32-NEXT: mull %ebx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: imull %esi, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: mull %edi ; X32-NEXT: addl %ecx, %edx -; X32-NEXT: movl -120(%ebp), %ecx # 4-byte Reload -; X32-NEXT: imull %ebx, %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: imull %edi, %ecx ; X32-NEXT: addl %edx, %ecx -; X32-NEXT: addl -28(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -96(%ebp) # 4-byte Spill -; X32-NEXT: adcl -492(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -120(%ebp) # 4-byte Spill -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: mull %esi -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %eax, -28(%ebp) # 4-byte Spill +; X32-NEXT: addl %ebp, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edi, %eax -; X32-NEXT: mull %esi +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: mull %ecx +; X32-NEXT: movl %edx, %edi +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %esi, %eax +; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl %ebx, %ecx +; X32-NEXT: addl %edi, %ecx ; X32-NEXT: adcl $0, %esi -; X32-NEXT: movl -552(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl -316(%ebp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %edi -; X32-NEXT: addl %ecx, %eax -; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl %eax, %ebp +; X32-NEXT: addl %ecx, %ebp ; X32-NEXT: adcl %esi, %edi ; X32-NEXT: setb %cl -; X32-NEXT: movl -460(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: addl %edi, %eax +; X32-NEXT: movl %eax, %edx +; X32-NEXT: addl %edi, %edx ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: adcl %ecx, %esi -; X32-NEXT: addl -96(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -120(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -28(%ebp), %edx # 4-byte Reload -; X32-NEXT: addl -324(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -160(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -260(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: adcl -340(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: adcl -192(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: addl -16(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, %ebx -; X32-NEXT: adcl -268(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill -; X32-NEXT: movl %eax, %edx -; X32-NEXT: adcl -364(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -396(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -164(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -124(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -384(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl -60(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -148(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -128(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: adcl -216(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -28(%ebp) # 4-byte Spill -; X32-NEXT: movl -160(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -264(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: adcl -180(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -120(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -288(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -16(%ebp) # 4-byte Spill -; X32-NEXT: addl -248(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill -; X32-NEXT: adcl -244(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill -; X32-NEXT: adcl -212(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -60(%ebp) # 4-byte Spill -; X32-NEXT: adcl -208(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -136(%ebp) # 4-byte Spill -; X32-NEXT: movl -28(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -320(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -300(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -160(%ebp) # 4-byte Spill -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -556(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -560(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload -; X32-NEXT: addl -344(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -168(%ebp) # 4-byte Spill -; X32-NEXT: movl -308(%ebp), %esi # 4-byte Reload -; X32-NEXT: adcl -232(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl -252(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -436(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -92(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -472(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -92(%ebp) # 4-byte Spill -; X32-NEXT: movl -156(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -88(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -296(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -104(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -40(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -48(%ebp) # 4-byte Spill -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -56(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -108(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -304(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -132(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -124(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -236(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -204(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -136(%ebp) # 4-byte Spill -; X32-NEXT: adcl -284(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl %edx, -28(%ebp) # 4-byte Spill -; X32-NEXT: movl -160(%ebp), %edx # 4-byte Reload -; X32-NEXT: adcl -140(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -116(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl -16(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -256(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl -168(%ebp), %ecx # 4-byte Reload -; X32-NEXT: addl -432(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -168(%ebp) # 4-byte Spill -; X32-NEXT: adcl -456(%ebp), %esi # 4-byte Folded Reload -; X32-NEXT: movl %esi, -308(%ebp) # 4-byte Spill -; X32-NEXT: adcl -44(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl %edi, -252(%ebp) # 4-byte Spill -; X32-NEXT: movl -92(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -52(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -92(%ebp) # 4-byte Spill -; X32-NEXT: adcl -24(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl %ebx, -156(%ebp) # 4-byte Spill -; X32-NEXT: movl -104(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -272(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -104(%ebp) # 4-byte Spill -; X32-NEXT: movl -48(%ebp), %ebx # 4-byte Reload -; X32-NEXT: adcl -276(%ebp), %ebx # 4-byte Folded Reload -; X32-NEXT: movl -108(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -240(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -108(%ebp) # 4-byte Spill -; X32-NEXT: movl -184(%ebp), %edi # 4-byte Reload -; X32-NEXT: adcl -172(%ebp), %edi # 4-byte Folded Reload -; X32-NEXT: movl -124(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -80(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill -; X32-NEXT: movl -60(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -36(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -60(%ebp) # 4-byte Spill -; X32-NEXT: movl -136(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -20(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -136(%ebp) # 4-byte Spill -; X32-NEXT: movl -28(%ebp), %ecx # 4-byte Reload -; X32-NEXT: adcl -336(%ebp), %ecx # 4-byte Folded Reload -; X32-NEXT: movl %ecx, -28(%ebp) # 4-byte Spill -; X32-NEXT: adcl -360(%ebp), %edx # 4-byte Folded Reload -; X32-NEXT: adcl -392(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -120(%ebp) # 4-byte Spill -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload -; X32-NEXT: adcl -412(%ebp), %eax # 4-byte Folded Reload -; X32-NEXT: movl %eax, -16(%ebp) # 4-byte Spill -; X32-NEXT: movl 16(%ebp), %ecx -; X32-NEXT: movl -648(%ebp), %esi # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, %ebp +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, (%ecx) -; X32-NEXT: movl -644(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 4(%ecx) -; X32-NEXT: movl -536(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 8(%ecx) -; X32-NEXT: movl -596(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 12(%ecx) -; X32-NEXT: movl -592(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 16(%ecx) -; X32-NEXT: movl -532(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 20(%ecx) -; X32-NEXT: movl -428(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 24(%ecx) -; X32-NEXT: movl -452(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 28(%ecx) -; X32-NEXT: movl -508(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 32(%ecx) -; X32-NEXT: movl -504(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 36(%ecx) -; X32-NEXT: movl -328(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 40(%ecx) -; X32-NEXT: movl -468(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 44(%ecx) -; X32-NEXT: movl -404(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 48(%ecx) -; X32-NEXT: movl -540(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 52(%ecx) -; X32-NEXT: movl -228(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 56(%ecx) -; X32-NEXT: movl -196(%ebp), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NEXT: movl %esi, 60(%ecx) -; X32-NEXT: movl -168(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, 64(%ecx) -; X32-NEXT: movl -308(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, 68(%ecx) -; X32-NEXT: movl -252(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, 72(%ecx) -; X32-NEXT: movl -92(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, 76(%ecx) -; X32-NEXT: movl -156(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, 80(%ecx) -; X32-NEXT: movl -104(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, 84(%ecx) -; X32-NEXT: movl %ebx, 88(%ecx) -; X32-NEXT: movl -108(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, 92(%ecx) -; X32-NEXT: movl %edi, 96(%ecx) -; X32-NEXT: movl -124(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, 100(%ecx) -; X32-NEXT: movl -60(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, 104(%ecx) -; X32-NEXT: movl -136(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, 108(%ecx) -; X32-NEXT: movl -28(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, 112(%ecx) -; X32-NEXT: movl %edx, 116(%ecx) -; X32-NEXT: movl -120(%ebp), %eax # 4-byte Reload -; X32-NEXT: movl %eax, 120(%ecx) -; X32-NEXT: movl -16(%ebp), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, 64(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, 68(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, 72(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, 76(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, 80(%ecx) +; X32-NEXT: movl %ebp, 84(%ecx) +; X32-NEXT: movl %edi, 88(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, 92(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, 96(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, 100(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, 104(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, 108(%ecx) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %esi, 112(%ecx) +; X32-NEXT: movl %ebx, 116(%ecx) +; X32-NEXT: movl %edx, 120(%ecx) ; X32-NEXT: movl %eax, 124(%ecx) -; X32-NEXT: addl $996, %esp # imm = 0x3E4 +; X32-NEXT: addl $1000, %esp # imm = 0x3E8 ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx @@ -6734,13 +6691,13 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: pushq %r12 ; X64-NEXT: pushq %rbx ; X64-NEXT: subq $352, %rsp # imm = 0x160 -; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq 48(%rdi), %r9 -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq 40(%rdi), %rbp -; X64-NEXT: movq %rbp, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq 32(%rdi), %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdi, %r10 ; X64-NEXT: xorl %r8d, %r8d ; X64-NEXT: mulq %r8 @@ -6753,7 +6710,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %rcx, %rbx -; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, %r11 ; X64-NEXT: adcq %rdi, %rbp ; X64-NEXT: setb %bl @@ -6762,8 +6719,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r11, %r12 ; X64-NEXT: movq %r11, %r8 ; X64-NEXT: addq %rax, %r12 @@ -6772,17 +6729,17 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %r9, (%rsp) # 8-byte Spill ; X64-NEXT: adcq %rdx, %rax ; X64-NEXT: addq %rbp, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbx, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq (%rsi), %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: xorl %ebp, %ebp ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq 8(%rsi), %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rbp ; X64-NEXT: xorl %r11d, %r11d ; X64-NEXT: movq %rax, %r15 @@ -6791,18 +6748,18 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %rdi, %r15 ; X64-NEXT: adcq %rcx, %rbp -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: movzbl %bl, %ebx ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: movq 16(%rsi), %rax ; X64-NEXT: movq %rsi, %r13 -; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdi, %r14 ; X64-NEXT: addq %rax, %r14 ; X64-NEXT: movq %rcx, %r11 @@ -6811,13 +6768,13 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %rbx, %r11 ; X64-NEXT: movq %r8, %rax ; X64-NEXT: movq %r8, %rbp -; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %r9, %rax ; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq (%r10), %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: xorl %r8d, %r8d ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rsi @@ -6826,35 +6783,35 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rdi, %r9 ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq 32(%r13), %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r8 ; X64-NEXT: xorl %r8d, %r8d ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rbx, %rax ; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq %rdx, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: addq %r9, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: adcq %r15, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r14, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: adcq %r11, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r11, %rdi ; X64-NEXT: movq 8(%r10), %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rax, %r11 ; X64-NEXT: addq %rsi, %r11 @@ -6862,16 +6819,16 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %rcx, %r11 ; X64-NEXT: adcq %rsi, %rbp -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: setb %bl ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: movzbl %bl, %ebx ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: movq 16(%r10), %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r8 -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, %r8 ; X64-NEXT: addq %rax, %r8 ; X64-NEXT: movq %rsi, %r10 @@ -6881,24 +6838,24 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %rbx, %r10 ; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: movq %rcx, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: addq %r9, %rdx -; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r11, %r8 ; X64-NEXT: adcq %r8, %r15 -; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rax, %r14 -; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: adcq %r10, %rdi -; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: movq 40(%rsi), %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: xorl %r14d, %r14d ; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rax, %rdi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload ; X64-NEXT: addq %r9, %rdi ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp @@ -6909,50 +6866,50 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movzbl %bl, %r11d ; X64-NEXT: adcq %rdx, %r11 ; X64-NEXT: movq 48(%rsi), %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r14 -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r13, %rbx ; X64-NEXT: addq %rax, %rbx ; X64-NEXT: movq %r9, %rsi ; X64-NEXT: adcq %rdx, %rsi ; X64-NEXT: addq %rbp, %rbx ; X64-NEXT: adcq %r11, %rsi -; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: addq %r13, %r12 -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rdi, %r8 -; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbx, %rcx -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rsi, %r10 -; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq %rdx, %rax ; X64-NEXT: addq %r13, %rax ; X64-NEXT: movq (%rsp), %rax # 8-byte Reload ; X64-NEXT: adcq %r9, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rax ; X64-NEXT: addq %r13, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload -; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: movq %rbx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq 56(%rax), %r11 ; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdi, %r10 ; X64-NEXT: movq %rdx, %rbp @@ -6960,7 +6917,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rsi, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r8 @@ -6973,19 +6930,19 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r15 ; X64-NEXT: adcq %rdx, %r12 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %r10, %rbp ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rbp @@ -6997,7 +6954,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %bl ; X64-NEXT: movq %rdi, %rax @@ -7007,15 +6964,15 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rsi ; X64-NEXT: movzbl %bl, %eax ; X64-NEXT: adcq %rax, %r13 -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: addq %r9, %rsi ; X64-NEXT: adcq %r8, %r13 ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r12 ; X64-NEXT: movq %r10, %rbx ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r10 @@ -7026,12 +6983,12 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq 24(%rax), %rcx ; X64-NEXT: movq %rbx, %rax ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %rbx -; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rbp, %r8 @@ -7042,30 +6999,30 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload ; X64-NEXT: addq %r14, %rbp ; X64-NEXT: movq (%rsp), %rbx # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload ; X64-NEXT: adcq %r9, %rbx ; X64-NEXT: addq %rax, %rbp ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: addq %rsi, %r10 -; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r13, %r8 -; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: adcq $0, %rbx ; X64-NEXT: addq %r15, %rbp ; X64-NEXT: adcq %r12, %rbx ; X64-NEXT: setb %r15b -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %r11, %rsi ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload ; X64-NEXT: movq %r12, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi @@ -7073,7 +7030,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %r11, %rdi ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r11 @@ -7086,9 +7043,9 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: addq %r14, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload ; X64-NEXT: adcq %r9, %r14 ; X64-NEXT: addq %rax, %rcx ; X64-NEXT: adcq %rdx, %r14 @@ -7097,24 +7054,24 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movzbl %r15b, %eax ; X64-NEXT: adcq %rax, %rcx ; X64-NEXT: adcq $0, %r14 -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload -; X64-NEXT: movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload -; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload -; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq 24(%rax), %rcx ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rsi, %r11 ; X64-NEXT: movq %rdx, %rsi @@ -7122,7 +7079,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rbx, %rbp ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %r15 @@ -7134,19 +7091,19 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r8 ; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %r11, %rbp ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rbp @@ -7158,7 +7115,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rdi ; X64-NEXT: setb %cl ; X64-NEXT: movq %rsi, %rax @@ -7169,14 +7126,14 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rdi, %rbx ; X64-NEXT: movzbl %cl, %eax ; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload ; X64-NEXT: addq %r14, %rbx ; X64-NEXT: adcq %r15, %rsi ; X64-NEXT: adcq $0, %r8 ; X64-NEXT: adcq $0, %r10 ; X64-NEXT: movq %r11, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r9 @@ -7200,29 +7157,29 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r13 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; X64-NEXT: addq %r13, %rdi -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload ; X64-NEXT: adcq %r14, %rbp ; X64-NEXT: addq %rax, %rdi ; X64-NEXT: adcq %rdx, %rbp ; X64-NEXT: addq %rbx, %r9 -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rsi, %r11 -; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %r8, %rdi ; X64-NEXT: adcq %r10, %rbp ; X64-NEXT: setb %r9b -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rsi @@ -7241,10 +7198,10 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; X64-NEXT: movq %r10, %rcx ; X64-NEXT: addq %r13, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; X64-NEXT: movq %rbx, %rsi ; X64-NEXT: movq %rbx, %r12 ; X64-NEXT: adcq %r14, %rsi @@ -7255,25 +7212,25 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movzbl %r9b, %eax ; X64-NEXT: adcq %rax, %rcx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload -; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload -; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: adcq $0, {{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; X64-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: adcq $0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 @@ -7286,7 +7243,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rsi, %rcx ; X64-NEXT: adcq $0, %rbx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r8 @@ -7300,17 +7257,17 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: movq %r10, %r9 -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload ; X64-NEXT: movq %r12, %r10 -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r9 ; X64-NEXT: adcq %rdx, %r10 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi @@ -7321,7 +7278,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil ; X64-NEXT: movq %rdi, %rax @@ -7331,15 +7288,15 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: movzbl %sil, %eax ; X64-NEXT: adcq %rax, %r15 -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload ; X64-NEXT: addq %r14, %rbx ; X64-NEXT: adcq %r8, %r15 ; X64-NEXT: adcq $0, %r9 ; X64-NEXT: adcq $0, %r10 ; X64-NEXT: movq %rbp, %rsi ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r12 @@ -7350,7 +7307,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %r14, %rcx ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq 56(%rax), %rdi ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %rdi @@ -7365,11 +7322,11 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rsi, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload ; X64-NEXT: addq %r11, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; X64-NEXT: adcq %r13, %rsi ; X64-NEXT: addq %rax, %rcx ; X64-NEXT: adcq %rdx, %rsi @@ -7379,14 +7336,14 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: addq %r9, %rcx ; X64-NEXT: adcq %r10, %rsi -; X64-NEXT: setb {{[0-9]+}}(%rsp) # 1-byte Folded Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; X64-NEXT: movq %r10, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r15 @@ -7395,7 +7352,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq %r8, %rdi -; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %r8 @@ -7407,48 +7364,48 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %r9, %rax ; X64-NEXT: movzbl %bl, %edi ; X64-NEXT: adcq %rdi, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload ; X64-NEXT: addq %r11, %r15 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: adcq %r13, %rbp ; X64-NEXT: addq %rax, %r15 ; X64-NEXT: adcq %rdx, %rbp -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: addq %rcx, %rdx ; X64-NEXT: adcq %rsi, %r8 -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload +; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; X64-NEXT: adcq %rax, %r15 ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: addq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload -; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rdx ; X64-NEXT: adcq $0, %r8 ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload -; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rsi, %r10 @@ -7457,7 +7414,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %r11, %rbx ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r12 @@ -7470,20 +7427,20 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: addq %rax, %r8 ; X64-NEXT: adcq %rdx, %rcx ; X64-NEXT: movq %rcx, %r14 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %r10, %rdi ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rdi @@ -7495,7 +7452,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: mulq %r9 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rdi, %rcx ; X64-NEXT: setb %bl ; X64-NEXT: movq %rsi, %rax @@ -7505,17 +7462,17 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rdi ; X64-NEXT: movzbl %bl, %eax ; X64-NEXT: adcq %rax, %r11 -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload ; X64-NEXT: adcq %r12, %r11 ; X64-NEXT: adcq $0, %r8 -; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %r14 -; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r13, %rbx ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq %rax, %r12 @@ -7528,7 +7485,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %r8, %rcx ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: addq %rcx, %rax @@ -7541,11 +7498,11 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rbx, %rax ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r13 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; X64-NEXT: addq %r13, %rsi ; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload ; X64-NEXT: adcq %r14, %rcx ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx @@ -7554,18 +7511,18 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %r8, %r11 ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: movq %rcx, (%rsp) # 8-byte Spill -; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: setb {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; X64-NEXT: movq %rbx, %rax ; X64-NEXT: movq %r10, %rsi ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-NEXT: movq %r8, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi @@ -7584,47 +7541,47 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %bl, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: addq %r13, %rsi -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: adcq %r14, %rcx ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: adcq (%rsp), %r10 # 8-byte Folded Reload -; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload +; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; X64-NEXT: adcq %rax, %rsi ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: addq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload -; X64-NEXT: adcq %rax, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: addq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: adcq %r15, %r12 -; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbp, %r11 ; X64-NEXT: movq %r11, (%rsp) # 8-byte Spill -; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # 1-byte Folded Reload +; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; X64-NEXT: adcq %rax, %r14 -; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq 64(%rcx), %r11 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbp @@ -7643,7 +7600,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %r10 -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %rsi, %rdi @@ -7654,20 +7611,20 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: mulq %rdx ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: movq %rdx, %r14 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload ; X64-NEXT: addq %rbx, %r12 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload ; X64-NEXT: adcq %r14, %r15 ; X64-NEXT: addq %rdi, %r12 ; X64-NEXT: adcq %rcx, %r15 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %r11, %rsi -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r11 -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi @@ -7679,7 +7636,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rdi, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rsi, %rcx ; X64-NEXT: setb %sil ; X64-NEXT: movq %r9, %rax @@ -7687,15 +7644,15 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: addq %rax, %rbx ; X64-NEXT: adcq %rdx, %r14 ; X64-NEXT: addq %r13, %rbx ; X64-NEXT: adcq %r8, %r14 ; X64-NEXT: adcq $0, %r12 ; X64-NEXT: adcq $0, %r15 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq 80(%rbp), %rdi ; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rdi @@ -7725,18 +7682,18 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: mulq %rdx -; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: addq %r9, %rbp -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: adcq %rdx, %rax ; X64-NEXT: addq %rsi, %rbp ; X64-NEXT: adcq %rcx, %rax ; X64-NEXT: addq %rbx, %r13 -; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r14, %r8 -; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: adcq $0, %rax ; X64-NEXT: addq %r12, %rbp @@ -7744,12 +7701,12 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %r15, %rax ; X64-NEXT: movq %rax, %r11 ; X64-NEXT: setb %r14b -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r15 ; X64-NEXT: movq %rax, %r12 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rsi @@ -7768,39 +7725,39 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %sil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: addq %r9, %rsi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx ; X64-NEXT: addq %r8, %r12 -; X64-NEXT: movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r11, %rbx -; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movzbl %r14b, %eax ; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: imulq %rax, %r10 ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %r10, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: imulq %rbp, %rdi ; X64-NEXT: addq %rdx, %rdi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload ; X64-NEXT: imulq %r11, %rsi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: imulq %rcx, %rax ; X64-NEXT: addq %rdx, %rax ; X64-NEXT: addq %r8, %r9 @@ -7810,7 +7767,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rcx, %rdi ; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rdx, %rsi @@ -7833,9 +7790,9 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %rax, %r12 ; X64-NEXT: addq %r9, %r13 ; X64-NEXT: adcq %r8, %r12 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq 120(%rdx), %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; X64-NEXT: imulq %r10, %rcx ; X64-NEXT: movq 112(%rdx), %rsi ; X64-NEXT: movq %rdx, %rbp @@ -7843,18 +7800,18 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r11 ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-NEXT: imulq %r8, %rsi ; X64-NEXT: addq %rdx, %rsi ; X64-NEXT: movq 96(%rbp), %rdi ; X64-NEXT: movq 104(%rbp), %rbx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: imulq %rbx, %rcx ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: imulq %rdi, %rax ; X64-NEXT: addq %rdx, %rax ; X64-NEXT: addq %r11, %r9 @@ -7884,29 +7841,29 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: addq %r9, %rax ; X64-NEXT: adcq %r11, %rdx -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: adcq %r15, %rdi ; X64-NEXT: adcq %r13, %rax ; X64-NEXT: adcq %r12, %rdx -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload -; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload -; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload +; X64-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: movq 80(%rsi), %rdi ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movq 88(%rsi), %rax ; X64-NEXT: movq %rsi, %r9 ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %r11 ; X64-NEXT: movq %rdx, %rbp @@ -7914,8 +7871,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %r8, %rbx ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r14 @@ -7932,13 +7889,13 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: mulq %rcx -; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload ; X64-NEXT: addq %r12, %rsi ; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; X64-NEXT: adcq %r8, %r10 ; X64-NEXT: addq %rbx, %rsi ; X64-NEXT: adcq %rbp, %r10 @@ -7946,7 +7903,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq 64(%rdi), %r13 ; X64-NEXT: movq %r13, %rax ; X64-NEXT: mulq %r11 -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq 72(%rdi), %r9 ; X64-NEXT: movq %r9, %rax @@ -7959,11 +7916,11 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %rbp, %rcx ; X64-NEXT: setb %r11b ; X64-NEXT: movq %r9, %rax -; X64-NEXT: movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: mulq %r15 ; X64-NEXT: movq %rdx, %rbx ; X64-NEXT: movq %rax, %rbp @@ -7980,15 +7937,15 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %r11, %r8 ; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: adcq %rbx, %r8 -; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r14, %r8 -; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rsi ; X64-NEXT: adcq $0, %r10 -; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r12 @@ -8000,7 +7957,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %r13, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbp, %rax @@ -8012,28 +7969,28 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %dil, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload ; X64-NEXT: addq %r14, %r15 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; X64-NEXT: adcq %r13, %r11 ; X64-NEXT: addq %rax, %r15 ; X64-NEXT: adcq %rdx, %r11 -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload -; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload -; X64-NEXT: movq %rbp, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload +; X64-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r11 ; X64-NEXT: addq %rsi, %r15 ; X64-NEXT: adcq %r10, %r11 ; X64-NEXT: setb %r10b -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq %r8, %rdi ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r9 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdi, %r12 @@ -8042,7 +7999,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: adcq $0, %rdi ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: addq %rbx, %rax @@ -8055,22 +8012,22 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: movzbl %r8b, %ecx ; X64-NEXT: adcq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; X64-NEXT: addq %r14, %rsi -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: adcq %r13, %rcx ; X64-NEXT: addq %rax, %rsi ; X64-NEXT: adcq %rdx, %rcx ; X64-NEXT: addq %r15, %r9 -; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq %r11, %rbx -; X64-NEXT: movq %rbx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movzbl %r10b, %eax ; X64-NEXT: adcq %rax, %rsi -; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: adcq $0, %rcx -; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: movq 96(%rbp), %rcx ; X64-NEXT: imulq %rcx, %rdi ; X64-NEXT: movq %rcx, %rax @@ -8085,9 +8042,9 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq 112(%rbp), %rax ; X64-NEXT: movq %rbp, %rdi ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload ; X64-NEXT: imulq %rbp, %rsi -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; X64-NEXT: mulq %rbx ; X64-NEXT: movq %rax, %r10 ; X64-NEXT: addq %rsi, %rdx @@ -8100,7 +8057,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rbx, %rsi ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %rbx -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq %rbp, %r9 ; X64-NEXT: mulq %rcx @@ -8124,32 +8081,32 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %rax, %rbx ; X64-NEXT: addq %r10, %rbp ; X64-NEXT: adcq %rdi, %rbx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: imulq %rax, %rsi ; X64-NEXT: movq %rax, %r13 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rsi, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload ; X64-NEXT: imulq %r11, %rcx ; X64-NEXT: addq %rdx, %rcx ; X64-NEXT: movq %rcx, %r9 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload ; X64-NEXT: imulq %r15, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload ; X64-NEXT: mulq %r14 ; X64-NEXT: movq %rax, %r10 ; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: imulq %r14, %rax ; X64-NEXT: addq %rdx, %rax ; X64-NEXT: addq %r8, %r10 ; X64-NEXT: adcq %r9, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %r14, %rax ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rdi @@ -8173,53 +8130,53 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: adcq %rcx, %rdx ; X64-NEXT: addq %r10, %rax -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload ; X64-NEXT: adcq %r12, %rsi ; X64-NEXT: adcq %rbp, %rax ; X64-NEXT: adcq %rbx, %rdx -; X64-NEXT: addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: addq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload +; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload ; X64-NEXT: movq %rcx, %r9 -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload ; X64-NEXT: movq %rdi, %r10 -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload ; X64-NEXT: adcq (%rsp), %rbx # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload -; X64-NEXT: adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload -; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload +; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, (%rcx) -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, 8(%rcx) -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, 16(%rcx) -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, 24(%rcx) -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, 32(%rcx) -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, 40(%rcx) -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, 48(%rcx) -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, 56(%rcx) ; X64-NEXT: movq %r9, 64(%rcx) ; X64-NEXT: movq %r10, 72(%rcx) diff --git a/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll index 66047e3677f6..023de041dce9 100644 --- a/test/CodeGen/X86/peephole-na-phys-copy-folding.ll +++ b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll @@ -1,13 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32 -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64 - -; TODO: Reenable verify-machineinstrs once the if (!AXDead) // FIXME in -; X86InstrInfo::copyPhysReg() is resolved. +; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32 +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64 ; The peephole optimizer can elide some physical register copies such as ; EFLAGS. Make sure the flags are used directly, instead of needlessly using -; lahf, when possible. +; saving and restoring specific conditions. @L = external global i32 @M = external global i8 @@ -209,29 +206,22 @@ exit2: define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind { ; CHECK32-LABEL: test_intervening_call: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: pushl %ebp -; CHECK32-NEXT: movl %esp, %ebp ; CHECK32-NEXT: pushl %ebx ; CHECK32-NEXT: pushl %esi -; CHECK32-NEXT: movl 12(%ebp), %eax -; CHECK32-NEXT: movl 16(%ebp), %edx -; CHECK32-NEXT: movl 20(%ebp), %ebx -; CHECK32-NEXT: movl 24(%ebp), %ecx -; CHECK32-NEXT: movl 8(%ebp), %esi -; CHECK32-NEXT: lock cmpxchg8b (%esi) ; CHECK32-NEXT: pushl %eax -; CHECK32-NEXT: seto %al -; CHECK32-NEXT: lahf -; CHECK32-NEXT: movl %eax, %esi -; CHECK32-NEXT: popl %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: lock cmpxchg8b (%esi) +; CHECK32-NEXT: setne %bl ; CHECK32-NEXT: subl $8, %esp ; CHECK32-NEXT: pushl %edx ; CHECK32-NEXT: pushl %eax ; CHECK32-NEXT: calll bar ; CHECK32-NEXT: addl $16, %esp -; CHECK32-NEXT: movl %esi, %eax -; CHECK32-NEXT: addb $127, %al -; CHECK32-NEXT: sahf +; CHECK32-NEXT: testb %bl, %bl ; CHECK32-NEXT: jne .LBB4_3 ; CHECK32-NEXT: # %bb.1: # %t ; CHECK32-NEXT: movl $42, %eax @@ -240,39 +230,28 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind { ; CHECK32-NEXT: xorl %eax, %eax ; CHECK32-NEXT: .LBB4_2: # %t ; CHECK32-NEXT: xorl %edx, %edx +; CHECK32-NEXT: addl $4, %esp ; CHECK32-NEXT: popl %esi ; CHECK32-NEXT: popl %ebx -; CHECK32-NEXT: popl %ebp ; CHECK32-NEXT: retl ; ; CHECK64-LABEL: test_intervening_call: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: pushq %rbp -; CHECK64-NEXT: movq %rsp, %rbp ; CHECK64-NEXT: pushq %rbx -; CHECK64-NEXT: pushq %rax ; CHECK64-NEXT: movq %rsi, %rax ; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi) -; CHECK64-NEXT: pushq %rax -; CHECK64-NEXT: seto %al -; CHECK64-NEXT: lahf -; CHECK64-NEXT: movq %rax, %rbx -; CHECK64-NEXT: popq %rax +; CHECK64-NEXT: setne %bl ; CHECK64-NEXT: movq %rax, %rdi ; CHECK64-NEXT: callq bar -; CHECK64-NEXT: movq %rbx, %rax -; CHECK64-NEXT: addb $127, %al -; CHECK64-NEXT: sahf -; CHECK64-NEXT: jne .LBB4_3 +; CHECK64-NEXT: testb %bl, %bl +; CHECK64-NEXT: jne .LBB4_2 ; CHECK64-NEXT: # %bb.1: # %t ; CHECK64-NEXT: movl $42, %eax -; CHECK64-NEXT: jmp .LBB4_2 -; CHECK64-NEXT: .LBB4_3: # %f +; CHECK64-NEXT: popq %rbx +; CHECK64-NEXT: retq +; CHECK64-NEXT: .LBB4_2: # %f ; CHECK64-NEXT: xorl %eax, %eax -; CHECK64-NEXT: .LBB4_2: # %t -; CHECK64-NEXT: addq $8, %rsp ; CHECK64-NEXT: popq %rbx -; CHECK64-NEXT: popq %rbp ; CHECK64-NEXT: retq entry: ; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS. @@ -293,32 +272,27 @@ define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i6 ; CHECK32-LABEL: test_two_live_flags: ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: pushl %ebp -; CHECK32-NEXT: movl %esp, %ebp ; CHECK32-NEXT: pushl %ebx ; CHECK32-NEXT: pushl %edi ; CHECK32-NEXT: pushl %esi -; CHECK32-NEXT: movl 44(%ebp), %edi -; CHECK32-NEXT: movl 12(%ebp), %eax -; CHECK32-NEXT: movl 16(%ebp), %edx -; CHECK32-NEXT: movl 20(%ebp), %ebx -; CHECK32-NEXT: movl 24(%ebp), %ecx -; CHECK32-NEXT: movl 8(%ebp), %esi +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: lock cmpxchg8b (%esi) +; CHECK32-NEXT: setne {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl %edi, %edx +; CHECK32-NEXT: movl %ebp, %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK32-NEXT: lock cmpxchg8b (%esi) -; CHECK32-NEXT: seto %al -; CHECK32-NEXT: lahf -; CHECK32-NEXT: movl %eax, %esi -; CHECK32-NEXT: movl 32(%ebp), %eax -; CHECK32-NEXT: movl 36(%ebp), %edx -; CHECK32-NEXT: movl %edi, %ecx -; CHECK32-NEXT: movl 40(%ebp), %ebx -; CHECK32-NEXT: movl 28(%ebp), %edi -; CHECK32-NEXT: lock cmpxchg8b (%edi) ; CHECK32-NEXT: sete %al -; CHECK32-NEXT: pushl %eax -; CHECK32-NEXT: movl %esi, %eax -; CHECK32-NEXT: addb $127, %al -; CHECK32-NEXT: sahf -; CHECK32-NEXT: popl %eax +; CHECK32-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload ; CHECK32-NEXT: jne .LBB5_4 ; CHECK32-NEXT: # %bb.1: # %entry ; CHECK32-NEXT: testb %al, %al @@ -330,6 +304,7 @@ define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i6 ; CHECK32-NEXT: xorl %eax, %eax ; CHECK32-NEXT: .LBB5_3: # %t ; CHECK32-NEXT: xorl %edx, %edx +; CHECK32-NEXT: addl $4, %esp ; CHECK32-NEXT: popl %esi ; CHECK32-NEXT: popl %edi ; CHECK32-NEXT: popl %ebx @@ -338,32 +313,22 @@ define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i6 ; ; CHECK64-LABEL: test_two_live_flags: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: pushq %rbp -; CHECK64-NEXT: movq %rsp, %rbp ; CHECK64-NEXT: movq %rsi, %rax ; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi) -; CHECK64-NEXT: seto %al -; CHECK64-NEXT: lahf -; CHECK64-NEXT: movq %rax, %rdx +; CHECK64-NEXT: setne %dl ; CHECK64-NEXT: movq %r8, %rax ; CHECK64-NEXT: lock cmpxchgq %r9, (%rcx) ; CHECK64-NEXT: sete %al -; CHECK64-NEXT: pushq %rax -; CHECK64-NEXT: movq %rdx, %rax -; CHECK64-NEXT: addb $127, %al -; CHECK64-NEXT: sahf -; CHECK64-NEXT: popq %rax +; CHECK64-NEXT: testb %dl, %dl ; CHECK64-NEXT: jne .LBB5_3 ; CHECK64-NEXT: # %bb.1: # %entry ; CHECK64-NEXT: testb %al, %al ; CHECK64-NEXT: je .LBB5_3 ; CHECK64-NEXT: # %bb.2: # %t ; CHECK64-NEXT: movl $42, %eax -; CHECK64-NEXT: popq %rbp ; CHECK64-NEXT: retq ; CHECK64-NEXT: .LBB5_3: # %f ; CHECK64-NEXT: xorl %eax, %eax -; CHECK64-NEXT: popq %rbp ; CHECK64-NEXT: retq entry: %cx0 = cmpxchg i64* %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst diff --git a/test/CodeGen/X86/pr37264.ll b/test/CodeGen/X86/pr37264.ll new file mode 100644 index 000000000000..8821960d4b74 --- /dev/null +++ b/test/CodeGen/X86/pr37264.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -mtriple=x86_64-- + +define void @a() local_unnamed_addr #0 { + ret void +} + +define void @b() local_unnamed_addr #1 { + ret void +} + +attributes #0 = { "target-features"="+avx,+avx2,+avx512bw,+avx512f,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" } +attributes #1 = { "target-features"="+avx,+avx2,+avx512f,+avx512vl,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" } diff --git a/test/CodeGen/X86/win64_frame.ll b/test/CodeGen/X86/win64_frame.ll index 34f78ad0ac20..c011b4dc6024 100644 --- a/test/CodeGen/X86/win64_frame.ll +++ b/test/CodeGen/X86/win64_frame.ll @@ -1,43 +1,85 @@ -; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=CHECK --check-prefix=PUSHF -; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+sahf | FileCheck %s --check-prefix=SAHF +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=ALL --check-prefix=PUSHF +; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+sahf | FileCheck %s --check-prefix=ALL --check-prefix=SAHF define i32 @f1(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) "no-frame-pointer-elim"="true" { - ; CHECK-LABEL: f1: - ; CHECK: movl 48(%rbp), %eax +; ALL-LABEL: f1: +; ALL: # %bb.0: +; ALL-NEXT: pushq %rbp +; ALL-NEXT: .seh_pushreg 5 +; ALL-NEXT: movq %rsp, %rbp +; ALL-NEXT: .seh_setframe 5, 0 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: movl 48(%rbp), %eax +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc ret i32 %p5 } define void @f2(i32 %p, ...) "no-frame-pointer-elim"="true" { - ; CHECK-LABEL: f2: - ; CHECK: .seh_stackalloc 8 - ; CHECK: movq %rsp, %rbp - ; CHECK: .seh_setframe 5, 0 - ; CHECK: movq %rdx, 32(%rbp) - ; CHECK: leaq 32(%rbp), %rax +; ALL-LABEL: f2: +; ALL: # %bb.0: +; ALL-NEXT: pushq %rbp +; ALL-NEXT: .seh_pushreg 5 +; ALL-NEXT: pushq %rax +; ALL-NEXT: .seh_stackalloc 8 +; ALL-NEXT: movq %rsp, %rbp +; ALL-NEXT: .seh_setframe 5, 0 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: movq %r9, 48(%rbp) +; ALL-NEXT: movq %r8, 40(%rbp) +; ALL-NEXT: movq %rdx, 32(%rbp) +; ALL-NEXT: leaq 32(%rbp), %rax +; ALL-NEXT: movq %rax, (%rbp) +; ALL-NEXT: addq $8, %rsp +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc %ap = alloca i8, align 8 call void @llvm.va_start(i8* %ap) ret void } define i8* @f3() "no-frame-pointer-elim"="true" { - ; CHECK-LABEL: f3: - ; CHECK: movq %rsp, %rbp - ; CHECK: .seh_setframe 5, 0 - ; CHECK: movq 8(%rbp), %rax +; ALL-LABEL: f3: +; ALL: # %bb.0: +; ALL-NEXT: pushq %rbp +; ALL-NEXT: .seh_pushreg 5 +; ALL-NEXT: movq %rsp, %rbp +; ALL-NEXT: .seh_setframe 5, 0 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: movq 8(%rbp), %rax +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc %ra = call i8* @llvm.returnaddress(i32 0) ret i8* %ra } define i8* @f4() "no-frame-pointer-elim"="true" { - ; CHECK-LABEL: f4: - ; CHECK: pushq %rbp - ; CHECK: .seh_pushreg 5 - ; CHECK: subq $304, %rsp - ; CHECK: .seh_stackalloc 304 - ; CHECK: leaq 128(%rsp), %rbp - ; CHECK: .seh_setframe 5, 128 - ; CHECK: .seh_endprologue - ; CHECK: movq 184(%rbp), %rax +; ALL-LABEL: f4: +; ALL: # %bb.0: +; ALL-NEXT: pushq %rbp +; ALL-NEXT: .seh_pushreg 5 +; ALL-NEXT: subq $304, %rsp # imm = 0x130 +; ALL-NEXT: .seh_stackalloc 304 +; ALL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp +; ALL-NEXT: .seh_setframe 5, 128 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: movq 184(%rbp), %rax +; ALL-NEXT: addq $304, %rsp # imm = 0x130 +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc alloca [300 x i8] %ra = call i8* @llvm.returnaddress(i32 0) ret i8* %ra @@ -46,13 +88,24 @@ define i8* @f4() "no-frame-pointer-elim"="true" { declare void @external(i8*) define void @f5() "no-frame-pointer-elim"="true" { - ; CHECK-LABEL: f5: - ; CHECK: subq $336, %rsp - ; CHECK: .seh_stackalloc 336 - ; CHECK: leaq 128(%rsp), %rbp - ; CHECK: .seh_setframe 5, 128 - ; CHECK: leaq -92(%rbp), %rcx - ; CHECK: callq external +; ALL-LABEL: f5: +; ALL: # %bb.0: +; ALL-NEXT: pushq %rbp +; ALL-NEXT: .seh_pushreg 5 +; ALL-NEXT: subq $336, %rsp # imm = 0x150 +; ALL-NEXT: .seh_stackalloc 336 +; ALL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp +; ALL-NEXT: .seh_setframe 5, 128 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: leaq -92(%rbp), %rcx +; ALL-NEXT: callq external +; ALL-NEXT: nop +; ALL-NEXT: addq $336, %rsp # imm = 0x150 +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc %a = alloca [300 x i8] %gep = getelementptr [300 x i8], [300 x i8]* %a, i32 0, i32 0 call void @external(i8* %gep) @@ -60,13 +113,24 @@ define void @f5() "no-frame-pointer-elim"="true" { } define void @f6(i32 %p, ...) "no-frame-pointer-elim"="true" { - ; CHECK-LABEL: f6: - ; CHECK: subq $336, %rsp - ; CHECK: .seh_stackalloc 336 - ; CHECK: leaq 128(%rsp), %rbp - ; CHECK: .seh_setframe 5, 128 - ; CHECK: leaq -92(%rbp), %rcx - ; CHECK: callq external +; ALL-LABEL: f6: +; ALL: # %bb.0: +; ALL-NEXT: pushq %rbp +; ALL-NEXT: .seh_pushreg 5 +; ALL-NEXT: subq $336, %rsp # imm = 0x150 +; ALL-NEXT: .seh_stackalloc 336 +; ALL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp +; ALL-NEXT: .seh_setframe 5, 128 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: leaq -92(%rbp), %rcx +; ALL-NEXT: callq external +; ALL-NEXT: nop +; ALL-NEXT: addq $336, %rsp # imm = 0x150 +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc %a = alloca [300 x i8] %gep = getelementptr [300 x i8], [300 x i8]* %a, i32 0, i32 0 call void @external(i8* %gep) @@ -74,130 +138,147 @@ define void @f6(i32 %p, ...) "no-frame-pointer-elim"="true" { } define i32 @f7(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="true" { - ; CHECK-LABEL: f7: - ; CHECK: pushq %rbp - ; CHECK: .seh_pushreg 5 - ; CHECK: subq $304, %rsp - ; CHECK: .seh_stackalloc 304 - ; CHECK: leaq 128(%rsp), %rbp - ; CHECK: .seh_setframe 5, 128 - ; CHECK: andq $-64, %rsp - ; CHECK: movl 224(%rbp), %eax - ; CHECK: leaq 176(%rbp), %rsp +; ALL-LABEL: f7: +; ALL: # %bb.0: +; ALL-NEXT: pushq %rbp +; ALL-NEXT: .seh_pushreg 5 +; ALL-NEXT: subq $304, %rsp # imm = 0x130 +; ALL-NEXT: .seh_stackalloc 304 +; ALL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp +; ALL-NEXT: .seh_setframe 5, 128 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: andq $-64, %rsp +; ALL-NEXT: movl 224(%rbp), %eax +; ALL-NEXT: leaq 176(%rbp), %rsp +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc alloca [300 x i8], align 64 ret i32 %e } define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="true" { - ; CHECK-LABEL: f8: - ; CHECK: subq $352, %rsp - ; CHECK: .seh_stackalloc 352 - ; CHECK: leaq 128(%rsp), %rbp - ; CHECK: .seh_setframe 5, 128 - +; ALL-LABEL: f8: +; ALL: # %bb.0: +; ALL-NEXT: pushq %rbp +; ALL-NEXT: .seh_pushreg 5 +; ALL-NEXT: pushq %rsi +; ALL-NEXT: .seh_pushreg 6 +; ALL-NEXT: pushq %rbx +; ALL-NEXT: .seh_pushreg 3 +; ALL-NEXT: subq $352, %rsp # imm = 0x160 +; ALL-NEXT: .seh_stackalloc 352 +; ALL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp +; ALL-NEXT: .seh_setframe 5, 128 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: andq $-64, %rsp +; ALL-NEXT: movq %rsp, %rbx +; ALL-NEXT: movl 288(%rbp), %esi +; ALL-NEXT: movl %ecx, %eax +; ALL-NEXT: leaq 15(,%rax,4), %rcx +; ALL-NEXT: movabsq $34359738352, %rax # imm = 0x7FFFFFFF0 +; ALL-NEXT: andq %rcx, %rax +; ALL-NEXT: callq __chkstk +; ALL-NEXT: subq %rax, %rsp +; ALL-NEXT: subq $32, %rsp +; ALL-NEXT: movq %rbx, %rcx +; ALL-NEXT: callq external +; ALL-NEXT: addq $32, %rsp +; ALL-NEXT: movl %esi, %eax +; ALL-NEXT: leaq 224(%rbp), %rsp +; ALL-NEXT: popq %rbx +; ALL-NEXT: popq %rsi +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc %alloca = alloca [300 x i8], align 64 - ; CHECK: andq $-64, %rsp - ; CHECK: movq %rsp, %rbx - alloca i32, i32 %a - ; CHECK: movl %ecx, %eax - ; CHECK: leaq 15(,%rax,4), %rcx - ; CHECK: movabsq $34359738352, %rax - ; CHECK: andq %rcx, %rax - ; CHECK: callq __chkstk - ; CHECK: subq %rax, %rsp - %gep = getelementptr [300 x i8], [300 x i8]* %alloca, i32 0, i32 0 call void @external(i8* %gep) - ; CHECK: subq $32, %rsp - ; CHECK: movq %rbx, %rcx - ; CHECK: callq external - ; CHECK: addq $32, %rsp - ret i32 %e - ; CHECK: movl %esi, %eax - ; CHECK: leaq 224(%rbp), %rsp } define i64 @f9() { +; ALL-LABEL: f9: +; ALL: # %bb.0: # %entry +; ALL-NEXT: pushq %rbp +; ALL-NEXT: .seh_pushreg 5 +; ALL-NEXT: movq %rsp, %rbp +; ALL-NEXT: .seh_setframe 5, 0 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: pushfq +; ALL-NEXT: popq %rax +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc entry: - ; CHECK-LABEL: f9: - ; CHECK: pushq %rbp - ; CHECK: .seh_pushreg 5 - ; CHECK-NEXT: movq %rsp, %rbp - ; CHECK: .seh_setframe 5, 0 - ; CHECK: .seh_endprologue - %call = call i64 @llvm.x86.flags.read.u64() - ; CHECK-NEXT: pushfq - ; CHECK-NEXT: popq %rax - ret i64 %call - ; CHECK-NEXT: popq %rbp - ; CHECK-NEXT: retq } declare i64 @dummy() define i64 @f10(i64* %foo, i64 %bar, i64 %baz) { - ; CHECK-LABEL: f10: - ; CHECK: pushq %rbp - ; CHECK: .seh_pushreg 5 - ; CHECK: pushq %rsi - ; CHECK: .seh_pushreg 6 - ; CHECK: pushq %rdi - ; CHECK: .seh_pushreg 7 - ; CHECK: subq $32, %rsp - ; CHECK: .seh_stackalloc 32 - ; CHECK: leaq 32(%rsp), %rbp - ; CHECK: .seh_setframe 5, 32 - ; CHECK: .seh_endprologue - +; ALL-LABEL: f10: +; ALL: # %bb.0: +; ALL-NEXT: pushq %rsi +; ALL-NEXT: .seh_pushreg 6 +; ALL-NEXT: pushq %rbx +; ALL-NEXT: .seh_pushreg 3 +; ALL-NEXT: subq $40, %rsp +; ALL-NEXT: .seh_stackalloc 40 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: movq %rdx, %rsi +; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: lock cmpxchgq %r8, (%rcx) +; ALL-NEXT: sete %bl +; ALL-NEXT: callq dummy +; ALL-NEXT: testb %bl, %bl +; ALL-NEXT: cmoveq %rsi, %rax +; ALL-NEXT: addq $40, %rsp +; ALL-NEXT: popq %rbx +; ALL-NEXT: popq %rsi +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst - ; PUSHF: lock cmpxchgq - ; PUSHF-NEXT: pushfq - ; PUSHF-NEXT: popq %[[REG:.*]] - ; SAHF: lock cmpxchgq - ; SAHF-NEXT: seto %al - ; SAHF-NEXT: lahf - %v = extractvalue { i64, i1 } %cx, 0 %p = extractvalue { i64, i1 } %cx, 1 - %call = call i64 @dummy() - ; PUSHF: callq dummy - ; PUSHF-NEXT: pushq %[[REG]] - ; PUSHF-NEXT: popfq - ; SAHF: callq dummy - ; SAHF-NEXT: pushq - ; SAHF: addb $127, %al - ; SAHF-NEXT: sahf - ; SAHF-NEXT: popq - %sel = select i1 %p, i64 %call, i64 %bar - ; CHECK-NEXT: cmovneq - ret i64 %sel - ; CHECK-NEXT: addq $32, %rsp - ; CHECK-NEXT: popq %rdi - ; CHECK-NEXT: popq %rsi - ; CHECK-NEXT: popq %rbp } define i8* @f11() "no-frame-pointer-elim"="true" { - ; CHECK-LABEL: f11: - ; CHECK: pushq %rbp - ; CHECK: movq %rsp, %rbp - ; CHECK: .seh_setframe 5, 0 - ; CHECK: leaq 8(%rbp), %rax +; ALL-LABEL: f11: +; ALL: # %bb.0: +; ALL-NEXT: pushq %rbp +; ALL-NEXT: .seh_pushreg 5 +; ALL-NEXT: movq %rsp, %rbp +; ALL-NEXT: .seh_setframe 5, 0 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: leaq 8(%rbp), %rax +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc %aora = call i8* @llvm.addressofreturnaddress() ret i8* %aora } define i8* @f12() { - ; CHECK-LABEL: f12: - ; CHECK-NOT: push - ; CHECK: movq %rsp, %rax +; ALL-LABEL: f12: +; ALL: # %bb.0: +; ALL-NEXT: movq %rsp, %rax +; ALL-NEXT: retq %aora = call i8* @llvm.addressofreturnaddress() ret i8* %aora } @@ -205,5 +286,4 @@ define i8* @f12() { declare i8* @llvm.returnaddress(i32) nounwind readnone declare i8* @llvm.addressofreturnaddress() nounwind readnone declare i64 @llvm.x86.flags.read.u64() - declare void @llvm.va_start(i8*) nounwind diff --git a/test/CodeGen/X86/x86-repmov-copy-eflags.ll b/test/CodeGen/X86/x86-repmov-copy-eflags.ll index ad3988857284..1c168e8ee3da 100644 --- a/test/CodeGen/X86/x86-repmov-copy-eflags.ll +++ b/test/CodeGen/X86/x86-repmov-copy-eflags.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck %s target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc18.0.0" @@ -39,15 +39,12 @@ declare void @g(%struct.T*) ; CHECK: leal 8(%esp), %esi ; CHECK: decl (%esp) -; CHECK: seto %al -; CHECK: lahf -; CHECK: movl %eax, %edi +; CHECK: setne %[[NE_REG:.*]] ; CHECK: pushl %esi ; CHECK: calll _g ; CHECK: addl $4, %esp -; CHECK: movl %edi, %eax -; CHECK: addb $127, %al -; CHECK: sahf +; CHECK: testb %[[NE_REG]], %[[NE_REG]] +; CHECK: jne attributes #0 = { nounwind optsize } attributes #1 = { argmemonly nounwind } diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll index 9954039654bb..e83cf0aa7ffd 100644 --- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll +++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll @@ -32,10 +32,10 @@ ;CHECK-NEXT: DW_AT_call_line ;CHECK: DW_TAG_formal_parameter -;FIXME: Linux shouldn't drop this parameter either... ;CHECK-NOT: DW_TAG -;DARWIN: DW_AT_abstract_origin {{.*}} "sp" -;DARWIN: DW_TAG_formal_parameter +;FIXME: Shouldn't drop this parameter... +;XCHECK: DW_AT_abstract_origin {{.*}} "sp" +;XCHECK: DW_TAG_formal_parameter ;CHECK: DW_AT_abstract_origin {{.*}} "nums" ;CHECK-NOT: DW_TAG_formal_parameter diff --git a/test/DebugInfo/X86/live-debug-vars-discard-invalid.mir b/test/DebugInfo/X86/live-debug-vars-discard-invalid.mir new file mode 100644 index 000000000000..51045f45b217 --- /dev/null +++ b/test/DebugInfo/X86/live-debug-vars-discard-invalid.mir @@ -0,0 +1,141 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -start-before greedy -stop-after virtregrewriter -o - %s | FileCheck %s + +--- | + ; ModuleID = '<stdin>' + source_filename = "test/DebugInfo/X86/dbg-value-inlined-parameter.ll" + target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-apple-darwin" + + %struct.S1 = type { float*, i32 } + + @p = common global %struct.S1 zeroinitializer, align 8, !dbg !0 + + ; Function Attrs: nounwind optsize ssp + define void @foobar() !dbg !15 { + entry: + tail call void @llvm.dbg.value(metadata %struct.S1* @p, metadata !18, metadata !DIExpression()) , !dbg !25 + ret void, !dbg !32 + } + + ; Function Attrs: nounwind readnone speculatable + declare void @llvm.dbg.value(metadata, metadata, metadata) #2 + + !llvm.dbg.cu = !{!2} + !llvm.module.flags = !{!14} + + !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) + !1 = !DIGlobalVariable(name: "p", scope: !2, file: !3, line: 14, type: !6, isLocal: false, isDefinition: true) + !2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 2.9 (trunk 125693)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4, globals: !5, imports: !4) + !3 = !DIFile(filename: "nm2.c", directory: "/private/tmp") + !4 = !{} + !5 = !{!0} + !6 = !DIDerivedType(tag: DW_TAG_typedef, name: "S1", scope: !2, file: !3, line: 4, baseType: !7) + !7 = !DICompositeType(tag: DW_TAG_structure_type, name: "S1", scope: !2, file: !3, line: 1, size: 128, align: 64, elements: !8) + !8 = !{!9, !12} + !9 = !DIDerivedType(tag: DW_TAG_member, name: "m", scope: !3, file: !3, line: 2, baseType: !10, size: 64, align: 64) + !10 = !DIDerivedType(tag: DW_TAG_pointer_type, scope: !2, baseType: !11, size: 64, align: 64) + !11 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float) + !12 = !DIDerivedType(tag: DW_TAG_member, name: "nums", scope: !3, file: !3, line: 3, baseType: !13, size: 32, align: 32, offset: 64) + !13 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) + !14 = !{i32 1, !"Debug Info Version", i32 3} + !15 = distinct !DISubprogram(name: "foobar", scope: !3, file: !3, line: 15, type: !16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, unit: !2) + !16 = !DISubroutineType(types: !17) + !17 = !{null} + !18 = !DILocalVariable(name: "sp", arg: 1, scope: !19, file: !3, line: 7, type: !24) + !19 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 8, type: !20, isLocal: false, isDefinition: true, scopeLine: 8, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !2, variables: !22) + !20 = !DISubroutineType(types: !21) + !21 = !{!13} + !22 = !{!18, !23} + !23 = !DILocalVariable(name: "nums", arg: 2, scope: !19, file: !3, line: 7, type: !13) + !24 = !DIDerivedType(tag: DW_TAG_pointer_type, scope: !2, baseType: !6, size: 64, align: 64) + !25 = !DILocation(line: 7, column: 13, scope: !19, inlinedAt: !26) + !26 = !DILocation(line: 16, column: 3, scope: !27) + !27 = distinct !DILexicalBlock(scope: !15, file: !3, line: 15, column: 15) + !32 = !DILocation(line: 17, column: 1, scope: !27) + +... +--- +name: foobar +tracksRegLiveness: true +body: | + bb.0: + %1:gr64 = IMPLICIT_DEF + %2:gr64 = IMPLICIT_DEF + + bb.1: + ; This DBG_VALUE will be discarded (use before def of %0). + DBG_VALUE debug-use %0, debug-use %noreg, !18, !DIExpression(), debug-location !25 + %0:gr64 = IMPLICIT_DEF + %0:gr64 = IMPLICIT_DEF + %0:gr64 = IMPLICIT_DEF + %0:gr64 = IMPLICIT_DEF + + bb.2: + ; This DBG_VALUE will be discarded (%1 is defined earlier, but it is not live in, so we do not know where %1 is stored). + DBG_VALUE debug-use %1, debug-use %noreg, !18, !DIExpression(), debug-location !25 + %1:gr64 = IMPLICIT_DEF + %1:gr64 = IMPLICIT_DEF + %1:gr64 = IMPLICIT_DEF + %1:gr64 = IMPLICIT_DEF + ; This DBG_VALUE is kept, even if %1 is dead, it was defined in the prev instruction, + ; so the value should be available for as long as the register allocated to %1 is live. + DBG_VALUE debug-use %1, debug-use %noreg, !18, !DIExpression(), debug-location !25 + + bb.3: + %1:gr64 = IMPLICIT_DEF + DBG_VALUE 0, debug-use %noreg, !23, !DIExpression(), debug-location !25 + ; This DBG_VALUE is kept, even if %1 is dead, it was defined in the prev non-dbg instruction, + ; so the value should be available for as long as the register allocated to %1 is live. + DBG_VALUE debug-use %1, debug-use %noreg, !18, !DIExpression(), debug-location !25 + + bb.4: + ; All DBG_VALUEs here should survive. %2 is livein as it was defined in bb.0, and it has use/def in the BTS64rr instruction. + DBG_VALUE debug-use %2, debug-use %noreg, !18, !DIExpression(), debug-location !25 + %2:gr64 = BTS64rr %2, 0, implicit-def %eflags + DBG_VALUE 0, debug-use %noreg, !23, !DIExpression(), debug-location !25 + DBG_VALUE debug-use %2, debug-use %noreg, !18, !DIExpression(), debug-location !25 + %2:gr64 = BTS64rr %2, 0, implicit-def %eflags + DBG_VALUE debug-use %2, debug-use %noreg, !18, !DIExpression(), debug-location !25 + %2:gr64 = BTS64rr %2, 0, implicit-def %eflags + DBG_VALUE debug-use %2, debug-use %noreg, !18, !DIExpression(), debug-location !25 + + bb.5: + RET 0, debug-location !32 +... + +# CHECK-LABEL: name: foobar + +# CHECK-LABEL: bb.1: +## After solving https://bugs.llvm.org/show_bug.cgi?id=36579 we expect to get a +## DBG_VALUE debug-use %noreg +## here. +# CHECK-NOT: DBG_VALUE + +# CHECK-LABEL: bb.2: +## After solving https://bugs.llvm.org/show_bug.cgi?id=36579 we expect to get a +## DBG_VALUE debug-use %noreg +## here. +# CHECK-NOT: DBG_VALUE +# CHECK: dead renamable %rcx = IMPLICIT_DEF +# CHECK-NEXT: dead renamable %rcx = IMPLICIT_DEF +# CHECK-NEXT: dead renamable %rcx = IMPLICIT_DEF +# CHECK-NEXT: dead renamable %rcx = IMPLICIT_DEF +# CHECK-NEXT: DBG_VALUE debug-use %rcx, debug-use %noreg, !18, !DIExpression() + +# CHECK-LABEL: bb.3: +# CHECK: dead renamable %rcx = IMPLICIT_DEF +# CHECK-NEXT: DBG_VALUE 0, debug-use %noreg, !23, !DIExpression() +# CHECK-NEXT: DBG_VALUE debug-use %rcx, debug-use %noreg, !18, !DIExpression() + +# CHECK-LABEL: bb.4: +# CHECK: liveins: %rax +# CHECK: DBG_VALUE debug-use %rax, debug-use %noreg, !18, !DIExpression() +# CHECK-NEXT: renamable %rax = BTS64rr killed renamable %rax, 0, implicit-def %eflags +# CHECK-NEXT: DBG_VALUE 0, debug-use %noreg, !23, !DIExpression() +# CHECK-NEXT: DBG_VALUE debug-use %rax, debug-use %noreg, !18, !DIExpression() +# CHECK-NEXT: renamable %rax = BTS64rr killed renamable %rax, 0, implicit-def %eflags +# CHECK-NEXT: DBG_VALUE debug-use %rax, debug-use %noreg, !18, !DIExpression() +# CHECK-NEXT: dead renamable %rax = BTS64rr killed renamable %rax, 0, implicit-def %eflags + +# CHECK-LABEL: bb.5: +# CHECK-NEXT: RET 0 diff --git a/test/ExecutionEngine/RuntimeDyld/PowerPC/Inputs/ppc64_elf_module_b.s b/test/ExecutionEngine/RuntimeDyld/PowerPC/Inputs/ppc64_elf_module_b.s new file mode 100644 index 000000000000..f47ddbd41368 --- /dev/null +++ b/test/ExecutionEngine/RuntimeDyld/PowerPC/Inputs/ppc64_elf_module_b.s @@ -0,0 +1,42 @@ +# This module contains a function with its local and global entry points +# exposed. It is used by the ppc64_elf test to verify that functions with +# different TOCs are called via their global entry points. + .text + .abiversion 2 + .file "ppc64_elf_module_b.ll" + .section .rodata.cst4,"aM",@progbits,4 + .p2align 2 # -- Begin function foo +.LCPI0_0: + .long 1093664768 # float 11 + .text + .globl foo + .p2align 4 + .type foo,@function +.Lfunc_toc0: # @foo + .quad .TOC.-foo_gep +foo: +.Lfunc_begin0: + .cfi_startproc + .globl foo_gep +foo_gep: + ld 2, .Lfunc_toc0-foo_gep(12) + add 2, 2, 12 + .globl foo_lep +foo_lep: + .localentry foo, foo_lep-foo_gep +# %bb.0: + addis 3, 2, .LC0@toc@ha + ld 3, .LC0@toc@l(3) + lfsx 1, 0, 3 + blr + .long 0 + .quad 0 +.Lfunc_end0: + .size foo, .Lfunc_end0-.Lfunc_begin0 + .cfi_endproc + # -- End function + .section .toc,"aw",@progbits +.LC0: + .tc .LCPI0_0[TC],.LCPI0_0 + + .section ".note.GNU-stack","",@progbits diff --git a/test/ExecutionEngine/RuntimeDyld/PowerPC/ppc64_elf.s b/test/ExecutionEngine/RuntimeDyld/PowerPC/ppc64_elf.s new file mode 100644 index 000000000000..b43c84caf56c --- /dev/null +++ b/test/ExecutionEngine/RuntimeDyld/PowerPC/ppc64_elf.s @@ -0,0 +1,47 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=powerpc64le-unknown-linux-gnu -filetype=obj -o %t/ppc64_elf.o %s +# RUN: llvm-mc -triple=powerpc64le-unknown-linux-gnu -filetype=obj -o %t/ppc64_elf_module_b.o %S/Inputs/ppc64_elf_module_b.s +# RUN: llvm-rtdyld -triple=powerpc64le-unknown-linux-gnu -verify -check=%s %t/ppc64_elf.o %t/ppc64_elf_module_b.o + + .text + .abiversion 2 + .file "Module2.ll" + .globl bar # -- Begin function bar + .p2align 4 + .type bar,@function +.Lfunc_toc0: # @bar + .quad .TOC.-.Lfunc_gep0 +bar: +.Lfunc_begin0: + .cfi_startproc +.Lfunc_gep0: + ld 2, .Lfunc_toc0-.Lfunc_gep0(12) + add 2, 2, 12 +.Lfunc_lep0: + .localentry bar, .Lfunc_lep0-.Lfunc_gep0 +# %bb.0: + mflr 0 + std 0, 16(1) + stdu 1, -32(1) + .cfi_def_cfa_offset 32 + .cfi_offset lr, 16 +# rtdyld-check: (*{4}(stub_addr(ppc64_elf.o, .text, foo) + 0)) [15:0] = foo_gep [63:48] +# rtdyld-check: (*{4}(stub_addr(ppc64_elf.o, .text, foo) + 4)) [15:0] = foo_gep [47:32] +# rtdyld-check: (*{4}(stub_addr(ppc64_elf.o, .text, foo) + 12)) [15:0] = foo_gep [31:16] +# rtdyld-check: (*{4}(stub_addr(ppc64_elf.o, .text, foo) + 16)) [15:0] = foo_gep [15:0] +# rtdyld-check: decode_operand(foo_call, 0) = (stub_addr(ppc64_elf.o, .text, foo) - foo_call) >> 2 +foo_call: + bl foo + nop + addi 1, 1, 32 + ld 0, 16(1) + mtlr 0 + blr + .long 0 + .quad 0 +.Lfunc_end0: + .size bar, .Lfunc_end0-.Lfunc_begin0 + .cfi_endproc + # -- End function + + .section ".note.GNU-stack","",@progbits diff --git a/test/MC/ELF/cfi-large-model.s b/test/MC/ELF/cfi-large-model.s index 790d75eee1fa..13e64d3f2851 100644 --- a/test/MC/ELF/cfi-large-model.s +++ b/test/MC/ELF/cfi-large-model.s @@ -1,26 +1,52 @@ // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -large-code-model %s \ -// RUN: -o - | llvm-readobj -s -sd | FileCheck %s +// RUN: -o - | llvm-readobj -s -sd | FileCheck --check-prefix=CHECK-X86 %s +// RUN: llvm-mc -filetype=obj -triple powerpc64le-linux-gnu -large-code-model %s \ +// RUN: -o - | llvm-readobj -s -sd | FileCheck --check-prefix=CHECK-PPC %s -// CHECK: Section { -// CHECK: Index: -// CHECK: Name: .eh_frame -// CHECK-NEXT: Type: SHT_X86_64_UNWIND -// CHECK-NEXT: Flags [ -// CHECK-NEXT: SHF_ALLOC -// CHECK-NEXT: ] -// CHECK-NEXT: Address: 0x0 -// CHECK-NEXT: Offset: 0x40 -// CHECK-NEXT: Size: 56 -// CHECK-NEXT: Link: 0 -// CHECK-NEXT: Info: 0 -// CHECK-NEXT: AddressAlignment: 8 -// CHECK-NEXT: EntrySize: 0 -// CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 00000000 017A5200 01781001 |.........zR..x..| -// CHECK-NEXT: 0010: 1C0C0708 90010000 1C000000 1C000000 |................| -// CHECK-NEXT: 0020: 00000000 00000000 00000000 00000000 |................| -// CHECK-NEXT: 0030: 00000000 00000000 |........| -// CHECK-NEXT: ) +// REQUIRES: x86-registered-target +// REQUIRES: powerpc-registered-target + +// CHECK-X86: Section { +// CHECK-X86: Index: +// CHECK-X86: Name: .eh_frame +// CHECK-X86-NEXT: Type: SHT_X86_64_UNWIND +// CHECK-X86-NEXT: Flags [ +// CHECK-X86-NEXT: SHF_ALLOC +// CHECK-X86-NEXT: ] +// CHECK-X86-NEXT: Address: 0x0 +// CHECK-X86-NEXT: Offset: 0x40 +// CHECK-X86-NEXT: Size: 56 +// CHECK-X86-NEXT: Link: 0 +// CHECK-X86-NEXT: Info: 0 +// CHECK-X86-NEXT: AddressAlignment: 8 +// CHECK-X86-NEXT: EntrySize: 0 +// CHECK-X86-NEXT: SectionData ( +// CHECK-X86-NEXT: 0000: 14000000 00000000 017A5200 01781001 |.........zR..x..| +// CHECK-X86-NEXT: 0010: 1C0C0708 90010000 1C000000 1C000000 |................| +// CHECK-X86-NEXT: 0020: 00000000 00000000 00000000 00000000 |................| +// CHECK-X86-NEXT: 0030: 00000000 00000000 |........| +// CHECK-X86-NEXT: ) + +// CHECK-PPC: Section { +// CHECK-PPC: Index: +// CHECK-PPC: Name: .eh_frame +// CHECK-PPC-NEXT: Type: SHT_PROGBITS +// CHECK-PPC-NEXT: Flags [ +// CHECK-PPC-NEXT: SHF_ALLOC +// CHECK-PPC-NEXT: ] +// CHECK-PPC-NEXT: Address: 0x0 +// CHECK-PPC-NEXT: Offset: 0x40 +// CHECK-PPC-NEXT: Size: 48 +// CHECK-PPC-NEXT: Link: 0 +// CHECK-PPC-NEXT: Info: 0 +// CHECK-PPC-NEXT: AddressAlignment: 8 +// CHECK-PPC-NEXT: EntrySize: 0 +// CHECK-PPC-NEXT: SectionData ( +// CHECK-PPC-NEXT: 0000: 10000000 00000000 017A5200 04784101 |.........zR..xA.| +// CHECK-PPC-NEXT: 0010: 1C0C0100 18000000 18000000 00000000 |................| +// CHECK-PPC-NEXT: 0020: 00000000 00000000 00000000 00000000 |................| +// CHECK-PPC-NEXT: ) +// CHECK-PPC-NEXT: } f: .cfi_startproc diff --git a/test/MC/Mips/unsupported-relocation.s b/test/MC/Mips/unsupported-relocation.s new file mode 100644 index 000000000000..151a559671fb --- /dev/null +++ b/test/MC/Mips/unsupported-relocation.s @@ -0,0 +1,13 @@ +# RUN: not llvm-mc -triple mips-unknown-linux -filetype=obj %s 2>%t +# RUN: FileCheck %s < %t + +# Check that we emit an error for unsupported relocations instead of crashing. + + .globl x + + .data +foo: + .byte x + .byte x+1 + +# CHECK: LLVM ERROR: MIPS does not support one byte relocations diff --git a/test/Transforms/ArgumentPromotion/musttail.ll b/test/Transforms/ArgumentPromotion/musttail.ll new file mode 100644 index 000000000000..aa1871168693 --- /dev/null +++ b/test/Transforms/ArgumentPromotion/musttail.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -argpromotion -S | FileCheck %s +; PR36543 + +; Don't promote arguments of musttail callee + +%T = type { i32, i32, i32, i32 } + +; CHECK-LABEL: define internal i32 @test(%T* %p) +define internal i32 @test(%T* %p) { + %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + ret i32 %v +} + +; CHECK-LABEL: define i32 @caller(%T* %p) +define i32 @caller(%T* %p) { + %v = musttail call i32 @test(%T* %p) + ret i32 %v +} + +; Don't promote arguments of musttail caller + +define i32 @foo(%T* %p, i32 %v) { + ret i32 0 +} + +; CHECK-LABEL: define internal i32 @test2(%T* %p, i32 %p2) +define internal i32 @test2(%T* %p, i32 %p2) { + %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + %ca = musttail call i32 @foo(%T* undef, i32 %v) + ret i32 %ca +} + +; CHECK-LABEL: define i32 @caller2(%T* %g) +define i32 @caller2(%T* %g) { + %v = call i32 @test2(%T* %g, i32 0) + ret i32 %v +} diff --git a/test/Transforms/CallSiteSplitting/musttail.ll b/test/Transforms/CallSiteSplitting/musttail.ll new file mode 100644 index 000000000000..97548501cd5c --- /dev/null +++ b/test/Transforms/CallSiteSplitting/musttail.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -callsite-splitting -S | FileCheck %s + +define i8* @caller(i8* %a, i8* %b) { +; CHECK-LABEL: @caller( +; CHECK-NEXT: Top: +; CHECK-NEXT: [[C:%.*]] = icmp eq i8* [[A:%.*]], null +; CHECK-NEXT: br i1 [[C]], label [[TAIL_PREDBB1_SPLIT:%.*]], label [[TBB:%.*]] +; CHECK: TBB: +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8* [[B:%.*]], null +; CHECK-NEXT: br i1 [[C2]], label [[TAIL_PREDBB2_SPLIT:%.*]], label [[END:%.*]] +; CHECK: Tail.predBB1.split: +; CHECK-NEXT: [[TMP0:%.*]] = musttail call i8* @callee(i8* null, i8* [[B]]) +; CHECK-NEXT: [[CB1:%.*]] = bitcast i8* [[TMP0]] to i8* +; CHECK-NEXT: ret i8* [[CB1]] +; CHECK: Tail.predBB2.split: +; CHECK-NEXT: [[TMP1:%.*]] = musttail call i8* @callee(i8* nonnull [[A]], i8* null) +; CHECK-NEXT: [[CB2:%.*]] = bitcast i8* [[TMP1]] to i8* +; CHECK-NEXT: ret i8* [[CB2]] +; CHECK: End: +; CHECK-NEXT: ret i8* null +; +Top: + %c = icmp eq i8* %a, null + br i1 %c, label %Tail, label %TBB +TBB: + %c2 = icmp eq i8* %b, null + br i1 %c2, label %Tail, label %End +Tail: + %ca = musttail call i8* @callee(i8* %a, i8* %b) + %cb = bitcast i8* %ca to i8* + ret i8* %cb +End: + ret i8* null +} + +define i8* @callee(i8* %a, i8* %b) noinline { +; CHECK-LABEL: define i8* @callee( +; CHECK-NEXT: ret i8* [[A:%.*]] +; + ret i8* %a +} + +define i8* @no_cast_caller(i8* %a, i8* %b) { +; CHECK-LABEL: @no_cast_caller( +; CHECK-NEXT: Top: +; CHECK-NEXT: [[C:%.*]] = icmp eq i8* [[A:%.*]], null +; CHECK-NEXT: br i1 [[C]], label [[TAIL_PREDBB1_SPLIT:%.*]], label [[TBB:%.*]] +; CHECK: TBB: +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8* [[B:%.*]], null +; CHECK-NEXT: br i1 [[C2]], label [[TAIL_PREDBB2_SPLIT:%.*]], label [[END:%.*]] +; CHECK: Tail.predBB1.split: +; CHECK-NEXT: [[TMP0:%.*]] = musttail call i8* @callee(i8* null, i8* [[B]]) +; CHECK-NEXT: ret i8* [[TMP0]] +; CHECK: Tail.predBB2.split: +; CHECK-NEXT: [[TMP1:%.*]] = musttail call i8* @callee(i8* nonnull [[A]], i8* null) +; CHECK-NEXT: ret i8* [[TMP1]] +; CHECK: End: +; CHECK-NEXT: ret i8* null +; +Top: + %c = icmp eq i8* %a, null + br i1 %c, label %Tail, label %TBB +TBB: + %c2 = icmp eq i8* %b, null + br i1 %c2, label %Tail, label %End +Tail: + %ca = musttail call i8* @callee(i8* %a, i8* %b) + ret i8* %ca +End: + ret i8* null +} + +define void @void_caller(i8* %a, i8* %b) { +; CHECK-LABEL: @void_caller( +; CHECK-NEXT: Top: +; CHECK-NEXT: [[C:%.*]] = icmp eq i8* [[A:%.*]], null +; CHECK-NEXT: br i1 [[C]], label [[TAIL_PREDBB1_SPLIT:%.*]], label [[TBB:%.*]] +; CHECK: TBB: +; CHECK-NEXT: [[C2:%.*]] = icmp eq i8* [[B:%.*]], null +; CHECK-NEXT: br i1 [[C2]], label [[TAIL_PREDBB2_SPLIT:%.*]], label [[END:%.*]] +; CHECK: Tail.predBB1.split: +; CHECK-NEXT: musttail call void @void_callee(i8* null, i8* [[B]]) +; CHECK-NEXT: ret void +; CHECK: Tail.predBB2.split: +; CHECK-NEXT: musttail call void @void_callee(i8* nonnull [[A]], i8* null) +; CHECK-NEXT: ret void +; CHECK: End: +; CHECK-NEXT: ret void +; +Top: + %c = icmp eq i8* %a, null + br i1 %c, label %Tail, label %TBB +TBB: + %c2 = icmp eq i8* %b, null + br i1 %c2, label %Tail, label %End +Tail: + musttail call void @void_callee(i8* %a, i8* %b) + ret void +End: + ret void +} + +define void @void_callee(i8* %a, i8* %b) noinline { +; CHECK-LABEL: define void @void_callee( +; CHECK-NEXT: ret void +; + ret void +} diff --git a/test/Transforms/DeadArgElim/musttail-caller.ll b/test/Transforms/DeadArgElim/musttail-caller.ll new file mode 100644 index 000000000000..981326bba0aa --- /dev/null +++ b/test/Transforms/DeadArgElim/musttail-caller.ll @@ -0,0 +1,16 @@ +; RUN: opt -deadargelim -S < %s | FileCheck %s +; PR36441 +; Dead arguments should not be removed in presence of `musttail` calls. + +; CHECK-LABEL: define internal void @test(i32 %a, i32 %b) +; CHECK: musttail call void @foo(i32 %a, i32 0) +; FIXME: we should replace those with `undef`s +define internal void @test(i32 %a, i32 %b) { + musttail call void @foo(i32 %a, i32 0) + ret void +} + +; CHECK-LABEL: define internal void @foo(i32 %a, i32 %b) +define internal void @foo(i32 %a, i32 %b) { + ret void +} diff --git a/test/Transforms/GlobalOpt/musttail_cc.ll b/test/Transforms/GlobalOpt/musttail_cc.ll new file mode 100644 index 000000000000..fc927ea91dd8 --- /dev/null +++ b/test/Transforms/GlobalOpt/musttail_cc.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -globalopt -S | FileCheck %s +; PR36546 + +; Check that musttail callee preserves its calling convention + +define i32 @test(i32 %a) { + ; CHECK: %ca = musttail call i32 @foo(i32 %a) + %ca = musttail call i32 @foo(i32 %a) + ret i32 %ca +} + +; CHECK-LABEL: define internal i32 @foo(i32 %a) +define internal i32 @foo(i32 %a) { + ret i32 %a +} + +; Check that musttail caller preserves its calling convention + +define i32 @test2(i32 %a) { + %ca = call i32 @foo1(i32 %a) + ret i32 %ca +} + +; CHECK-LABEL: define internal i32 @foo1(i32 %a) +define internal i32 @foo1(i32 %a) { + ; CHECK: %ca = musttail call i32 @foo2(i32 %a) + %ca = musttail call i32 @foo2(i32 %a) + ret i32 %ca +} + +; CHECK-LABEL: define internal i32 @foo2(i32 %a) +define internal i32 @foo2(i32 %a) { + ret i32 %a +} diff --git a/test/Transforms/IPConstantProp/musttail-call.ll b/test/Transforms/IPConstantProp/musttail-call.ll new file mode 100644 index 000000000000..f02f6992a70d --- /dev/null +++ b/test/Transforms/IPConstantProp/musttail-call.ll @@ -0,0 +1,58 @@ +; RUN: opt < %s -ipsccp -S | FileCheck %s +; PR36485 +; musttail call result can\'t be replaced with a constant, unless the call +; can be removed + +declare i32 @external() + +define i8* @start(i8 %v) { + %c1 = icmp eq i8 %v, 0 + br i1 %c1, label %true, label %false +true: + ; CHECK: %ca = musttail call i8* @side_effects(i8 %v) + ; CHECK: ret i8* %ca + %ca = musttail call i8* @side_effects(i8 %v) + ret i8* %ca +false: + %c2 = icmp eq i8 %v, 1 + br i1 %c2, label %c2_true, label %c2_false +c2_true: + ; CHECK: %ca1 = musttail call i8* @no_side_effects(i8 %v) + ; CHECK: ret i8* %ca1 + %ca1 = musttail call i8* @no_side_effects(i8 %v) + ret i8* %ca1 +c2_false: + ; CHECK: %ca2 = musttail call i8* @dont_zap_me(i8 %v) + ; CHECK: ret i8* %ca2 + %ca2 = musttail call i8* @dont_zap_me(i8 %v) + ret i8* %ca2 +} + +define internal i8* @side_effects(i8 %v) { + %i1 = call i32 @external() + + ; since this goes back to `start` the SCPP should be see that the return value + ; is always `null`. + ; The call can't be removed due to `external` call above, though. + + ; CHECK: %ca = musttail call i8* @start(i8 %v) + %ca = musttail call i8* @start(i8 %v) + + ; Thus the result must be returned anyway + ; CHECK: ret i8* %ca + ret i8* %ca +} + +define internal i8* @no_side_effects(i8 %v) readonly nounwind { + ; CHECK: ret i8* null + ret i8* null +} + +define internal i8* @dont_zap_me(i8 %v) { + %i1 = call i32 @external() + + ; The call to this function cannot be removed due to side effects. Thus the + ; return value should stay as it is, and should not be zapped. + ; CHECK: ret i8* null + ret i8* null +} diff --git a/test/Transforms/InstCombine/gep-addrspace.ll b/test/Transforms/InstCombine/gep-addrspace.ll index aa46ea671302..4a4951dee7fd 100644 --- a/test/Transforms/InstCombine/gep-addrspace.ll +++ b/test/Transforms/InstCombine/gep-addrspace.ll @@ -32,3 +32,22 @@ entry: ret void } +declare void @escape_alloca(i16*) + +; check that addrspacecast is not ignored (leading to an assertion failure) +; when trying to mark a GEP as inbounds +define { i8, i8 } @inbounds_after_addrspacecast() { +top: +; CHECK-LABEL: @inbounds_after_addrspacecast + %0 = alloca i16, align 2 + call void @escape_alloca(i16* %0) + %tmpcast = bitcast i16* %0 to [2 x i8]* +; CHECK: addrspacecast [2 x i8]* %tmpcast to [2 x i8] addrspace(11)* + %1 = addrspacecast [2 x i8]* %tmpcast to [2 x i8] addrspace(11)* +; CHECK: getelementptr [2 x i8], [2 x i8] addrspace(11)* %1, i64 0, i64 1 + %2 = getelementptr [2 x i8], [2 x i8] addrspace(11)* %1, i64 0, i64 1 +; CHECK: addrspace(11) + %3 = load i8, i8 addrspace(11)* %2, align 1 + %.fca.1.insert = insertvalue { i8, i8 } zeroinitializer, i8 %3, 1 + ret { i8, i8 } %.fca.1.insert +} diff --git a/test/Transforms/JumpThreading/header-succ.ll b/test/Transforms/JumpThreading/header-succ.ll new file mode 100644 index 000000000000..859d44cff293 --- /dev/null +++ b/test/Transforms/JumpThreading/header-succ.ll @@ -0,0 +1,99 @@ +; RUN: opt -S -jump-threading < %s | FileCheck %s + +; Check that the heuristic for avoiding accidental introduction of irreducible +; loops doesn't also prevent us from threading simple constructs where this +; isn't a problem. + +declare void @opaque_body() + +define void @jump_threading_loopheader() { +; CHECK-LABEL: @jump_threading_loopheader +top: + br label %entry + +entry: + %ind = phi i32 [0, %top], [%nextind, %latch] + %nextind = add i32 %ind, 1 + %cmp = icmp ule i32 %ind, 10 +; CHECK: br i1 %cmp, label %latch, label %exit + br i1 %cmp, label %body, label %latch + +body: + call void @opaque_body() +; CHECK: br label %entry + br label %latch + +latch: + %cond = phi i2 [1, %entry], [2, %body] + switch i2 %cond, label %unreach [ + i2 2, label %entry + i2 1, label %exit + ] + +unreach: + unreachable + +exit: + ret void +} + +; We also need to check the opposite order of the branches, in the switch +; instruction because jump-threading relies on that to decide which edge to +; try to thread first. +define void @jump_threading_loopheader2() { +; CHECK-LABEL: @jump_threading_loopheader2 +top: + br label %entry + +entry: + %ind = phi i32 [0, %top], [%nextind, %latch] + %nextind = add i32 %ind, 1 + %cmp = icmp ule i32 %ind, 10 +; CHECK: br i1 %cmp, label %exit, label %latch + br i1 %cmp, label %body, label %latch + +body: + call void @opaque_body() +; CHECK: br label %entry + br label %latch + +latch: + %cond = phi i2 [1, %entry], [2, %body] + switch i2 %cond, label %unreach [ + i2 1, label %entry + i2 2, label %exit + ] + +unreach: + unreachable + +exit: + ret void +} + +; Check if we can handle undef branch condition. +define void @jump_threading_loopheader3() { +; CHECK-LABEL: @jump_threading_loopheader3 +top: + br label %entry + +entry: + %ind = phi i32 [0, %top], [%nextind, %latch] + %nextind = add i32 %ind, 1 + %cmp = icmp ule i32 %ind, 10 +; CHECK: br i1 %cmp, label %latch, label %exit + br i1 %cmp, label %body, label %latch + +body: + call void @opaque_body() +; CHECK: br label %entry + br label %latch + +latch: + %phi = phi i32 [undef, %entry], [0, %body] + %cmp1 = icmp eq i32 %phi, 0 + br i1 %cmp1, label %entry, label %exit + +exit: + ret void +} diff --git a/test/Transforms/MergeFunc/inline-asm.ll b/test/Transforms/MergeFunc/inline-asm.ll new file mode 100644 index 000000000000..370d3c56f060 --- /dev/null +++ b/test/Transforms/MergeFunc/inline-asm.ll @@ -0,0 +1,53 @@ +; RUN: opt -mergefunc -S < %s | FileCheck %s + +; CHECK-LABEL: @int_ptr_arg_different +; CHECK-NEXT: call void asm + +; CHECK-LABEL: @int_ptr_arg_same +; CHECK-NEXT: %2 = bitcast i32* %0 to float* +; CHECK-NEXT: tail call void @float_ptr_arg_same(float* %2) + +; CHECK-LABEL: @int_ptr_null +; CHECK-NEXT: tail call void @float_ptr_null() + +; Used to satisfy minimum size limit +declare void @stuff() + +; Can be merged +define void @float_ptr_null() { + call void asm "nop", "r"(float* null) + call void @stuff() + ret void +} + +define void @int_ptr_null() { + call void asm "nop", "r"(i32* null) + call void @stuff() + ret void +} + +; Can be merged (uses same argument differing by pointer type) +define void @float_ptr_arg_same(float*) { + call void asm "nop", "r"(float* %0) + call void @stuff() + ret void +} + +define void @int_ptr_arg_same(i32*) { + call void asm "nop", "r"(i32* %0) + call void @stuff() + ret void +} + +; Can not be merged (uses different arguments) +define void @float_ptr_arg_different(float*, float*) { + call void asm "nop", "r"(float* %0) + call void @stuff() + ret void +} + +define void @int_ptr_arg_different(i32*, i32*) { + call void asm "nop", "r"(i32* %1) + call void @stuff() + ret void +} diff --git a/test/Transforms/MergeFunc/weak-small.ll b/test/Transforms/MergeFunc/weak-small.ll new file mode 100644 index 000000000000..64f108317462 --- /dev/null +++ b/test/Transforms/MergeFunc/weak-small.ll @@ -0,0 +1,16 @@ +; RUN: opt -mergefunc -S < %s | FileCheck %s + +; Weak functions too small for merging to be profitable + +; CHECK: define weak i32 @foo(i8*, i32) +; CHECK-NEXT: ret i32 %1 +; CHECK: define weak i32 @bar(i8*, i32) +; CHECK-NEXT: ret i32 %1 + +define weak i32 @foo(i8*, i32) #0 { + ret i32 %1 +} + +define weak i32 @bar(i8*, i32) #0 { + ret i32 %1 +} diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt index 25f99cec9788..f59402ac4b0a 100644 --- a/tools/llvm-config/CMakeLists.txt +++ b/tools/llvm-config/CMakeLists.txt @@ -9,8 +9,9 @@ add_llvm_tool(llvm-config ) # Compute the substitution values for various items. -get_property(LLVM_SYSTEM_LIBS_LIST TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS) -foreach(l ${LLVM_SYSTEM_LIBS_LIST}) +get_property(SUPPORT_SYSTEM_LIBS TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS) +get_property(WINDOWSMANIFEST_SYSTEM_LIBS TARGET LLVMWindowsManifest PROPERTY LLVM_SYSTEM_LIBS) +foreach(l ${SUPPORT_SYSTEM_LIBS} ${WINDOWSMANIFEST_SYSTEM_LIBS}) if(MSVC) set(SYSTEM_LIBS ${SYSTEM_LIBS} "${l}.lib") else() @@ -64,18 +65,17 @@ endif() # Add the dependency on the generation step. add_file_dependencies(${CMAKE_CURRENT_SOURCE_DIR}/llvm-config.cpp ${BUILDVARIABLES_OBJPATH}) -if(CMAKE_CROSSCOMPILING) - set(${project}_LLVM_CONFIG_EXE "${LLVM_NATIVE_BUILD}/bin/llvm-config") - set(${project}_LLVM_CONFIG_EXE ${${project}_LLVM_CONFIG_EXE} PARENT_SCOPE) +if(CMAKE_CROSSCOMPILING AND NOT LLVM_CONFIG_PATH) + set(LLVM_CONFIG_PATH "${LLVM_NATIVE_BUILD}/bin/llvm-config" CACHE STRING "") - add_custom_command(OUTPUT "${${project}_LLVM_CONFIG_EXE}" + add_custom_command(OUTPUT "${LLVM_CONFIG_PATH}" COMMAND ${CMAKE_COMMAND} --build . --target llvm-config --config $<CONFIGURATION> DEPENDS ${LLVM_NATIVE_BUILD}/CMakeCache.txt WORKING_DIRECTORY ${LLVM_NATIVE_BUILD} COMMENT "Building native llvm-config..." USES_TERMINAL) - add_custom_target(${project}NativeLLVMConfig DEPENDS ${${project}_LLVM_CONFIG_EXE}) - add_dependencies(${project}NativeLLVMConfig CONFIGURE_LLVM_NATIVE) + add_custom_target(NativeLLVMConfig DEPENDS ${LLVM_CONFIG_PATH}) + add_dependencies(NativeLLVMConfig CONFIGURE_LLVM_NATIVE) - add_dependencies(llvm-config ${project}NativeLLVMConfig) -endif(CMAKE_CROSSCOMPILING) + add_dependencies(llvm-config NativeLLVMConfig) +endif() |