diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-08-20 21:02:43 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-08-20 21:02:43 +0000 |
commit | 15c5c77fa04cd97e1057e8a585f669fc49da0d92 (patch) | |
tree | 9047e00a30ccb7b81dbe7227c8c883cbafb5d2dd | |
parent | 4e20bb0468b8d0db13287e666b482eb93689be99 (diff) |
Vendor import of llvm release_50 branch r311219:vendor/llvm/llvm-release_50-r311219
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=322727
svn path=/vendor/llvm/llvm-release_50-r311219/; revision=322728; tag=vendor/llvm/llvm-release_50-r311219
55 files changed, 1306 insertions, 236 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 6af2cba10093..8c0f51145139 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -314,6 +314,7 @@ set(LLVM_CMAKE_PATH ${LLVM_MAIN_SRC_DIR}/cmake/modules) set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples) set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include) +# List of all targets to be built by default: set(LLVM_ALL_TARGETS AArch64 AMDGPU @@ -325,7 +326,6 @@ set(LLVM_ALL_TARGETS MSP430 NVPTX PowerPC - RISCV Sparc SystemZ X86 diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 44efc1498060..5c65864e901e 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -5369,6 +5369,10 @@ The following behaviors are supported: nodes. However, duplicate entries in the second list are dropped during the append operation. + * - 7 + - **Max** + Takes the max of the two values, which are required to be integers. + It is an error for a particular unique flag ID to have multiple behaviors, except in the case of **Require** (which adds restrictions on another metadata value) or **Override**. diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index dcd2ec7eb22b..48af491f1214 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -117,6 +117,18 @@ Changes to the X86 Target * Added support for AMD Lightweight Profiling (LWP) instructions. +* Avoid using slow LEA instructions. + +* Use alternative sequences for multiply by constant. + +* Improved lowering of strided shuffles. + +* Improved the AVX512 cost model used by the vectorizer. + +* Fix scalar code performance when AVX512 is enabled by making i1's illegal. + +* Fixed many inline assembly bugs. + Changes to the AMDGPU Target ----------------------------- @@ -160,7 +172,29 @@ Changes to the C API External Open Source Projects Using LLVM 5 ========================================== -* A project... +Zig Programming Language +------------------------ + +`Zig <http://ziglang.org>`_ is an open-source programming language designed +for robustness, optimality, and clarity. It integrates closely with C and is +intended to eventually take the place of C. It uses LLVM to produce highly +optimized native code and to cross-compile for any target out of the box. Zig +is in alpha; with a beta release expected in September. + +LDC - the LLVM-based D compiler +------------------------------- + +`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It +pragmatically combines efficiency, control, and modeling power, with safety and +programmer productivity. D supports powerful concepts like Compile-Time Function +Execution (CTFE) and Template Meta-Programming, provides an innovative approach +to concurrency and offers many classical paradigms. + +`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler +combined with LLVM as backend to produce efficient native code. LDC targets +x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM +and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64 +are underway. Additional Information diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 55a23c3cca9b..d6851f7143a5 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -1220,8 +1220,9 @@ public: /// If an existing load has uses of its chain, create a token factor node with /// that chain and the new memory node's chain and update users of the old /// chain to the token factor. This ensures that the new memory node will have - /// the same relative memory dependency position as the old load. - void makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New); + /// the same relative memory dependency position as the old load. Returns the + /// new merged load chain. + SDValue makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their diff --git a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h index 6c951fab6185..b7e462e85d9d 100644 --- a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h @@ -94,9 +94,9 @@ private: llvm_unreachable("Invalid emit-state."); } - void removeModuleFromBaseLayer(BaseLayerT &BaseLayer) { - if (EmitState != NotEmitted) - BaseLayer.removeModule(Handle); + Error removeModuleFromBaseLayer(BaseLayerT& BaseLayer) { + return EmitState != NotEmitted ? BaseLayer.removeModule(Handle) + : Error::success(); } void emitAndFinalize(BaseLayerT &BaseLayer) { @@ -226,9 +226,9 @@ public: /// This method will free the memory associated with the given module, both /// in this layer, and the base layer. Error removeModule(ModuleHandleT H) { - (*H)->removeModuleFromBaseLayer(BaseLayer); + Error Err = (*H)->removeModuleFromBaseLayer(BaseLayer); ModuleList.erase(H); - return Error::success(); + return Err; } /// @brief Search for the given named symbol. diff --git a/include/llvm/Object/COFFImportFile.h b/include/llvm/Object/COFFImportFile.h index 8e215b565fc4..cf9c80a06f49 100644 --- a/include/llvm/Object/COFFImportFile.h +++ b/include/llvm/Object/COFFImportFile.h @@ -73,6 +73,7 @@ private: struct COFFShortExport { std::string Name; std::string ExtName; + std::string SymbolName; uint16_t Ordinal = 0; bool Noname = false; @@ -98,7 +99,8 @@ struct COFFShortExport { std::error_code writeImportLibrary(StringRef ImportName, StringRef Path, ArrayRef<COFFShortExport> Exports, - COFF::MachineTypes Machine); + COFF::MachineTypes Machine, + bool MakeWeakAliases); } // namespace object } // namespace llvm diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index b973203a89b6..9539fd7c7559 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -162,6 +162,11 @@ static cl::opt<unsigned> cl::desc("Maximum depth of recursive SExt/ZExt"), cl::init(8)); +static cl::opt<unsigned> + MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden, + cl::desc("Max coefficients in AddRec during evolving"), + cl::init(16)); + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -2878,6 +2883,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) continue; + // Limit max number of arguments to avoid creation of unreasonably big + // SCEVAddRecs with very complex operands. + if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 > + MaxAddRecSize) + continue; + bool Overflow = false; Type *Ty = AddRec->getType(); bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; @@ -7582,6 +7593,25 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); if (const SCEVConstant *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) { + + // This trivial case can show up in some degenerate cases where + // the incoming IR has not yet been fully simplified. + if (BTCC->getValue()->isZero()) { + Value *InitValue = nullptr; + bool MultipleInitValues = false; + for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { + if (!LI->contains(PN->getIncomingBlock(i))) { + if (!InitValue) + InitValue = PN->getIncomingValue(i); + else if (InitValue != PN->getIncomingValue(i)) { + MultipleInitValues = true; + break; + } + } + if (!MultipleInitValues && InitValue) + return getSCEV(InitValue); + } + } // Okay, we know how many times the containing loop executes. If // this is a constant evolving PHI node, get the final value at // the specified iteration number. diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 439b21a81258..cdfe74d158c9 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -4458,6 +4458,10 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { + // Bail out when we hit the limit. + if (Depth == MaxDepth) + return None; + // A mismatch occurs when we compare a scalar cmp to a vector cmp, for example. if (LHS->getType() != RHS->getType()) return None; diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 0cad20db0964..ecb54e1e4b41 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -302,7 +302,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { - SDValue Cond = GetScalarizedVector(N->getOperand(0)); + SDValue Cond = N->getOperand(0); + EVT OpVT = Cond.getValueType(); + SDLoc DL(N); + // The vselect result and true/value operands needs scalarizing, but it's + // not a given that the Cond does. For instance, in AVX512 v1i1 is legal. + // See the similar logic in ScalarizeVecRes_VSETCC + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Cond = GetScalarizedVector(Cond); + } else { + EVT VT = OpVT.getVectorElementType(); + Cond = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + SDValue LHS = GetScalarizedVector(N->getOperand(1)); TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false, false); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 823e77850c4b..0ff154784f68 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7262,22 +7262,23 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { AddDbgValue(I, ToNode, false); } -void SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, - SDValue NewMemOp) { +SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, + SDValue NewMemOp) { assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); - if (!OldLoad->hasAnyUseOfValue(1)) - return; - // The new memory operation must have the same position as the old load in // terms of memory dependency. Create a TokenFactor for the old load and new // memory operation and update uses of the old load's output chain to use that // TokenFactor. SDValue OldChain = SDValue(OldLoad, 1); SDValue NewChain = SDValue(NewMemOp.getNode(), 1); + if (!OldLoad->hasAnyUseOfValue(1)) + return NewChain; + SDValue TokenFactor = getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain); ReplaceAllUsesOfValueWith(OldChain, TokenFactor); UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); + return TokenFactor; } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 124c2790f68c..f8aacdb8649d 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -180,6 +180,7 @@ class VirtRegRewriter : public MachineFunctionPass { void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const; void handleIdentityCopy(MachineInstr &MI) const; void expandCopyBundle(MachineInstr &MI) const; + bool subRegLiveThrough(const MachineInstr &MI, unsigned SuperPhysReg) const; public: static char ID; @@ -415,6 +416,32 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { } } +/// Check whether (part of) \p SuperPhysReg is live through \p MI. +/// \pre \p MI defines a subregister of a virtual register that +/// has been assigned to \p SuperPhysReg. +bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI, + unsigned SuperPhysReg) const { + SlotIndex MIIndex = LIS->getInstructionIndex(MI); + SlotIndex BeforeMIUses = MIIndex.getBaseIndex(); + SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex(); + for (MCRegUnitIterator Unit(SuperPhysReg, TRI); Unit.isValid(); ++Unit) { + const LiveRange &UnitRange = LIS->getRegUnit(*Unit); + // If the regunit is live both before and after MI, + // we assume it is live through. + // Generally speaking, this is not true, because something like + // "RU = op RU" would match that description. + // However, we know that we are trying to assess whether + // a def of a virtual reg, vreg, is live at the same time of RU. + // If we are in the "RU = op RU" situation, that means that vreg + // is defined at the same time as RU (i.e., "vreg, RU = op RU"). + // Thus, vreg and RU interferes and vreg cannot be assigned to + // SuperPhysReg. Therefore, this situation cannot happen. + if (UnitRange.liveAt(AfterMIDefs) && UnitRange.liveAt(BeforeMIUses)) + return true; + } + return false; +} + void VirtRegRewriter::rewrite() { bool NoSubRegLiveness = !MRI->subRegLivenessEnabled(); SmallVector<unsigned, 8> SuperDeads; @@ -452,7 +479,8 @@ void VirtRegRewriter::rewrite() { // A virtual register kill refers to the whole register, so we may // have to add <imp-use,kill> operands for the super-register. A // partial redef always kills and redefines the super-register. - if (MO.readsReg() && (MO.isDef() || MO.isKill())) + if ((MO.readsReg() && (MO.isDef() || MO.isKill())) || + (MO.isDef() && subRegLiveThrough(*MI, PhysReg))) SuperKills.push_back(PhysReg); if (MO.isDef()) { diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index 495e09fbae35..dd3235244e24 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -134,13 +134,13 @@ dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName, uint64_t StringOffset = StrOffsetExt.getRelocatedValue(EntrySize, &Offset); if (Format == DWARF32) { - OS << format("%8.8x ", StringOffset); uint32_t StringOffset32 = (uint32_t)StringOffset; + OS << format("%8.8x ", StringOffset32); const char *S = StrData.getCStr(&StringOffset32); if (S) OS << format("\"%s\"", S); } else - OS << format("%16.16x ", StringOffset); + OS << format("%16.16" PRIx64 " ", StringOffset); OS << "\n"; } } diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 6cf44ffa3796..4de46bea301e 100644 --- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -196,7 +196,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, ++NumErrors; OS << "error: DW_AT_stmt_list offset is beyond .debug_line " "bounds: " - << format("0x%08" PRIx32, *SectionOffset) << "\n"; + << format("0x%08" PRIx64, *SectionOffset) << "\n"; Die.dump(OS, 0); OS << "\n"; } @@ -234,7 +234,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, if (CUOffset >= CUSize) { ++NumErrors; OS << "error: " << FormEncodingString(Form) << " CU offset " - << format("0x%08" PRIx32, CUOffset) + << format("0x%08" PRIx64, CUOffset) << " is invalid (must be less than CU size of " << format("0x%08" PRIx32, CUSize) << "):\n"; Die.dump(OS, 0); @@ -366,7 +366,7 @@ void DWARFVerifier::verifyDebugLineRows() { if (Row.Address < PrevAddress) { ++NumDebugLineErrors; OS << "error: .debug_line[" - << format("0x%08" PRIx32, + << format("0x%08" PRIx64, *toSectionOffset(Die.find(DW_AT_stmt_list))) << "] row[" << RowIndex << "] decreases in address from previous row:\n"; @@ -381,7 +381,7 @@ void DWARFVerifier::verifyDebugLineRows() { if (Row.File > MaxFileIndex) { ++NumDebugLineErrors; OS << "error: .debug_line[" - << format("0x%08" PRIx32, + << format("0x%08" PRIx64, *toSectionOffset(Die.find(DW_AT_stmt_list))) << "][" << RowIndex << "] has invalid file index " << Row.File << " (valid values are [1," << MaxFileIndex << "]):\n"; diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp index a515bc8ad16d..ff039463d08c 100644 --- a/lib/Object/COFFImportFile.cpp +++ b/lib/Object/COFFImportFile.cpp @@ -557,7 +557,7 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, std::error_code writeImportLibrary(StringRef ImportName, StringRef Path, ArrayRef<COFFShortExport> Exports, - MachineTypes Machine) { + MachineTypes Machine, bool MakeWeakAliases) { std::vector<NewArchiveMember> Members; ObjectFactory OF(llvm::sys::path::filename(ImportName), Machine); @@ -575,7 +575,7 @@ std::error_code writeImportLibrary(StringRef ImportName, StringRef Path, if (E.Private) continue; - if (E.isWeak()) { + if (E.isWeak() && MakeWeakAliases) { Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, false)); Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, true)); continue; @@ -587,7 +587,7 @@ std::error_code writeImportLibrary(StringRef ImportName, StringRef Path, if (E.Constant) ImportType = IMPORT_CONST; - StringRef SymbolName = E.isWeak() ? E.ExtName : E.Name; + StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName; ImportNameType NameType = getNameType(SymbolName, E.Name, Machine); Expected<std::string> Name = E.ExtName.empty() ? SymbolName diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 005f2d51e403..9a7f45bde6c9 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -388,6 +388,10 @@ static unsigned isMatchingStore(MachineInstr &LoadInst, } static unsigned getPreIndexedOpcode(unsigned Opc) { + // FIXME: We don't currently support creating pre-indexed loads/stores when + // the load or store is the unscaled version. If we decide to perform such an + // optimization in the future the cases for the unscaled loads/stores will + // need to be added here. switch (Opc) { default: llvm_unreachable("Opcode has no pre-indexed equivalent!"); @@ -451,32 +455,42 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { default: llvm_unreachable("Opcode has no post-indexed wise equivalent!"); case AArch64::STRSui: + case AArch64::STURSi: return AArch64::STRSpost; case AArch64::STRDui: + case AArch64::STURDi: return AArch64::STRDpost; case AArch64::STRQui: + case AArch64::STURQi: return AArch64::STRQpost; case AArch64::STRBBui: return AArch64::STRBBpost; case AArch64::STRHHui: return AArch64::STRHHpost; case AArch64::STRWui: + case AArch64::STURWi: return AArch64::STRWpost; case AArch64::STRXui: + case AArch64::STURXi: return AArch64::STRXpost; case AArch64::LDRSui: + case AArch64::LDURSi: return AArch64::LDRSpost; case AArch64::LDRDui: + case AArch64::LDURDi: return AArch64::LDRDpost; case AArch64::LDRQui: + case AArch64::LDURQi: return AArch64::LDRQpost; case AArch64::LDRBBui: return AArch64::LDRBBpost; case AArch64::LDRHHui: return AArch64::LDRHHpost; case AArch64::LDRWui: + case AArch64::LDURWi: return AArch64::LDRWpost; case AArch64::LDRXui: + case AArch64::LDURXi: return AArch64::LDRXpost; case AArch64::LDRSWui: return AArch64::LDRSWpost; @@ -1694,8 +1708,9 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, ++NumPostFolded; break; } - // Don't know how to handle pre/post-index versions, so move to the next - // instruction. + + // Don't know how to handle unscaled pre/post-index versions below, so + // move to the next instruction. if (TII->isUnscaledLdSt(Opc)) { ++MBBI; break; diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index ec49f0d37af4..46d8f0dba691 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -769,8 +769,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); const MachineOperand &Dest = MI.getOperand(0); - unsigned StatusReg = MI.getOperand(1).getReg(); - bool StatusDead = MI.getOperand(1).isDead(); + unsigned TempReg = MI.getOperand(1).getReg(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); @@ -797,23 +796,9 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, } // .Lloadcmp: - // mov wStatus, #0 // ldrex rDest, [rAddr] // cmp rDest, rDesired // bne .Ldone - if (!StatusDead) { - if (IsThumb) { - BuildMI(LoadCmpBB, DL, TII->get(ARM::tMOVi8), StatusReg) - .addDef(ARM::CPSR, RegState::Dead) - .addImm(0) - .add(predOps(ARMCC::AL)); - } else { - BuildMI(LoadCmpBB, DL, TII->get(ARM::MOVi), StatusReg) - .addImm(0) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); - } - } MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); @@ -836,10 +821,10 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, LoadCmpBB->addSuccessor(StoreBB); // .Lstore: - // strex rStatus, rNew, [rAddr] - // cmp rStatus, #0 + // strex rTempReg, rNew, [rAddr] + // cmp rTempReg, #0 // bne .Lloadcmp - MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg) + MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg) .addReg(NewReg) .addReg(AddrReg); if (StrexOp == ARM::t2STREX) @@ -848,7 +833,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, getKillRegState(StatusDead)) + .addReg(TempReg, RegState::Kill) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) @@ -904,8 +889,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); - unsigned StatusReg = MI.getOperand(1).getReg(); - bool StatusDead = MI.getOperand(1).isDead(); + unsigned TempReg = MI.getOperand(1).getReg(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); @@ -931,7 +915,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, // .Lloadcmp: // ldrexd rDestLo, rDestHi, [rAddr] // cmp rDestLo, rDesiredLo - // sbcs rStatus<dead>, rDestHi, rDesiredHi + // sbcs rTempReg<dead>, rDestHi, rDesiredHi // bne .Ldone unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; MachineInstrBuilder MIB; @@ -959,17 +943,17 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, LoadCmpBB->addSuccessor(StoreBB); // .Lstore: - // strexd rStatus, rNewLo, rNewHi, [rAddr] - // cmp rStatus, #0 + // strexd rTempReg, rNewLo, rNewHi, [rAddr] + // cmp rTempReg, #0 // bne .Lloadcmp unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; - MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg); + MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg); addExclusiveRegPair(MIB, New, 0, IsThumb, TRI); MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, getKillRegState(StatusDead)) + .addReg(TempReg, RegState::Kill) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index d06b7d0896f1..7206083a7079 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -6053,21 +6053,21 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), // significantly more naive than the standard expansion: we conservatively // assume seq_cst, strong cmpxchg and omit clrex on failure. -let Constraints = "@earlyclobber $Rd,@earlyclobber $status", +let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", mayLoad = 1, mayStore = 1 in { -def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status), +def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp), (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), NoItinerary, []>, Sched<[]>; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7563bffd8f87..1e73122cdc38 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -419,6 +419,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); } + + // Custom action for SELECT MMX and expand action for SELECT_CC MMX + setOperationAction(ISD::SELECT, MVT::x86mmx, Custom); + setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand); + setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to @@ -1383,7 +1388,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // (result) is 256-bit but the source is 512-bit wide. // 128-bit was made Custom under AVX1. for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, - MVT::v8f32, MVT::v4f64 }) + MVT::v8f32, MVT::v4f64, MVT::v1i1 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1, MVT::v32i1, MVT::v64i1 }) @@ -14570,6 +14575,21 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); MVT ResVT = Op.getSimpleValueType(); + // When v1i1 is legal a scalarization of a vselect with a vXi1 Cond + // would result with: v1i1 = extract_subvector(vXi1, idx). + // Lower these into extract_vector_elt which is already selectable. + if (ResVT == MVT::v1i1) { + assert(Subtarget.hasAVX512() && + "Boolean EXTRACT_SUBVECTOR requires AVX512"); + + MVT EltVT = ResVT.getVectorElementType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT LegalVT = + (TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT(); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res); + } + assert((In.getSimpleValueType().is256BitVector() || In.getSimpleValueType().is512BitVector()) && "Can only extract from 256-bit or 512-bit vectors"); @@ -20651,8 +20671,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, } // ADC/ADCX/SBB case ADX: { - SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other); - SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other); + SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); + SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::i32); SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2), DAG.getConstant(-1, dl, MVT::i8)); SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3), @@ -30663,6 +30683,14 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(N, 0); } + // Custom action for SELECT MMX + if (VT == MVT::x86mmx) { + LHS = DAG.getBitcast(MVT::i64, LHS); + RHS = DAG.getBitcast(MVT::i64, RHS); + SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::i64, Cond, LHS, RHS); + return DAG.getBitcast(VT, newSelect); + } + return SDValue(); } @@ -33358,7 +33386,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); - SDValue NewChain = NewLd.getValue(1); + // Make sure new load is placed in same chain order. + SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd); if (TokenFactorIndex >= 0) { Ops.push_back(NewChain); NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); @@ -33379,11 +33408,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, Ld->getPointerInfo().getWithOffset(4), MinAlign(Ld->getAlignment(), 4), Ld->getMemOperand()->getFlags()); + // Make sure new loads are placed in same chain order. + SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd); + NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd); - SDValue NewChain = LoLd.getValue(1); if (TokenFactorIndex >= 0) { - Ops.push_back(LoLd); - Ops.push_back(HiLd); + Ops.push_back(NewChain); NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); } diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 705d0f7a5cf7..0e654a380e7c 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -978,6 +978,44 @@ multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _, (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX; } +multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, + X86VectorVTInfo _, SDPatternOperator OpNode, + RegisterClass SrcRC, SubRegIndex Subreg> { + let ExeDomain = _.ExeDomain in + defm r : AVX512_maskable_custom<opc, MRMSrcReg, + (outs _.RC:$dst), (ins GR32:$src), + !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), + !con((ins _.KRCWM:$mask), (ins GR32:$src)), + "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [], + "$src0 = $dst">, T8PD, EVEX; + + def : Pat <(_.VT (OpNode SrcRC:$src)), + (!cast<Instruction>(Name#r) + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + + def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), + (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask, + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + + def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), + (!cast<Instruction>(Name#rkz) _.KRCWM:$mask, + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; +} + +multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, + AVX512VLVectorVTInfo _, SDPatternOperator OpNode, + RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, _.info512, OpNode, SrcRC, + Subreg>, EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, _.info256, OpNode, + SrcRC, Subreg>, EVEX_V256; + defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, _.info128, OpNode, + SrcRC, Subreg>, EVEX_V128; + } +} + multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, Predicate prd> { @@ -989,18 +1027,11 @@ multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, } } -let isCodeGenOnly = 1 in { -defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, - X86VBroadcast, GR8, HasBWI>; -defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, - X86VBroadcast, GR16, HasBWI>; -} -let isAsmParserOnly = 1 in { - defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, - null_frag, GR32, HasBWI>; - defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, - null_frag, GR32, HasBWI>; -} +defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", + avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; +defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", + avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, + HasBWI>; defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, X86VBroadcast, GR32, HasAVX512>; defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, diff --git a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index a7de79306074..fc15dc1e6032 100644 --- a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -60,11 +60,13 @@ std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs; // Opens a file. Path has to be resolved already. // Newly created memory buffers are owned by this driver. -MemoryBufferRef openFile(StringRef Path) { +Optional<MemoryBufferRef> openFile(StringRef Path) { ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MB = MemoryBuffer::getFile(Path); - if (std::error_code EC = MB.getError()) + if (std::error_code EC = MB.getError()) { llvm::errs() << "fail openFile: " << EC.message() << "\n"; + return None; + } MemoryBufferRef MBRef = MB.get()->getMemBufferRef(); OwningMBs.push_back(std::move(MB.get())); // take ownership @@ -114,11 +116,16 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) { for (auto *Arg : Args.filtered(OPT_UNKNOWN)) llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; - MemoryBufferRef MB; - if (auto *Arg = Args.getLastArg(OPT_d)) - MB = openFile(Arg->getValue()); + if (!Args.hasArg(OPT_d)) { + llvm::errs() << "no definition file specified\n"; + return 1; + } + + Optional<MemoryBufferRef> MB = openFile(Args.getLastArg(OPT_d)->getValue()); + if (!MB) + return 1; - if (!MB.getBufferSize()) { + if (!MB->getBufferSize()) { llvm::errs() << "definition file empty\n"; return 1; } @@ -133,7 +140,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) { } Expected<COFFModuleDefinition> Def = - parseCOFFModuleDefinition(MB, Machine, true); + parseCOFFModuleDefinition(*MB, Machine, true); if (!Def) { llvm::errs() << "error parsing definition\n" @@ -154,7 +161,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) { if (Path.empty()) Path = getImplibPath(Def->OutputFile); - if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine)) + if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true)) return 1; return 0; } diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index a33490f6e4ac..ddc975cbed1a 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1470,6 +1470,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { } i = CS.arg_begin(); + const unsigned ShadowArgStart = Args.size(); for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) Args.push_back(DFSF.getShadow(*i)); @@ -1505,6 +1506,15 @@ void DFSanVisitor::visitCallSite(CallSite CS) { CustomCI->setCallingConv(CI->getCallingConv()); CustomCI->setAttributes(CI->getAttributes()); + // Update the parameter attributes of the custom call instruction to + // zero extend the shadow parameters. This is required for targets + // which consider ShadowTy an illegal type. + for (unsigned n = 0; n < FT->getNumParams(); n++) { + const unsigned ArgNo = ShadowArgStart + n; + if (CustomCI->getArgOperand(ArgNo)->getType() == DFSF.DFS.ShadowTy) + CustomCI->addParamAttr(ArgNo, Attribute::ZExt); + } + if (!FT->getReturnType()->isVoidTy()) { LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca); DFSF.setShadow(CustomCI, LabelLoad); diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp index 61e8700f1cd6..2e5618686ec2 100644 --- a/lib/Transforms/Scalar/BDCE.cpp +++ b/lib/Transforms/Scalar/BDCE.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/BDCE.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DemandedBits.h" @@ -35,6 +36,46 @@ using namespace llvm; STATISTIC(NumRemoved, "Number of instructions removed (unused)"); STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)"); +/// If an instruction is trivialized (dead), then the chain of users of that +/// instruction may need to be cleared of assumptions that can no longer be +/// guaranteed correct. +static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) { + assert(I->getType()->isIntegerTy() && "Trivializing a non-integer value?"); + + // Initialize the worklist with eligible direct users. + SmallVector<Instruction *, 16> WorkList; + for (User *JU : I->users()) { + // If all bits of a user are demanded, then we know that nothing below that + // in the def-use chain needs to be changed. + auto *J = dyn_cast<Instruction>(JU); + if (J && !DB.getDemandedBits(J).isAllOnesValue()) + WorkList.push_back(J); + } + + // DFS through subsequent users while tracking visits to avoid cycles. + SmallPtrSet<Instruction *, 16> Visited; + while (!WorkList.empty()) { + Instruction *J = WorkList.pop_back_val(); + + // NSW, NUW, and exact are based on operands that might have changed. + J->dropPoisonGeneratingFlags(); + + // We do not have to worry about llvm.assume or range metadata: + // 1. llvm.assume demands its operand, so trivializing can't change it. + // 2. range metadata only applies to memory accesses which demand all bits. + + Visited.insert(J); + + for (User *KU : J->users()) { + // If all bits of a user are demanded, then we know that nothing below + // that in the def-use chain needs to be changed. + auto *K = dyn_cast<Instruction>(KU); + if (K && !Visited.count(K) && !DB.getDemandedBits(K).isAllOnesValue()) + WorkList.push_back(K); + } + } +} + static bool bitTrackingDCE(Function &F, DemandedBits &DB) { SmallVector<Instruction*, 128> Worklist; bool Changed = false; @@ -51,6 +92,9 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) { // replacing all uses with something else. Then, if they don't need to // remain live (because they have side effects, etc.) we can remove them. DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n"); + + clearAssumptionsOfUsers(&I, DB); + // FIXME: In theory we could substitute undef here instead of zero. // This should be reconsidered once we settle on the semantics of // undef, poison, etc. diff --git a/test/Analysis/ScalarEvolution/max-addrec-size.ll b/test/Analysis/ScalarEvolution/max-addrec-size.ll new file mode 100644 index 000000000000..aad0ddda37bc --- /dev/null +++ b/test/Analysis/ScalarEvolution/max-addrec-size.ll @@ -0,0 +1,33 @@ +; RUN: opt -analyze -scalar-evolution -scalar-evolution-max-add-rec-size=3 < %s | FileCheck %s + +; Show that we are able to avoid creation of huge SCEVs by capping the max +; AddRec size. +define i32 @test_01(i32 %a, i32 %b) { + +; CHECK-LABEL: Classifying expressions for: @test_01 +; CHECK-NEXT: %iv = phi i32 [ %a, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {%a,+,%b}<%loop> U: full-set S: full-set +; CHECK-NEXT: %iv.next = add i32 %iv, %b +; CHECK-NEXT: --> {(%a + %b),+,%b}<%loop> U: full-set S: full-set +; CHECK-NEXT: %x1 = mul i32 %iv, %iv.next +; CHECK-NEXT: --> {((%a + %b) * %a),+,(((2 * %a) + (2 * %b)) * %b),+,(2 * %b * %b)}<%loop> U: full-set S: full-set +; CHECK-NEXT: %x2 = mul i32 %x1, %x1 +; CHECK-NEXT: --> ({((%a + %b) * %a),+,(((2 * %a) + (2 * %b)) * %b),+,(2 * %b * %b)}<%loop> * {((%a + %b) * %a),+,(((2 * %a) + (2 * %b)) * %b),+,(2 * %b * %b)}<%loop>) U: full-set S: full-set +; CHECK-NEXT: %x3 = mul i32 %x2, %x1 +; CHECK-NEXT: --> ({((%a + %b) * %a),+,(((2 * %a) + (2 * %b)) * %b),+,(2 * %b * %b)}<%loop> * {((%a + %b) * %a),+,(((2 * %a) + (2 * %b)) * %b),+,(2 * %b * %b)}<%loop> * {((%a + %b) * %a),+,(((2 * %a) + (2 * %b)) * %b),+,(2 * %b * %b)}<%loop>) U: full-set S: full-set + +entry: + br label %loop + +loop: + %iv = phi i32 [ %a, %entry ], [ %iv.next, %loop ] + %iv.next = add i32 %iv, %b + %cond = icmp slt i32 %iv.next, 1000 + br i1 %cond, label %loop, label %exit + +exit: + %x1 = mul i32 %iv, %iv.next + %x2 = mul i32 %x1, %x1 + %x3 = mul i32 %x2, %x1 + ret i32 %x3 +} diff --git a/test/CodeGen/AArch64/arm64-ldst-unscaled-pre-post.mir b/test/CodeGen/AArch64/arm64-ldst-unscaled-pre-post.mir new file mode 100644 index 000000000000..dacaf4966d07 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-ldst-unscaled-pre-post.mir @@ -0,0 +1,115 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s +--- +# CHECK-LABEL: name: test_LDURSi_post +# CHECK: LDRSpost %x0, -4 +name: test_LDURSi_post +body: | + bb.0.entry: + liveins: %x0 + + %s0 = LDURSi %x0, 0 + %x0 = SUBXri %x0, 4, 0 + RET_ReallyLR implicit %x0 +... +# CHECK-LABEL: name: test_LDURDi_post +# CHECK: LDRDpost %x0, -4 +name: test_LDURDi_post +body: | + bb.0.entry: + liveins: %x0 + + %d0 = LDURDi %x0, 0 + %x0 = SUBXri %x0, 4, 0 + RET_ReallyLR implicit %x0 +... +# CHECK-LABEL: name: test_LDURQi_post +# CHECK: LDRQpost %x0, -4 +name: test_LDURQi_post +body: | + bb.0.entry: + liveins: %x0 + + %q0 = LDURQi %x0, 0 + %x0 = SUBXri %x0, 4, 0 + RET_ReallyLR implicit %x0 +... +# CHECK-LABEL: name: test_LDURWi_post +# CHECK: LDRWpost %x0, -4 +name: test_LDURWi_post +body: | + bb.0.entry: + liveins: %x0 + + %w1 = LDURWi %x0, 0 + %x0 = SUBXri %x0, 4, 0 + RET_ReallyLR implicit %x0 +... +# CHECK-LABEL: name: test_LDURXi_post +# CHECK: %x1 = LDRXpost %x0, -4 +name: test_LDURXi_post +body: | + bb.0.entry: + liveins: %x0 + + %x1 = LDURXi %x0, 0 + %x0 = SUBXri %x0, 4, 0 + RET_ReallyLR implicit %x0 +... +# CHECK-LABEL: name: test_STURSi_post +# CHECK: STRSpost %s0, %x0, -4 +name: test_STURSi_post +body: | + bb.0.entry: + liveins: %x0 + + %s0 = FMOVS0 + STURSi %s0, %x0, 0 + %x0 = SUBXri %x0, 4, 0 + RET_ReallyLR implicit %x0 +... +# CHECK-LABEL: name: test_STURDi_post +# CHECK: STRDpost %d0, %x0, -4 +name: test_STURDi_post +body: | + bb.0.entry: + liveins: %x0 + + %d0 = FMOVD0 + STURDi %d0, %x0, 0 + %x0 = SUBXri %x0, 4, 0 + RET_ReallyLR implicit %x0 +... +# CHECK-LABEL: name: test_STURQi_post +# CHECK: STRQpost %q0, %x0, -4 +name: test_STURQi_post +body: | + bb.0.entry: + liveins: %x0 + + %q0 = MOVIv4i32 0, 0 + STURQi %q0, %x0, 0 + %x0 = SUBXri %x0, 4, 0 + RET_ReallyLR implicit %x0 +... +# CHECK-LABEL: name: test_STURWi_post +# CHECK: STRWpost %wzr, %x0, -4 +name: test_STURWi_post +body: | + bb.0.entry: + liveins: %x0 + + STURWi %wzr, %x0, 0 + %x0 = SUBXri %x0, 4, 0 + RET_ReallyLR implicit %x0 +... +# CHECK-LABEL: name: test_STURXi_post +# CHECK: STRXpost %xzr, %x0, -4 +name: test_STURXi_post +body: | + bb.0.entry: + liveins: %x0 + + STURXi %xzr, %x0, 0 + %x0 = SUBXri %x0, 4, 0 + RET_ReallyLR implicit %x0 +... diff --git a/test/CodeGen/ARM/cmpxchg-O0.ll b/test/CodeGen/ARM/cmpxchg-O0.ll index a3be72112c76..f8ad2bbbbe0e 100644 --- a/test/CodeGen/ARM/cmpxchg-O0.ll +++ b/test/CodeGen/ARM/cmpxchg-O0.ll @@ -10,11 +10,10 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK: dmb ish ; CHECK: uxtb [[DESIRED:r[0-9]+]], [[DESIRED]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov{{s?}} [[STATUS:r[0-9]+]], #0 ; CHECK: ldrexb [[OLD:r[0-9]+]], [r0] ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexb [[STATUS]], r2, [r0] +; CHECK: strexb [[STATUS:r[0-9]+]], r2, [r0] ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: @@ -30,11 +29,10 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind ; CHECK: dmb ish ; CHECK: uxth [[DESIRED:r[0-9]+]], [[DESIRED]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov{{s?}} [[STATUS:r[0-9]+]], #0 ; CHECK: ldrexh [[OLD:r[0-9]+]], [r0] ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexh [[STATUS]], r2, [r0] +; CHECK: strexh [[STATUS:r[0-9]+]], r2, [r0] ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: @@ -50,11 +48,10 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind ; CHECK: dmb ish ; CHECK-NOT: uxt ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov{{s?}} [[STATUS:r[0-9]+]], #0 ; CHECK: ldrex [[OLD:r[0-9]+]], [r0] ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strex [[STATUS]], r2, [r0] +; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0] ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: diff --git a/test/CodeGen/ARM/virtregrewriter-subregliveness.mir b/test/CodeGen/ARM/virtregrewriter-subregliveness.mir new file mode 100644 index 000000000000..83335a3ccffd --- /dev/null +++ b/test/CodeGen/ARM/virtregrewriter-subregliveness.mir @@ -0,0 +1,84 @@ +# RUN: llc -o - -mtriple=thumbv7--windows-gnu -run-pass=greedy -run-pass=virtregrewriter %s | FileCheck %s +--- | + target datalayout = "e-m:w-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv7--windows-gnu" + + define void @subregLiveThrough() { ret void } + define void @subregNotLiveThrough() { ret void } + define void @subregNotLiveThrough2() { ret void } + +... +--- +# Check that we properly recognize that r1 is live through +# the first subreg copy. +# That will materialize as an implicit use of the big register +# on that copy. +# PR34107. +# +# CHECK-LABEL: name: subregLiveThrough +name: subregLiveThrough +tracksRegLiveness: true +registers: + - { id: 0, class: gprpair } +body: | + bb.0: + liveins: %r0, %r1 + + ; That copy is being coalesced so we should use a KILL + ; placeholder. If that's not a kill that means we probably + ; not coalescing %0 and %r0_r1 and thus we are not testing + ; the problematic code anymore. + ; + ; CHECK: %r0 = KILL %r0, implicit killed %r0_r1, implicit-def %r0_r1 + ; CHECK-NEXT: %r1 = KILL %r1, implicit killed %r0_r1 + undef %0.gsub_0 = COPY %r0 + %0.gsub_1 = COPY %r1 + tBX_RET 14, _, implicit %0 + + +... + +--- +# Check that we properly recognize that r1 is *not* live through +# the first subreg copy. +# CHECK-LABEL: name: subregNotLiveThrough +name: subregNotLiveThrough +tracksRegLiveness: true +registers: + - { id: 0, class: gprpair } +body: | + bb.0: + liveins: %r0, %r1 + + ; r1 is not live through so check we are not implicitly using + ; the big register. + ; CHECK: %r0 = KILL %r0, implicit-def %r0_r1 + ; CHECK-NEXT: tBX_RET + undef %0.gsub_0 = COPY %r0 + tBX_RET 14, _, implicit %0 + + +... + +--- +# Check that we properly recognize that r1 is *not* live through +# the first subreg copy. It is defined by this copy, but is not +# through. +# CHECK-LABEL: name: subregNotLiveThrough2 +name: subregNotLiveThrough2 +tracksRegLiveness: true +registers: + - { id: 0, class: gprpair } +body: | + bb.0: + liveins: %r0, %r1 + + ; r1 is not live through so check we are not implicitly using + ; the big register. + ; CHECK: %r0 = KILL %r0, implicit-def %r1, implicit-def %r0_r1 + ; CHECK-NEXT: tBX_RET + undef %0.gsub_0 = COPY %r0, implicit-def %r1 + tBX_RET 14, _, implicit %0 + + +... diff --git a/test/CodeGen/X86/adx-intrinsics.ll b/test/CodeGen/X86/adx-intrinsics.ll index 0498177a9c12..819a5df14e63 100644 --- a/test/CodeGen/X86/adx-intrinsics.ll +++ b/test/CodeGen/X86/adx-intrinsics.ll @@ -75,3 +75,30 @@ define i8 @test_subborrow_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) { ret i8 %ret; } +; Try a version with loads. Previously we crashed on this. +define i32 @load_crash(i64* nocapture readonly %a, i64* nocapture readonly %b, i64* %res) { +; CHECK-LABEL: load_crash +; CHECK: addb +; ADX: adcxq +; CHECK: setb +; CHECK: retq + %1 = load i64, i64* %a, align 8 + %2 = load i64, i64* %b, align 8 + %3 = bitcast i64* %res to i8* + %4 = tail call i8 @llvm.x86.addcarryx.u64(i8 0, i64 %1, i64 %2, i8* %3) + %conv = zext i8 %4 to i32 + ret i32 %conv +} + +; Try a really simple all zero input case, which also used to crash +define void @allzeros() { +; CHECK-LABEL: allzeros +; CHECK: xorl +; CHECK: addb +; CHECK: sbbq +; CHECK: andl +; CHECK: retq +entry: + %0 = tail call i8 @llvm.x86.addcarryx.u64(i8 0, i64 0, i64 0, i8* null) + ret void +} diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 5472f057ef27..4abe3df9fc2a 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1921,9 +1921,9 @@ define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> ; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: kmovq %rsi, %k1 -; AVX512BW-NEXT: vpbroadcastb %dil, %zmm0 {%k1} -; AVX512BW-NEXT: vpbroadcastb %dil, %zmm1 {%k1} {z} -; AVX512BW-NEXT: vpbroadcastb %dil, %zmm2 +; AVX512BW-NEXT: vpbroadcastb %edi, %zmm1 {%k1} {z} +; AVX512BW-NEXT: vpbroadcastb %edi, %zmm0 {%k1} +; AVX512BW-NEXT: vpbroadcastb %edi, %zmm2 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq @@ -1934,9 +1934,9 @@ define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 -; AVX512F-32-NEXT: vpbroadcastb %al, %zmm1 {%k1} {z} -; AVX512F-32-NEXT: vpbroadcastb %al, %zmm0 {%k1} -; AVX512F-32-NEXT: vpbroadcastb %al, %zmm2 +; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm1 {%k1} {z} +; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm0 {%k1} +; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm2 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl @@ -1954,20 +1954,20 @@ define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i ; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: kmovd %esi, %k1 -; AVX512BW-NEXT: vpbroadcastw %di, %zmm0 {%k1} -; AVX512BW-NEXT: vpbroadcastw %di, %zmm1 {%k1} {z} -; AVX512BW-NEXT: vpbroadcastw %di, %zmm2 +; AVX512BW-NEXT: vpbroadcastw %edi, %zmm1 {%k1} {z} +; AVX512BW-NEXT: vpbroadcastw %edi, %zmm0 {%k1} +; AVX512BW-NEXT: vpbroadcastw %edi, %zmm2 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm0 {%k1} -; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm1 {%k1} {z} -; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm2 +; AVX512F-32-NEXT: movw {{[0-9]+}}(%esp), %ax +; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm1 {%k1} {z} +; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm0 {%k1} +; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm2 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index c3ba6f106e6a..9ceb3e5931a6 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2799,9 +2799,9 @@ define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastb %dil, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] -; CHECK-NEXT: vpbroadcastb %dil, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf] -; CHECK-NEXT: vpbroadcastb %dil, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7] +; CHECK-NEXT: vpbroadcastb %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf] +; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] +; CHECK-NEXT: vpbroadcastb %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7] ; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] ; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2819,9 +2819,9 @@ define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastb %dil, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf] -; CHECK-NEXT: vpbroadcastb %dil, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] -; CHECK-NEXT: vpbroadcastb %dil, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7] +; CHECK-NEXT: vpbroadcastb %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf] +; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] +; CHECK-NEXT: vpbroadcastb %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7] ; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2839,9 +2839,9 @@ define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastw %di, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf] -; CHECK-NEXT: vpbroadcastw %di, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] -; CHECK-NEXT: vpbroadcastw %di, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7] +; CHECK-NEXT: vpbroadcastw %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf] +; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] +; CHECK-NEXT: vpbroadcastw %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2859,9 +2859,9 @@ define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16 ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] -; CHECK-NEXT: vpbroadcastw %di, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf] -; CHECK-NEXT: vpbroadcastw %di, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] -; CHECK-NEXT: vpbroadcastw %di, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7] +; CHECK-NEXT: vpbroadcastw %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf] +; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] +; CHECK-NEXT: vpbroadcastw %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7] ; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] diff --git a/test/CodeGen/X86/pr33349.ll b/test/CodeGen/X86/pr33349.ll new file mode 100644 index 000000000000..db866db22481 --- /dev/null +++ b/test/CodeGen/X86/pr33349.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mattr=+avx512f | FileCheck %s --check-prefix=KNL +; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=SKX + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + + define void @test(<4 x i1> %m, <4 x x86_fp80> %v, <4 x x86_fp80>*%p) local_unnamed_addr { +; KNL-LABEL: test: +; KNL: # BB#0: # %bb +; KNL-NEXT: vpextrb $0, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: fld1 +; KNL-NEXT: fldz +; KNL-NEXT: fld %st(0) +; KNL-NEXT: fcmovne %st(2), %st(0) +; KNL-NEXT: vpextrb $4, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: fld %st(1) +; KNL-NEXT: fcmovne %st(3), %st(0) +; KNL-NEXT: vpextrb $8, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: fld %st(2) +; KNL-NEXT: fcmovne %st(4), %st(0) +; KNL-NEXT: vpextrb $12, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: fxch %st(3) +; KNL-NEXT: fcmovne %st(4), %st(0) +; KNL-NEXT: fstp %st(4) +; KNL-NEXT: fxch %st(3) +; KNL-NEXT: fstpt 30(%rdi) +; KNL-NEXT: fxch %st(1) +; KNL-NEXT: fstpt 20(%rdi) +; KNL-NEXT: fxch %st(1) +; KNL-NEXT: fstpt 10(%rdi) +; KNL-NEXT: fstpt (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: test: +; SKX: # BB#0: # %bb +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 +; SKX-NEXT: kshiftrw $2, %k0, %k1 +; SKX-NEXT: kshiftlw $15, %k1, %k2 +; SKX-NEXT: kshiftrw $15, %k2, %k2 +; SKX-NEXT: kshiftlw $15, %k2, %k2 +; SKX-NEXT: kshiftrw $15, %k2, %k2 +; SKX-NEXT: kmovd %k2, %eax +; SKX-NEXT: testb $1, %al +; SKX-NEXT: fld1 +; SKX-NEXT: fldz +; SKX-NEXT: fld %st(0) +; SKX-NEXT: fcmovne %st(2), %st(0) +; SKX-NEXT: kshiftlw $14, %k1, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kshiftlw $15, %k1, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kmovd %k1, %eax +; SKX-NEXT: testb $1, %al +; SKX-NEXT: fld %st(1) +; SKX-NEXT: fcmovne %st(3), %st(0) +; SKX-NEXT: kshiftlw $15, %k0, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kshiftlw $15, %k1, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kmovd %k1, %eax +; SKX-NEXT: testb $1, %al +; SKX-NEXT: fld %st(2) +; SKX-NEXT: fcmovne %st(4), %st(0) +; SKX-NEXT: kshiftlw $14, %k0, %k0 +; SKX-NEXT: kshiftrw $15, %k0, %k0 +; SKX-NEXT: kshiftlw $15, %k0, %k0 +; SKX-NEXT: kshiftrw $15, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: testb $1, %al +; SKX-NEXT: fxch %st(3) +; SKX-NEXT: fcmovne %st(4), %st(0) +; SKX-NEXT: fstp %st(4) +; SKX-NEXT: fxch %st(3) +; SKX-NEXT: fstpt 10(%rdi) +; SKX-NEXT: fxch %st(1) +; SKX-NEXT: fstpt (%rdi) +; SKX-NEXT: fxch %st(1) +; SKX-NEXT: fstpt 30(%rdi) +; SKX-NEXT: fstpt 20(%rdi) +; SKX-NEXT: retq + bb: + %tmp = select <4 x i1> %m, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer + store <4 x x86_fp80> %tmp, <4 x x86_fp80>* %p, align 16 + ret void + } + diff --git a/test/CodeGen/X86/pr34088.ll b/test/CodeGen/X86/pr34088.ll new file mode 100644 index 000000000000..d3667e3884d4 --- /dev/null +++ b/test/CodeGen/X86/pr34088.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mcpu=pentium4 | FileCheck %s + +%struct.Foo = type { i32, %struct.Bar } +%struct.Bar = type { i32, %struct.Buffer, i32 } +%struct.Buffer = type { i8*, i32 } + +; This test checks that the load of store %2 is not dropped. +; +define i32 @pr34088() local_unnamed_addr { +; CHECK-LABEL: pr34088: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .Lcfi1: +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .Lcfi2: +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: andl $-16, %esp +; CHECK-NEXT: subl $32, %esp +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205] +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movaps %xmm0, (%esp) +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movaps %xmm1, (%esp) +; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +entry: + %foo = alloca %struct.Foo, align 4 + %0 = bitcast %struct.Foo* %foo to i8* + call void @llvm.memset.p0i8.i32(i8* nonnull %0, i8 0, i32 20, i32 4, i1 false) + %buffer1 = getelementptr inbounds %struct.Foo, %struct.Foo* %foo, i32 0, i32 1, i32 1 + %1 = bitcast %struct.Buffer* %buffer1 to i64* + %2 = load i64, i64* %1, align 4 + call void @llvm.memset.p0i8.i32(i8* nonnull %0, i8 -51, i32 20, i32 4, i1 false) + store i64 %2, i64* %1, align 4 + ret i32 0 +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) diff --git a/test/CodeGen/X86/select-mmx.ll b/test/CodeGen/X86/select-mmx.ll new file mode 100644 index 000000000000..9e6382faaa59 --- /dev/null +++ b/test/CodeGen/X86/select-mmx.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+mmx < %s | FileCheck %s --check-prefix=X64 +; RUN: llc -mtriple=i686-unknown-unknown -mattr=+mmx < %s | FileCheck %s --check-prefix=I32 + + +; From source: clang -02 +;__m64 test47(int a) +;{ +; __m64 x = (a)? (__m64)(7): (__m64)(0); +; return __builtin_ia32_psllw(x, x); +;} + +define i64 @test47(i64 %arg) { +; +; X64-LABEL: test47: +; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testq %rdi, %rdi +; X64-NEXT: movl $7, %ecx +; X64-NEXT: cmoveq %rcx, %rax +; X64-NEXT: movd %rax, %mm0 +; X64-NEXT: psllw %mm0, %mm0 +; X64-NEXT: movd %mm0, %rax +; X64-NEXT: retq +; +; I32-LABEL: test47: +; I32: # BB#0: +; I32-NEXT: pushl %ebp +; I32-NEXT: .Lcfi0: +; I32-NEXT: .cfi_def_cfa_offset 8 +; I32-NEXT: .Lcfi1: +; I32-NEXT: .cfi_offset %ebp, -8 +; I32-NEXT: movl %esp, %ebp +; I32-NEXT: .Lcfi2: +; I32-NEXT: .cfi_def_cfa_register %ebp +; I32-NEXT: andl $-8, %esp +; I32-NEXT: subl $16, %esp +; I32-NEXT: movl 8(%ebp), %eax +; I32-NEXT: orl 12(%ebp), %eax +; I32-NEXT: movl $7, %eax +; I32-NEXT: je .LBB0_2 +; I32-NEXT: # BB#1: +; I32-NEXT: xorl %eax, %eax +; I32-NEXT: .LBB0_2: +; I32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I32-NEXT: movl $0, {{[0-9]+}}(%esp) +; I32-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; I32-NEXT: psllw %mm0, %mm0 +; I32-NEXT: movq %mm0, (%esp) +; I32-NEXT: movl (%esp), %eax +; I32-NEXT: movl {{[0-9]+}}(%esp), %edx +; I32-NEXT: movl %ebp, %esp +; I32-NEXT: popl %ebp +; I32-NEXT: retl + %cond = icmp eq i64 %arg, 0 + %slct = select i1 %cond, x86_mmx bitcast (i64 7 to x86_mmx), x86_mmx bitcast (i64 0 to x86_mmx) + %psll = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %slct, x86_mmx %slct) + %retc = bitcast x86_mmx %psll to i64 + ret i64 %retc +} + + +; From source: clang -O2 +;__m64 test49(int a, long long n, long long m) +;{ +; __m64 x = (a)? (__m64)(n): (__m64)(m); +; return __builtin_ia32_psllw(x, x); +;} + +define i64 @test49(i64 %arg, i64 %x, i64 %y) { +; +; X64-LABEL: test49: +; X64: # BB#0: +; X64-NEXT: testq %rdi, %rdi +; X64-NEXT: cmovneq %rdx, %rsi +; X64-NEXT: movd %rsi, %mm0 +; X64-NEXT: psllw %mm0, %mm0 +; X64-NEXT: movd %mm0, %rax +; X64-NEXT: retq +; +; I32-LABEL: test49: +; I32: # BB#0: +; I32-NEXT: pushl %ebp +; I32-NEXT: .Lcfi3: +; I32-NEXT: .cfi_def_cfa_offset 8 +; I32-NEXT: .Lcfi4: +; I32-NEXT: .cfi_offset %ebp, -8 +; I32-NEXT: movl %esp, %ebp +; I32-NEXT: .Lcfi5: +; I32-NEXT: .cfi_def_cfa_register %ebp +; I32-NEXT: andl $-8, %esp +; I32-NEXT: subl $8, %esp +; I32-NEXT: movl 8(%ebp), %eax +; I32-NEXT: orl 12(%ebp), %eax +; I32-NEXT: je .LBB1_1 +; I32-NEXT: # BB#2: +; I32-NEXT: leal 24(%ebp), %eax +; I32-NEXT: jmp .LBB1_3 +; I32-NEXT: .LBB1_1: +; I32-NEXT: leal 16(%ebp), %eax +; I32-NEXT: .LBB1_3: +; I32-NEXT: movq (%eax), %mm0 +; I32-NEXT: psllw %mm0, %mm0 +; I32-NEXT: movq %mm0, (%esp) +; I32-NEXT: movl (%esp), %eax +; I32-NEXT: movl {{[0-9]+}}(%esp), %edx +; I32-NEXT: movl %ebp, %esp +; I32-NEXT: popl %ebp +; I32-NEXT: retl + %cond = icmp eq i64 %arg, 0 + %xmmx = bitcast i64 %x to x86_mmx + %ymmx = bitcast i64 %y to x86_mmx + %slct = select i1 %cond, x86_mmx %xmmx, x86_mmx %ymmx + %psll = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %slct, x86_mmx %slct) + %retc = bitcast x86_mmx %psll to i64 + ret i64 %retc +} + +declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) + diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index abba0ff87ace..9f1ed021992d 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -1643,7 +1643,7 @@ define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movsbl (%rdi), %eax ; AVX512VL-NEXT: shrl $8, %eax -; AVX512VL-NEXT: vpbroadcastb %al, %xmm0 +; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 @@ -1696,7 +1696,7 @@ define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movsbl (%rdi), %eax ; AVX512VL-NEXT: shrl $16, %eax -; AVX512VL-NEXT: vpbroadcastb %al, %xmm0 +; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll index c03b9d1472c1..1cf8453fc6ad 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -2274,7 +2274,7 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) { ; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 @@ -2390,7 +2390,7 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax ; AVX512VL-NEXT: shrl $16, %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 @@ -2443,7 +2443,7 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax ; AVX512VL-NEXT: shrl $16, %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll index 6f5d916f2294..ba7c0894b932 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -4069,7 +4069,7 @@ define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) { ; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movswl (%rdi), %eax -; AVX512VL-NEXT: vpbroadcastw %ax, %ymm0 +; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0 ; AVX512VL-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll index 05a797cb6f8e..d51b69415b93 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -2431,7 +2431,7 @@ define <32 x i8> @insert_dup_elt1_mem_v32i8_sext_i8(i8* %ptr) { ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movsbl (%rdi), %eax ; AVX512VL-NEXT: shrl $8, %eax -; AVX512VL-NEXT: vpbroadcastb %al, %ymm0 +; AVX512VL-NEXT: vpbroadcastb %eax, %ymm0 ; AVX512VL-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 diff --git a/test/CodeGen/X86/vector-shuffle-512-v32.ll b/test/CodeGen/X86/vector-shuffle-512-v32.ll index 7a5c992bb829..b8fc27ba5515 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -228,7 +228,7 @@ define <32 x i16> @insert_dup_mem_v32i16_i32(i32* %ptr) { ; SKX-LABEL: insert_dup_mem_v32i16_i32: ; SKX: ## BB#0: ; SKX-NEXT: movl (%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i32, i32* %ptr, align 4 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 @@ -249,7 +249,7 @@ define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) { ; SKX-LABEL: insert_dup_mem_v32i16_sext_i16: ; SKX: ## BB#0: ; SKX-NEXT: movswl (%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i16, i16* %ptr, align 2 %tmp1 = sext i16 %tmp to i32 @@ -269,7 +269,7 @@ define <32 x i16> @insert_dup_elt1_mem_v32i16_i32(i32* %ptr) #0 { ; SKX-LABEL: insert_dup_elt1_mem_v32i16_i32: ; SKX: ## BB#0: ; SKX-NEXT: movzwl 2(%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i32, i32* %ptr, align 4 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 @@ -288,7 +288,7 @@ define <32 x i16> @insert_dup_elt3_mem_v32i16_i32(i32* %ptr) #0 { ; SKX-LABEL: insert_dup_elt3_mem_v32i16_i32: ; SKX: ## BB#0: ; SKX-NEXT: movzwl 2(%rdi), %eax -; SKX-NEXT: vpbroadcastw %ax, %zmm0 +; SKX-NEXT: vpbroadcastw %eax, %zmm0 ; SKX-NEXT: retq %tmp = load i32, i32* %ptr, align 4 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 diff --git a/test/CodeGen/X86/vector-shuffle-512-v64.ll b/test/CodeGen/X86/vector-shuffle-512-v64.ll index f4650ec741a7..9dca3191e06b 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -332,7 +332,7 @@ define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(i8* %ptr) { ; AVX512BW: # BB#0: ; AVX512BW-NEXT: movsbl (%rdi), %eax ; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: vpbroadcastb %al, %zmm0 +; AVX512BW-NEXT: vpbroadcastb %eax, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_sext_i8: @@ -348,7 +348,7 @@ define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(i8* %ptr) { ; AVX512VBMI: # BB#0: ; AVX512VBMI-NEXT: movsbl (%rdi), %eax ; AVX512VBMI-NEXT: shrl $8, %eax -; AVX512VBMI-NEXT: vpbroadcastb %al, %zmm0 +; AVX512VBMI-NEXT: vpbroadcastb %eax, %zmm0 ; AVX512VBMI-NEXT: retq %tmp = load i8, i8* %ptr, align 1 %tmp1 = sext i8 %tmp to i32 diff --git a/test/Instrumentation/DataFlowSanitizer/Inputs/shadow-args-abilist.txt b/test/Instrumentation/DataFlowSanitizer/Inputs/shadow-args-abilist.txt new file mode 100644 index 000000000000..723cbc9086da --- /dev/null +++ b/test/Instrumentation/DataFlowSanitizer/Inputs/shadow-args-abilist.txt @@ -0,0 +1,8 @@ +fun:dfsan_get_label=uninstrumented +fun:dfsan_get_label=custom + +fun:k2=uninstrumented +fun:k2=custom + +fun:k4=uninstrumented +fun:k4=custom diff --git a/test/Instrumentation/DataFlowSanitizer/abilist.ll b/test/Instrumentation/DataFlowSanitizer/abilist.ll index 8b30875a03fa..e33237ffe19d 100644 --- a/test/Instrumentation/DataFlowSanitizer/abilist.ll +++ b/test/Instrumentation/DataFlowSanitizer/abilist.ll @@ -47,13 +47,13 @@ define void @f(i32 %x) { ; CHECK: %[[LABELVA1:.*]] = alloca [2 x i16] ; CHECK: %[[LABELRETURN:.*]] = alloca i16 - ; CHECK: call void @__dfsw_custom1(i32 1, i32 2, i16 0, i16 0) + ; CHECK: call void @__dfsw_custom1(i32 1, i32 2, i16 zeroext 0, i16 zeroext 0) call void @custom1(i32 1, i32 2) - ; CHECK: call i32 @__dfsw_custom2(i32 1, i32 2, i16 0, i16 0, i16* %[[LABELRETURN]]) + ; CHECK: call i32 @__dfsw_custom2(i32 1, i32 2, i16 zeroext 0, i16 zeroext 0, i16* %[[LABELRETURN]]) call i32 @custom2(i32 1, i32 2) - ; CHECK: call void @__dfsw_customcb({{.*}} @"dfst0$customcb", i8* bitcast ({{.*}} @"dfs$cb" to i8*), i16 0) + ; CHECK: call void @__dfsw_customcb({{.*}} @"dfst0$customcb", i8* bitcast ({{.*}} @"dfs$cb" to i8*), i16 zeroext 0) call void @customcb(i32 (i32)* @cb) ; CHECK: %[[LABELVA1_0:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA1]], i32 0, i32 0 @@ -61,12 +61,12 @@ define void @f(i32 %x) { ; CHECK: %[[LABELVA1_1:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA1]], i32 0, i32 1 ; CHECK: store i16 %{{.*}}, i16* %[[LABELVA1_1]] ; CHECK: %[[LABELVA1_0A:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA1]], i32 0, i32 0 - ; CHECK: call void (i32, i16, i16*, ...) @__dfsw_custom3(i32 1, i16 0, i16* %[[LABELVA1_0A]], i32 2, i32 %{{.*}}) + ; CHECK: call void (i32, i16, i16*, ...) @__dfsw_custom3(i32 1, i16 zeroext 0, i16* %[[LABELVA1_0A]], i32 2, i32 %{{.*}}) call void (i32, ...) @custom3(i32 1, i32 2, i32 %x) ; CHECK: %[[LABELVA2_0:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA2]], i32 0, i32 0 ; CHECK: %[[LABELVA2_0A:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA2]], i32 0, i32 0 - ; CHECK: call i32 (i32, i16, i16*, i16*, ...) @__dfsw_custom4(i32 1, i16 0, i16* %[[LABELVA2_0A]], i16* %[[LABELRETURN]], i32 2, i32 3) + ; CHECK: call i32 (i32, i16, i16*, i16*, ...) @__dfsw_custom4(i32 1, i16 zeroext 0, i16* %[[LABELVA2_0A]], i16* %[[LABELRETURN]], i32 2, i32 3) call i32 (i32, ...) @custom4(i32 1, i32 2, i32 3) ret void diff --git a/test/Instrumentation/DataFlowSanitizer/shadow-args-zext.ll b/test/Instrumentation/DataFlowSanitizer/shadow-args-zext.ll new file mode 100644 index 000000000000..0ffbf1970e7f --- /dev/null +++ b/test/Instrumentation/DataFlowSanitizer/shadow-args-zext.ll @@ -0,0 +1,54 @@ +; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -dfsan -S --dfsan-abilist=%S/Inputs/shadow-args-abilist.txt | FileCheck %s + +; REQUIRES: x86-registered-target + +; Test that the custom abi marks shadow parameters as zero extended. + +define i32 @m() { +entry: + %call = call zeroext i16 @dfsan_get_label(i64 signext 56) + %conv = zext i16 %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: @"dfs$m" +; CHECK: %{{.*}} = call zeroext i16 @__dfsw_dfsan_get_label(i64 signext 56, i16 zeroext 0, i16* %{{.*}}) + +define i32 @k() { +entry: + %call = call zeroext i16 @k2(i64 signext 56, i64 signext 67) + %conv = zext i16 %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: @"dfs$k" +; CHECK: %{{.*}} = call zeroext i16 @__dfsw_k2(i64 signext 56, i64 signext 67, i16 zeroext {{.*}}, i16 zeroext {{.*}}, i16* %{{.*}}) + +define i32 @k3() { +entry: + %call = call zeroext i16 @k4(i64 signext 56, i64 signext 67, i64 signext 78, i64 signext 89) + %conv = zext i16 %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: @"dfs$k3" +; CHECK: %{{.*}} = call zeroext i16 @__dfsw_k4(i64 signext 56, i64 signext 67, i64 signext 78, i64 signext 89, i16 zeroext {{.*}}, i16 zeroext {{.*}}, i16 zeroext {{.*}}, i16 zeroext {{.*}}, i16* %{{.*}}) + +declare zeroext i16 @dfsan_get_label(i64 signext) + +; CHECK-LABEL: @"dfsw$dfsan_get_label" +; CHECK: %{{.*}} = call i16 @__dfsw_dfsan_get_label(i64 %0, i16 zeroext %1, i16* %{{.*}}) + +declare zeroext i16 @k2(i64 signext, i64 signext) +; CHECK-LABEL: @"dfsw$k2" +; CHECK: %{{.*}} = call i16 @__dfsw_k2(i64 %{{.*}}, i64 %{{.*}}, i16 zeroext %{{.*}}, i16 zeroext %{{.*}}, i16* %{{.*}}) + +declare zeroext i16 @k4(i64 signext, i64 signext, i64 signext, i64 signext) + +; CHECK-LABEL: @"dfsw$k4" +; CHECK: %{{.*}} = call i16 @__dfsw_k4(i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i16 zeroext %{{.*}}, i16 zeroext %{{.*}}, i16 zeroext %{{.*}}, i16 zeroext %{{.*}}, i16* %{{.*}}) + + +; CHECK: declare zeroext i16 @__dfsw_dfsan_get_label(i64 signext, i16, i16*) +; CHECK: declare zeroext i16 @__dfsw_k2(i64 signext, i64 signext, i16, i16, i16*) +; CHECK: declare zeroext i16 @__dfsw_k4(i64 signext, i64 signext, i64 signext, i64 signext, i16, i16, i16, i16, i16*) diff --git a/test/Transforms/BDCE/invalidate-assumptions.ll b/test/Transforms/BDCE/invalidate-assumptions.ll new file mode 100644 index 000000000000..d165d74be86d --- /dev/null +++ b/test/Transforms/BDCE/invalidate-assumptions.ll @@ -0,0 +1,100 @@ +; RUN: opt -bdce %s -S | FileCheck %s + +; The 'nuw' on the subtract allows us to deduce that %setbit is not demanded. +; But if we change that value to '0', then the 'nuw' is no longer valid. If we don't +; remove the 'nuw', another pass (-instcombine) may make a transform based on an +; that incorrect assumption and we can miscompile: +; https://bugs.llvm.org/show_bug.cgi?id=33695 + +define i1 @PR33695(i1 %b, i8 %x) { +; CHECK-LABEL: @PR33695( +; CHECK-NEXT: [[SETBIT:%.*]] = or i8 %x, 64 +; CHECK-NEXT: [[LITTLE_NUMBER:%.*]] = zext i1 %b to i8 +; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl i8 0, 1 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[BIG_NUMBER]], [[LITTLE_NUMBER]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[SUB]] to i1 +; CHECK-NEXT: ret i1 [[TRUNC]] +; + %setbit = or i8 %x, 64 + %little_number = zext i1 %b to i8 + %big_number = shl i8 %setbit, 1 + %sub = sub nuw i8 %big_number, %little_number + %trunc = trunc i8 %sub to i1 + ret i1 %trunc +} + +; Similar to above, but now with more no-wrap. +; https://bugs.llvm.org/show_bug.cgi?id=34037 + +define i64 @PR34037(i64 %m, i32 %r, i64 %j, i1 %b, i32 %k, i64 %p) { +; CHECK-LABEL: @PR34037( +; CHECK-NEXT: [[CONV:%.*]] = zext i32 %r to i64 +; CHECK-NEXT: [[AND:%.*]] = and i64 %m, 0 +; CHECK-NEXT: [[NEG:%.*]] = xor i64 0, 34359738367 +; CHECK-NEXT: [[OR:%.*]] = or i64 %j, 0 +; CHECK-NEXT: [[SHL:%.*]] = shl i64 0, 29 +; CHECK-NEXT: [[CONV1:%.*]] = select i1 %b, i64 7, i64 0 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[SHL]], [[CONV1]] +; CHECK-NEXT: [[CONV2:%.*]] = zext i32 %k to i64 +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], [[CONV2]] +; CHECK-NEXT: [[CONV4:%.*]] = and i64 %p, 65535 +; CHECK-NEXT: [[AND5:%.*]] = and i64 [[MUL]], [[CONV4]] +; CHECK-NEXT: ret i64 [[AND5]] +; + %conv = zext i32 %r to i64 + %and = and i64 %m, %conv + %neg = xor i64 %and, 34359738367 + %or = or i64 %j, %neg + %shl = shl i64 %or, 29 + %conv1 = select i1 %b, i64 7, i64 0 + %sub = sub nuw nsw i64 %shl, %conv1 + %conv2 = zext i32 %k to i64 + %mul = mul nsw i64 %sub, %conv2 + %conv4 = and i64 %p, 65535 + %and5 = and i64 %mul, %conv4 + ret i64 %and5 +} + +; This is a manufactured example based on the 1st test to prove that the +; assumption-killing algorithm stops at the call. Ie, it does not remove +; nsw/nuw from the 'add' because a call demands all bits of its argument. + +declare i1 @foo(i1) + +define i1 @poison_on_call_user_is_ok(i1 %b, i8 %x) { +; CHECK-LABEL: @poison_on_call_user_is_ok( +; CHECK-NEXT: [[SETBIT:%.*]] = or i8 %x, 64 +; CHECK-NEXT: [[LITTLE_NUMBER:%.*]] = zext i1 %b to i8 +; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl i8 0, 1 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[BIG_NUMBER]], [[LITTLE_NUMBER]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[SUB]] to i1 +; CHECK-NEXT: [[CALL_RESULT:%.*]] = call i1 @foo(i1 [[TRUNC]]) +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i1 [[CALL_RESULT]], true +; CHECK-NEXT: [[MUL:%.*]] = mul i1 [[TRUNC]], [[ADD]] +; CHECK-NEXT: ret i1 [[MUL]] +; + %setbit = or i8 %x, 64 + %little_number = zext i1 %b to i8 + %big_number = shl i8 %setbit, 1 + %sub = sub nuw i8 %big_number, %little_number + %trunc = trunc i8 %sub to i1 + %call_result = call i1 @foo(i1 %trunc) + %add = add nsw nuw i1 %call_result, 1 + %mul = mul i1 %trunc, %add + ret i1 %mul +} + + +; We were asserting that all users of a trivialized integer-type instruction were +; also integer-typed, but that's too strong. The alloca has a pointer-type result. + +define void @PR34179(i32* %a) { +; CHECK-LABEL: @PR34179( +; CHECK-NEXT: [[T0:%.*]] = load volatile i32, i32* %a +; CHECK-NEXT: ret void +; + %t0 = load volatile i32, i32* %a + %vla = alloca i32, i32 %t0 + ret void +} + diff --git a/test/Transforms/IndVarSimplify/exit_value_test2.ll b/test/Transforms/IndVarSimplify/exit_value_test2.ll index ee641667506c..7b6e91a742b2 100644 --- a/test/Transforms/IndVarSimplify/exit_value_test2.ll +++ b/test/Transforms/IndVarSimplify/exit_value_test2.ll @@ -3,15 +3,14 @@ ; Check IndVarSimplify should not replace exit value because or else ; udiv will be introduced by expand and the cost will be high. -; -; CHECK-LABEL: @_Z3fooPKcjj( -; CHECK-NOT: udiv declare void @_Z3mixRjj(i32* dereferenceable(4), i32) declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) { +; CHECK-LABEL: @_Z3fooPKcjj( +; CHECK-NOT: udiv entry: %a = alloca i32, align 4 %tmp = bitcast i32* %a to i8* @@ -50,3 +49,26 @@ while.end: ; preds = %while.cond.while.en call void @llvm.lifetime.end.p0i8(i64 4, i8* %tmp) ret i32 %tmp4 } + +define i32 @zero_backedge_count_test(i32 %unknown_init, i32* %unknown_mem) { +; CHECK-LABEL: @zero_backedge_count_test( +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry], [ %iv.inc, %loop ] + %unknown_phi = phi i32 [ %unknown_init, %entry ], [ %unknown_next, %loop ] + %iv.inc = add i32 %iv, 1 + %be_taken = icmp ne i32 %iv.inc, 1 + %unknown_next = load volatile i32, i32* %unknown_mem + br i1 %be_taken, label %loop, label %leave + +leave: +; We can fold %unknown_phi even though the backedge value for it is completely +; unknown, since we can prove that the loop's backedge taken count is 0. + +; CHECK: leave: +; CHECK: ret i32 %unknown_init + %exit_val = phi i32 [ %unknown_phi, %loop ] + ret i32 %exit_val +} diff --git a/test/Transforms/SimplifyCFG/pr34131.ll b/test/Transforms/SimplifyCFG/pr34131.ll new file mode 100644 index 000000000000..b64b6876e04e --- /dev/null +++ b/test/Transforms/SimplifyCFG/pr34131.ll @@ -0,0 +1,74 @@ +; RUN: opt -simplifycfg -S < %s | FileCheck %s + +; Just checking for lack of crash here, but we should be able to check the IR? +; Earlier version using auto-generated checks from utils/update_test_checks.py +; had bot problems though... + +define void @patatino() { + +; CHECK-LABEL: @patatino + + br label %bb1 +bb1: ; preds = %bb36, %0 + br label %bb2 +bb2: ; preds = %bb3, %bb1 + br i1 undef, label %bb4, label %bb3 +bb3: ; preds = %bb4, %bb2 + br i1 undef, label %bb2, label %bb5 +bb4: ; preds = %bb2 + switch i32 undef, label %bb3 [ + ] +bb5: ; preds = %bb3 + br label %bb6 +bb6: ; preds = %bb5 + br i1 undef, label %bb7, label %bb9 +bb7: ; preds = %bb6 + %tmp = or i64 undef, 1 + %tmp8 = icmp ult i64 %tmp, 0 + br i1 %tmp8, label %bb12, label %bb9 +bb9: ; preds = %bb35, %bb34, %bb33, %bb32, %bb31, %bb30, %bb27, %bb24, %bb21, %bb18, %bb16, %bb14, %bb12, %bb7, %bb6 + br label %bb11 +bb10: ; preds = %bb36 + br label %bb11 +bb11: ; preds = %bb10, %bb9 + ret void +bb12: ; preds = %bb7 + %tmp13 = icmp ult i64 0, 0 + br i1 %tmp13, label %bb14, label %bb9 +bb14: ; preds = %bb12 + %tmp15 = icmp ult i64 undef, 0 + br i1 %tmp15, label %bb16, label %bb9 +bb16: ; preds = %bb14 + %tmp17 = icmp ult i64 undef, 0 + br i1 %tmp17, label %bb18, label %bb9 +bb18: ; preds = %bb16 + %tmp19 = or i64 undef, 5 + %tmp20 = icmp ult i64 %tmp19, 0 + br i1 %tmp20, label %bb21, label %bb9 +bb21: ; preds = %bb18 + %tmp22 = or i64 undef, 6 + %tmp23 = icmp ult i64 %tmp22, 0 + br i1 %tmp23, label %bb24, label %bb9 +bb24: ; preds = %bb21 + %tmp25 = or i64 undef, 7 + %tmp26 = icmp ult i64 %tmp25, 0 + br i1 %tmp26, label %bb27, label %bb9 +bb27: ; preds = %bb24 + %tmp28 = or i64 undef, 8 + %tmp29 = icmp ult i64 %tmp28, 0 + br i1 %tmp29, label %bb30, label %bb9 +bb30: ; preds = %bb27 + br i1 undef, label %bb31, label %bb9 +bb31: ; preds = %bb30 + br i1 undef, label %bb32, label %bb9 +bb32: ; preds = %bb31 + br i1 undef, label %bb33, label %bb9 +bb33: ; preds = %bb32 + br i1 undef, label %bb34, label %bb9 +bb34: ; preds = %bb33 + br i1 undef, label %bb35, label %bb9 +bb35: ; preds = %bb34 + br i1 undef, label %bb36, label %bb9 +bb36: ; preds = %bb35 + br i1 undef, label %bb1, label %bb10 +} diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index d54b45515f05..74593e6202aa 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -871,7 +871,7 @@ static void printRelocationTargetName(const MachOObjectFile *O, uint64_t Val = O->getPlainRelocationSymbolNum(RE); if (O->getAnyRelocationType(RE) == MachO::ARM64_RELOC_ADDEND) { - fmt << format("0x%x", Val); + fmt << format("0x%0" PRIx64, Val); return; } else if (isExtern) { symbol_iterator SI = O->symbol_begin(); diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py index 2ef0a8f77ec9..3351ebed54bd 100644 --- a/utils/lit/lit/LitConfig.py +++ b/utils/lit/lit/LitConfig.py @@ -25,7 +25,7 @@ class LitConfig(object): params, config_prefix = None, maxIndividualTestTime = 0, maxFailures = None, - parallelism_groups = [], + parallelism_groups = {}, echo_all_commands = False): # The name of the test runner. self.progname = progname diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py index 46bcac4b306e..a60a0f854870 100644 --- a/utils/lit/lit/TestRunner.py +++ b/utils/lit/lit/TestRunner.py @@ -313,7 +313,7 @@ def processRedirects(cmd, stdin_source, cmd_shenv, opened_files): elif op == ('<',): redirects[0] = [filename, 'r', None] else: - raise InternalShellError(cmd, "Unsupported redirect: %r" % (r,)) + raise InternalShellError(cmd, "Unsupported redirect: %r" % ((op, filename),)) # Open file descriptors in a second pass. std_fds = [None, None, None] diff --git a/utils/lit/lit/formats/__init__.py b/utils/lit/lit/formats/__init__.py index 7d14ca4b535a..3ff46e93ead2 100644 --- a/utils/lit/lit/formats/__init__.py +++ b/utils/lit/lit/formats/__init__.py @@ -1,3 +1,8 @@ -from lit.formats.base import TestFormat # noqa: F401 +from lit.formats.base import ( # noqa: F401 + TestFormat, + FileBasedTest, + OneCommandPerFileTest +) + from lit.formats.googletest import GoogleTest # noqa: F401 from lit.formats.shtest import ShTest # noqa: F401 diff --git a/utils/lit/lit/formats/base.py b/utils/lit/lit/formats/base.py index baa9ff1d3b7d..6721d17e334e 100644 --- a/utils/lit/lit/formats/base.py +++ b/utils/lit/lit/formats/base.py @@ -1,50 +1,117 @@ -import abc +from __future__ import absolute_import +import os + +import lit.Test +import lit.util class TestFormat(object): - """Base class for test formats. - - A TestFormat encapsulates logic for finding and executing a certain type of - test. For example, a subclass FooTestFormat would contain the logic for - finding tests written in the 'Foo' format, and the logic for running a - single one. - - TestFormat is an Abstract Base Class (ABC). It uses the Python abc.ABCMeta - type and associated @abc.abstractmethod decorator. Together, these provide - subclass behaviour which is notionally similar to C++ pure virtual classes: - only subclasses which implement all abstract methods can be instantiated - (the implementation may come from an intermediate base). - - For details on ABCs, see: https://docs.python.org/2/library/abc.html. Note - that Python ABCs have extensive abilities beyond what is used here. For - TestFormat, we only care about enforcing that abstract methods are - implemented. - """ - - __metaclass__ = abc.ABCMeta - - @abc.abstractmethod - def getTestsInDirectory(self, testSuite, path_in_suite, litConfig, - localConfig): - """Finds tests of this format in the given directory. - - Args: - testSuite: a Test.TestSuite object. - path_in_suite: the subpath under testSuite to look for tests. - litConfig: the LitConfig for the test suite. - localConfig: a LitConfig with local specializations. - - Returns: - An iterable of Test.Test objects. - """ - - @abc.abstractmethod + pass + +### + +class FileBasedTest(TestFormat): + def getTestsInDirectory(self, testSuite, path_in_suite, + litConfig, localConfig): + source_path = testSuite.getSourcePath(path_in_suite) + for filename in os.listdir(source_path): + # Ignore dot files and excluded tests. + if (filename.startswith('.') or + filename in localConfig.excludes): + continue + + filepath = os.path.join(source_path, filename) + if not os.path.isdir(filepath): + base,ext = os.path.splitext(filename) + if ext in localConfig.suffixes: + yield lit.Test.Test(testSuite, path_in_suite + (filename,), + localConfig) + +### + +import re +import tempfile + +class OneCommandPerFileTest(TestFormat): + # FIXME: Refactor into generic test for running some command on a directory + # of inputs. + + def __init__(self, command, dir, recursive=False, + pattern=".*", useTempInput=False): + if isinstance(command, str): + self.command = [command] + else: + self.command = list(command) + if dir is not None: + dir = str(dir) + self.dir = dir + self.recursive = bool(recursive) + self.pattern = re.compile(pattern) + self.useTempInput = useTempInput + + def getTestsInDirectory(self, testSuite, path_in_suite, + litConfig, localConfig): + dir = self.dir + if dir is None: + dir = testSuite.getSourcePath(path_in_suite) + + for dirname,subdirs,filenames in os.walk(dir): + if not self.recursive: + subdirs[:] = [] + + subdirs[:] = [d for d in subdirs + if (d != '.svn' and + d not in localConfig.excludes)] + + for filename in filenames: + if (filename.startswith('.') or + not self.pattern.match(filename) or + filename in localConfig.excludes): + continue + + path = os.path.join(dirname,filename) + suffix = path[len(dir):] + if suffix.startswith(os.sep): + suffix = suffix[1:] + test = lit.Test.Test( + testSuite, path_in_suite + tuple(suffix.split(os.sep)), + localConfig) + # FIXME: Hack? + test.source_path = path + yield test + + def createTempInput(self, tmp, test): + raise NotImplementedError('This is an abstract method.') + def execute(self, test, litConfig): - """Runs the given 'test', which is of this format. + if test.config.unsupported: + return (lit.Test.UNSUPPORTED, 'Test is unsupported') + + cmd = list(self.command) + + # If using temp input, create a temporary file and hand it to the + # subclass. + if self.useTempInput: + tmp = tempfile.NamedTemporaryFile(suffix='.cpp') + self.createTempInput(tmp, test) + tmp.flush() + cmd.append(tmp.name) + elif hasattr(test, 'source_path'): + cmd.append(test.source_path) + else: + cmd.append(test.getSourcePath()) + + out, err, exitCode = lit.util.executeCommand(cmd) + + diags = out + err + if not exitCode and not diags.strip(): + return lit.Test.PASS,'' - Args: - test: a Test.Test object describing the test to run. - litConfig: the LitConfig for the test suite. + # Try to include some useful information. + report = """Command: %s\n""" % ' '.join(["'%s'" % a + for a in cmd]) + if self.useTempInput: + report += """Temporary File: %s\n""" % tmp.name + report += "--\n%s--\n""" % open(tmp.name).read() + report += """Output:\n--\n%s--""" % diags - Returns: - A tuple of (status:Test.ResultCode, message:str) - """ + return lit.Test.FAIL, report diff --git a/utils/lit/lit/formats/shtest.py b/utils/lit/lit/formats/shtest.py index 01ecd192092e..fdc9bd0241f3 100644 --- a/utils/lit/lit/formats/shtest.py +++ b/utils/lit/lit/formats/shtest.py @@ -1,13 +1,12 @@ from __future__ import absolute_import -import os - -import lit.Test import lit.TestRunner import lit.util -from .base import TestFormat -class ShTest(TestFormat): +from .base import FileBasedTest + + +class ShTest(FileBasedTest): """ShTest is a format with one file per test. This is the primary format for regression tests as described in the LLVM @@ -18,31 +17,9 @@ class ShTest(TestFormat): The ShTest files contain some number of shell-like command pipelines, along with assertions about what should be in the output. """ - - def __init__(self, execute_external = False): - """Initializer. - - The 'execute_external' argument controls whether lit uses its internal - logic for command pipelines, or passes the command to a shell - subprocess. - - Args: - execute_external: (optional) If true, use shell subprocesses instead - of lit's internal pipeline logic. - """ + def __init__(self, execute_external=False): self.execute_external = execute_external - def getTestsInDirectory(self, testSuite, path_in_suite, - litConfig, localConfig): - """Yields test files matching 'suffixes' from the localConfig.""" - file_matches = lit.util.listdir_files( - testSuite.getSourcePath(path_in_suite), - localConfig.suffixes, localConfig.excludes) - for filename in file_matches: - yield lit.Test.Test(testSuite, path_in_suite + (filename,), - localConfig) - def execute(self, test, litConfig): - """Interprets and runs the given test file, and returns the result.""" return lit.TestRunner.executeShTest(test, litConfig, self.execute_external) diff --git a/utils/lit/lit/run.py b/utils/lit/lit/run.py index 1290c142c834..3e39bdb92203 100644 --- a/utils/lit/lit/run.py +++ b/utils/lit/lit/run.py @@ -44,6 +44,12 @@ class Run(object): def __init__(self, lit_config, tests): self.lit_config = lit_config self.tests = tests + # Set up semaphores to limit parallelism of certain classes of tests. + # For example, some ASan tests require lots of virtual memory and run + # faster with less parallelism on OS X. + self.parallelism_semaphores = \ + {k: multiprocessing.Semaphore(v) for k, v in + self.lit_config.parallelism_groups.items()} def execute_test(self, test): return _execute_test_impl(test, self.lit_config, @@ -74,13 +80,6 @@ class Run(object): if not self.tests or jobs == 0: return - # Set up semaphores to limit parallelism of certain classes of tests. - # For example, some ASan tests require lots of virtual memory and run - # faster with less parallelism on OS X. - self.parallelism_semaphores = \ - {k: multiprocessing.Semaphore(v) for k, v in - self.lit_config.parallelism_groups.items()} - # Install a console-control signal handler on Windows. if win32api is not None: def console_ctrl_handler(type): diff --git a/utils/lit/tests/Inputs/max-failures/lit.cfg b/utils/lit/tests/Inputs/max-failures/lit.cfg new file mode 100644 index 000000000000..50d07566e1cc --- /dev/null +++ b/utils/lit/tests/Inputs/max-failures/lit.cfg @@ -0,0 +1,6 @@ +import lit.formats +config.name = 'shtest-shell' +config.suffixes = ['.txt'] +config.test_format = lit.formats.ShTest() +config.test_source_root = os.path.dirname(__file__) + '/../shtest-shell' +config.test_exec_root = None diff --git a/utils/lit/tests/max-failures.py b/utils/lit/tests/max-failures.py index 5cc258dd08aa..bc58e9a4e47f 100644 --- a/utils/lit/tests/max-failures.py +++ b/utils/lit/tests/max-failures.py @@ -1,9 +1,9 @@ # Check the behavior of --max-failures option. # -# RUN: not %{lit} -j 1 -v %{inputs}/shtest-shell > %t.out -# RUN: not %{lit} --max-failures=1 -j 1 -v %{inputs}/shtest-shell >> %t.out -# RUN: not %{lit} --max-failures=2 -j 1 -v %{inputs}/shtest-shell >> %t.out -# RUN: not %{lit} --max-failures=0 -j 1 -v %{inputs}/shtest-shell 2>> %t.out +# RUN: not %{lit} -j 1 -v %{inputs}/max-failures > %t.out +# RUN: not %{lit} --max-failures=1 -j 1 -v %{inputs}/max-failures >> %t.out +# RUN: not %{lit} --max-failures=2 -j 1 -v %{inputs}/max-failures >> %t.out +# RUN: not %{lit} --max-failures=0 -j 1 -v %{inputs}/max-failures 2>> %t.out # RUN: FileCheck < %t.out %s # # END. diff --git a/utils/lit/tests/selecting.py b/utils/lit/tests/selecting.py index 19ba240f9b0f..4a0d08b860b8 100644 --- a/utils/lit/tests/selecting.py +++ b/utils/lit/tests/selecting.py @@ -9,7 +9,7 @@ # Check that regex-filtering based on environment variables work. # -# RUN: LIT_FILTER='o[a-z]e' %{lit} %{inputs}/discovery | FileCheck --check-prefix=CHECK-FILTER-ENV %s +# RUN: env LIT_FILTER='o[a-z]e' %{lit} %{inputs}/discovery | FileCheck --check-prefix=CHECK-FILTER-ENV %s # CHECK-FILTER-ENV: Testing: 2 of 5 tests diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh index 02d8e7925f6e..66a2c578083e 100755 --- a/utils/release/test-release.sh +++ b/utils/release/test-release.sh @@ -403,14 +403,6 @@ function test_llvmCore() { fi if [ $do_test_suite = 'yes' ]; then - SandboxDir="$BuildDir/sandbox" - Lit=$SandboxDir/bin/lit - TestSuiteBuildDir="$BuildDir/test-suite-build" - TestSuiteSrcDir="$BuildDir/test-suite.src" - - virtualenv $SandboxDir - $SandboxDir/bin/python $BuildDir/llvm.src/utils/lit/setup.py install - mkdir -p $TestSuiteBuildDir cd $TestSuiteBuildDir env CC="$c_compiler" CXX="$cxx_compiler" \ cmake $TestSuiteSrcDir -DTEST_SUITE_LIT=$Lit @@ -466,6 +458,19 @@ if [ "$do_checkout" = "yes" ]; then export_sources fi +# Setup the test-suite. Do this early so we can catch failures before +# we do the full 3 stage build. +if [ $do_test_suite = "yes" ]; then + SandboxDir="$BuildDir/sandbox" + Lit=$SandboxDir/bin/lit + TestSuiteBuildDir="$BuildDir/test-suite-build" + TestSuiteSrcDir="$BuildDir/test-suite.src" + + virtualenv $SandboxDir + $SandboxDir/bin/python $BuildDir/llvm.src/utils/lit/setup.py install + mkdir -p $TestSuiteBuildDir +fi + ( Flavors="Release" if [ "$do_debug" = "yes" ]; then |