diff options
Diffstat (limited to 'contrib/llvm/lib/Target/NVPTX')
26 files changed, 671 insertions, 582 deletions
diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h index 02c5a210d099..f0f223aa057b 100644 --- a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h +++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h @@ -15,11 +15,9 @@ #define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { -class MCOperand; class MCSubtargetInfo; class NVPTXInstPrinter : public MCInstPrinter { diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h index b432e065c2f4..9ac3c8850f75 100644 --- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h +++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h @@ -22,6 +22,7 @@ class Triple; class NVPTXMCAsmInfo : public MCAsmInfo { virtual void anchor(); + public: explicit NVPTXMCAsmInfo(const Triple &TheTriple); }; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h index fe28214e9588..e5fae85bacf2 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h @@ -41,24 +41,6 @@ enum CondCodes { }; } -inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { - switch (CC) { - case NVPTXCC::NE: - return "ne"; - case NVPTXCC::EQ: - return "eq"; - case NVPTXCC::LT: - return "lt"; - case NVPTXCC::LE: - return "le"; - case NVPTXCC::GT: - return "gt"; - case NVPTXCC::GE: - return "ge"; - } - llvm_unreachable("Unknown condition code"); -} - FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); ModulePass *createNVPTXAssignValidGlobalNamesPass(); diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index ecb0f0a1d0a1..e8c36089a779 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -355,7 +355,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { if (isABI) { if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) { unsigned size = 0; - if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { + if (auto *ITy = dyn_cast<IntegerType>(Ty)) { size = ITy->getBitWidth(); if (size < 32) size = 32; @@ -635,9 +635,7 @@ static bool usedInGlobalVarDef(const Constant *C) { return false; if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { - if (GV->getName() == "llvm.used") - return false; - return true; + return GV->getName() != "llvm.used"; } for (const User *U : C->users()) @@ -682,7 +680,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) { static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { if (!gv->hasInternalLinkage()) return false; - const PointerType *Pty = gv->getType(); + PointerType *Pty = gv->getType(); if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) return false; @@ -720,7 +718,7 @@ static bool useFuncSeen(const Constant *C, void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { llvm::DenseMap<const Function *, bool> seenMap; for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { - const Function *F = FI; + const Function *F = &*FI; if (F->isDeclaration()) { if (F->use_empty()) @@ -870,9 +868,8 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) { DenseSet<const GlobalVariable *> GVVisiting; // Visit each global variable, in order - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); + for (const GlobalVariable &I : M.globals()) + VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting); assert(GVVisited.size() == M.getGlobalList().size() && "Missed a global variable"); @@ -1029,10 +1026,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, GVar->getName().startswith("nvvm.")) return; - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // GlobalVariables are always constant pointers themselves. - const PointerType *PTy = GVar->getType(); + PointerType *PTy = GVar->getType(); Type *ETy = PTy->getElementType(); if (GVar->hasExternalLinkage()) { @@ -1159,7 +1156,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, } if (GVar->getAlignment() == 0) - O << " .align " << (int) TD->getPrefTypeAlignment(ETy); + O << " .align " << (int)DL.getPrefTypeAlignment(ETy); else O << " .align " << GVar->getAlignment(); @@ -1185,9 +1182,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, printScalarConstant(Initializer, O); } } else { - // The frontend adds zero-initializer to variables that don't have an - // initial value, so skip warning for this case. - if (!GVar->getInitializer()->isNullValue()) { + // The frontend adds zero-initializer to device and constant variables + // that don't have an initial value, and UndefValue to shared + // variables, so skip warning for this case. + if (!GVar->getInitializer()->isNullValue() && + !isa<UndefValue>(GVar->getInitializer())) { report_fatal_error("initial value of '" + GVar->getName() + "' is not allowed in addrspace(" + Twine(PTy->getAddressSpace()) + ")"); @@ -1205,7 +1204,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, case Type::StructTyID: case Type::ArrayTyID: case Type::VectorTyID: - ElementSize = TD->getTypeStoreSize(ETy); + ElementSize = DL.getTypeStoreSize(ETy); // Ptx allows variable initilization only for constant and // global state spaces. if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || @@ -1299,7 +1298,7 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, } std::string -NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { +NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const { switch (Ty->getTypeID()) { default: llvm_unreachable("unexpected type"); @@ -1339,16 +1338,16 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // GlobalVariables are always constant pointers themselves. - const PointerType *PTy = GVar->getType(); + PointerType *PTy = GVar->getType(); Type *ETy = PTy->getElementType(); O << "."; emitPTXAddressSpace(PTy->getAddressSpace(), O); if (GVar->getAlignment() == 0) - O << " .align " << (int) TD->getPrefTypeAlignment(ETy); + O << " .align " << (int)DL.getPrefTypeAlignment(ETy); else O << " .align " << GVar->getAlignment(); @@ -1370,7 +1369,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, case Type::StructTyID: case Type::ArrayTyID: case Type::VectorTyID: - ElementSize = TD->getTypeStoreSize(ETy); + ElementSize = DL.getTypeStoreSize(ETy); O << " .b8 "; getSymbol(GVar)->print(O, MAI); O << "["; @@ -1385,32 +1384,32 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, return; } -static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { +static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) { if (Ty->isSingleValueType()) - return TD->getPrefTypeAlignment(Ty); + return DL.getPrefTypeAlignment(Ty); - const ArrayType *ATy = dyn_cast<ArrayType>(Ty); + auto *ATy = dyn_cast<ArrayType>(Ty); if (ATy) - return getOpenCLAlignment(TD, ATy->getElementType()); + return getOpenCLAlignment(DL, ATy->getElementType()); - const StructType *STy = dyn_cast<StructType>(Ty); + auto *STy = dyn_cast<StructType>(Ty); if (STy) { unsigned int alignStruct = 1; // Go through each element of the struct and find the // largest alignment. for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { Type *ETy = STy->getElementType(i); - unsigned int align = getOpenCLAlignment(TD, ETy); + unsigned int align = getOpenCLAlignment(DL, ETy); if (align > alignStruct) alignStruct = align; } return alignStruct; } - const FunctionType *FTy = dyn_cast<FunctionType>(Ty); + auto *FTy = dyn_cast<FunctionType>(Ty); if (FTy) - return TD->getPointerPrefAlignment(); - return TD->getPrefTypeAlignment(Ty); + return DL.getPointerPrefAlignment(); + return DL.getPrefTypeAlignment(Ty); } void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, @@ -1419,13 +1418,8 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, O << "_param_" << paramIndex; } -void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { - CurrentFnSym->print(O, MAI); - O << "_param_" << paramIndex; -} - void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); const AttributeSet &PAL = F->getAttributes(); const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); Function::const_arg_iterator I, E; @@ -1433,7 +1427,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { bool first = true; bool isKernelFunc = llvm::isKernelFunction(*F); bool isABI = (nvptxSubtarget->getSmVersion() >= 20); - MVT thePointerTy = TLI->getPointerTy(*TD); + MVT thePointerTy = TLI->getPointerTy(DL); O << "(\n"; @@ -1485,9 +1479,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // size = typeallocsize of element type unsigned align = PAL.getParamAlignment(paramIndex + 1); if (align == 0) - align = TD->getABITypeAlignment(Ty); + align = DL.getABITypeAlignment(Ty); - unsigned sz = TD->getTypeAllocSize(Ty); + unsigned sz = DL.getTypeAllocSize(Ty); O << "\t.param .align " << align << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; @@ -1495,7 +1489,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { continue; } // Just a scalar - const PointerType *PTy = dyn_cast<PointerType>(Ty); + auto *PTy = dyn_cast<PointerType>(Ty); if (isKernelFunc) { if (PTy) { // Special handling for pointer arguments to kernel @@ -1519,7 +1513,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { O << ".ptr .global "; break; } - O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; + O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " "; } printParamName(I, paramIndex, O); continue; @@ -1556,7 +1550,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { } // param has byVal attribute. So should be a pointer - const PointerType *PTy = dyn_cast<PointerType>(Ty); + auto *PTy = dyn_cast<PointerType>(Ty); assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); @@ -1566,9 +1560,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // size = typeallocsize of element type unsigned align = PAL.getParamAlignment(paramIndex + 1); if (align == 0) - align = TD->getABITypeAlignment(ETy); + align = DL.getABITypeAlignment(ETy); - unsigned sz = TD->getTypeAllocSize(ETy); + unsigned sz = DL.getTypeAllocSize(ETy); O << "\t.param .align " << align << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; @@ -1579,7 +1573,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Further, if a part is vector, print the above for // each vector element. SmallVector<EVT, 16> vtparts; - ComputeValueVTs(*TLI, getDataLayout(), ETy, vtparts); + ComputeValueVTs(*TLI, DL, ETy, vtparts); for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; @@ -1786,10 +1780,10 @@ static void ConvertDoubleToBytes(unsigned char *p, double val) { void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); if (isa<UndefValue>(CPV) || CPV->isNullValue()) { - int s = TD->getTypeAllocSize(CPV->getType()); + int s = DL.getTypeAllocSize(CPV->getType()); if (s < Bytes) s = Bytes; aggBuffer->addZeros(s); @@ -1800,7 +1794,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, switch (CPV->getType()->getTypeID()) { case Type::IntegerTyID: { - const Type *ETy = CPV->getType(); + Type *ETy = CPV->getType(); if (ETy == Type::getInt8Ty(CPV->getContext())) { unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); ConvertIntToBytes<>(ptr, c); @@ -1817,7 +1811,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( - ConstantFoldConstantExpression(Cexpr, *TD))) { + ConstantFoldConstantExpression(Cexpr, DL))) { int int32 = (int)(constInt->getZExtValue()); ConvertIntToBytes<>(ptr, int32); aggBuffer->addBytes(ptr, 4, Bytes); @@ -1839,7 +1833,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( - ConstantFoldConstantExpression(Cexpr, *TD))) { + ConstantFoldConstantExpression(Cexpr, DL))) { long long int64 = (long long)(constInt->getZExtValue()); ConvertIntToBytes<>(ptr, int64); aggBuffer->addBytes(ptr, 8, Bytes); @@ -1860,7 +1854,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, case Type::FloatTyID: case Type::DoubleTyID: { const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); - const Type *Ty = CFP->getType(); + Type *Ty = CFP->getType(); if (Ty == Type::getFloatTy(CPV->getContext())) { float float32 = (float) CFP->getValueAPF().convertToFloat(); ConvertFloatToBytes(ptr, float32); @@ -1881,7 +1875,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, const Value *v = Cexpr->stripPointerCasts(); aggBuffer->addSymbol(v, Cexpr); } - unsigned int s = TD->getTypeAllocSize(CPV->getType()); + unsigned int s = DL.getTypeAllocSize(CPV->getType()); aggBuffer->addZeros(s); break; } @@ -1891,7 +1885,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, case Type::StructTyID: { if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) { - int ElementSize = TD->getTypeAllocSize(CPV->getType()); + int ElementSize = DL.getTypeAllocSize(CPV->getType()); bufferAggregateConstant(CPV, aggBuffer); if (Bytes > ElementSize) aggBuffer->addZeros(Bytes - ElementSize); @@ -1909,7 +1903,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, AggBuffer *aggBuffer) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); int Bytes; // Old constants @@ -1934,12 +1928,12 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, StructType *ST = cast<StructType>(CPV->getType()); for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { if (i == (e - 1)) - Bytes = TD->getStructLayout(ST)->getElementOffset(0) + - TD->getTypeAllocSize(ST) - - TD->getStructLayout(ST)->getElementOffset(i); + Bytes = DL.getStructLayout(ST)->getElementOffset(0) + + DL.getTypeAllocSize(ST) - + DL.getStructLayout(ST)->getElementOffset(i); else - Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - - TD->getStructLayout(ST)->getElementOffset(i); + Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) - + DL.getStructLayout(ST)->getElementOffset(i); bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); } } @@ -1951,18 +1945,6 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, // buildTypeNameMap - Run through symbol table looking for type names. // -bool NVPTXAsmPrinter::isImageType(const Type *Ty) { - - std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); - - if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || - !PI->second.compare("struct._image2d_t") || - !PI->second.compare("struct._image3d_t"))) - return true; - - return false; -} - bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { switch (MI.getOpcode()) { @@ -2054,7 +2036,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout())) if (C != CE) return lowerConstantForGV(C, ProcessingGeneric); @@ -2083,7 +2065,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) } case Instruction::GetElementPtr: { - const DataLayout &DL = *TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // Generate a symbolic expression for the byte address APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); @@ -2109,7 +2091,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); case Instruction::IntToPtr: { - const DataLayout &DL = *TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. @@ -2120,7 +2102,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) } case Instruction::PtrToInt: { - const DataLayout &DL = *TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index f6f7685e76f9..76bf179896a8 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -212,28 +212,21 @@ private: MCOperand GetSymbolRef(const MCSymbol *Symbol); unsigned encodeVirtualRegister(unsigned Reg); - void EmitAlignment(unsigned NumBits, const GlobalValue *GV = nullptr) const {} - void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier, raw_ostream &O); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = nullptr); - void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const; void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O, bool = false); - void printParamName(int paramIndex, raw_ostream &O); void printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O); void emitGlobals(const Module &M); void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI); void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const; void emitVirtualRegister(unsigned int vr, raw_ostream &); - void emitFunctionExternParamList(const MachineFunction &MF); void emitFunctionParamList(const Function *, raw_ostream &O); void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O); void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF); - void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize); - bool isImageType(const Type *Ty); void printReturnValStr(const Function *, raw_ostream &O); void printReturnValStr(const MachineFunction &MF, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -271,7 +264,7 @@ private: // Build the map between type name and ID based on module's type // symbol table. - std::map<const Type *, std::string> TypeNameMap; + std::map<Type *, std::string> TypeNameMap; // List of variables demoted to a function scope. std::map<const Function *, std::vector<const GlobalVariable *> > localDecls; @@ -282,19 +275,15 @@ private: void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O); void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const; - std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const; + std::string getPTXFundamentalTypeStr(Type *Ty, bool = true) const; void printScalarConstant(const Constant *CPV, raw_ostream &O); void printFPConstant(const ConstantFP *Fp, raw_ostream &O); void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer); void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer); - void printOperandProper(const MachineOperand &MO); - void emitLinkageDirective(const GlobalValue *V, raw_ostream &O); void emitDeclarations(const Module &, raw_ostream &O); void emitDeclaration(const Function *, raw_ostream &O); - - static const char *getRegisterName(unsigned RegNo); void emitDemotedVars(const Function *, raw_ostream &); bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index 69a229e32f43..95813c8430d1 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -98,7 +98,7 @@ private: /// This reordering exposes to optimizeMemoryInstruction more /// optimization opportunities on loads and stores. /// - /// If this function succesfully hoists an eliminable addrspacecast or V is + /// If this function successfully hoists an eliminable addrspacecast or V is /// already such an addrspacecast, it returns the transformed value (which is /// guaranteed to be an addrspacecast); otherwise, it returns nullptr. Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0); @@ -267,14 +267,14 @@ bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { return false; bool Changed = false; - for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) { - for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) { + for (BasicBlock &B : F) { + for (Instruction &I : B) { if (isa<LoadInst>(I)) { // V = load P - Changed |= optimizeMemoryInstruction(I, 0); + Changed |= optimizeMemoryInstruction(&I, 0); } else if (isa<StoreInst>(I)) { // store V, P - Changed |= optimizeMemoryInstruction(I, 1); + Changed |= optimizeMemoryInstruction(&I, 1); } } } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 6fd09c405260..62ca5e9f9f62 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -81,7 +81,7 @@ bool GenericToNVVM::runOnModule(Module &M) { for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;) { - GlobalVariable *GV = I++; + GlobalVariable *GV = &*I++; if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC && !llvm::isTexture(*GV) && !llvm::isSurface(*GV) && !llvm::isSampler(*GV) && !GV->getName().startswith("llvm.")) { @@ -117,7 +117,7 @@ bool GenericToNVVM::runOnModule(Module &M) { Value *Operand = II->getOperand(i); if (isa<Constant>(Operand)) { II->setOperand( - i, remapConstant(&M, I, cast<Constant>(Operand), Builder)); + i, remapConstant(&M, &*I, cast<Constant>(Operand), Builder)); } } } @@ -132,10 +132,8 @@ bool GenericToNVVM::runOnModule(Module &M) { // Walk through the metadata section and update the debug information // associated with the global variables in the default address space. - for (Module::named_metadata_iterator I = M.named_metadata_begin(), - E = M.named_metadata_end(); - I != E; I++) { - remapNamedMDNode(VM, I); + for (NamedMDNode &I : M.named_metadata()) { + remapNamedMDNode(VM, &I); } // Walk through the global variable initializers, and replace any use of @@ -318,9 +316,8 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, NewOperands[0], NewOperands[1]); case Instruction::FCmp: // CompareConstantExpr (fcmp) - assert(false && "Address space conversion should have no effect " - "on float point CompareConstantExpr (fcmp)!"); - return C; + llvm_unreachable("Address space conversion should have no effect " + "on float point CompareConstantExpr (fcmp)!"); case Instruction::ExtractElement: // ExtractElementConstantExpr return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]); @@ -364,8 +361,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, return Builder.CreateCast(Instruction::CastOps(C->getOpcode()), NewOperands[0], C->getType()); } - assert(false && "GenericToNVVM encountered an unsupported ConstantExpr"); - return C; + llvm_unreachable("GenericToNVVM encountered an unsupported ConstantExpr"); } } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 232a611d1760..2d0098b392f4 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "NVPTXISelDAGToDAG.h" +#include "NVPTXUtilities.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" #include "llvm/Support/CommandLine.h" @@ -530,7 +532,7 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) { if (!Src) return NVPTX::PTXLdStInstCode::GENERIC; - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) { + if (auto *PT = dyn_cast<PointerType>(Src->getType())) { switch (PT->getAddressSpace()) { case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; @@ -544,6 +546,39 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) { return NVPTX::PTXLdStInstCode::GENERIC; } +static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, + unsigned CodeAddrSpace, MachineFunction *F) { + // To use non-coherent caching, the load has to be from global + // memory and we have to prove that the memory area is not written + // to anywhere for the duration of the kernel call, not even after + // the load. + // + // To ensure that there are no writes to the memory, we require the + // underlying pointer to be a noalias (__restrict) kernel parameter + // that is never used for a write. We can only do this for kernel + // functions since from within a device function, we cannot know if + // there were or will be writes to the memory from the caller - or we + // could, but then we would have to do inter-procedural analysis. + if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL || + !isKernelFunction(*F->getFunction())) { + return false; + } + + // We use GetUnderlyingObjects() here instead of + // GetUnderlyingObject() mainly because the former looks through phi + // nodes while the latter does not. We need to look through phi + // nodes to handle pointer induction variables. + SmallVector<Value *, 8> Objs; + GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()), + Objs, F->getDataLayout()); + for (Value *Obj : Objs) { + auto *A = dyn_cast<const Argument>(Obj); + if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false; + } + + return true; +} + SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) { unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); switch (IID) { @@ -638,6 +673,10 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { // Address Space Setting unsigned int codeAddrSpace = getCodeAddrSpace(LD); + if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) { + return SelectLDGLDU(N); + } + // Volatile Setting // - .volatile is only availalble for .global and .shared bool isVolatile = LD->isVolatile(); @@ -872,6 +911,10 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD); + if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) { + return SelectLDGLDU(N); + } + // Volatile Setting // - .volatile is only availalble for .global and .shared bool IsVolatile = MemSD->isVolatile(); @@ -1425,6 +1468,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { switch (N->getOpcode()) { default: return nullptr; + case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) { switch (EltVT.getSimpleVT().SimpleTy) { @@ -1474,6 +1518,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { } } break; + case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1522,6 +1567,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { break; } break; + case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1563,6 +1609,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { switch (N->getOpcode()) { default: return nullptr; + case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) { switch (EltVT.getSimpleVT().SimpleTy) { @@ -1612,6 +1659,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { } } break; + case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1660,6 +1708,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { break; } break; + case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1707,6 +1756,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { switch (N->getOpcode()) { default: return nullptr; + case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) { switch (EltVT.getSimpleVT().SimpleTy) { @@ -1756,6 +1806,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { } } break; + case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1804,6 +1855,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { break; } break; + case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1845,6 +1897,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { switch (N->getOpcode()) { default: return nullptr; + case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) { switch (EltVT.getSimpleVT().SimpleTy) { @@ -1894,6 +1947,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { } } break; + case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1942,6 +1996,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { break; } break; + case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -5039,7 +5094,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, } if (!Src) return false; - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + if (auto *PT = dyn_cast<PointerType>(Src->getType())) return (PT->getAddressSpace() == spN); return false; } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index b75cf4040312..766369631e14 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -124,6 +124,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // condition branches. setJumpIsExpensive(true); + // Wide divides are _very_ slow. Try to reduce the width of the divide if + // possible. + addBypassSlowDiv(64, 32); + // By default, use the Source scheduling if (sched4reg) setSchedulingPreference(Sched::RegPressure); @@ -275,6 +279,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SELECT); // Now deduce the information based on the above mentioned // actions @@ -910,7 +915,7 @@ std::string NVPTXTargetLowering::getPrototype( O << "("; if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { unsigned size = 0; - if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { + if (auto *ITy = dyn_cast<IntegerType>(retTy)) { size = ITy->getBitWidth(); if (size < 32) size = 32; @@ -981,7 +986,7 @@ std::string NVPTXTargetLowering::getPrototype( O << "_"; continue; } - const PointerType *PTy = dyn_cast<PointerType>(Ty); + auto *PTy = dyn_cast<PointerType>(Ty); assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); @@ -1318,7 +1323,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // struct or vector SmallVector<EVT, 16> vtparts; SmallVector<uint64_t, 16> Offsets; - const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); + auto *PTy = dyn_cast<PointerType>(Args[i].Ty); assert(PTy && "Type of a byval parameter should be pointer"); ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(), vtparts, &Offsets, 0); @@ -2007,15 +2012,6 @@ SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { return Result; } -SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, - int idx, EVT v) const { - std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); - std::stringstream suffix; - suffix << idx; - *name += suffix.str(); - return DAG.getTargetExternalSymbol(name->c_str(), v); -} - SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { std::string ParamSym; @@ -2029,10 +2025,6 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); } -SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { - return getExtSymb(DAG, ".HLPPARAM", idx); -} - // Check to see if the kernel argument is image*_t or sampler_t bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { @@ -2040,8 +2032,8 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { "struct._image3d_t", "struct._sampler_t" }; - const Type *Ty = arg->getType(); - const PointerType *PTy = dyn_cast<PointerType>(Ty); + Type *Ty = arg->getType(); + auto *PTy = dyn_cast<PointerType>(Ty); if (!PTy) return false; @@ -2049,14 +2041,11 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { if (!context) return false; - const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); + auto *STy = dyn_cast<StructType>(PTy->getElementType()); const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; - for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) - if (TypeName == specialTypes[i]) - return true; - - return false; + return std::find(std::begin(specialTypes), std::end(specialTypes), + TypeName) != std::end(specialTypes); } SDValue NVPTXTargetLowering::LowerFormalArguments( @@ -2082,10 +2071,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( std::vector<Type *> argTypes; std::vector<const Argument *> theArgs; - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) { - theArgs.push_back(I); - argTypes.push_back(I->getType()); + for (const Argument &I : F->args()) { + theArgs.push_back(&I); + argTypes.push_back(I.getType()); } // argTypes.size() (or theArgs.size()) and Ins.size() need not match. // Ins.size() will be larger @@ -2545,20 +2533,6 @@ void NVPTXTargetLowering::LowerAsmOperandForConstraint( TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } -// NVPTX suuport vector of legal types of any length in Intrinsics because the -// NVPTX specific type legalizer -// will legalize them to the PTX supported length. -bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { - if (isTypeLegal(VT)) - return true; - if (VT.isVector()) { - MVT eVT = VT.getVectorElementType(); - if (isTypeLegal(eVT)) - return true; - } - return false; -} - static unsigned getOpcForTextureInstr(unsigned Intrinsic) { switch (Intrinsic) { default: @@ -3747,9 +3721,7 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, // - [immAddr] if (AM.BaseGV) { - if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) - return false; - return true; + return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale; } switch (AM.Scale) { @@ -3820,11 +3792,6 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } -/// getFunctionAlignment - Return the Log2 alignment of this function. -unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { - return 4; -} - //===----------------------------------------------------------------------===// // NVPTX DAG Combining //===----------------------------------------------------------------------===// @@ -4057,6 +4024,67 @@ static SDValue PerformANDCombine(SDNode *N, return SDValue(); } +static SDValue PerformSELECTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // Currently this detects patterns for integer min and max and + // lowers them to PTX-specific intrinsics that enable hardware + // support. + + const SDValue Cond = N->getOperand(0); + if (Cond.getOpcode() != ISD::SETCC) return SDValue(); + + const SDValue LHS = Cond.getOperand(0); + const SDValue RHS = Cond.getOperand(1); + const SDValue True = N->getOperand(1); + const SDValue False = N->getOperand(2); + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) + return SDValue(); + + const EVT VT = N->getValueType(0); + if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); + + const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + SDValue Larger; // The larger of LHS and RHS when condition is true. + switch (CC) { + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETLT: + case ISD::SETLE: + Larger = RHS; + break; + + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGT: + case ISD::SETUGE: + Larger = LHS; + break; + + default: + return SDValue(); + } + const bool IsMax = (Larger == True); + const bool IsSigned = ISD::isSignedIntSetCC(CC); + + unsigned IntrinsicId; + if (VT == MVT::i32) { + if (IsSigned) + IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i; + else + IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui; + } else { + assert(VT == MVT::i64); + if (IsSigned) + IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll; + else + IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull; + } + + SDLoc DL(N); + return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS); +} + enum OperandSignedness { Signed = 0, Unsigned, @@ -4113,25 +4141,16 @@ static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { APInt Val = CI->getAPIntValue(); if (LHSSign == Unsigned) { - if (Val.isIntN(OptSize)) { - return true; - } - return false; + return Val.isIntN(OptSize); } else { - if (Val.isSignedIntN(OptSize)) { - return true; - } - return false; + return Val.isSignedIntN(OptSize); } } else { OperandSignedness RHSSign; if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) return false; - if (LHSSign != RHSSign) - return false; - - return true; + return LHSSign == RHSSign; } } @@ -4247,6 +4266,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, return PerformSHLCombine(N, DCI, OptLevel); case ISD::AND: return PerformANDCombine(N, DCI); + case ISD::SELECT: + return PerformSELECTCombine(N, DCI); } return SDValue(); } @@ -4509,25 +4530,25 @@ void NVPTXTargetLowering::ReplaceNodeResults( void NVPTXSection::anchor() {} NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { - delete TextSection; - delete DataSection; - delete BSSSection; - delete ReadOnlySection; - - delete StaticCtorSection; - delete StaticDtorSection; - delete LSDASection; - delete EHFrameSection; - delete DwarfAbbrevSection; - delete DwarfInfoSection; - delete DwarfLineSection; - delete DwarfFrameSection; - delete DwarfPubTypesSection; - delete DwarfDebugInlineSection; - delete DwarfStrSection; - delete DwarfLocSection; - delete DwarfARangesSection; - delete DwarfRangesSection; + delete static_cast<NVPTXSection *>(TextSection); + delete static_cast<NVPTXSection *>(DataSection); + delete static_cast<NVPTXSection *>(BSSSection); + delete static_cast<NVPTXSection *>(ReadOnlySection); + + delete static_cast<NVPTXSection *>(StaticCtorSection); + delete static_cast<NVPTXSection *>(StaticDtorSection); + delete static_cast<NVPTXSection *>(LSDASection); + delete static_cast<NVPTXSection *>(EHFrameSection); + delete static_cast<NVPTXSection *>(DwarfAbbrevSection); + delete static_cast<NVPTXSection *>(DwarfInfoSection); + delete static_cast<NVPTXSection *>(DwarfLineSection); + delete static_cast<NVPTXSection *>(DwarfFrameSection); + delete static_cast<NVPTXSection *>(DwarfPubTypesSection); + delete static_cast<const NVPTXSection *>(DwarfDebugInlineSection); + delete static_cast<NVPTXSection *>(DwarfStrSection); + delete static_cast<NVPTXSection *>(DwarfLocSection); + delete static_cast<NVPTXSection *>(DwarfARangesSection); + delete static_cast<NVPTXSection *>(DwarfRangesSection); } MCSection * diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index e5c37321a33b..60914c1d09b4 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -441,13 +441,9 @@ public: SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset, - SelectionDAG &DAG) const; const char *getTargetNodeName(unsigned Opcode) const override; - bool isTypeSupportedInIntrinsic(MVT VT) const; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override; @@ -459,8 +455,13 @@ public: bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; - /// getFunctionAlignment - Return the Log2 alignment of this function. - unsigned getFunctionAlignment(const Function *F) const; + bool isTruncateFree(Type *SrcTy, Type *DstTy) const override { + // Truncating 64-bit to 32-bit is free in SASS. + if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) + return false; + return SrcTy->getPrimitiveSizeInBits() == 64 && + DstTy->getPrimitiveSizeInBits() == 32; + } EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, EVT VT) const override { @@ -515,11 +516,7 @@ public: private: const NVPTXSubtarget &STI; // cache the subtarget here - - SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, - EVT = MVT::i32) const; SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; - SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 76d6597c6e20..9f3cf4551955 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -37,30 +37,31 @@ void NVPTXInstrInfo::copyPhysReg( const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); - if (DestRC != SrcRC) - report_fatal_error("Attempted to created cross-class register copy"); - - if (DestRC == &NVPTX::Int32RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (DestRC == &NVPTX::Int1RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (DestRC == &NVPTX::Float32RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (DestRC == &NVPTX::Int16RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (DestRC == &NVPTX::Int64RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (DestRC == &NVPTX::Float64RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else { + if (DestRC->getSize() != SrcRC->getSize()) + report_fatal_error("Copy one register into another with a different width"); + + unsigned Op; + if (DestRC == &NVPTX::Int1RegsRegClass) { + Op = NVPTX::IMOV1rr; + } else if (DestRC == &NVPTX::Int16RegsRegClass) { + Op = NVPTX::IMOV16rr; + } else if (DestRC == &NVPTX::Int32RegsRegClass) { + Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32rr + : NVPTX::BITCONVERT_32_F2I); + } else if (DestRC == &NVPTX::Int64RegsRegClass) { + Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr + : NVPTX::BITCONVERT_64_F2I); + } else if (DestRC == &NVPTX::Float32RegsRegClass) { + Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr + : NVPTX::BITCONVERT_32_I2F); + } else if (DestRC == &NVPTX::Float64RegsRegClass) { + Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64rr + : NVPTX::BITCONVERT_64_I2F); + } else { llvm_unreachable("Bad register copy"); } + BuildMI(MBB, I, DL, get(Op), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, @@ -86,27 +87,6 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, return false; } -bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const { - switch (MI.getOpcode()) { - default: - return false; - case NVPTX::INT_PTX_SREG_NTID_X: - case NVPTX::INT_PTX_SREG_NTID_Y: - case NVPTX::INT_PTX_SREG_NTID_Z: - case NVPTX::INT_PTX_SREG_TID_X: - case NVPTX::INT_PTX_SREG_TID_Y: - case NVPTX::INT_PTX_SREG_TID_Z: - case NVPTX::INT_PTX_SREG_CTAID_X: - case NVPTX::INT_PTX_SREG_CTAID_Y: - case NVPTX::INT_PTX_SREG_CTAID_Z: - case NVPTX::INT_PTX_SREG_NCTAID_X: - case NVPTX::INT_PTX_SREG_NCTAID_Y: - case NVPTX::INT_PTX_SREG_NCTAID_Z: - case NVPTX::INT_PTX_SREG_WARPSIZE: - return true; - } -} - bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const { bool isLoad = false; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h index 179c06887198..3e407223f010 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -56,7 +56,6 @@ public: unsigned &DestReg) const; bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const; bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const; - bool isReadSpecialReg(MachineInstr &MI) const; virtual bool CanTailMerge(const MachineInstr *MI) const; // Branch analysis. diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 0bf72febc4a0..f770c2acaab5 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -6,6 +6,8 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// \file // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when // the size is large or is not a compile-time constant. // @@ -18,19 +20,20 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #define DEBUG_TYPE "nvptx" using namespace llvm; namespace { + // actual analysis class, which is a functionpass struct NVPTXLowerAggrCopies : public FunctionPass { static char ID; @@ -50,179 +53,299 @@ struct NVPTXLowerAggrCopies : public FunctionPass { return "Lower aggregate copies/intrinsics into loops"; } }; -} // namespace char NVPTXLowerAggrCopies::ID = 0; -// Lower MemTransferInst or load-store pair to loop -static void convertTransferToLoop( - Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len, - bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) { - Type *indType = len->getType(); +// Lower memcpy to loop. +void convertMemCpyToLoop(Instruction *ConvertedInst, Value *SrcAddr, + Value *DstAddr, Value *CopyLen, bool SrcIsVolatile, + bool DstIsVolatile, LLVMContext &Context, + Function &F) { + Type *TypeOfCopyLen = CopyLen->getType(); - BasicBlock *origBB = splitAt->getParent(); - BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split"); - BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB); + BasicBlock *OrigBB = ConvertedInst->getParent(); + BasicBlock *NewBB = + ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split"); + BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB); - origBB->getTerminator()->setSuccessor(0, loopBB); - IRBuilder<> builder(origBB, origBB->getTerminator()); + OrigBB->getTerminator()->setSuccessor(0, LoopBB); + IRBuilder<> Builder(OrigBB->getTerminator()); - // srcAddr and dstAddr are expected to be pointer types, + // SrcAddr and DstAddr are expected to be pointer types, // so no check is made here. - unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace(); - unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); // Cast pointers to (char *) - srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS)); - dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS)); + SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS)); + DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS)); - IRBuilder<> loop(loopBB); - // The loop index (ind) is a phi node. - PHINode *ind = loop.CreatePHI(indType, 0); - // Incoming value for ind is 0 - ind->addIncoming(ConstantInt::get(indType, 0), origBB); + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); + LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); - // load from srcAddr+ind + // load from SrcAddr+LoopIndex // TODO: we can leverage the align parameter of llvm.memcpy for more efficient // word-sized loads and stores. - Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind), - srcVolatile); - // store at dstAddr+ind - loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind), - dstVolatile); - - // The value for ind coming from backedge is (ind + 1) - Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1)); - ind->addIncoming(newind, loopBB); - - loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB); + Value *Element = + LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP( + LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex), + SrcIsVolatile); + // store at DstAddr+LoopIndex + LoopBuilder.CreateStore(Element, + LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(), + DstAddr, LoopIndex), + DstIsVolatile); + + // The value for LoopIndex coming from backedge is (LoopIndex + 1) + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, + NewBB); } -// Lower MemSetInst to loop -static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, - Value *len, Value *val, LLVMContext &Context, - Function &F) { - BasicBlock *origBB = splitAt->getParent(); - BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split"); - BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB); +// Lower memmove to IR. memmove is required to correctly copy overlapping memory +// regions; therefore, it has to check the relative positions of the source and +// destination pointers and choose the copy direction accordingly. +// +// The code below is an IR rendition of this C function: +// +// void* memmove(void* dst, const void* src, size_t n) { +// unsigned char* d = dst; +// const unsigned char* s = src; +// if (s < d) { +// // copy backwards +// while (n--) { +// d[n] = s[n]; +// } +// } else { +// // copy forward +// for (size_t i = 0; i < n; ++i) { +// d[i] = s[i]; +// } +// } +// return dst; +// } +void convertMemMoveToLoop(Instruction *ConvertedInst, Value *SrcAddr, + Value *DstAddr, Value *CopyLen, bool SrcIsVolatile, + bool DstIsVolatile, LLVMContext &Context, + Function &F) { + Type *TypeOfCopyLen = CopyLen->getType(); + BasicBlock *OrigBB = ConvertedInst->getParent(); + + // Create the a comparison of src and dst, based on which we jump to either + // the forward-copy part of the function (if src >= dst) or the backwards-copy + // part (if src < dst). + // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else + // structure. Its block terminators (unconditional branches) are replaced by + // the appropriate conditional branches when the loop is built. + ICmpInst *PtrCompare = new ICmpInst(ConvertedInst, ICmpInst::ICMP_ULT, + SrcAddr, DstAddr, "compare_src_dst"); + TerminatorInst *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(PtrCompare, ConvertedInst, &ThenTerm, + &ElseTerm); + + // Each part of the function consists of two blocks: + // copy_backwards: used to skip the loop when n == 0 + // copy_backwards_loop: the actual backwards loop BB + // copy_forward: used to skip the loop when n == 0 + // copy_forward_loop: the actual forward loop BB + BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); + CopyBackwardsBB->setName("copy_backwards"); + BasicBlock *CopyForwardBB = ElseTerm->getParent(); + CopyForwardBB->setName("copy_forward"); + BasicBlock *ExitBB = ConvertedInst->getParent(); + ExitBB->setName("memmove_done"); + + // Initial comparison of n == 0 that lets us skip the loops altogether. Shared + // between both backwards and forward copy clauses. + ICmpInst *CompareN = + new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, + ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); + + // Copying backwards. + BasicBlock *LoopBB = + BasicBlock::Create(Context, "copy_backwards_loop", &F, CopyForwardBB); + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); + Value *IndexPtr = LoopBuilder.CreateSub( + LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); + Value *Element = LoopBuilder.CreateLoad( + LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element"); + LoopBuilder.CreateStore(Element, + LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr)); + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), + ExitBB, LoopBB); + LoopPhi->addIncoming(IndexPtr, LoopBB); + LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); + BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); + ThenTerm->eraseFromParent(); + + // Copying forward. + BasicBlock *FwdLoopBB = + BasicBlock::Create(Context, "copy_forward_loop", &F, ExitBB); + IRBuilder<> FwdLoopBuilder(FwdLoopBB); + PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); + Value *FwdElement = FwdLoopBuilder.CreateLoad( + FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element"); + FwdLoopBuilder.CreateStore( + FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi)); + Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( + FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); + FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), + ExitBB, FwdLoopBB); + FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); + FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); + + BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); + ElseTerm->eraseFromParent(); +} - origBB->getTerminator()->setSuccessor(0, loopBB); - IRBuilder<> builder(origBB, origBB->getTerminator()); +// Lower memset to loop. +void convertMemSetToLoop(Instruction *ConvertedInst, Value *DstAddr, + Value *CopyLen, Value *SetValue, LLVMContext &Context, + Function &F) { + BasicBlock *OrigBB = ConvertedInst->getParent(); + BasicBlock *NewBB = + ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split"); + BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB); - unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace(); + OrigBB->getTerminator()->setSuccessor(0, LoopBB); + IRBuilder<> Builder(OrigBB->getTerminator()); // Cast pointer to the type of value getting stored - dstAddr = - builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS)); + unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); + DstAddr = Builder.CreateBitCast(DstAddr, + PointerType::get(SetValue->getType(), dstAS)); - IRBuilder<> loop(loopBB); - PHINode *ind = loop.CreatePHI(len->getType(), 0); - ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB); + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0); + LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB); - loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false); + LoopBuilder.CreateStore( + SetValue, + LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), + false); - Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1)); - ind->addIncoming(newind, loopBB); + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1)); + LoopIndex->addIncoming(NewIndex, LoopBB); - loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB); + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, + NewBB); } bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { - SmallVector<LoadInst *, 4> aggrLoads; - SmallVector<MemTransferInst *, 4> aggrMemcpys; - SmallVector<MemSetInst *, 4> aggrMemsets; + SmallVector<LoadInst *, 4> AggrLoads; + SmallVector<MemIntrinsic *, 4> MemCalls; const DataLayout &DL = F.getParent()->getDataLayout(); LLVMContext &Context = F.getParent()->getContext(); - // - // Collect all the aggrLoads, aggrMemcpys and addrMemsets. - // + // Collect all aggregate loads and mem* calls. for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; ++II) { - if (LoadInst *load = dyn_cast<LoadInst>(II)) { - if (!load->hasOneUse()) + if (LoadInst *LI = dyn_cast<LoadInst>(II)) { + if (!LI->hasOneUse()) continue; - if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize) + if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize) continue; - User *use = load->user_back(); - if (StoreInst *store = dyn_cast<StoreInst>(use)) { - if (store->getOperand(0) != load) + if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) { + if (SI->getOperand(0) != LI) continue; - aggrLoads.push_back(load); - } - } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) { - Value *len = intr->getLength(); - // If the number of elements being copied is greater - // than MaxAggrCopySize, lower it to a loop - if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) { - if (len_int->getZExtValue() >= MaxAggrCopySize) { - aggrMemcpys.push_back(intr); - } - } else { - // turn variable length memcpy/memmov into loop - aggrMemcpys.push_back(intr); + AggrLoads.push_back(LI); } - } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) { - Value *len = memsetintr->getLength(); - if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) { - if (len_int->getZExtValue() >= MaxAggrCopySize) { - aggrMemsets.push_back(memsetintr); + } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) { + // Convert intrinsic calls with variable size or with constant size + // larger than the MaxAggrCopySize threshold. + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) { + if (LenCI->getZExtValue() >= MaxAggrCopySize) { + MemCalls.push_back(IntrCall); } } else { - // turn variable length memset into loop - aggrMemsets.push_back(memsetintr); + MemCalls.push_back(IntrCall); } } } } - if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) && - (aggrMemsets.size() == 0)) + + if (AggrLoads.size() == 0 && MemCalls.size() == 0) { return false; + } // // Do the transformation of an aggr load/copy/set to a loop // - for (LoadInst *load : aggrLoads) { - StoreInst *store = dyn_cast<StoreInst>(*load->user_begin()); - Value *srcAddr = load->getOperand(0); - Value *dstAddr = store->getOperand(1); - unsigned numLoads = DL.getTypeStoreSize(load->getType()); - Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads); - - convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(), - store->isVolatile(), Context, F); - - store->eraseFromParent(); - load->eraseFromParent(); + for (LoadInst *LI : AggrLoads) { + StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin()); + Value *SrcAddr = LI->getOperand(0); + Value *DstAddr = SI->getOperand(1); + unsigned NumLoads = DL.getTypeStoreSize(LI->getType()); + Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads); + + convertMemCpyToLoop(/* ConvertedInst */ SI, + /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, + /* CopyLen */ CopyLen, + /* SrcIsVolatile */ LI->isVolatile(), + /* DstIsVolatile */ SI->isVolatile(), + /* Context */ Context, + /* Function F */ F); + + SI->eraseFromParent(); + LI->eraseFromParent(); } - for (MemTransferInst *cpy : aggrMemcpys) { - convertTransferToLoop(/* splitAt */ cpy, - /* srcAddr */ cpy->getSource(), - /* dstAddr */ cpy->getDest(), - /* len */ cpy->getLength(), - /* srcVolatile */ cpy->isVolatile(), - /* dstVolatile */ cpy->isVolatile(), + // Transform mem* intrinsic calls. + for (MemIntrinsic *MemCall : MemCalls) { + if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) { + convertMemCpyToLoop(/* ConvertedInst */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ Memcpy->getLength(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), /* Context */ Context, /* Function F */ F); - cpy->eraseFromParent(); - } - - for (MemSetInst *memsetinst : aggrMemsets) { - Value *len = memsetinst->getLength(); - Value *val = memsetinst->getValue(); - convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context, - F); - memsetinst->eraseFromParent(); + } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) { + convertMemMoveToLoop(/* ConvertedInst */ Memmove, + /* SrcAddr */ Memmove->getRawSource(), + /* DstAddr */ Memmove->getRawDest(), + /* CopyLen */ Memmove->getLength(), + /* SrcIsVolatile */ Memmove->isVolatile(), + /* DstIsVolatile */ Memmove->isVolatile(), + /* Context */ Context, + /* Function F */ F); + + } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) { + convertMemSetToLoop(/* ConvertedInst */ Memset, + /* DstAddr */ Memset->getRawDest(), + /* CopyLen */ Memset->getLength(), + /* SetValue */ Memset->getValue(), + /* Context */ Context, + /* Function F */ F); + } + MemCall->eraseFromParent(); } return true; } +} // namespace + +namespace llvm { +void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); +} + +INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies", + "Lower aggregate copies, and llvm.mem* intrinsics into loops", + false, false) + FunctionPass *llvm::createLowerAggrCopies() { return new NVPTXLowerAggrCopies(); } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp index 93d0025d8f53..624052e9b981 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp @@ -81,7 +81,7 @@ bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) { // Check Load, Store, GEP, and BitCast Uses on alloca and make them // use the converted generic address, in order to expose non-generic // addrspacecast to NVPTXFavorNonGenericAddrSpace. For other types - // of instructions this is unecessary and may introduce redudant + // of instructions this is unnecessary and may introduce redundant // address cast. const auto &AllocaUse = *UI++; auto LI = dyn_cast<LoadInst>(AllocaUse.getUser()); diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp index b533f316d8a9..6656077348a1 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp @@ -47,6 +47,36 @@ // ... // } // +// 3. Convert pointers in a byval kernel parameter to pointers in the global +// address space. As #2, it allows NVPTX to emit more ld/st.global. E.g., +// +// struct S { +// int *x; +// int *y; +// }; +// __global__ void foo(S s) { +// int *b = s.y; +// // use b +// } +// +// "b" points to the global address space. In the IR level, +// +// define void @foo({i32*, i32*}* byval %input) { +// %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1 +// %b = load i32*, i32** %b_ptr +// ; use %b +// } +// +// becomes +// +// define void @foo({i32*, i32*}* byval %input) { +// %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1 +// %b = load i32*, i32** %b_ptr +// %b_global = addrspacecast i32* %b to i32 addrspace(1)* +// %b_generic = addrspacecast i32 addrspace(1)* %b_global to i32* +// ; use %b_generic +// } +// // TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes // don't cancel the addrspacecast pair this pass emits. //===----------------------------------------------------------------------===// @@ -54,6 +84,7 @@ #include "NVPTX.h" #include "NVPTXUtilities.h" #include "NVPTXTargetMachine.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -71,9 +102,12 @@ class NVPTXLowerKernelArgs : public FunctionPass { bool runOnFunction(Function &F) override; // handle byval parameters - void handleByValParam(Argument *); - // handle non-byval pointer parameters - void handlePointerParam(Argument *); + void handleByValParam(Argument *Arg); + // Knowing Ptr must point to the global address space, this function + // addrspacecasts Ptr to global and then back to generic. This allows + // NVPTXFavorNonGenericAddrSpace to fold the global-to-generic cast into + // loads/stores that appear later. + void markPointerAsGlobal(Value *Ptr); public: static char ID; // Pass identification, replacement for typeid @@ -104,7 +138,7 @@ INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args", // // The above code allocates some space in the stack and copies the incoming // struct from param space to local space. -// Then replace all occurences of %d by %temp. +// Then replace all occurrences of %d by %temp. // ============================================================================= void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) { Function *Func = Arg->getParent(); @@ -128,27 +162,33 @@ void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) { new StoreInst(LI, AllocA, FirstInst); } -void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) { - assert(!Arg->hasByValAttr() && - "byval params should be handled by handleByValParam"); - - // Do nothing if the argument already points to the global address space. - if (Arg->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL) +void NVPTXLowerKernelArgs::markPointerAsGlobal(Value *Ptr) { + if (Ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL) return; - Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin(); - Instruction *ArgInGlobal = new AddrSpaceCastInst( - Arg, PointerType::get(Arg->getType()->getPointerElementType(), + // Deciding where to emit the addrspacecast pair. + BasicBlock::iterator InsertPt; + if (Argument *Arg = dyn_cast<Argument>(Ptr)) { + // Insert at the functon entry if Ptr is an argument. + InsertPt = Arg->getParent()->getEntryBlock().begin(); + } else { + // Insert right after Ptr if Ptr is an instruction. + InsertPt = ++cast<Instruction>(Ptr)->getIterator(); + assert(InsertPt != InsertPt->getParent()->end() && + "We don't call this function with Ptr being a terminator."); + } + + Instruction *PtrInGlobal = new AddrSpaceCastInst( + Ptr, PointerType::get(Ptr->getType()->getPointerElementType(), ADDRESS_SPACE_GLOBAL), - Arg->getName(), FirstInst); - Value *ArgInGeneric = new AddrSpaceCastInst(ArgInGlobal, Arg->getType(), - Arg->getName(), FirstInst); - // Replace with ArgInGeneric all uses of Args except ArgInGlobal. - Arg->replaceAllUsesWith(ArgInGeneric); - ArgInGlobal->setOperand(0, Arg); + Ptr->getName(), &*InsertPt); + Value *PtrInGeneric = new AddrSpaceCastInst(PtrInGlobal, Ptr->getType(), + Ptr->getName(), &*InsertPt); + // Replace with PtrInGeneric all uses of Ptr except PtrInGlobal. + Ptr->replaceAllUsesWith(PtrInGeneric); + PtrInGlobal->setOperand(0, Ptr); } - // ============================================================================= // Main function for this pass. // ============================================================================= @@ -157,12 +197,32 @@ bool NVPTXLowerKernelArgs::runOnFunction(Function &F) { if (!isKernelFunction(F)) return false; + if (TM && TM->getDrvInterface() == NVPTX::CUDA) { + // Mark pointers in byval structs as global. + for (auto &B : F) { + for (auto &I : B) { + if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { + if (LI->getType()->isPointerTy()) { + Value *UO = GetUnderlyingObject(LI->getPointerOperand(), + F.getParent()->getDataLayout()); + if (Argument *Arg = dyn_cast<Argument>(UO)) { + if (Arg->hasByValAttr()) { + // LI is a load from a pointer within a byval kernel parameter. + markPointerAsGlobal(LI); + } + } + } + } + } + } + } + for (Argument &Arg : F.args()) { if (Arg.getType()->isPointerTy()) { if (Arg.hasByValAttr()) handleByValParam(&Arg); else if (TM && TM->getDrvInterface() == NVPTX::CUDA) - handlePointerParam(&Arg); + markPointerAsGlobal(&Arg); } } return true; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h index 46b4b33e7e40..81a606d7535c 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h @@ -68,7 +68,7 @@ public: return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; - MCSection *findAssociatedSection() const override { return nullptr; } + MCFragment *findAssociatedFragment() const override { return nullptr; } // There are no TLS NVPTXMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} @@ -110,7 +110,7 @@ public: return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; - MCSection *findAssociatedSection() const override { return nullptr; } + MCFragment *findAssociatedFragment() const override { return nullptr; } // There are no TLS NVPTXMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index 5fd69a6815a8..17019d7b364d 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -72,7 +72,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().isReturn()) + if (I->isReturnBlock()) TFI.emitEpilogue(MF, *I); } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h index 0d2627d62ebd..45a7309479ee 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h @@ -19,15 +19,14 @@ #include <vector> namespace llvm { -/// NVPTXSection - Represents a section in PTX -/// PTX does not have sections. We create this class in order to use -/// the ASMPrint interface. +/// Represents a section in PTX PTX does not have sections. We create this class +/// in order to use the ASMPrint interface. /// -class NVPTXSection : public MCSection { +class NVPTXSection final : public MCSection { virtual void anchor(); public: NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {} - virtual ~NVPTXSection() {} + ~NVPTXSection() {} /// Override this as NVPTX has its own way of printing switching /// to a section. diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 248f9e117d83..aa931b134da9 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -53,6 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); +void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); void initializeNVPTXLowerKernelArgsPass(PassRegistry &); void initializeNVPTXLowerAllocaPass(PassRegistry &); } @@ -64,14 +65,15 @@ extern "C" void LLVMInitializeNVPTXTarget() { // FIXME: This pass is really intended to be invoked during IR optimization, // but it's very NVPTX-specific. - initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); - initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); - initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry()); - initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); - initializeNVPTXFavorNonGenericAddrSpacesPass( - *PassRegistry::getPassRegistry()); - initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry()); - initializeNVPTXLowerAllocaPass(*PassRegistry::getPassRegistry()); + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeNVVMReflectPass(PR); + initializeGenericToNVVMPass(PR); + initializeNVPTXAllocaHoistingPass(PR); + initializeNVPTXAssignValidGlobalNamesPass(PR); + initializeNVPTXFavorNonGenericAddrSpacesPass(PR); + initializeNVPTXLowerKernelArgsPass(PR); + initializeNVPTXLowerAllocaPass(PR); + initializeNVPTXLowerAggrCopiesPass(PR); } static std::string computeDataLayout(bool is64Bit) { @@ -139,6 +141,10 @@ public: FunctionPass *createTargetRegisterAllocator(bool) override; void addFastRegAlloc(FunctionPass *RegAllocPass) override; void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; + +private: + // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE. + void addEarlyCSEOrGVNPass(); }; } // end anonymous namespace @@ -148,11 +154,18 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { } TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo(NVPTXTTIImpl(this, F)); }); } +void NVPTXPassConfig::addEarlyCSEOrGVNPass() { + if (getOptLevel() == CodeGenOpt::Aggressive) + addPass(createGVNPass()); + else + addPass(createEarlyCSEPass()); +} + void NVPTXPassConfig::addIRPasses() { // The following passes are known to not play well with virtual regs hanging // around after register allocation (which in our case, is *all* registers). @@ -161,13 +174,14 @@ void NVPTXPassConfig::addIRPasses() { // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). disablePass(&PrologEpilogCodeInserterID); disablePass(&MachineCopyPropagationID); - disablePass(&BranchFolderPassID); disablePass(&TailDuplicateID); + addPass(createNVVMReflectPass()); addPass(createNVPTXImageOptimizerPass()); - TargetPassConfig::addIRPasses(); addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); + + // === Propagate special address spaces === addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); // NVPTXLowerKernelArgs emits alloca for byval parameters which can often // be eliminated by SROA. @@ -178,22 +192,38 @@ void NVPTXPassConfig::addIRPasses() { // them unused. We could remove dead code in an ad-hoc manner, but that // requires manual work and might be error-prone. addPass(createDeadCodeEliminationPass()); + + // === Straight-line scalar optimizations === addPass(createSeparateConstOffsetFromGEPPass()); + addPass(createSpeculativeExecutionPass()); // ReassociateGEPs exposes more opportunites for SLSR. See // the example in reassociate-geps-and-slsr.ll. addPass(createStraightLineStrengthReducePass()); // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE // for some of our benchmarks. - if (getOptLevel() == CodeGenOpt::Aggressive) - addPass(createGVNPass()); - else - addPass(createEarlyCSEPass()); + addEarlyCSEOrGVNPass(); // Run NaryReassociate after EarlyCSE/GVN to be more effective. addPass(createNaryReassociatePass()); // NaryReassociate on GEPs creates redundant common expressions, so run // EarlyCSE after it. addPass(createEarlyCSEPass()); + + // === LSR and other generic IR passes === + TargetPassConfig::addIRPasses(); + // EarlyCSE is not always strong enough to clean up what LSR produces. For + // example, GVN can combine + // + // %0 = add %a, %b + // %1 = add %b, %a + // + // and + // + // %0 = shl nsw %a, 2 + // %1 = shl %a, 2 + // + // but EarlyCSE can do neither of them. + addEarlyCSEOrGVNPass(); } bool NVPTXPassConfig::addInstSelector() { diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 5ecdc8748830..0f88ddfaa934 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -48,8 +48,7 @@ public: void Initialize(MCContext &ctx, const TargetMachine &TM) override { TargetLoweringObjectFile::Initialize(ctx, TM); TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText()); - DataSection = - new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel()); + DataSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getData()); BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS()); ReadOnlySection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly()); @@ -84,7 +83,7 @@ public: new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); } - MCSection *getSectionForConstant(SectionKind Kind, + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, const Constant *C) const override { return ReadOnlySection; } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index e7250cdba5ac..6e679dd0257c 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -89,12 +89,12 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { return false; } -unsigned NVPTXTTIImpl::getArithmeticInstrCost( +int NVPTXTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo) { // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 5bcd1e27a558..0946a3293eec 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -52,7 +52,7 @@ public: bool isSourceOfDivergence(const Value *V); - unsigned getArithmeticInstrCost( + int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index 1f178af41670..578b466568ae 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -335,106 +335,7 @@ bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) { return false; } -bool llvm::isBarrierIntrinsic(Intrinsic::ID id) { - if ((id == Intrinsic::nvvm_barrier0) || - (id == Intrinsic::nvvm_barrier0_popc) || - (id == Intrinsic::nvvm_barrier0_and) || - (id == Intrinsic::nvvm_barrier0_or) || - (id == Intrinsic::cuda_syncthreads)) - return true; - return false; -} - -// Interface for checking all memory space transfer related intrinsics -bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) { - if (id == Intrinsic::nvvm_ptr_local_to_gen || - id == Intrinsic::nvvm_ptr_shared_to_gen || - id == Intrinsic::nvvm_ptr_global_to_gen || - id == Intrinsic::nvvm_ptr_constant_to_gen || - id == Intrinsic::nvvm_ptr_gen_to_global || - id == Intrinsic::nvvm_ptr_gen_to_shared || - id == Intrinsic::nvvm_ptr_gen_to_local || - id == Intrinsic::nvvm_ptr_gen_to_constant || - id == Intrinsic::nvvm_ptr_gen_to_param) { - return true; - } - - return false; -} - -// consider several special intrinsics in striping pointer casts, and -// provide an option to ignore GEP indicies for find out the base address only -// which could be used in simple alias disambigurate. -const Value * -llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) { - V = V->stripPointerCasts(); - while (true) { - if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) { - if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) { - V = IS->getArgOperand(0)->stripPointerCasts(); - continue; - } - } else if (ignore_GEP_indices) - if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - V = GEP->getPointerOperand()->stripPointerCasts(); - continue; - } - break; - } - return V; -} - -// consider several special intrinsics in striping pointer casts, and -// - ignore GEP indicies for find out the base address only, and -// - tracking PHINode -// which could be used in simple alias disambigurate. -const Value * -llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) { - if (processed.find(V) != processed.end()) - return nullptr; - processed.insert(V); - - const Value *V2 = V->stripPointerCasts(); - if (V2 != V && processed.find(V2) != processed.end()) - return nullptr; - processed.insert(V2); - - V = V2; - - while (true) { - if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) { - if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) { - V = IS->getArgOperand(0)->stripPointerCasts(); - continue; - } - } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - V = GEP->getPointerOperand()->stripPointerCasts(); - continue; - } else if (const PHINode *PN = dyn_cast<PHINode>(V)) { - if (V != V2 && processed.find(V) != processed.end()) - return nullptr; - processed.insert(PN); - const Value *common = nullptr; - for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { - const Value *pv = PN->getIncomingValue(i); - const Value *base = skipPointerTransfer(pv, processed); - if (base) { - if (!common) - common = base; - else if (common != base) - return PN; - } - } - if (!common) - return PN; - V = common; - } - break; - } - return V; -} - -// The following are some useful utilities for debuggung +// The following are some useful utilities for debugging BasicBlock *llvm::getParentBlock(Value *v) { if (BasicBlock *B = dyn_cast<BasicBlock>(v)) @@ -466,7 +367,7 @@ void llvm::dumpBlock(Value *v, char *blockName) { return; for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) { - BasicBlock *B = it; + BasicBlock *B = &*it; if (strcmp(B->getName().data(), blockName) == 0) { B->dump(); return; @@ -490,7 +391,7 @@ Instruction *llvm::getInst(Value *base, char *instName) { return nullptr; } -// Dump an instruction by nane +// Dump an instruction by name void llvm::dumpInst(Value *base, char *instName) { Instruction *I = getInst(base, instName); if (I) diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h index 7e2ce73daaa3..a5262cb7412f 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h @@ -61,27 +61,6 @@ bool isKernelFunction(const llvm::Function &); bool getAlign(const llvm::Function &, unsigned index, unsigned &); bool getAlign(const llvm::CallInst &, unsigned index, unsigned &); -bool isBarrierIntrinsic(llvm::Intrinsic::ID); - -/// make_vector - Helper function which is useful for building temporary vectors -/// to pass into type construction of CallInst ctors. This turns a null -/// terminated list of pointers (or other value types) into a real live vector. -/// -template <typename T> inline std::vector<T> make_vector(T A, ...) { - va_list Args; - va_start(Args, A); - std::vector<T> Result; - Result.push_back(A); - while (T Val = va_arg(Args, T)) - Result.push_back(Val); - va_end(Args); - return Result; -} - -bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id); -const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices); -const Value * -skipPointerTransfer(const Value *V, std::set<const Value *> &processed); BasicBlock *getParentBlock(Value *v); Function *getParentFunction(Value *v); void dumpBlock(Value *v, char *blockName); diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td index a237247e4833..e69bbba9f193 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td @@ -26,7 +26,7 @@ let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in { def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), (ins V2I16Regs:$src, i8imm:$c), "mov.u16 \t$dst, $src${c:vecelem};", - [(set Int16Regs:$dst, (vector_extract + [(set Int16Regs:$dst, (extractelt (v2i16 V2I16Regs:$src), imm:$c))], IMOV16rr>; @@ -34,7 +34,7 @@ def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), (ins V4I16Regs:$src, i8imm:$c), "mov.u16 \t$dst, $src${c:vecelem};", - [(set Int16Regs:$dst, (vector_extract + [(set Int16Regs:$dst, (extractelt (v4i16 V4I16Regs:$src), imm:$c))], IMOV16rr>; @@ -42,7 +42,7 @@ def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), (ins V2I8Regs:$src, i8imm:$c), "mov.u16 \t$dst, $src${c:vecelem};", - [(set Int8Regs:$dst, (vector_extract + [(set Int8Regs:$dst, (extractelt (v2i8 V2I8Regs:$src), imm:$c))], IMOV8rr>; @@ -50,7 +50,7 @@ def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), (ins V4I8Regs:$src, i8imm:$c), "mov.u16 \t$dst, $src${c:vecelem};", - [(set Int8Regs:$dst, (vector_extract + [(set Int8Regs:$dst, (extractelt (v4i8 V4I8Regs:$src), imm:$c))], IMOV8rr>; @@ -58,7 +58,7 @@ def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), (ins V2I32Regs:$src, i8imm:$c), "mov.u32 \t$dst, $src${c:vecelem};", - [(set Int32Regs:$dst, (vector_extract + [(set Int32Regs:$dst, (extractelt (v2i32 V2I32Regs:$src), imm:$c))], IMOV32rr>; @@ -66,7 +66,7 @@ def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), (ins V2F32Regs:$src, i8imm:$c), "mov.f32 \t$dst, $src${c:vecelem};", - [(set Float32Regs:$dst, (vector_extract + [(set Float32Regs:$dst, (extractelt (v2f32 V2F32Regs:$src), imm:$c))], FMOV32rr>; @@ -74,7 +74,7 @@ def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst), (ins V2I64Regs:$src, i8imm:$c), "mov.u64 \t$dst, $src${c:vecelem};", - [(set Int64Regs:$dst, (vector_extract + [(set Int64Regs:$dst, (extractelt (v2i64 V2I64Regs:$src), imm:$c))], IMOV64rr>; @@ -82,7 +82,7 @@ def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst), def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst), (ins V2F64Regs:$src, i8imm:$c), "mov.f64 \t$dst, $src${c:vecelem};", - [(set Float64Regs:$dst, (vector_extract + [(set Float64Regs:$dst, (extractelt (v2f64 V2F64Regs:$src), imm:$c))], FMOV64rr>; @@ -90,7 +90,7 @@ def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst), def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), (ins V4I32Regs:$src, i8imm:$c), "mov.u32 \t$dst, $src${c:vecelem};", - [(set Int32Regs:$dst, (vector_extract + [(set Int32Regs:$dst, (extractelt (v4i32 V4I32Regs:$src), imm:$c))], IMOV32rr>; @@ -98,7 +98,7 @@ def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), (ins V4F32Regs:$src, i8imm:$c), "mov.f32 \t$dst, $src${c:vecelem};", - [(set Float32Regs:$dst, (vector_extract + [(set Float32Regs:$dst, (extractelt (v4f32 V4F32Regs:$src), imm:$c))], FMOV32rr>; } @@ -110,8 +110,7 @@ def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst), "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u16 \t$dst${c:vecelem}, $val;", [(set V2I8Regs:$dst, - (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))], - IMOV8rr>; + (insertelt V2I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>; // Insert v4i8 def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst), @@ -119,8 +118,7 @@ def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst), "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u16 \t$dst${c:vecelem}, $val;", [(set V4I8Regs:$dst, - (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))], - IMOV8rr>; + (insertelt V4I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>; // Insert v2i16 def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst), @@ -128,8 +126,8 @@ def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst), "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u16 \t$dst${c:vecelem}, $val;", [(set V2I16Regs:$dst, - (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))], - IMOV16rr>; + (insertelt V2I16Regs:$src, Int16Regs:$val, imm:$c))], + IMOV16rr>; // Insert v4i16 def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst), @@ -137,8 +135,8 @@ def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst), "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u16 \t$dst${c:vecelem}, $val;", [(set V4I16Regs:$dst, - (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))], - IMOV16rr>; + (insertelt V4I16Regs:$src, Int16Regs:$val, imm:$c))], + IMOV16rr>; // Insert v2i32 def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst), @@ -146,8 +144,8 @@ def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst), "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u32 \t$dst${c:vecelem}, $val;", [(set V2I32Regs:$dst, - (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))], - IMOV32rr>; + (insertelt V2I32Regs:$src, Int32Regs:$val, imm:$c))], + IMOV32rr>; // Insert v2f32 def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst), @@ -155,8 +153,8 @@ def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst), "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.f32 \t$dst${c:vecelem}, $val;", [(set V2F32Regs:$dst, - (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))], - FMOV32rr>; + (insertelt V2F32Regs:$src, Float32Regs:$val, imm:$c))], + FMOV32rr>; // Insert v2i64 def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst), @@ -164,8 +162,8 @@ def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst), "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u64 \t$dst${c:vecelem}, $val;", [(set V2I64Regs:$dst, - (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))], - IMOV64rr>; + (insertelt V2I64Regs:$src, Int64Regs:$val, imm:$c))], + IMOV64rr>; // Insert v2f64 def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst), @@ -173,8 +171,8 @@ def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst), "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.f64 \t$dst${c:vecelem}, $val;", [(set V2F64Regs:$dst, - (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))], - FMOV64rr>; + (insertelt V2F64Regs:$src, Float64Regs:$val, imm:$c))], + FMOV64rr>; // Insert v4i32 def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst), @@ -182,8 +180,8 @@ def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst), "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u32 \t$dst${c:vecelem}, $val;", [(set V4I32Regs:$dst, - (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))], - IMOV32rr>; + (insertelt V4I32Regs:$src, Int32Regs:$val, imm:$c))], + IMOV32rr>; // Insert v4f32 def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst), @@ -191,8 +189,8 @@ def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst), "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.f32 \t$dst${c:vecelem}, $val;", [(set V4F32Regs:$dst, - (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))], - FMOV32rr>; + (insertelt V4F32Regs:$src, Float32Regs:$val, imm:$c))], + FMOV32rr>; } class BinOpAsmString<string c> { diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp index 5e375b7852e4..20ab5db584d2 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp @@ -109,10 +109,10 @@ void NVVMReflect::setVarMap() { for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) { DEBUG(dbgs() << "Option : " << ReflectList[i] << "\n"); SmallVector<StringRef, 4> NameValList; - StringRef(ReflectList[i]).split(NameValList, ","); + StringRef(ReflectList[i]).split(NameValList, ','); for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) { SmallVector<StringRef, 2> NameValPair; - NameValList[j].split(NameValPair, "="); + NameValList[j].split(NameValPair, '='); assert(NameValPair.size() == 2 && "name=val expected"); std::stringstream ValStream(NameValPair[1]); int Val; |