diff options
Diffstat (limited to 'lib/Target/NVPTX')
-rw-r--r-- | lib/Target/NVPTX/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp | 2 | ||||
-rw-r--r-- | lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp | 3 | ||||
-rw-r--r-- | lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h | 4 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTX.h | 2 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 143 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXAsmPrinter.h | 4 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp | 207 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 4 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXISelLowering.cpp | 3 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXISelLowering.h | 9 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp | 170 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXLowerStructArgs.cpp | 136 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXMCExpr.cpp | 13 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXMCExpr.h | 24 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXTargetMachine.cpp | 13 |
16 files changed, 454 insertions, 285 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index cdd2f1f5944f..d48a7a9b1fcc 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -20,7 +20,7 @@ set(NVPTXCodeGen_sources NVPTXImageOptimizer.cpp NVPTXInstrInfo.cpp NVPTXLowerAggrCopies.cpp - NVPTXLowerStructArgs.cpp + NVPTXLowerKernelArgs.cpp NVPTXMCExpr.cpp NVPTXPrologEpilogPass.cpp NVPTXRegisterInfo.cpp diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp index ac92df901243..4594c22b8701 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp @@ -85,7 +85,7 @@ void NVPTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << markup("<imm:") << formatImm(Op.getImm()) << markup(">"); } else { assert(Op.isExpr() && "Unknown operand kind in printOperand"); - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index b9df3d18f941..ef36c13b49f1 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -25,8 +25,7 @@ static cl::opt<bool> CompileForDebugging("debug-compile", void NVPTXMCAsmInfo::anchor() {} -NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) { - Triple TheTriple(TT); +NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple) { if (TheTriple.getArch() == Triple::nvptx64) { PointerSize = CalleeSaveStackSlotSize = 8; } diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h index c3242866b177..b432e065c2f4 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h @@ -18,12 +18,12 @@ namespace llvm { class Target; -class StringRef; +class Triple; class NVPTXMCAsmInfo : public MCAsmInfo { virtual void anchor(); public: - explicit NVPTXMCAsmInfo(StringRef TT); + explicit NVPTXMCAsmInfo(const Triple &TheTriple); }; } // namespace llvm diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 382525d27a25..477b0bac6ca8 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -69,7 +69,7 @@ ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping); MachineFunctionPass *createNVPTXPrologEpilogPass(); MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); FunctionPass *createNVPTXImageOptimizerPass(); -FunctionPass *createNVPTXLowerStructArgsPass(); +FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM); bool isImageOrSamplerVal(const Value *, const Module *); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 3bbea400e53e..298b992b241f 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -266,7 +266,7 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), OutContext)); break; case MachineOperand::MO_ExternalSymbol: @@ -283,11 +283,11 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, default: report_fatal_error("Unsupported FP type"); break; case Type::FloatTyID: MCOp = MCOperand::createExpr( - NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext)); + NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext)); break; case Type::DoubleTyID: MCOp = MCOperand::createExpr( - NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext)); + NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext)); break; } break; @@ -334,7 +334,7 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { const MCExpr *Expr; - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, + Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); return MCOperand::createExpr(Expr); } @@ -418,9 +418,8 @@ void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( const MachineBasicBlock &MBB) const { MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); - // TODO: isLoopHeader() should take "const MachineBasicBlock *". // We insert .pragma "nounroll" only to the loop header. - if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB))) + if (!LI.isLoopHeader(&MBB)) return false; // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, @@ -468,7 +467,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { printReturnValStr(*MF, O); } - O << *CurrentFnSym; + CurrentFnSym->print(O, MAI); emitFunctionParamList(*MF, O); @@ -625,7 +624,8 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { else O << ".func "; printReturnValStr(F, O); - O << *getSymbol(F) << "\n"; + getSymbol(F)->print(O, MAI); + O << "\n"; emitFunctionParamList(F, O); O << ";\n"; } @@ -1172,7 +1172,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, else O << getPTXFundamentalTypeStr(ETy, false); O << " "; - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); // Ptx allows variable initilization only for constant and global state // spaces. @@ -1189,11 +1189,9 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, // The frontend adds zero-initializer to variables that don't have an // initial value, so skip warning for this case. if (!GVar->getInitializer()->isNullValue()) { - std::string warnMsg = - ("initial value of '" + GVar->getName() + - "' is not allowed in addrspace(" + - Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str(); - report_fatal_error(warnMsg.c_str()); + report_fatal_error("initial value of '" + GVar->getName() + + "' is not allowed in addrspace(" + + Twine(PTy->getAddressSpace()) + ")"); } } } @@ -1220,15 +1218,21 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, bufferAggregateConstant(Initializer, &aggBuffer); if (aggBuffer.numSymbols) { if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { - O << " .u64 " << *getSymbol(GVar) << "["; + O << " .u64 "; + getSymbol(GVar)->print(O, MAI); + O << "["; O << ElementSize / 8; } else { - O << " .u32 " << *getSymbol(GVar) << "["; + O << " .u32 "; + getSymbol(GVar)->print(O, MAI); + O << "["; O << ElementSize / 4; } O << "]"; } else { - O << " .b8 " << *getSymbol(GVar) << "["; + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); + O << "["; O << ElementSize; O << "]"; } @@ -1236,7 +1240,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, aggBuffer.print(); O << "}"; } else { - O << " .b8 " << *getSymbol(GVar); + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); if (ElementSize) { O << "["; O << ElementSize; @@ -1244,7 +1249,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, } } } else { - O << " .b8 " << *getSymbol(GVar); + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); if (ElementSize) { O << "["; O << ElementSize; @@ -1351,7 +1357,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, O << " ."; O << getPTXFundamentalTypeStr(ETy); O << " "; - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); return; } @@ -1366,9 +1372,11 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, case Type::ArrayTyID: case Type::VectorTyID: ElementSize = TD->getTypeStoreSize(ETy); - O << " .b8 " << *getSymbol(GVar) << "["; + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); + O << "["; if (ElementSize) { - O << itostr(ElementSize); + O << ElementSize; } O << "]"; break; @@ -1408,11 +1416,13 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O) { - O << *getSymbol(I->getParent()) << "_param_" << paramIndex; + getSymbol(I->getParent())->print(O, MAI); + O << "_param_" << paramIndex; } void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { - O << *CurrentFnSym << "_param_" << paramIndex; + CurrentFnSym->print(O, MAI); + O << "_param_" << paramIndex; } void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { @@ -1446,21 +1456,24 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { O << "\t.param .u64 .ptr .surfref "; else O << "\t.param .surfref "; - O << *CurrentFnSym << "_param_" << paramIndex; + CurrentFnSym->print(O, MAI); + O << "_param_" << paramIndex; } else { // Default image is read_only if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .texref "; else O << "\t.param .texref "; - O << *CurrentFnSym << "_param_" << paramIndex; + CurrentFnSym->print(O, MAI); + O << "_param_" << paramIndex; } } else { if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .samplerref "; else O << "\t.param .samplerref "; - O << *CurrentFnSym << "_param_" << paramIndex; + CurrentFnSym->print(O, MAI); + O << "_param_" << paramIndex; } continue; } @@ -1716,10 +1729,10 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { } if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { O << "generic("; - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); O << ")"; } else { - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); } return; } @@ -1733,20 +1746,44 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { O << "generic("; - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); O << ")"; } else { - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); } return; } else { - O << *lowerConstant(CPV); + lowerConstant(CPV)->print(O, MAI); return; } } llvm_unreachable("Not scalar type found in printScalarConstant()"); } +// These utility functions assure we get the right sequence of bytes for a given +// type even for big-endian machines +template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) { + int64_t vp = (int64_t)val; + for (unsigned i = 0; i < sizeof(T); ++i) { + p[i] = (unsigned char)vp; + vp >>= 8; + } +} +static void ConvertFloatToBytes(unsigned char *p, float val) { + int32_t *vp = (int32_t *)&val; + for (unsigned i = 0; i < sizeof(int32_t); ++i) { + p[i] = (unsigned char)*vp; + *vp >>= 8; + } +} +static void ConvertDoubleToBytes(unsigned char *p, double val) { + int64_t *vp = (int64_t *)&val; + for (unsigned i = 0; i < sizeof(int64_t); ++i) { + p[i] = (unsigned char)*vp; + *vp >>= 8; + } +} + void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer) { @@ -1760,30 +1797,30 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, return; } - unsigned char *ptr; + unsigned char ptr[8]; switch (CPV->getType()->getTypeID()) { case Type::IntegerTyID: { const Type *ETy = CPV->getType(); if (ETy == Type::getInt8Ty(CPV->getContext())) { unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); - ptr = &c; + ConvertIntToBytes<>(ptr, c); aggBuffer->addBytes(ptr, 1, Bytes); } else if (ETy == Type::getInt16Ty(CPV->getContext())) { short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue(); - ptr = (unsigned char *)&int16; + ConvertIntToBytes<>(ptr, int16); aggBuffer->addBytes(ptr, 2, Bytes); } else if (ETy == Type::getInt32Ty(CPV->getContext())) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { int int32 = (int)(constInt->getZExtValue()); - ptr = (unsigned char *)&int32; + ConvertIntToBytes<>(ptr, int32); aggBuffer->addBytes(ptr, 4, Bytes); break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( ConstantFoldConstantExpression(Cexpr, *TD))) { int int32 = (int)(constInt->getZExtValue()); - ptr = (unsigned char *)&int32; + ConvertIntToBytes<>(ptr, int32); aggBuffer->addBytes(ptr, 4, Bytes); break; } @@ -1798,14 +1835,14 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, } else if (ETy == Type::getInt64Ty(CPV->getContext())) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { long long int64 = (long long)(constInt->getZExtValue()); - ptr = (unsigned char *)&int64; + ConvertIntToBytes<>(ptr, int64); aggBuffer->addBytes(ptr, 8, Bytes); break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( ConstantFoldConstantExpression(Cexpr, *TD))) { long long int64 = (long long)(constInt->getZExtValue()); - ptr = (unsigned char *)&int64; + ConvertIntToBytes<>(ptr, int64); aggBuffer->addBytes(ptr, 8, Bytes); break; } @@ -1827,11 +1864,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, const Type *Ty = CFP->getType(); if (Ty == Type::getFloatTy(CPV->getContext())) { float float32 = (float) CFP->getValueAPF().convertToFloat(); - ptr = (unsigned char *)&float32; + ConvertFloatToBytes(ptr, float32); aggBuffer->addBytes(ptr, 4, Bytes); } else if (Ty == Type::getDoubleTy(CPV->getContext())) { double float64 = CFP->getValueAPF().convertToDouble(); - ptr = (unsigned char *)&float64; + ConvertDoubleToBytes(ptr, float64); aggBuffer->addBytes(ptr, 8, Bytes); } else { llvm_unreachable("unsupported fp const type"); @@ -1993,16 +2030,16 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) MCContext &Ctx = OutContext; if (CV->isNullValue() || isa<UndefValue>(CV)) - return MCConstantExpr::Create(0, Ctx); + return MCConstantExpr::create(0, Ctx); if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) - return MCConstantExpr::Create(CI->getZExtValue(), Ctx); + return MCConstantExpr::create(CI->getZExtValue(), Ctx); if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { const MCSymbolRefExpr *Expr = - MCSymbolRefExpr::Create(getSymbol(GV), Ctx); + MCSymbolRefExpr::create(getSymbol(GV), Ctx); if (ProcessingGeneric) { - return NVPTXGenericMCSymbolRefExpr::Create(Expr, Ctx); + return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx); } else { return Expr; } @@ -2059,7 +2096,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) return Base; int64_t Offset = OffsetAI.getSExtValue(); - return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), + return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx), Ctx); } @@ -2102,8 +2139,8 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) // the high bits so we are sure to get a proper truncation if the input is // a constant expr. unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); - const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); - return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); + const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx); + return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); } // The MC library also has a right-shift operator, but it isn't consistently @@ -2113,7 +2150,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric); switch (CE->getOpcode()) { default: llvm_unreachable("Unknown binary operator constant cast expr"); - case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); + case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); } } } @@ -2123,7 +2160,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { switch (Expr.getKind()) { case MCExpr::Target: - return cast<MCTargetExpr>(&Expr)->PrintImpl(OS); + return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI); case MCExpr::Constant: OS << cast<MCConstantExpr>(Expr).getValue(); return; @@ -2131,7 +2168,7 @@ void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { case MCExpr::SymbolRef: { const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr); const MCSymbol &Sym = SRE.getSymbol(); - OS << Sym; + Sym.print(OS, MAI); return; } @@ -2256,11 +2293,11 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_GlobalAddress: - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); break; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; default: diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 301c68609a29..f6f7685e76f9 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -165,10 +165,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { } if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { O << "generic("; - O << *Name; + Name->print(O, AP.MAI); O << ")"; } else { - O << *Name; + Name->print(O, AP.MAI); } } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) { const MCExpr *Expr = diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index ae63caec1320..cfff0019b8d9 100644 --- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -10,34 +10,54 @@ // When a load/store accesses the generic address space, checks whether the // address is casted from a non-generic address space. If so, remove this // addrspacecast because accessing non-generic address spaces is typically -// faster. Besides seeking addrspacecasts, this optimization also traces into -// the base pointer of a GEP. +// faster. Besides removing addrspacecasts directly used by loads/stores, this +// optimization also recursively traces into a GEP's pointer operand and a +// bitcast's source to find more eliminable addrspacecasts. // // For instance, the code below loads a float from an array allocated in // addrspace(3). // -// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* -// %1 = gep [10 x float]* %0, i64 0, i64 %i -// %2 = load float* %1 ; emits ld.f32 +// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* +// %1 = gep [10 x float]* %0, i64 0, i64 %i +// %2 = bitcast float* %1 to i32* +// %3 = load i32* %2 ; emits ld.u32 // -// First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast -// and the GEP to expose more optimization opportunities to function +// First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP, +// and the bitcast to expose more optimization opportunities to function // optimizeMemoryInst. The intermediate code looks like: // -// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i -// %1 = addrspacecast float addrspace(3)* %0 to float* -// %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly +// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i +// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* +// %2 = addrspacecast i32 addrspace(3)* %1 to i32* +// %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly // // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed // generic pointers, and folds the load and the addrspacecast into a load from // the original address space. The final code looks like: // -// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i -// %2 = load float addrspace(3)* %0 ; emits ld.shared.f32 +// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i +// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* +// %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32 // // This pass may remove an addrspacecast in a different BB. Therefore, we // implement it as a FunctionPass. // +// TODO: +// The current implementation doesn't handle PHINodes. Eliminating +// addrspacecasts used by PHINodes is trickier because PHINodes can introduce +// loops in data flow. For example, +// +// %generic.input = addrspacecast float addrspace(3)* %input to float* +// loop: +// %y = phi [ %generic.input, %y2 ] +// %y2 = getelementptr %y, 1 +// %v = load %y2 +// br ..., label %loop, ... +// +// Marking %y2 shared depends on marking %y shared, but %y also data-flow +// depends on %y2. We probably need an iterative fix-point algorithm on handle +// this case. +// //===----------------------------------------------------------------------===// #include "NVPTX.h" @@ -62,17 +82,31 @@ class NVPTXFavorNonGenericAddrSpaces : public FunctionPass { public: static char ID; NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} - bool runOnFunction(Function &F) override; +private: /// Optimizes load/store instructions. Idx is the index of the pointer operand /// (0 for load, and 1 for store). Returns true if it changes anything. bool optimizeMemoryInstruction(Instruction *I, unsigned Idx); + /// Recursively traces into a GEP's pointer operand or a bitcast's source to + /// find an eliminable addrspacecast, and hoists that addrspacecast to the + /// outermost level. For example, this function transforms + /// bitcast(gep(gep(addrspacecast(X)))) + /// to + /// addrspacecast(bitcast(gep(gep(X)))). + /// + /// This reordering exposes to optimizeMemoryInstruction more + /// optimization opportunities on loads and stores. + /// + /// Returns true if this function succesfully hoists an eliminable + /// addrspacecast or V is already such an addrspacecast. /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X, - /// indices)". This reordering exposes to optimizeMemoryInstruction more - /// optimization opportunities on loads and stores. Returns true if it changes - /// the program. - bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP); + /// indices)". + bool hoistAddrSpaceCastFrom(Value *V, int Depth = 0); + /// Helper function for GEPs. + bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth); + /// Helper function for bitcasts. + bool hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth); }; } @@ -85,11 +119,12 @@ INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic", "Remove unnecessary non-generic-to-generic addrspacecasts", false, false) -// Decides whether removing Cast is valid and beneficial. Cast can be an -// instruction or a constant expression. -static bool IsEliminableAddrSpaceCast(Operator *Cast) { - // Returns false if not even an addrspacecast. - if (Cast->getOpcode() != Instruction::AddrSpaceCast) +// Decides whether V is an addrspacecast and shortcutting V in load/store is +// valid and beneficial. +static bool isEliminableAddrSpaceCast(Value *V) { + // Returns false if V is not even an addrspacecast. + Operator *Cast = dyn_cast<Operator>(V); + if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast) return false; Value *Src = Cast->getOperand(0); @@ -108,67 +143,119 @@ static bool IsEliminableAddrSpaceCast(Operator *Cast) { DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); } -bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( - GEPOperator *GEP) { - Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand()); - if (!Cast) +bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(GEPOperator *GEP, + int Depth) { + if (!hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1)) return false; - if (!IsEliminableAddrSpaceCast(Cast)) - return false; + // That hoistAddrSpaceCastFrom succeeds implies GEP's pointer operand is now + // an eliminable addrspacecast. + assert(isEliminableAddrSpaceCast(GEP->getPointerOperand())); + Operator *Cast = cast<Operator>(GEP->getPointerOperand()); SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) { - // %1 = gep (addrspacecast X), indices + // GEP = gep (addrspacecast X), indices // => - // %0 = gep X, indices - // %1 = addrspacecast %0 - GetElementPtrInst *NewGEPI = GetElementPtrInst::Create( + // NewGEP = gep X, indices + // NewASC = addrspacecast NewGEP + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( GEP->getSourceElementType(), Cast->getOperand(0), Indices, - GEP->getName(), GEPI); - NewGEPI->setIsInBounds(GEP->isInBounds()); - GEP->replaceAllUsesWith( - new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); + "", GEPI); + NewGEP->setIsInBounds(GEP->isInBounds()); + Value *NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI); + NewASC->takeName(GEP); + GEP->replaceAllUsesWith(NewASC); } else { // GEP is a constant expression. - Constant *NewGEPCE = ConstantExpr::getGetElementPtr( + Constant *NewGEP = ConstantExpr::getGetElementPtr( GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)), Indices, GEP->isInBounds()); GEP->replaceAllUsesWith( - ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType())); + ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType())); } return true; } -bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, - unsigned Idx) { - // If the pointer operand is a GEP, hoist the addrspacecast if any from the - // GEP to expose more optimization opportunites. - if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) { - hoistAddrSpaceCastFromGEP(GEP); - } +bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast( + BitCastOperator *BC, int Depth) { + if (!hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1)) + return false; - // load/store (addrspacecast X) => load/store X if shortcutting the - // addrspacecast is valid and can improve performance. - // - // e.g., - // %1 = addrspacecast float addrspace(3)* %0 to float* - // %2 = load float* %1 - // -> - // %2 = load float addrspace(3)* %0 - // - // Note: the addrspacecast can also be a constant expression. - if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) { - if (IsEliminableAddrSpaceCast(Cast)) { - MI->setOperand(Idx, Cast->getOperand(0)); - return true; - } + // That hoistAddrSpaceCastFrom succeeds implies BC's source operand is now + // an eliminable addrspacecast. + assert(isEliminableAddrSpaceCast(BC->getOperand(0))); + Operator *Cast = cast<Operator>(BC->getOperand(0)); + + // Cast = addrspacecast Src + // BC = bitcast Cast + // => + // Cast' = bitcast Src + // BC' = addrspacecast Cast' + Value *Src = Cast->getOperand(0); + Type *TypeOfNewCast = + PointerType::get(BC->getType()->getPointerElementType(), + Src->getType()->getPointerAddressSpace()); + if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) { + Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI); + Value *NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI); + NewBC->takeName(BC); + BC->replaceAllUsesWith(NewBC); + } else { + // BC is a constant expression. + Constant *NewCast = + ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast); + Constant *NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType()); + BC->replaceAllUsesWith(NewBC); } + return true; +} + +bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V, + int Depth) { + // Returns true if V is already an eliminable addrspacecast. + if (isEliminableAddrSpaceCast(V)) + return true; + + // Limit the depth to prevent this recursive function from running too long. + const int MaxDepth = 20; + if (Depth >= MaxDepth) + return false; + + // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer + // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts + // that are not directly used by the load/store. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) + return hoistAddrSpaceCastFromGEP(GEP, Depth); + + if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) + return hoistAddrSpaceCastFromBitCast(BC, Depth); return false; } +bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, + unsigned Idx) { + if (hoistAddrSpaceCastFrom(MI->getOperand(Idx))) { + // load/store (addrspacecast X) => load/store X if shortcutting the + // addrspacecast is valid and can improve performance. + // + // e.g., + // %1 = addrspacecast float addrspace(3)* %0 to float* + // %2 = load float* %1 + // -> + // %2 = load float addrspace(3)* %0 + // + // Note: the addrspacecast can also be a constant expression. + assert(isEliminableAddrSpaceCast(MI->getOperand(Idx))); + Operator *ASC = dyn_cast<Operator>(MI->getOperand(Idx)); + MI->setOperand(Idx, ASC->getOperand(0)); + return true; + } + return false; +} + bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { if (DisableFavorNonGeneric) return false; diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index fa38a686fcbf..232a611d1760 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -613,6 +613,10 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { Opc = TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes; break; + case ADDRESS_SPACE_PARAM: + Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 + : NVPTX::nvvm_ptr_gen_to_param; + break; } return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); } diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 805847a581fa..b5af72ab855a 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3725,7 +3725,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( /// (LoopStrengthReduce.cpp) and memory optimization for address mode /// (CodeGenPrepare.cpp) bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // AddrMode - This represents an addressing mode of: // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 5142ae3cd88f..ed94775b3002 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -456,7 +456,8 @@ public: /// Used to guide target specific optimizations, like loop strength /// reduction (LoopStrengthReduce.cpp) and memory optimization for /// address mode (CodeGenPrepare.cpp) - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned getFunctionAlignment(const Function *F) const; @@ -497,12 +498,6 @@ public: std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { - // FIXME: Map different constraints differently. - return InlineAsm::Constraint_m; - } - const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts diff --git a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp new file mode 100644 index 000000000000..24dcb122b94e --- /dev/null +++ b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp @@ -0,0 +1,170 @@ +//===-- NVPTXLowerKernelArgs.cpp - Lower kernel arguments -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pointer arguments to kernel functions need to be lowered specially. +// +// 1. Copy byval struct args to local memory. This is a preparation for handling +// cases like +// +// kernel void foo(struct A arg, ...) +// { +// struct A *p = &arg; +// ... +// ... = p->filed1 ... (this is no generic address for .param) +// p->filed2 = ... (this is no write access to .param) +// } +// +// 2. Convert non-byval pointer arguments of CUDA kernels to pointers in the +// global address space. This allows later optimizations to emit +// ld.global.*/st.global.* for accessing these pointer arguments. For +// example, +// +// define void @foo(float* %input) { +// %v = load float, float* %input, align 4 +// ... +// } +// +// becomes +// +// define void @foo(float* %input) { +// %input2 = addrspacecast float* %input to float addrspace(1)* +// %input3 = addrspacecast float addrspace(1)* %input2 to float* +// %v = load float, float* %input3, align 4 +// ... +// } +// +// Later, NVPTXFavorNonGenericAddrSpaces will optimize it to +// +// define void @foo(float* %input) { +// %input2 = addrspacecast float* %input to float addrspace(1)* +// %v = load float, float addrspace(1)* %input2, align 4 +// ... +// } +// +// TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes +// don't cancel the addrspacecast pair this pass emits. +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXUtilities.h" +#include "NVPTXTargetMachine.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace llvm { +void initializeNVPTXLowerKernelArgsPass(PassRegistry &); +} + +namespace { +class NVPTXLowerKernelArgs : public FunctionPass { + bool runOnFunction(Function &F) override; + + // handle byval parameters + void handleByValParam(Argument *); + // handle non-byval pointer parameters + void handlePointerParam(Argument *); + +public: + static char ID; // Pass identification, replacement for typeid + NVPTXLowerKernelArgs(const NVPTXTargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) {} + const char *getPassName() const override { + return "Lower pointer arguments of CUDA kernels"; + } + +private: + const NVPTXTargetMachine *TM; +}; +} // namespace + +char NVPTXLowerKernelArgs::ID = 1; + +INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args", + "Lower kernel arguments (NVPTX)", false, false) + +// ============================================================================= +// If the function had a byval struct ptr arg, say foo(%struct.x *byval %d), +// then add the following instructions to the first basic block: +// +// %temp = alloca %struct.x, align 8 +// %tempd = addrspacecast %struct.x* %d to %struct.x addrspace(101)* +// %tv = load %struct.x addrspace(101)* %tempd +// store %struct.x %tv, %struct.x* %temp, align 8 +// +// The above code allocates some space in the stack and copies the incoming +// struct from param space to local space. +// Then replace all occurences of %d by %temp. +// ============================================================================= +void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) { + Function *Func = Arg->getParent(); + Instruction *FirstInst = &(Func->getEntryBlock().front()); + PointerType *PType = dyn_cast<PointerType>(Arg->getType()); + + assert(PType && "Expecting pointer type in handleByValParam"); + + Type *StructType = PType->getElementType(); + AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst); + // Set the alignment to alignment of the byval parameter. This is because, + // later load/stores assume that alignment, and we are going to replace + // the use of the byval parameter with this alloca instruction. + AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1)); + Arg->replaceAllUsesWith(AllocA); + + Value *ArgInParam = new AddrSpaceCastInst( + Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(), + FirstInst); + LoadInst *LI = new LoadInst(ArgInParam, Arg->getName(), FirstInst); + new StoreInst(LI, AllocA, FirstInst); +} + +void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) { + assert(!Arg->hasByValAttr() && + "byval params should be handled by handleByValParam"); + + Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin(); + Instruction *ArgInGlobal = new AddrSpaceCastInst( + Arg, PointerType::get(Arg->getType()->getPointerElementType(), + ADDRESS_SPACE_GLOBAL), + Arg->getName(), FirstInst); + Value *ArgInGeneric = new AddrSpaceCastInst(ArgInGlobal, Arg->getType(), + Arg->getName(), FirstInst); + // Replace with ArgInGeneric all uses of Args except ArgInGlobal. + Arg->replaceAllUsesWith(ArgInGeneric); + ArgInGlobal->setOperand(0, Arg); +} + + +// ============================================================================= +// Main function for this pass. +// ============================================================================= +bool NVPTXLowerKernelArgs::runOnFunction(Function &F) { + // Skip non-kernels. See the comments at the top of this file. + if (!isKernelFunction(F)) + return false; + + for (Argument &Arg : F.args()) { + if (Arg.getType()->isPointerTy()) { + if (Arg.hasByValAttr()) + handleByValParam(&Arg); + else if (TM && TM->getDrvInterface() == NVPTX::CUDA) + handlePointerParam(&Arg); + } + } + return true; +} + +FunctionPass * +llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) { + return new NVPTXLowerKernelArgs(TM); +} diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp deleted file mode 100644 index 68dfbb716139..000000000000 --- a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp +++ /dev/null @@ -1,136 +0,0 @@ -//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Copy struct args to local memory. This is needed for kernel functions only. -// This is a preparation for handling cases like -// -// kernel void foo(struct A arg, ...) -// { -// struct A *p = &arg; -// ... -// ... = p->filed1 ... (this is no generic address for .param) -// p->filed2 = ... (this is no write access to .param) -// } -// -//===----------------------------------------------------------------------===// - -#include "NVPTX.h" -#include "NVPTXUtilities.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Pass.h" - -using namespace llvm; - -namespace llvm { -void initializeNVPTXLowerStructArgsPass(PassRegistry &); -} - -namespace { -class NVPTXLowerStructArgs : public FunctionPass { - bool runOnFunction(Function &F) override; - - void handleStructPtrArgs(Function &); - void handleParam(Argument *); - -public: - static char ID; // Pass identification, replacement for typeid - NVPTXLowerStructArgs() : FunctionPass(ID) {} - const char *getPassName() const override { - return "Copy structure (byval *) arguments to stack"; - } -}; -} // namespace - -char NVPTXLowerStructArgs::ID = 1; - -INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args", - "Lower structure arguments (NVPTX)", false, false) - -void NVPTXLowerStructArgs::handleParam(Argument *Arg) { - Function *Func = Arg->getParent(); - Instruction *FirstInst = &(Func->getEntryBlock().front()); - PointerType *PType = dyn_cast<PointerType>(Arg->getType()); - - assert(PType && "Expecting pointer type in handleParam"); - - Type *StructType = PType->getElementType(); - AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst); - - /* Set the alignment to alignment of the byval parameter. This is because, - * later load/stores assume that alignment, and we are going to replace - * the use of the byval parameter with this alloca instruction. - */ - AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1)); - - Arg->replaceAllUsesWith(AllocA); - - // Get the cvt.gen.to.param intrinsic - Type *CvtTypes[] = { - Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM), - Type::getInt8PtrTy(Func->getParent()->getContext(), - ADDRESS_SPACE_GENERIC)}; - Function *CvtFunc = Intrinsic::getDeclaration( - Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param, CvtTypes); - - Value *BitcastArgs[] = { - new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(), - ADDRESS_SPACE_GENERIC), - Arg->getName(), FirstInst)}; - CallInst *CallCVT = - CallInst::Create(CvtFunc, BitcastArgs, "cvt_to_param", FirstInst); - - BitCastInst *BitCast = new BitCastInst( - CallCVT, PointerType::get(StructType, ADDRESS_SPACE_PARAM), - Arg->getName(), FirstInst); - LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst); - new StoreInst(LI, AllocA, FirstInst); -} - -// ============================================================================= -// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then -// add the following instructions to the first basic block : -// -// %temp = alloca %struct.x, align 8 -// %tt1 = bitcast %struct.x * %d to i8 * -// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2 -// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) * -// %tv = load %struct.x addrspace(101) * %tempd -// store %struct.x %tv, %struct.x * %temp, align 8 -// -// The above code allocates some space in the stack and copies the incoming -// struct from param space to local space. -// Then replace all occurences of %d by %temp. -// ============================================================================= -void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) { - for (Argument &Arg : F.args()) { - if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) { - handleParam(&Arg); - } - } -} - -// ============================================================================= -// Main function for this pass. -// ============================================================================= -bool NVPTXLowerStructArgs::runOnFunction(Function &F) { - // Skip non-kernels. See the comments at the top of this file. - if (!isKernelFunction(F)) - return false; - - handleStructPtrArgs(F); - return true; -} - -FunctionPass *llvm::createNVPTXLowerStructArgsPass() { - return new NVPTXLowerStructArgs(); -} diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp index 779b65ecc39f..3c98b9febf85 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.cpp +++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp @@ -16,11 +16,11 @@ using namespace llvm; #define DEBUG_TYPE "nvptx-mcexpr" const NVPTXFloatMCExpr* -NVPTXFloatMCExpr::Create(VariantKind Kind, APFloat Flt, MCContext &Ctx) { +NVPTXFloatMCExpr::create(VariantKind Kind, APFloat Flt, MCContext &Ctx) { return new (Ctx) NVPTXFloatMCExpr(Kind, Flt); } -void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const { +void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { bool Ignored; unsigned NumHex; APFloat APF = getAPFloat(); @@ -47,11 +47,14 @@ void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const { } const NVPTXGenericMCSymbolRefExpr* -NVPTXGenericMCSymbolRefExpr::Create(const MCSymbolRefExpr *SymExpr, +NVPTXGenericMCSymbolRefExpr::create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx) { return new (Ctx) NVPTXGenericMCSymbolRefExpr(SymExpr); } -void NVPTXGenericMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const { - OS << "generic(" << *SymExpr << ")"; +void NVPTXGenericMCSymbolRefExpr::printImpl(raw_ostream &OS, + const MCAsmInfo *MAI) const { + OS << "generic("; + SymExpr->print(OS, MAI); + OS << ")"; } diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h index 8c6b219abd13..46b4b33e7e40 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/lib/Target/NVPTX/NVPTXMCExpr.h @@ -36,17 +36,17 @@ public: /// @name Construction /// @{ - static const NVPTXFloatMCExpr *Create(VariantKind Kind, APFloat Flt, + static const NVPTXFloatMCExpr *create(VariantKind Kind, APFloat Flt, MCContext &Ctx); - static const NVPTXFloatMCExpr *CreateConstantFPSingle(APFloat Flt, + static const NVPTXFloatMCExpr *createConstantFPSingle(APFloat Flt, MCContext &Ctx) { - return Create(VK_NVPTX_SINGLE_PREC_FLOAT, Flt, Ctx); + return create(VK_NVPTX_SINGLE_PREC_FLOAT, Flt, Ctx); } - static const NVPTXFloatMCExpr *CreateConstantFPDouble(APFloat Flt, + static const NVPTXFloatMCExpr *createConstantFPDouble(APFloat Flt, MCContext &Ctx) { - return Create(VK_NVPTX_DOUBLE_PREC_FLOAT, Flt, Ctx); + return create(VK_NVPTX_DOUBLE_PREC_FLOAT, Flt, Ctx); } /// @} @@ -61,14 +61,14 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override { return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; - MCSection *FindAssociatedSection() const override { return nullptr; } + MCSection *findAssociatedSection() const override { return nullptr; } // There are no TLS NVPTXMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} @@ -92,7 +92,7 @@ public: /// @{ static const NVPTXGenericMCSymbolRefExpr - *Create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx); + *create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx); /// @} /// @name Accessors @@ -103,14 +103,14 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override { return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; - MCSection *FindAssociatedSection() const override { return nullptr; } + MCSection *findAssociatedSection() const override { return nullptr; } // There are no TLS NVPTXMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index ac27c30aabab..a6466687bc7b 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -53,7 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); -void initializeNVPTXLowerStructArgsPass(PassRegistry &); +void initializeNVPTXLowerKernelArgsPass(PassRegistry &); } extern "C" void LLVMInitializeNVPTXTarget() { @@ -69,7 +69,7 @@ extern "C" void LLVMInitializeNVPTXTarget() { initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); initializeNVPTXFavorNonGenericAddrSpacesPass( *PassRegistry::getPassRegistry()); - initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); + initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry()); } static std::string computeDataLayout(bool is64Bit) { @@ -163,7 +163,13 @@ void NVPTXPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); + addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); addPass(createNVPTXFavorNonGenericAddrSpacesPass()); + // NVPTXLowerKernelArgs emits alloca for byval parameters which can often + // be eliminated by SROA. We do not run SROA right after NVPTXLowerKernelArgs + // because we plan to merge NVPTXLowerKernelArgs and + // NVPTXFavorNonGenericAddrSpaces into one pass. + addPass(createSROAPass()); // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave // them unused. We could remove dead code in an ad-hoc manner, but that // requires manual work and might be error-prone. @@ -181,6 +187,9 @@ void NVPTXPassConfig::addIRPasses() { addPass(createEarlyCSEPass()); // Run NaryReassociate after EarlyCSE/GVN to be more effective. addPass(createNaryReassociatePass()); + // NaryReassociate on GEPs creates redundant common expressions, so run + // EarlyCSE after it. + addPass(createEarlyCSEPass()); } bool NVPTXPassConfig::addInstSelector() { |