aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/NVPTX
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/NVPTX')
-rw-r--r--contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h1
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.h18
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp132
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h15
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp10
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp18
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp59
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp183
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h17
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp66
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h1
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp353
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp102
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXSection.h9
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp60
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h5
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp105
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h21
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXVector.td58
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp4
26 files changed, 671 insertions, 582 deletions
diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 02c5a210d099..f0f223aa057b 100644
--- a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -15,11 +15,9 @@
#define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
-class MCOperand;
class MCSubtargetInfo;
class NVPTXInstPrinter : public MCInstPrinter {
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index b432e065c2f4..9ac3c8850f75 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -22,6 +22,7 @@ class Triple;
class NVPTXMCAsmInfo : public MCAsmInfo {
virtual void anchor();
+
public:
explicit NVPTXMCAsmInfo(const Triple &TheTriple);
};
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index fe28214e9588..e5fae85bacf2 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -41,24 +41,6 @@ enum CondCodes {
};
}
-inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
- switch (CC) {
- case NVPTXCC::NE:
- return "ne";
- case NVPTXCC::EQ:
- return "eq";
- case NVPTXCC::LT:
- return "lt";
- case NVPTXCC::LE:
- return "le";
- case NVPTXCC::GT:
- return "gt";
- case NVPTXCC::GE:
- return "ge";
- }
- llvm_unreachable("Unknown condition code");
-}
-
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index ecb0f0a1d0a1..e8c36089a779 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -355,7 +355,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
if (isABI) {
if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
unsigned size = 0;
- if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+ if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
size = ITy->getBitWidth();
if (size < 32)
size = 32;
@@ -635,9 +635,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
return false;
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
- if (GV->getName() == "llvm.used")
- return false;
- return true;
+ return GV->getName() != "llvm.used";
}
for (const User *U : C->users())
@@ -682,7 +680,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
if (!gv->hasInternalLinkage())
return false;
- const PointerType *Pty = gv->getType();
+ PointerType *Pty = gv->getType();
if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
return false;
@@ -720,7 +718,7 @@ static bool useFuncSeen(const Constant *C,
void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
llvm::DenseMap<const Function *, bool> seenMap;
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
- const Function *F = FI;
+ const Function *F = &*FI;
if (F->isDeclaration()) {
if (F->use_empty())
@@ -870,9 +868,8 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
DenseSet<const GlobalVariable *> GVVisiting;
// Visit each global variable, in order
- for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
+ for (const GlobalVariable &I : M.globals())
+ VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
assert(GVVisited.size() == M.getGlobalList().size() &&
"Missed a global variable");
@@ -1029,10 +1026,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
GVar->getName().startswith("nvvm."))
return;
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// GlobalVariables are always constant pointers themselves.
- const PointerType *PTy = GVar->getType();
+ PointerType *PTy = GVar->getType();
Type *ETy = PTy->getElementType();
if (GVar->hasExternalLinkage()) {
@@ -1159,7 +1156,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
}
if (GVar->getAlignment() == 0)
- O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+ O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
else
O << " .align " << GVar->getAlignment();
@@ -1185,9 +1182,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
printScalarConstant(Initializer, O);
}
} else {
- // The frontend adds zero-initializer to variables that don't have an
- // initial value, so skip warning for this case.
- if (!GVar->getInitializer()->isNullValue()) {
+ // The frontend adds zero-initializer to device and constant variables
+ // that don't have an initial value, and UndefValue to shared
+ // variables, so skip warning for this case.
+ if (!GVar->getInitializer()->isNullValue() &&
+ !isa<UndefValue>(GVar->getInitializer())) {
report_fatal_error("initial value of '" + GVar->getName() +
"' is not allowed in addrspace(" +
Twine(PTy->getAddressSpace()) + ")");
@@ -1205,7 +1204,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
case Type::StructTyID:
case Type::ArrayTyID:
case Type::VectorTyID:
- ElementSize = TD->getTypeStoreSize(ETy);
+ ElementSize = DL.getTypeStoreSize(ETy);
// Ptx allows variable initilization only for constant and
// global state spaces.
if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
@@ -1299,7 +1298,7 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
}
std::string
-NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
+NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
switch (Ty->getTypeID()) {
default:
llvm_unreachable("unexpected type");
@@ -1339,16 +1338,16 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// GlobalVariables are always constant pointers themselves.
- const PointerType *PTy = GVar->getType();
+ PointerType *PTy = GVar->getType();
Type *ETy = PTy->getElementType();
O << ".";
emitPTXAddressSpace(PTy->getAddressSpace(), O);
if (GVar->getAlignment() == 0)
- O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+ O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
else
O << " .align " << GVar->getAlignment();
@@ -1370,7 +1369,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
case Type::StructTyID:
case Type::ArrayTyID:
case Type::VectorTyID:
- ElementSize = TD->getTypeStoreSize(ETy);
+ ElementSize = DL.getTypeStoreSize(ETy);
O << " .b8 ";
getSymbol(GVar)->print(O, MAI);
O << "[";
@@ -1385,32 +1384,32 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
return;
}
-static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
+static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
if (Ty->isSingleValueType())
- return TD->getPrefTypeAlignment(Ty);
+ return DL.getPrefTypeAlignment(Ty);
- const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+ auto *ATy = dyn_cast<ArrayType>(Ty);
if (ATy)
- return getOpenCLAlignment(TD, ATy->getElementType());
+ return getOpenCLAlignment(DL, ATy->getElementType());
- const StructType *STy = dyn_cast<StructType>(Ty);
+ auto *STy = dyn_cast<StructType>(Ty);
if (STy) {
unsigned int alignStruct = 1;
// Go through each element of the struct and find the
// largest alignment.
for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
Type *ETy = STy->getElementType(i);
- unsigned int align = getOpenCLAlignment(TD, ETy);
+ unsigned int align = getOpenCLAlignment(DL, ETy);
if (align > alignStruct)
alignStruct = align;
}
return alignStruct;
}
- const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
+ auto *FTy = dyn_cast<FunctionType>(Ty);
if (FTy)
- return TD->getPointerPrefAlignment();
- return TD->getPrefTypeAlignment(Ty);
+ return DL.getPointerPrefAlignment();
+ return DL.getPrefTypeAlignment(Ty);
}
void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
@@ -1419,13 +1418,8 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
O << "_param_" << paramIndex;
}
-void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
- CurrentFnSym->print(O, MAI);
- O << "_param_" << paramIndex;
-}
-
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
const AttributeSet &PAL = F->getAttributes();
const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
Function::const_arg_iterator I, E;
@@ -1433,7 +1427,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
bool first = true;
bool isKernelFunc = llvm::isKernelFunction(*F);
bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
- MVT thePointerTy = TLI->getPointerTy(*TD);
+ MVT thePointerTy = TLI->getPointerTy(DL);
O << "(\n";
@@ -1485,9 +1479,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// size = typeallocsize of element type
unsigned align = PAL.getParamAlignment(paramIndex + 1);
if (align == 0)
- align = TD->getABITypeAlignment(Ty);
+ align = DL.getABITypeAlignment(Ty);
- unsigned sz = TD->getTypeAllocSize(Ty);
+ unsigned sz = DL.getTypeAllocSize(Ty);
O << "\t.param .align " << align << " .b8 ";
printParamName(I, paramIndex, O);
O << "[" << sz << "]";
@@ -1495,7 +1489,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
continue;
}
// Just a scalar
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ auto *PTy = dyn_cast<PointerType>(Ty);
if (isKernelFunc) {
if (PTy) {
// Special handling for pointer arguments to kernel
@@ -1519,7 +1513,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << ".ptr .global ";
break;
}
- O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
+ O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
}
printParamName(I, paramIndex, O);
continue;
@@ -1556,7 +1550,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
// param has byVal attribute. So should be a pointer
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ auto *PTy = dyn_cast<PointerType>(Ty);
assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
@@ -1566,9 +1560,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// size = typeallocsize of element type
unsigned align = PAL.getParamAlignment(paramIndex + 1);
if (align == 0)
- align = TD->getABITypeAlignment(ETy);
+ align = DL.getABITypeAlignment(ETy);
- unsigned sz = TD->getTypeAllocSize(ETy);
+ unsigned sz = DL.getTypeAllocSize(ETy);
O << "\t.param .align " << align << " .b8 ";
printParamName(I, paramIndex, O);
O << "[" << sz << "]";
@@ -1579,7 +1573,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// Further, if a part is vector, print the above for
// each vector element.
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, getDataLayout(), ETy, vtparts);
+ ComputeValueVTs(*TLI, DL, ETy, vtparts);
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
@@ -1786,10 +1780,10 @@ static void ConvertDoubleToBytes(unsigned char *p, double val) {
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
- int s = TD->getTypeAllocSize(CPV->getType());
+ int s = DL.getTypeAllocSize(CPV->getType());
if (s < Bytes)
s = Bytes;
aggBuffer->addZeros(s);
@@ -1800,7 +1794,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
switch (CPV->getType()->getTypeID()) {
case Type::IntegerTyID: {
- const Type *ETy = CPV->getType();
+ Type *ETy = CPV->getType();
if (ETy == Type::getInt8Ty(CPV->getContext())) {
unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
ConvertIntToBytes<>(ptr, c);
@@ -1817,7 +1811,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, *TD))) {
+ ConstantFoldConstantExpression(Cexpr, DL))) {
int int32 = (int)(constInt->getZExtValue());
ConvertIntToBytes<>(ptr, int32);
aggBuffer->addBytes(ptr, 4, Bytes);
@@ -1839,7 +1833,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, *TD))) {
+ ConstantFoldConstantExpression(Cexpr, DL))) {
long long int64 = (long long)(constInt->getZExtValue());
ConvertIntToBytes<>(ptr, int64);
aggBuffer->addBytes(ptr, 8, Bytes);
@@ -1860,7 +1854,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::FloatTyID:
case Type::DoubleTyID: {
const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
- const Type *Ty = CFP->getType();
+ Type *Ty = CFP->getType();
if (Ty == Type::getFloatTy(CPV->getContext())) {
float float32 = (float) CFP->getValueAPF().convertToFloat();
ConvertFloatToBytes(ptr, float32);
@@ -1881,7 +1875,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
const Value *v = Cexpr->stripPointerCasts();
aggBuffer->addSymbol(v, Cexpr);
}
- unsigned int s = TD->getTypeAllocSize(CPV->getType());
+ unsigned int s = DL.getTypeAllocSize(CPV->getType());
aggBuffer->addZeros(s);
break;
}
@@ -1891,7 +1885,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::StructTyID: {
if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
- int ElementSize = TD->getTypeAllocSize(CPV->getType());
+ int ElementSize = DL.getTypeAllocSize(CPV->getType());
bufferAggregateConstant(CPV, aggBuffer);
if (Bytes > ElementSize)
aggBuffer->addZeros(Bytes - ElementSize);
@@ -1909,7 +1903,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
int Bytes;
// Old constants
@@ -1934,12 +1928,12 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
StructType *ST = cast<StructType>(CPV->getType());
for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
if (i == (e - 1))
- Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
- TD->getTypeAllocSize(ST) -
- TD->getStructLayout(ST)->getElementOffset(i);
+ Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
+ DL.getTypeAllocSize(ST) -
+ DL.getStructLayout(ST)->getElementOffset(i);
else
- Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
- TD->getStructLayout(ST)->getElementOffset(i);
+ Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
+ DL.getStructLayout(ST)->getElementOffset(i);
bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
}
}
@@ -1951,18 +1945,6 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
// buildTypeNameMap - Run through symbol table looking for type names.
//
-bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
-
- std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
-
- if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
- !PI->second.compare("struct._image2d_t") ||
- !PI->second.compare("struct._image3d_t")))
- return true;
-
- return false;
-}
-
bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
switch (MI.getOpcode()) {
@@ -2054,7 +2036,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
+ if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
if (C != CE)
return lowerConstantForGV(C, ProcessingGeneric);
@@ -2083,7 +2065,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
}
case Instruction::GetElementPtr: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Generate a symbolic expression for the byte address
APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
@@ -2109,7 +2091,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
case Instruction::IntToPtr: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
@@ -2120,7 +2102,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
}
case Instruction::PtrToInt: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index f6f7685e76f9..76bf179896a8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -212,28 +212,21 @@ private:
MCOperand GetSymbolRef(const MCSymbol *Symbol);
unsigned encodeVirtualRegister(unsigned Reg);
- void EmitAlignment(unsigned NumBits, const GlobalValue *GV = nullptr) const {}
-
void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = nullptr);
- void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
bool = false);
- void printParamName(int paramIndex, raw_ostream &O);
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
void emitGlobals(const Module &M);
void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI);
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, raw_ostream &);
- void emitFunctionExternParamList(const MachineFunction &MF);
void emitFunctionParamList(const Function *, raw_ostream &O);
void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
- void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize);
- bool isImageType(const Type *Ty);
void printReturnValStr(const Function *, raw_ostream &O);
void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -271,7 +264,7 @@ private:
// Build the map between type name and ID based on module's type
// symbol table.
- std::map<const Type *, std::string> TypeNameMap;
+ std::map<Type *, std::string> TypeNameMap;
// List of variables demoted to a function scope.
std::map<const Function *, std::vector<const GlobalVariable *> > localDecls;
@@ -282,19 +275,15 @@ private:
void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
- std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
+ std::string getPTXFundamentalTypeStr(Type *Ty, bool = true) const;
void printScalarConstant(const Constant *CPV, raw_ostream &O);
void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer);
void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer);
- void printOperandProper(const MachineOperand &MO);
-
void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
void emitDeclarations(const Module &, raw_ostream &O);
void emitDeclaration(const Function *, raw_ostream &O);
-
- static const char *getRegisterName(unsigned RegNo);
void emitDemotedVars(const Function *, raw_ostream &);
bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index 69a229e32f43..95813c8430d1 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -98,7 +98,7 @@ private:
/// This reordering exposes to optimizeMemoryInstruction more
/// optimization opportunities on loads and stores.
///
- /// If this function succesfully hoists an eliminable addrspacecast or V is
+ /// If this function successfully hoists an eliminable addrspacecast or V is
/// already such an addrspacecast, it returns the transformed value (which is
/// guaranteed to be an addrspacecast); otherwise, it returns nullptr.
Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
@@ -267,14 +267,14 @@ bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
return false;
bool Changed = false;
- for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
- for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) {
+ for (BasicBlock &B : F) {
+ for (Instruction &I : B) {
if (isa<LoadInst>(I)) {
// V = load P
- Changed |= optimizeMemoryInstruction(I, 0);
+ Changed |= optimizeMemoryInstruction(&I, 0);
} else if (isa<StoreInst>(I)) {
// store V, P
- Changed |= optimizeMemoryInstruction(I, 1);
+ Changed |= optimizeMemoryInstruction(&I, 1);
}
}
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 6fd09c405260..62ca5e9f9f62 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -81,7 +81,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E;) {
- GlobalVariable *GV = I++;
+ GlobalVariable *GV = &*I++;
if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
!llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
!llvm::isSampler(*GV) && !GV->getName().startswith("llvm.")) {
@@ -117,7 +117,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
Value *Operand = II->getOperand(i);
if (isa<Constant>(Operand)) {
II->setOperand(
- i, remapConstant(&M, I, cast<Constant>(Operand), Builder));
+ i, remapConstant(&M, &*I, cast<Constant>(Operand), Builder));
}
}
}
@@ -132,10 +132,8 @@ bool GenericToNVVM::runOnModule(Module &M) {
// Walk through the metadata section and update the debug information
// associated with the global variables in the default address space.
- for (Module::named_metadata_iterator I = M.named_metadata_begin(),
- E = M.named_metadata_end();
- I != E; I++) {
- remapNamedMDNode(VM, I);
+ for (NamedMDNode &I : M.named_metadata()) {
+ remapNamedMDNode(VM, &I);
}
// Walk through the global variable initializers, and replace any use of
@@ -318,9 +316,8 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
NewOperands[0], NewOperands[1]);
case Instruction::FCmp:
// CompareConstantExpr (fcmp)
- assert(false && "Address space conversion should have no effect "
- "on float point CompareConstantExpr (fcmp)!");
- return C;
+ llvm_unreachable("Address space conversion should have no effect "
+ "on float point CompareConstantExpr (fcmp)!");
case Instruction::ExtractElement:
// ExtractElementConstantExpr
return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]);
@@ -364,8 +361,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
return Builder.CreateCast(Instruction::CastOps(C->getOpcode()),
NewOperands[0], C->getType());
}
- assert(false && "GenericToNVVM encountered an unsupported ConstantExpr");
- return C;
+ llvm_unreachable("GenericToNVVM encountered an unsupported ConstantExpr");
}
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 232a611d1760..2d0098b392f4 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "NVPTXISelDAGToDAG.h"
+#include "NVPTXUtilities.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/CommandLine.h"
@@ -530,7 +532,7 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
if (!Src)
return NVPTX::PTXLdStInstCode::GENERIC;
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
+ if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
switch (PT->getAddressSpace()) {
case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
@@ -544,6 +546,39 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
return NVPTX::PTXLdStInstCode::GENERIC;
}
+static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
+ unsigned CodeAddrSpace, MachineFunction *F) {
+ // To use non-coherent caching, the load has to be from global
+ // memory and we have to prove that the memory area is not written
+ // to anywhere for the duration of the kernel call, not even after
+ // the load.
+ //
+ // To ensure that there are no writes to the memory, we require the
+ // underlying pointer to be a noalias (__restrict) kernel parameter
+ // that is never used for a write. We can only do this for kernel
+ // functions since from within a device function, we cannot know if
+ // there were or will be writes to the memory from the caller - or we
+ // could, but then we would have to do inter-procedural analysis.
+ if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
+ !isKernelFunction(*F->getFunction())) {
+ return false;
+ }
+
+ // We use GetUnderlyingObjects() here instead of
+ // GetUnderlyingObject() mainly because the former looks through phi
+ // nodes while the latter does not. We need to look through phi
+ // nodes to handle pointer induction variables.
+ SmallVector<Value *, 8> Objs;
+ GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
+ Objs, F->getDataLayout());
+ for (Value *Obj : Objs) {
+ auto *A = dyn_cast<const Argument>(Obj);
+ if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
+ }
+
+ return true;
+}
+
SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IID) {
@@ -638,6 +673,10 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
// Address Space Setting
unsigned int codeAddrSpace = getCodeAddrSpace(LD);
+ if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
+ return SelectLDGLDU(N);
+ }
+
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool isVolatile = LD->isVolatile();
@@ -872,6 +911,10 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
+ if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
+ return SelectLDGLDU(N);
+ }
+
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool IsVolatile = MemSD->isVolatile();
@@ -1425,6 +1468,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1474,6 +1518,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1522,6 +1567,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1563,6 +1609,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1612,6 +1659,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1660,6 +1708,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1707,6 +1756,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1756,6 +1806,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1804,6 +1855,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1845,6 +1897,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1894,6 +1947,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1942,6 +1996,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -5039,7 +5094,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
}
if (!Src)
return false;
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (auto *PT = dyn_cast<PointerType>(Src->getType()))
return (PT->getAddressSpace() == spN);
return false;
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b75cf4040312..766369631e14 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -124,6 +124,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// condition branches.
setJumpIsExpensive(true);
+ // Wide divides are _very_ slow. Try to reduce the width of the divide if
+ // possible.
+ addBypassSlowDiv(64, 32);
+
// By default, use the Source scheduling
if (sched4reg)
setSchedulingPreference(Sched::RegPressure);
@@ -275,6 +279,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SELECT);
// Now deduce the information based on the above mentioned
// actions
@@ -910,7 +915,7 @@ std::string NVPTXTargetLowering::getPrototype(
O << "(";
if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
unsigned size = 0;
- if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
+ if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
size = ITy->getBitWidth();
if (size < 32)
size = 32;
@@ -981,7 +986,7 @@ std::string NVPTXTargetLowering::getPrototype(
O << "_";
continue;
}
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ auto *PTy = dyn_cast<PointerType>(Ty);
assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
@@ -1318,7 +1323,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// struct or vector
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> Offsets;
- const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
+ auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
assert(PTy && "Type of a byval parameter should be pointer");
ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
vtparts, &Offsets, 0);
@@ -2007,15 +2012,6 @@ SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
return Result;
}
-SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
- int idx, EVT v) const {
- std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
- std::stringstream suffix;
- suffix << idx;
- *name += suffix.str();
- return DAG.getTargetExternalSymbol(name->c_str(), v);
-}
-
SDValue
NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
std::string ParamSym;
@@ -2029,10 +2025,6 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
}
-SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
- return getExtSymb(DAG, ".HLPPARAM", idx);
-}
-
// Check to see if the kernel argument is image*_t or sampler_t
bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
@@ -2040,8 +2032,8 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
"struct._image3d_t",
"struct._sampler_t" };
- const Type *Ty = arg->getType();
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ Type *Ty = arg->getType();
+ auto *PTy = dyn_cast<PointerType>(Ty);
if (!PTy)
return false;
@@ -2049,14 +2041,11 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
if (!context)
return false;
- const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
+ auto *STy = dyn_cast<StructType>(PTy->getElementType());
const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
- for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
- if (TypeName == specialTypes[i])
- return true;
-
- return false;
+ return std::find(std::begin(specialTypes), std::end(specialTypes),
+ TypeName) != std::end(specialTypes);
}
SDValue NVPTXTargetLowering::LowerFormalArguments(
@@ -2082,10 +2071,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
std::vector<Type *> argTypes;
std::vector<const Argument *> theArgs;
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I) {
- theArgs.push_back(I);
- argTypes.push_back(I->getType());
+ for (const Argument &I : F->args()) {
+ theArgs.push_back(&I);
+ argTypes.push_back(I.getType());
}
// argTypes.size() (or theArgs.size()) and Ins.size() need not match.
// Ins.size() will be larger
@@ -2545,20 +2533,6 @@ void NVPTXTargetLowering::LowerAsmOperandForConstraint(
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
-// NVPTX suuport vector of legal types of any length in Intrinsics because the
-// NVPTX specific type legalizer
-// will legalize them to the PTX supported length.
-bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
- if (isTypeLegal(VT))
- return true;
- if (VT.isVector()) {
- MVT eVT = VT.getVectorElementType();
- if (isTypeLegal(eVT))
- return true;
- }
- return false;
-}
-
static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
switch (Intrinsic) {
default:
@@ -3747,9 +3721,7 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
// - [immAddr]
if (AM.BaseGV) {
- if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
- return false;
- return true;
+ return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
}
switch (AM.Scale) {
@@ -3820,11 +3792,6 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
- return 4;
-}
-
//===----------------------------------------------------------------------===//
// NVPTX DAG Combining
//===----------------------------------------------------------------------===//
@@ -4057,6 +4024,67 @@ static SDValue PerformANDCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformSELECTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Currently this detects patterns for integer min and max and
+ // lowers them to PTX-specific intrinsics that enable hardware
+ // support.
+
+ const SDValue Cond = N->getOperand(0);
+ if (Cond.getOpcode() != ISD::SETCC) return SDValue();
+
+ const SDValue LHS = Cond.getOperand(0);
+ const SDValue RHS = Cond.getOperand(1);
+ const SDValue True = N->getOperand(1);
+ const SDValue False = N->getOperand(2);
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+ return SDValue();
+
+ const EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64) return SDValue();
+
+ const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ SDValue Larger; // The larger of LHS and RHS when condition is true.
+ switch (CC) {
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ Larger = RHS;
+ break;
+
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ Larger = LHS;
+ break;
+
+ default:
+ return SDValue();
+ }
+ const bool IsMax = (Larger == True);
+ const bool IsSigned = ISD::isSignedIntSetCC(CC);
+
+ unsigned IntrinsicId;
+ if (VT == MVT::i32) {
+ if (IsSigned)
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i;
+ else
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui;
+ } else {
+ assert(VT == MVT::i64);
+ if (IsSigned)
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll;
+ else
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull;
+ }
+
+ SDLoc DL(N);
+ return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS);
+}
+
enum OperandSignedness {
Signed = 0,
Unsigned,
@@ -4113,25 +4141,16 @@ static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
APInt Val = CI->getAPIntValue();
if (LHSSign == Unsigned) {
- if (Val.isIntN(OptSize)) {
- return true;
- }
- return false;
+ return Val.isIntN(OptSize);
} else {
- if (Val.isSignedIntN(OptSize)) {
- return true;
- }
- return false;
+ return Val.isSignedIntN(OptSize);
}
} else {
OperandSignedness RHSSign;
if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
return false;
- if (LHSSign != RHSSign)
- return false;
-
- return true;
+ return LHSSign == RHSSign;
}
}
@@ -4247,6 +4266,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
return PerformSHLCombine(N, DCI, OptLevel);
case ISD::AND:
return PerformANDCombine(N, DCI);
+ case ISD::SELECT:
+ return PerformSELECTCombine(N, DCI);
}
return SDValue();
}
@@ -4509,25 +4530,25 @@ void NVPTXTargetLowering::ReplaceNodeResults(
void NVPTXSection::anchor() {}
NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
- delete TextSection;
- delete DataSection;
- delete BSSSection;
- delete ReadOnlySection;
-
- delete StaticCtorSection;
- delete StaticDtorSection;
- delete LSDASection;
- delete EHFrameSection;
- delete DwarfAbbrevSection;
- delete DwarfInfoSection;
- delete DwarfLineSection;
- delete DwarfFrameSection;
- delete DwarfPubTypesSection;
- delete DwarfDebugInlineSection;
- delete DwarfStrSection;
- delete DwarfLocSection;
- delete DwarfARangesSection;
- delete DwarfRangesSection;
+ delete static_cast<NVPTXSection *>(TextSection);
+ delete static_cast<NVPTXSection *>(DataSection);
+ delete static_cast<NVPTXSection *>(BSSSection);
+ delete static_cast<NVPTXSection *>(ReadOnlySection);
+
+ delete static_cast<NVPTXSection *>(StaticCtorSection);
+ delete static_cast<NVPTXSection *>(StaticDtorSection);
+ delete static_cast<NVPTXSection *>(LSDASection);
+ delete static_cast<NVPTXSection *>(EHFrameSection);
+ delete static_cast<NVPTXSection *>(DwarfAbbrevSection);
+ delete static_cast<NVPTXSection *>(DwarfInfoSection);
+ delete static_cast<NVPTXSection *>(DwarfLineSection);
+ delete static_cast<NVPTXSection *>(DwarfFrameSection);
+ delete static_cast<NVPTXSection *>(DwarfPubTypesSection);
+ delete static_cast<const NVPTXSection *>(DwarfDebugInlineSection);
+ delete static_cast<NVPTXSection *>(DwarfStrSection);
+ delete static_cast<NVPTXSection *>(DwarfLocSection);
+ delete static_cast<NVPTXSection *>(DwarfARangesSection);
+ delete static_cast<NVPTXSection *>(DwarfRangesSection);
}
MCSection *
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index e5c37321a33b..60914c1d09b4 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -441,13 +441,9 @@ public:
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
- SelectionDAG &DAG) const;
const char *getTargetNodeName(unsigned Opcode) const override;
- bool isTypeSupportedInIntrinsic(MVT VT) const;
-
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
unsigned Intrinsic) const override;
@@ -459,8 +455,13 @@ public:
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
- /// getFunctionAlignment - Return the Log2 alignment of this function.
- unsigned getFunctionAlignment(const Function *F) const;
+ bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
+ // Truncating 64-bit to 32-bit is free in SASS.
+ if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
+ return false;
+ return SrcTy->getPrimitiveSizeInBits() == 64 &&
+ DstTy->getPrimitiveSizeInBits() == 32;
+ }
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
EVT VT) const override {
@@ -515,11 +516,7 @@ public:
private:
const NVPTXSubtarget &STI; // cache the subtarget here
-
- SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
- EVT = MVT::i32) const;
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
- SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 76d6597c6e20..9f3cf4551955 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -37,30 +37,31 @@ void NVPTXInstrInfo::copyPhysReg(
const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg);
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
- if (DestRC != SrcRC)
- report_fatal_error("Attempted to created cross-class register copy");
-
- if (DestRC == &NVPTX::Int32RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Int1RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Float32RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Int16RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Int64RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Float64RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else {
+ if (DestRC->getSize() != SrcRC->getSize())
+ report_fatal_error("Copy one register into another with a different width");
+
+ unsigned Op;
+ if (DestRC == &NVPTX::Int1RegsRegClass) {
+ Op = NVPTX::IMOV1rr;
+ } else if (DestRC == &NVPTX::Int16RegsRegClass) {
+ Op = NVPTX::IMOV16rr;
+ } else if (DestRC == &NVPTX::Int32RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32rr
+ : NVPTX::BITCONVERT_32_F2I);
+ } else if (DestRC == &NVPTX::Int64RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr
+ : NVPTX::BITCONVERT_64_F2I);
+ } else if (DestRC == &NVPTX::Float32RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr
+ : NVPTX::BITCONVERT_32_I2F);
+ } else if (DestRC == &NVPTX::Float64RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64rr
+ : NVPTX::BITCONVERT_64_I2F);
+ } else {
llvm_unreachable("Bad register copy");
}
+ BuildMI(MBB, I, DL, get(Op), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
@@ -86,27 +87,6 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
return false;
}
-bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const {
- switch (MI.getOpcode()) {
- default:
- return false;
- case NVPTX::INT_PTX_SREG_NTID_X:
- case NVPTX::INT_PTX_SREG_NTID_Y:
- case NVPTX::INT_PTX_SREG_NTID_Z:
- case NVPTX::INT_PTX_SREG_TID_X:
- case NVPTX::INT_PTX_SREG_TID_Y:
- case NVPTX::INT_PTX_SREG_TID_Z:
- case NVPTX::INT_PTX_SREG_CTAID_X:
- case NVPTX::INT_PTX_SREG_CTAID_Y:
- case NVPTX::INT_PTX_SREG_CTAID_Z:
- case NVPTX::INT_PTX_SREG_NCTAID_X:
- case NVPTX::INT_PTX_SREG_NCTAID_Y:
- case NVPTX::INT_PTX_SREG_NCTAID_Z:
- case NVPTX::INT_PTX_SREG_WARPSIZE:
- return true;
- }
-}
-
bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
unsigned &AddrSpace) const {
bool isLoad = false;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index 179c06887198..3e407223f010 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -56,7 +56,6 @@ public:
unsigned &DestReg) const;
bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
- bool isReadSpecialReg(MachineInstr &MI) const;
virtual bool CanTailMerge(const MachineInstr *MI) const;
// Branch analysis.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 0bf72febc4a0..f770c2acaab5 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -6,6 +6,8 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//
+// \file
// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
// the size is large or is not a compile-time constant.
//
@@ -18,19 +20,20 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#define DEBUG_TYPE "nvptx"
using namespace llvm;
namespace {
+
// actual analysis class, which is a functionpass
struct NVPTXLowerAggrCopies : public FunctionPass {
static char ID;
@@ -50,179 +53,299 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
return "Lower aggregate copies/intrinsics into loops";
}
};
-} // namespace
char NVPTXLowerAggrCopies::ID = 0;
-// Lower MemTransferInst or load-store pair to loop
-static void convertTransferToLoop(
- Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
- bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
- Type *indType = len->getType();
+// Lower memcpy to loop.
+void convertMemCpyToLoop(Instruction *ConvertedInst, Value *SrcAddr,
+ Value *DstAddr, Value *CopyLen, bool SrcIsVolatile,
+ bool DstIsVolatile, LLVMContext &Context,
+ Function &F) {
+ Type *TypeOfCopyLen = CopyLen->getType();
- BasicBlock *origBB = splitAt->getParent();
- BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
- BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
+ BasicBlock *OrigBB = ConvertedInst->getParent();
+ BasicBlock *NewBB =
+ ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split");
+ BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB);
- origBB->getTerminator()->setSuccessor(0, loopBB);
- IRBuilder<> builder(origBB, origBB->getTerminator());
+ OrigBB->getTerminator()->setSuccessor(0, LoopBB);
+ IRBuilder<> Builder(OrigBB->getTerminator());
- // srcAddr and dstAddr are expected to be pointer types,
+ // SrcAddr and DstAddr are expected to be pointer types,
// so no check is made here.
- unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace();
- unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
// Cast pointers to (char *)
- srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
- dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS));
+ SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
+ DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
- IRBuilder<> loop(loopBB);
- // The loop index (ind) is a phi node.
- PHINode *ind = loop.CreatePHI(indType, 0);
- // Incoming value for ind is 0
- ind->addIncoming(ConstantInt::get(indType, 0), origBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
- // load from srcAddr+ind
+ // load from SrcAddr+LoopIndex
// TODO: we can leverage the align parameter of llvm.memcpy for more efficient
// word-sized loads and stores.
- Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
- srcVolatile);
- // store at dstAddr+ind
- loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind),
- dstVolatile);
-
- // The value for ind coming from backedge is (ind + 1)
- Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
- ind->addIncoming(newind, loopBB);
-
- loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
+ Value *Element =
+ LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP(
+ LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex),
+ SrcIsVolatile);
+ // store at DstAddr+LoopIndex
+ LoopBuilder.CreateStore(Element,
+ LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(),
+ DstAddr, LoopIndex),
+ DstIsVolatile);
+
+ // The value for LoopIndex coming from backedge is (LoopIndex + 1)
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
}
-// Lower MemSetInst to loop
-static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
- Value *len, Value *val, LLVMContext &Context,
- Function &F) {
- BasicBlock *origBB = splitAt->getParent();
- BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
- BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
+// Lower memmove to IR. memmove is required to correctly copy overlapping memory
+// regions; therefore, it has to check the relative positions of the source and
+// destination pointers and choose the copy direction accordingly.
+//
+// The code below is an IR rendition of this C function:
+//
+// void* memmove(void* dst, const void* src, size_t n) {
+// unsigned char* d = dst;
+// const unsigned char* s = src;
+// if (s < d) {
+// // copy backwards
+// while (n--) {
+// d[n] = s[n];
+// }
+// } else {
+// // copy forward
+// for (size_t i = 0; i < n; ++i) {
+// d[i] = s[i];
+// }
+// }
+// return dst;
+// }
+void convertMemMoveToLoop(Instruction *ConvertedInst, Value *SrcAddr,
+ Value *DstAddr, Value *CopyLen, bool SrcIsVolatile,
+ bool DstIsVolatile, LLVMContext &Context,
+ Function &F) {
+ Type *TypeOfCopyLen = CopyLen->getType();
+ BasicBlock *OrigBB = ConvertedInst->getParent();
+
+ // Create the a comparison of src and dst, based on which we jump to either
+ // the forward-copy part of the function (if src >= dst) or the backwards-copy
+ // part (if src < dst).
+ // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
+ // structure. Its block terminators (unconditional branches) are replaced by
+ // the appropriate conditional branches when the loop is built.
+ ICmpInst *PtrCompare = new ICmpInst(ConvertedInst, ICmpInst::ICMP_ULT,
+ SrcAddr, DstAddr, "compare_src_dst");
+ TerminatorInst *ThenTerm, *ElseTerm;
+ SplitBlockAndInsertIfThenElse(PtrCompare, ConvertedInst, &ThenTerm,
+ &ElseTerm);
+
+ // Each part of the function consists of two blocks:
+ // copy_backwards: used to skip the loop when n == 0
+ // copy_backwards_loop: the actual backwards loop BB
+ // copy_forward: used to skip the loop when n == 0
+ // copy_forward_loop: the actual forward loop BB
+ BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
+ CopyBackwardsBB->setName("copy_backwards");
+ BasicBlock *CopyForwardBB = ElseTerm->getParent();
+ CopyForwardBB->setName("copy_forward");
+ BasicBlock *ExitBB = ConvertedInst->getParent();
+ ExitBB->setName("memmove_done");
+
+ // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
+ // between both backwards and forward copy clauses.
+ ICmpInst *CompareN =
+ new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
+ ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
+
+ // Copying backwards.
+ BasicBlock *LoopBB =
+ BasicBlock::Create(Context, "copy_backwards_loop", &F, CopyForwardBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ Value *IndexPtr = LoopBuilder.CreateSub(
+ LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
+ Value *Element = LoopBuilder.CreateLoad(
+ LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element");
+ LoopBuilder.CreateStore(Element,
+ LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
+ ExitBB, LoopBB);
+ LoopPhi->addIncoming(IndexPtr, LoopBB);
+ LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
+ BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
+ ThenTerm->eraseFromParent();
+
+ // Copying forward.
+ BasicBlock *FwdLoopBB =
+ BasicBlock::Create(Context, "copy_forward_loop", &F, ExitBB);
+ IRBuilder<> FwdLoopBuilder(FwdLoopBB);
+ PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
+ Value *FwdElement = FwdLoopBuilder.CreateLoad(
+ FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element");
+ FwdLoopBuilder.CreateStore(
+ FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
+ Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
+ FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
+ FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
+ ExitBB, FwdLoopBB);
+ FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
+ FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
+
+ BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
+ ElseTerm->eraseFromParent();
+}
- origBB->getTerminator()->setSuccessor(0, loopBB);
- IRBuilder<> builder(origBB, origBB->getTerminator());
+// Lower memset to loop.
+void convertMemSetToLoop(Instruction *ConvertedInst, Value *DstAddr,
+ Value *CopyLen, Value *SetValue, LLVMContext &Context,
+ Function &F) {
+ BasicBlock *OrigBB = ConvertedInst->getParent();
+ BasicBlock *NewBB =
+ ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split");
+ BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB);
- unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ OrigBB->getTerminator()->setSuccessor(0, LoopBB);
+ IRBuilder<> Builder(OrigBB->getTerminator());
// Cast pointer to the type of value getting stored
- dstAddr =
- builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
+ unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+ DstAddr = Builder.CreateBitCast(DstAddr,
+ PointerType::get(SetValue->getType(), dstAS));
- IRBuilder<> loop(loopBB);
- PHINode *ind = loop.CreatePHI(len->getType(), 0);
- ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0);
+ LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB);
- loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false);
+ LoopBuilder.CreateStore(
+ SetValue,
+ LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
+ false);
- Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
- ind->addIncoming(newind, loopBB);
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
- loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
}
bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
- SmallVector<LoadInst *, 4> aggrLoads;
- SmallVector<MemTransferInst *, 4> aggrMemcpys;
- SmallVector<MemSetInst *, 4> aggrMemsets;
+ SmallVector<LoadInst *, 4> AggrLoads;
+ SmallVector<MemIntrinsic *, 4> MemCalls;
const DataLayout &DL = F.getParent()->getDataLayout();
LLVMContext &Context = F.getParent()->getContext();
- //
- // Collect all the aggrLoads, aggrMemcpys and addrMemsets.
- //
+ // Collect all aggregate loads and mem* calls.
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
++II) {
- if (LoadInst *load = dyn_cast<LoadInst>(II)) {
- if (!load->hasOneUse())
+ if (LoadInst *LI = dyn_cast<LoadInst>(II)) {
+ if (!LI->hasOneUse())
continue;
- if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+ if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
continue;
- User *use = load->user_back();
- if (StoreInst *store = dyn_cast<StoreInst>(use)) {
- if (store->getOperand(0) != load)
+ if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
+ if (SI->getOperand(0) != LI)
continue;
- aggrLoads.push_back(load);
- }
- } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
- Value *len = intr->getLength();
- // If the number of elements being copied is greater
- // than MaxAggrCopySize, lower it to a loop
- if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
- if (len_int->getZExtValue() >= MaxAggrCopySize) {
- aggrMemcpys.push_back(intr);
- }
- } else {
- // turn variable length memcpy/memmov into loop
- aggrMemcpys.push_back(intr);
+ AggrLoads.push_back(LI);
}
- } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
- Value *len = memsetintr->getLength();
- if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
- if (len_int->getZExtValue() >= MaxAggrCopySize) {
- aggrMemsets.push_back(memsetintr);
+ } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) {
+ // Convert intrinsic calls with variable size or with constant size
+ // larger than the MaxAggrCopySize threshold.
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
+ if (LenCI->getZExtValue() >= MaxAggrCopySize) {
+ MemCalls.push_back(IntrCall);
}
} else {
- // turn variable length memset into loop
- aggrMemsets.push_back(memsetintr);
+ MemCalls.push_back(IntrCall);
}
}
}
}
- if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
- (aggrMemsets.size() == 0))
+
+ if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
return false;
+ }
//
// Do the transformation of an aggr load/copy/set to a loop
//
- for (LoadInst *load : aggrLoads) {
- StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
- Value *srcAddr = load->getOperand(0);
- Value *dstAddr = store->getOperand(1);
- unsigned numLoads = DL.getTypeStoreSize(load->getType());
- Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
-
- convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
- store->isVolatile(), Context, F);
-
- store->eraseFromParent();
- load->eraseFromParent();
+ for (LoadInst *LI : AggrLoads) {
+ StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin());
+ Value *SrcAddr = LI->getOperand(0);
+ Value *DstAddr = SI->getOperand(1);
+ unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
+ Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
+
+ convertMemCpyToLoop(/* ConvertedInst */ SI,
+ /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
+ /* CopyLen */ CopyLen,
+ /* SrcIsVolatile */ LI->isVolatile(),
+ /* DstIsVolatile */ SI->isVolatile(),
+ /* Context */ Context,
+ /* Function F */ F);
+
+ SI->eraseFromParent();
+ LI->eraseFromParent();
}
- for (MemTransferInst *cpy : aggrMemcpys) {
- convertTransferToLoop(/* splitAt */ cpy,
- /* srcAddr */ cpy->getSource(),
- /* dstAddr */ cpy->getDest(),
- /* len */ cpy->getLength(),
- /* srcVolatile */ cpy->isVolatile(),
- /* dstVolatile */ cpy->isVolatile(),
+ // Transform mem* intrinsic calls.
+ for (MemIntrinsic *MemCall : MemCalls) {
+ if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
+ convertMemCpyToLoop(/* ConvertedInst */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ Memcpy->getLength(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
/* Context */ Context,
/* Function F */ F);
- cpy->eraseFromParent();
- }
-
- for (MemSetInst *memsetinst : aggrMemsets) {
- Value *len = memsetinst->getLength();
- Value *val = memsetinst->getValue();
- convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
- F);
- memsetinst->eraseFromParent();
+ } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
+ convertMemMoveToLoop(/* ConvertedInst */ Memmove,
+ /* SrcAddr */ Memmove->getRawSource(),
+ /* DstAddr */ Memmove->getRawDest(),
+ /* CopyLen */ Memmove->getLength(),
+ /* SrcIsVolatile */ Memmove->isVolatile(),
+ /* DstIsVolatile */ Memmove->isVolatile(),
+ /* Context */ Context,
+ /* Function F */ F);
+
+ } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
+ convertMemSetToLoop(/* ConvertedInst */ Memset,
+ /* DstAddr */ Memset->getRawDest(),
+ /* CopyLen */ Memset->getLength(),
+ /* SetValue */ Memset->getValue(),
+ /* Context */ Context,
+ /* Function F */ F);
+ }
+ MemCall->eraseFromParent();
}
return true;
}
+} // namespace
+
+namespace llvm {
+void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
+}
+
+INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
+ "Lower aggregate copies, and llvm.mem* intrinsics into loops",
+ false, false)
+
FunctionPass *llvm::createLowerAggrCopies() {
return new NVPTXLowerAggrCopies();
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 93d0025d8f53..624052e9b981 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -81,7 +81,7 @@ bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) {
// Check Load, Store, GEP, and BitCast Uses on alloca and make them
// use the converted generic address, in order to expose non-generic
// addrspacecast to NVPTXFavorNonGenericAddrSpace. For other types
- // of instructions this is unecessary and may introduce redudant
+ // of instructions this is unnecessary and may introduce redundant
// address cast.
const auto &AllocaUse = *UI++;
auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
index b533f316d8a9..6656077348a1 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
@@ -47,6 +47,36 @@
// ...
// }
//
+// 3. Convert pointers in a byval kernel parameter to pointers in the global
+// address space. As #2, it allows NVPTX to emit more ld/st.global. E.g.,
+//
+// struct S {
+// int *x;
+// int *y;
+// };
+// __global__ void foo(S s) {
+// int *b = s.y;
+// // use b
+// }
+//
+// "b" points to the global address space. In the IR level,
+//
+// define void @foo({i32*, i32*}* byval %input) {
+// %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1
+// %b = load i32*, i32** %b_ptr
+// ; use %b
+// }
+//
+// becomes
+//
+// define void @foo({i32*, i32*}* byval %input) {
+// %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1
+// %b = load i32*, i32** %b_ptr
+// %b_global = addrspacecast i32* %b to i32 addrspace(1)*
+// %b_generic = addrspacecast i32 addrspace(1)* %b_global to i32*
+// ; use %b_generic
+// }
+//
// TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes
// don't cancel the addrspacecast pair this pass emits.
//===----------------------------------------------------------------------===//
@@ -54,6 +84,7 @@
#include "NVPTX.h"
#include "NVPTXUtilities.h"
#include "NVPTXTargetMachine.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -71,9 +102,12 @@ class NVPTXLowerKernelArgs : public FunctionPass {
bool runOnFunction(Function &F) override;
// handle byval parameters
- void handleByValParam(Argument *);
- // handle non-byval pointer parameters
- void handlePointerParam(Argument *);
+ void handleByValParam(Argument *Arg);
+ // Knowing Ptr must point to the global address space, this function
+ // addrspacecasts Ptr to global and then back to generic. This allows
+ // NVPTXFavorNonGenericAddrSpace to fold the global-to-generic cast into
+ // loads/stores that appear later.
+ void markPointerAsGlobal(Value *Ptr);
public:
static char ID; // Pass identification, replacement for typeid
@@ -104,7 +138,7 @@ INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args",
//
// The above code allocates some space in the stack and copies the incoming
// struct from param space to local space.
-// Then replace all occurences of %d by %temp.
+// Then replace all occurrences of %d by %temp.
// =============================================================================
void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
Function *Func = Arg->getParent();
@@ -128,27 +162,33 @@ void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
new StoreInst(LI, AllocA, FirstInst);
}
-void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) {
- assert(!Arg->hasByValAttr() &&
- "byval params should be handled by handleByValParam");
-
- // Do nothing if the argument already points to the global address space.
- if (Arg->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
+void NVPTXLowerKernelArgs::markPointerAsGlobal(Value *Ptr) {
+ if (Ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
return;
- Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin();
- Instruction *ArgInGlobal = new AddrSpaceCastInst(
- Arg, PointerType::get(Arg->getType()->getPointerElementType(),
+ // Deciding where to emit the addrspacecast pair.
+ BasicBlock::iterator InsertPt;
+ if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
+ // Insert at the functon entry if Ptr is an argument.
+ InsertPt = Arg->getParent()->getEntryBlock().begin();
+ } else {
+ // Insert right after Ptr if Ptr is an instruction.
+ InsertPt = ++cast<Instruction>(Ptr)->getIterator();
+ assert(InsertPt != InsertPt->getParent()->end() &&
+ "We don't call this function with Ptr being a terminator.");
+ }
+
+ Instruction *PtrInGlobal = new AddrSpaceCastInst(
+ Ptr, PointerType::get(Ptr->getType()->getPointerElementType(),
ADDRESS_SPACE_GLOBAL),
- Arg->getName(), FirstInst);
- Value *ArgInGeneric = new AddrSpaceCastInst(ArgInGlobal, Arg->getType(),
- Arg->getName(), FirstInst);
- // Replace with ArgInGeneric all uses of Args except ArgInGlobal.
- Arg->replaceAllUsesWith(ArgInGeneric);
- ArgInGlobal->setOperand(0, Arg);
+ Ptr->getName(), &*InsertPt);
+ Value *PtrInGeneric = new AddrSpaceCastInst(PtrInGlobal, Ptr->getType(),
+ Ptr->getName(), &*InsertPt);
+ // Replace with PtrInGeneric all uses of Ptr except PtrInGlobal.
+ Ptr->replaceAllUsesWith(PtrInGeneric);
+ PtrInGlobal->setOperand(0, Ptr);
}
-
// =============================================================================
// Main function for this pass.
// =============================================================================
@@ -157,12 +197,32 @@ bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
if (!isKernelFunction(F))
return false;
+ if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
+ // Mark pointers in byval structs as global.
+ for (auto &B : F) {
+ for (auto &I : B) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (LI->getType()->isPointerTy()) {
+ Value *UO = GetUnderlyingObject(LI->getPointerOperand(),
+ F.getParent()->getDataLayout());
+ if (Argument *Arg = dyn_cast<Argument>(UO)) {
+ if (Arg->hasByValAttr()) {
+ // LI is a load from a pointer within a byval kernel parameter.
+ markPointerAsGlobal(LI);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
for (Argument &Arg : F.args()) {
if (Arg.getType()->isPointerTy()) {
if (Arg.hasByValAttr())
handleByValParam(&Arg);
else if (TM && TM->getDrvInterface() == NVPTX::CUDA)
- handlePointerParam(&Arg);
+ markPointerAsGlobal(&Arg);
}
}
return true;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
index 46b4b33e7e40..81a606d7535c 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -68,7 +68,7 @@ public:
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override {};
- MCSection *findAssociatedSection() const override { return nullptr; }
+ MCFragment *findAssociatedFragment() const override { return nullptr; }
// There are no TLS NVPTXMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
@@ -110,7 +110,7 @@ public:
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override {};
- MCSection *findAssociatedSection() const override { return nullptr; }
+ MCFragment *findAssociatedFragment() const override { return nullptr; }
// There are no TLS NVPTXMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 5fd69a6815a8..17019d7b364d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -72,7 +72,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
- if (!I->empty() && I->back().isReturn())
+ if (I->isReturnBlock())
TFI.emitEpilogue(MF, *I);
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
index 0d2627d62ebd..45a7309479ee 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
@@ -19,15 +19,14 @@
#include <vector>
namespace llvm {
-/// NVPTXSection - Represents a section in PTX
-/// PTX does not have sections. We create this class in order to use
-/// the ASMPrint interface.
+/// Represents a section in PTX PTX does not have sections. We create this class
+/// in order to use the ASMPrint interface.
///
-class NVPTXSection : public MCSection {
+class NVPTXSection final : public MCSection {
virtual void anchor();
public:
NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {}
- virtual ~NVPTXSection() {}
+ ~NVPTXSection() {}
/// Override this as NVPTX has its own way of printing switching
/// to a section.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 248f9e117d83..aa931b134da9 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -53,6 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);
}
@@ -64,14 +65,15 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// FIXME: This pass is really intended to be invoked during IR optimization,
// but it's very NVPTX-specific.
- initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
- initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
- initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry());
- initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
- initializeNVPTXFavorNonGenericAddrSpacesPass(
- *PassRegistry::getPassRegistry());
- initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry());
- initializeNVPTXLowerAllocaPass(*PassRegistry::getPassRegistry());
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeNVVMReflectPass(PR);
+ initializeGenericToNVVMPass(PR);
+ initializeNVPTXAllocaHoistingPass(PR);
+ initializeNVPTXAssignValidGlobalNamesPass(PR);
+ initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
+ initializeNVPTXLowerKernelArgsPass(PR);
+ initializeNVPTXLowerAllocaPass(PR);
+ initializeNVPTXLowerAggrCopiesPass(PR);
}
static std::string computeDataLayout(bool is64Bit) {
@@ -139,6 +141,10 @@ public:
FunctionPass *createTargetRegisterAllocator(bool) override;
void addFastRegAlloc(FunctionPass *RegAllocPass) override;
void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
+
+private:
+ // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
+ void addEarlyCSEOrGVNPass();
};
} // end anonymous namespace
@@ -148,11 +154,18 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(NVPTXTTIImpl(this, F));
});
}
+void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
+ if (getOptLevel() == CodeGenOpt::Aggressive)
+ addPass(createGVNPass());
+ else
+ addPass(createEarlyCSEPass());
+}
+
void NVPTXPassConfig::addIRPasses() {
// The following passes are known to not play well with virtual regs hanging
// around after register allocation (which in our case, is *all* registers).
@@ -161,13 +174,14 @@ void NVPTXPassConfig::addIRPasses() {
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineCopyPropagationID);
- disablePass(&BranchFolderPassID);
disablePass(&TailDuplicateID);
+ addPass(createNVVMReflectPass());
addPass(createNVPTXImageOptimizerPass());
- TargetPassConfig::addIRPasses();
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
+
+ // === Propagate special address spaces ===
addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
// NVPTXLowerKernelArgs emits alloca for byval parameters which can often
// be eliminated by SROA.
@@ -178,22 +192,38 @@ void NVPTXPassConfig::addIRPasses() {
// them unused. We could remove dead code in an ad-hoc manner, but that
// requires manual work and might be error-prone.
addPass(createDeadCodeEliminationPass());
+
+ // === Straight-line scalar optimizations ===
addPass(createSeparateConstOffsetFromGEPPass());
+ addPass(createSpeculativeExecutionPass());
// ReassociateGEPs exposes more opportunites for SLSR. See
// the example in reassociate-geps-and-slsr.ll.
addPass(createStraightLineStrengthReducePass());
// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
// EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
// for some of our benchmarks.
- if (getOptLevel() == CodeGenOpt::Aggressive)
- addPass(createGVNPass());
- else
- addPass(createEarlyCSEPass());
+ addEarlyCSEOrGVNPass();
// Run NaryReassociate after EarlyCSE/GVN to be more effective.
addPass(createNaryReassociatePass());
// NaryReassociate on GEPs creates redundant common expressions, so run
// EarlyCSE after it.
addPass(createEarlyCSEPass());
+
+ // === LSR and other generic IR passes ===
+ TargetPassConfig::addIRPasses();
+ // EarlyCSE is not always strong enough to clean up what LSR produces. For
+ // example, GVN can combine
+ //
+ // %0 = add %a, %b
+ // %1 = add %b, %a
+ //
+ // and
+ //
+ // %0 = shl nsw %a, 2
+ // %1 = shl %a, 2
+ //
+ // but EarlyCSE can do neither of them.
+ addEarlyCSEOrGVNPass();
}
bool NVPTXPassConfig::addInstSelector() {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 5ecdc8748830..0f88ddfaa934 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -48,8 +48,7 @@ public:
void Initialize(MCContext &ctx, const TargetMachine &TM) override {
TargetLoweringObjectFile::Initialize(ctx, TM);
TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
- DataSection =
- new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
+ DataSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getData());
BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS());
ReadOnlySection =
new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly());
@@ -84,7 +83,7 @@ public:
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
}
- MCSection *getSectionForConstant(SectionKind Kind,
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C) const override {
return ReadOnlySection;
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index e7250cdba5ac..6e679dd0257c 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -89,12 +89,12 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
return false;
}
-unsigned NVPTXTTIImpl::getArithmeticInstrCost(
+int NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 5bcd1e27a558..0946a3293eec 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -52,7 +52,7 @@ public:
bool isSourceOfDivergence(const Value *V);
- unsigned getArithmeticInstrCost(
+ int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 1f178af41670..578b466568ae 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -335,106 +335,7 @@ bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
return false;
}
-bool llvm::isBarrierIntrinsic(Intrinsic::ID id) {
- if ((id == Intrinsic::nvvm_barrier0) ||
- (id == Intrinsic::nvvm_barrier0_popc) ||
- (id == Intrinsic::nvvm_barrier0_and) ||
- (id == Intrinsic::nvvm_barrier0_or) ||
- (id == Intrinsic::cuda_syncthreads))
- return true;
- return false;
-}
-
-// Interface for checking all memory space transfer related intrinsics
-bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
- if (id == Intrinsic::nvvm_ptr_local_to_gen ||
- id == Intrinsic::nvvm_ptr_shared_to_gen ||
- id == Intrinsic::nvvm_ptr_global_to_gen ||
- id == Intrinsic::nvvm_ptr_constant_to_gen ||
- id == Intrinsic::nvvm_ptr_gen_to_global ||
- id == Intrinsic::nvvm_ptr_gen_to_shared ||
- id == Intrinsic::nvvm_ptr_gen_to_local ||
- id == Intrinsic::nvvm_ptr_gen_to_constant ||
- id == Intrinsic::nvvm_ptr_gen_to_param) {
- return true;
- }
-
- return false;
-}
-
-// consider several special intrinsics in striping pointer casts, and
-// provide an option to ignore GEP indicies for find out the base address only
-// which could be used in simple alias disambigurate.
-const Value *
-llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
- V = V->stripPointerCasts();
- while (true) {
- if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
- if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
- V = IS->getArgOperand(0)->stripPointerCasts();
- continue;
- }
- } else if (ignore_GEP_indices)
- if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- V = GEP->getPointerOperand()->stripPointerCasts();
- continue;
- }
- break;
- }
- return V;
-}
-
-// consider several special intrinsics in striping pointer casts, and
-// - ignore GEP indicies for find out the base address only, and
-// - tracking PHINode
-// which could be used in simple alias disambigurate.
-const Value *
-llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
- if (processed.find(V) != processed.end())
- return nullptr;
- processed.insert(V);
-
- const Value *V2 = V->stripPointerCasts();
- if (V2 != V && processed.find(V2) != processed.end())
- return nullptr;
- processed.insert(V2);
-
- V = V2;
-
- while (true) {
- if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
- if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
- V = IS->getArgOperand(0)->stripPointerCasts();
- continue;
- }
- } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- V = GEP->getPointerOperand()->stripPointerCasts();
- continue;
- } else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
- if (V != V2 && processed.find(V) != processed.end())
- return nullptr;
- processed.insert(PN);
- const Value *common = nullptr;
- for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
- const Value *pv = PN->getIncomingValue(i);
- const Value *base = skipPointerTransfer(pv, processed);
- if (base) {
- if (!common)
- common = base;
- else if (common != base)
- return PN;
- }
- }
- if (!common)
- return PN;
- V = common;
- }
- break;
- }
- return V;
-}
-
-// The following are some useful utilities for debuggung
+// The following are some useful utilities for debugging
BasicBlock *llvm::getParentBlock(Value *v) {
if (BasicBlock *B = dyn_cast<BasicBlock>(v))
@@ -466,7 +367,7 @@ void llvm::dumpBlock(Value *v, char *blockName) {
return;
for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) {
- BasicBlock *B = it;
+ BasicBlock *B = &*it;
if (strcmp(B->getName().data(), blockName) == 0) {
B->dump();
return;
@@ -490,7 +391,7 @@ Instruction *llvm::getInst(Value *base, char *instName) {
return nullptr;
}
-// Dump an instruction by nane
+// Dump an instruction by name
void llvm::dumpInst(Value *base, char *instName) {
Instruction *I = getInst(base, instName);
if (I)
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index 7e2ce73daaa3..a5262cb7412f 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -61,27 +61,6 @@ bool isKernelFunction(const llvm::Function &);
bool getAlign(const llvm::Function &, unsigned index, unsigned &);
bool getAlign(const llvm::CallInst &, unsigned index, unsigned &);
-bool isBarrierIntrinsic(llvm::Intrinsic::ID);
-
-/// make_vector - Helper function which is useful for building temporary vectors
-/// to pass into type construction of CallInst ctors. This turns a null
-/// terminated list of pointers (or other value types) into a real live vector.
-///
-template <typename T> inline std::vector<T> make_vector(T A, ...) {
- va_list Args;
- va_start(Args, A);
- std::vector<T> Result;
- Result.push_back(A);
- while (T Val = va_arg(Args, T))
- Result.push_back(Val);
- va_end(Args);
- return Result;
-}
-
-bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
-const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
-const Value *
-skipPointerTransfer(const Value *V, std::set<const Value *> &processed);
BasicBlock *getParentBlock(Value *v);
Function *getParentFunction(Value *v);
void dumpBlock(Value *v, char *blockName);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
index a237247e4833..e69bbba9f193 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
@@ -26,7 +26,7 @@ let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in {
def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
(ins V2I16Regs:$src, i8imm:$c),
"mov.u16 \t$dst, $src${c:vecelem};",
- [(set Int16Regs:$dst, (vector_extract
+ [(set Int16Regs:$dst, (extractelt
(v2i16 V2I16Regs:$src), imm:$c))],
IMOV16rr>;
@@ -34,7 +34,7 @@ def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
(ins V4I16Regs:$src, i8imm:$c),
"mov.u16 \t$dst, $src${c:vecelem};",
- [(set Int16Regs:$dst, (vector_extract
+ [(set Int16Regs:$dst, (extractelt
(v4i16 V4I16Regs:$src), imm:$c))],
IMOV16rr>;
@@ -42,7 +42,7 @@ def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
(ins V2I8Regs:$src, i8imm:$c),
"mov.u16 \t$dst, $src${c:vecelem};",
- [(set Int8Regs:$dst, (vector_extract
+ [(set Int8Regs:$dst, (extractelt
(v2i8 V2I8Regs:$src), imm:$c))],
IMOV8rr>;
@@ -50,7 +50,7 @@ def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
(ins V4I8Regs:$src, i8imm:$c),
"mov.u16 \t$dst, $src${c:vecelem};",
- [(set Int8Regs:$dst, (vector_extract
+ [(set Int8Regs:$dst, (extractelt
(v4i8 V4I8Regs:$src), imm:$c))],
IMOV8rr>;
@@ -58,7 +58,7 @@ def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
(ins V2I32Regs:$src, i8imm:$c),
"mov.u32 \t$dst, $src${c:vecelem};",
- [(set Int32Regs:$dst, (vector_extract
+ [(set Int32Regs:$dst, (extractelt
(v2i32 V2I32Regs:$src), imm:$c))],
IMOV32rr>;
@@ -66,7 +66,7 @@ def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
(ins V2F32Regs:$src, i8imm:$c),
"mov.f32 \t$dst, $src${c:vecelem};",
- [(set Float32Regs:$dst, (vector_extract
+ [(set Float32Regs:$dst, (extractelt
(v2f32 V2F32Regs:$src), imm:$c))],
FMOV32rr>;
@@ -74,7 +74,7 @@ def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
(ins V2I64Regs:$src, i8imm:$c),
"mov.u64 \t$dst, $src${c:vecelem};",
- [(set Int64Regs:$dst, (vector_extract
+ [(set Int64Regs:$dst, (extractelt
(v2i64 V2I64Regs:$src), imm:$c))],
IMOV64rr>;
@@ -82,7 +82,7 @@ def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
(ins V2F64Regs:$src, i8imm:$c),
"mov.f64 \t$dst, $src${c:vecelem};",
- [(set Float64Regs:$dst, (vector_extract
+ [(set Float64Regs:$dst, (extractelt
(v2f64 V2F64Regs:$src), imm:$c))],
FMOV64rr>;
@@ -90,7 +90,7 @@ def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
(ins V4I32Regs:$src, i8imm:$c),
"mov.u32 \t$dst, $src${c:vecelem};",
- [(set Int32Regs:$dst, (vector_extract
+ [(set Int32Regs:$dst, (extractelt
(v4i32 V4I32Regs:$src), imm:$c))],
IMOV32rr>;
@@ -98,7 +98,7 @@ def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
(ins V4F32Regs:$src, i8imm:$c),
"mov.f32 \t$dst, $src${c:vecelem};",
- [(set Float32Regs:$dst, (vector_extract
+ [(set Float32Regs:$dst, (extractelt
(v4f32 V4F32Regs:$src), imm:$c))],
FMOV32rr>;
}
@@ -110,8 +110,7 @@ def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst),
"mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u16 \t$dst${c:vecelem}, $val;",
[(set V2I8Regs:$dst,
- (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))],
- IMOV8rr>;
+ (insertelt V2I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>;
// Insert v4i8
def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
@@ -119,8 +118,7 @@ def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
"mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u16 \t$dst${c:vecelem}, $val;",
[(set V4I8Regs:$dst,
- (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))],
- IMOV8rr>;
+ (insertelt V4I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>;
// Insert v2i16
def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
@@ -128,8 +126,8 @@ def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
"mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u16 \t$dst${c:vecelem}, $val;",
[(set V2I16Regs:$dst,
- (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))],
- IMOV16rr>;
+ (insertelt V2I16Regs:$src, Int16Regs:$val, imm:$c))],
+ IMOV16rr>;
// Insert v4i16
def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
@@ -137,8 +135,8 @@ def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
"mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u16 \t$dst${c:vecelem}, $val;",
[(set V4I16Regs:$dst,
- (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))],
- IMOV16rr>;
+ (insertelt V4I16Regs:$src, Int16Regs:$val, imm:$c))],
+ IMOV16rr>;
// Insert v2i32
def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
@@ -146,8 +144,8 @@ def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
"mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u32 \t$dst${c:vecelem}, $val;",
[(set V2I32Regs:$dst,
- (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))],
- IMOV32rr>;
+ (insertelt V2I32Regs:$src, Int32Regs:$val, imm:$c))],
+ IMOV32rr>;
// Insert v2f32
def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
@@ -155,8 +153,8 @@ def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
"mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.f32 \t$dst${c:vecelem}, $val;",
[(set V2F32Regs:$dst,
- (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))],
- FMOV32rr>;
+ (insertelt V2F32Regs:$src, Float32Regs:$val, imm:$c))],
+ FMOV32rr>;
// Insert v2i64
def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
@@ -164,8 +162,8 @@ def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
"mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u64 \t$dst${c:vecelem}, $val;",
[(set V2I64Regs:$dst,
- (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))],
- IMOV64rr>;
+ (insertelt V2I64Regs:$src, Int64Regs:$val, imm:$c))],
+ IMOV64rr>;
// Insert v2f64
def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
@@ -173,8 +171,8 @@ def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
"mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.f64 \t$dst${c:vecelem}, $val;",
[(set V2F64Regs:$dst,
- (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))],
- FMOV64rr>;
+ (insertelt V2F64Regs:$src, Float64Regs:$val, imm:$c))],
+ FMOV64rr>;
// Insert v4i32
def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
@@ -182,8 +180,8 @@ def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
"mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u32 \t$dst${c:vecelem}, $val;",
[(set V4I32Regs:$dst,
- (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))],
- IMOV32rr>;
+ (insertelt V4I32Regs:$src, Int32Regs:$val, imm:$c))],
+ IMOV32rr>;
// Insert v4f32
def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
@@ -191,8 +189,8 @@ def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
"mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.f32 \t$dst${c:vecelem}, $val;",
[(set V4F32Regs:$dst,
- (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))],
- FMOV32rr>;
+ (insertelt V4F32Regs:$src, Float32Regs:$val, imm:$c))],
+ FMOV32rr>;
}
class BinOpAsmString<string c> {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 5e375b7852e4..20ab5db584d2 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -109,10 +109,10 @@ void NVVMReflect::setVarMap() {
for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
DEBUG(dbgs() << "Option : " << ReflectList[i] << "\n");
SmallVector<StringRef, 4> NameValList;
- StringRef(ReflectList[i]).split(NameValList, ",");
+ StringRef(ReflectList[i]).split(NameValList, ',');
for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
SmallVector<StringRef, 2> NameValPair;
- NameValList[j].split(NameValPair, "=");
+ NameValList[j].split(NameValPair, '=');
assert(NameValPair.size() == 2 && "name=val expected");
std::stringstream ValStream(NameValPair[1]);
int Val;