aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/R600
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600')
-rw-r--r--lib/Target/R600/AMDGPU.h7
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp58
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h4
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp14
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h2
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.td10
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.cpp8
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h2
-rw-r--r--lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp2
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp2
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp8
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp2
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h4
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp9
-rw-r--r--lib/Target/R600/R600Instructions.td8
-rw-r--r--lib/Target/R600/SIISelLowering.cpp159
-rw-r--r--lib/Target/R600/SIISelLowering.h2
-rw-r--r--lib/Target/R600/SIInstructions.td8
18 files changed, 184 insertions, 125 deletions
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 9b3606372035..0a05d25189b0 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -105,7 +105,7 @@ namespace ShaderType {
/// a separate piece of memory that is unique from other
/// memory locations.
namespace AMDGPUAS {
-enum AddressSpaces {
+enum AddressSpaces : unsigned {
PRIVATE_ADDRESS = 0, ///< Address space for private memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
CONSTANT_ADDRESS = 2, ///< Address space for constant memory
@@ -137,7 +137,10 @@ enum AddressSpaces {
CONSTANT_BUFFER_14 = 22,
CONSTANT_BUFFER_15 = 23,
ADDRESS_NONE = 24, ///< Address space for unknown memory.
- LAST_ADDRESS = ADDRESS_NONE
+ LAST_ADDRESS = ADDRESS_NONE,
+
+ // Some places use this if the address space can't be determined.
+ UNKNOWN_ADDRESS_SPACE = ~0u
};
} // namespace AMDGPUAS
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index d00ae78c99b0..d56838ec2019 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -257,9 +257,22 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
+
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
+
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
+ setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
+ setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
+
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs) {
@@ -301,6 +314,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ setOperationAction(ISD::SMIN, MVT::i32, Legal);
+ setOperationAction(ISD::UMIN, MVT::i32, Legal);
+ setOperationAction(ISD::SMAX, MVT::i32, Legal);
+ setOperationAction(ISD::UMAX, MVT::i32, Legal);
+
if (!Subtarget->hasFFBH())
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
@@ -962,17 +980,17 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_imax:
- return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
+ return DAG.getNode(ISD::SMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umax:
- return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
+ return DAG.getNode(ISD::UMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_imin:
- return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
+ return DAG.getNode(ISD::SMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umin:
- return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
+ return DAG.getNode(ISD::UMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umul24:
return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT,
@@ -1050,7 +1068,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(1));
- return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
+ return DAG.getNode(ISD::SMAX, DL, VT, Neg, Op.getOperand(1));
}
/// Linear Interpolation
@@ -1149,7 +1167,7 @@ SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
return SDValue();
}
-/// \brief Generate Min/Max node
+// FIXME: Remove this when combines added to DAGCombiner.
SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
EVT VT,
SDValue LHS,
@@ -1165,22 +1183,22 @@ SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
switch (CCOpcode) {
case ISD::SETULE:
case ISD::SETULT: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX;
+ unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
case ISD::SETLE:
case ISD::SETLT: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX;
+ unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
case ISD::SETGT:
case ISD::SETGE: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN;
+ unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
case ISD::SETUGE:
case ISD::SETUGT: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN;
+ unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
default:
@@ -2644,11 +2662,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(COS_HW)
NODE_NAME_CASE(SIN_HW)
NODE_NAME_CASE(FMAX_LEGACY)
- NODE_NAME_CASE(SMAX)
- NODE_NAME_CASE(UMAX)
NODE_NAME_CASE(FMIN_LEGACY)
- NODE_NAME_CASE(SMIN)
- NODE_NAME_CASE(UMIN)
NODE_NAME_CASE(FMAX3)
NODE_NAME_CASE(SMAX3)
NODE_NAME_CASE(UMAX3)
@@ -2794,14 +2808,6 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
break;
}
- case AMDGPUISD::SMAX:
- case AMDGPUISD::UMAX:
- case AMDGPUISD::SMIN:
- case AMDGPUISD::UMIN:
- computeKnownBitsForMinMax(Op.getOperand(0), Op.getOperand(1),
- KnownZero, KnownOne, DAG, Depth);
- break;
-
case AMDGPUISD::CARRY:
case AMDGPUISD::BORROW: {
KnownZero = APInt::getHighBitsSet(32, 31);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index c9f198129efc..fbb7d3c88437 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -228,11 +228,7 @@ enum NodeType : unsigned {
COS_HW,
SIN_HW,
FMAX_LEGACY,
- SMAX,
- UMAX,
FMIN_LEGACY,
- SMIN,
- UMIN,
FMAX3,
SMAX3,
UMAX3,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index f0f10ca59723..64e295f1144c 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -152,17 +152,15 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const
return true;
}
-MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- int FrameIndex) const {
+MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
// TODO: Implement this function
return nullptr;
}
-MachineInstr *
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineInstr *LoadMI) const {
+MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const {
// TODO: Implement this function
return nullptr;
}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index 07042b59be7b..8fd27a17638b 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -87,9 +87,11 @@ public:
protected:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
int FrameIndex) const override;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
MachineInstr *LoadMI) const override;
public:
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 790f34cea8cd..b413897d9d23 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -94,16 +94,6 @@ def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
[]
>;
-// out = min(a, b) a and b are signed ints
-def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = min(a, b) a and b are unsigned ints
-def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
// FIXME: TableGen doesn't like commutative instructions with more
// than 2 operands.
// out = max(a, b, c) a, b and c are floats
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
index 9565e3fd5fa6..20831460b933 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -64,25 +64,25 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::createReg(MO.getReg());
break;
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
MO.getMBB()->getSymbol(), Ctx));
break;
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName()));
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(Sym, Ctx));
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx));
break;
}
case MachineOperand::MO_TargetIndex: {
assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START);
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
- const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
MCOp = MCOperand::createExpr(Expr);
break;
}
case MachineOperand::MO_ExternalSymbol: {
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName()));
- const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
MCOp = MCOperand::createExpr(Expr);
break;
}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index b262cdf57712..a5a901c739d4 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -272,7 +272,7 @@ public:
}
bool enableSubRegLiveness() const override {
- return false;
+ return true;
}
};
diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
index 19bffd575117..95025a6e29f1 100644
--- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -1084,7 +1084,7 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
case AsmToken::Identifier:
Operands.push_back(AMDGPUOperand::CreateExpr(
- MCSymbolRefExpr::Create(getContext().getOrCreateSymbol(
+ MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
Parser.getTok().getString()), getContext()), S));
Parser.Lex();
return MatchOperand_Success;
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index 279c3eb1912f..f70676943bb3 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -337,7 +337,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
} else if (Op.isExpr()) {
const MCExpr *Exp = Op.getExpr();
- Exp->print(O);
+ Exp->print(O, &MAI);
} else {
llvm_unreachable("unknown operand type in printOperand");
}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index 2605ca52dfde..3713223697ed 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -25,18 +25,18 @@ namespace {
class AMDGPUMCObjectWriter : public MCObjectWriter {
public:
AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {}
- void ExecutePostLayoutBinding(MCAssembler &Asm,
+ void executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) override {
//XXX: Implement if necessary.
}
- void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+ void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
uint64_t &FixedValue) override {
assert(!"Not implemented");
}
- void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+ void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
};
@@ -64,7 +64,7 @@ public:
} //End anonymous namespace
-void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
+void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
Asm.writeSectionData(&*I, Layout);
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 19d89fb27caa..028a86dfc7ad 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -11,7 +11,7 @@
#include "AMDGPUMCAsmInfo.h"
using namespace llvm;
-AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfoELF() {
+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
HasSingleParameterDotFile = false;
//===------------------------------------------------------------------===//
MaxInstLength = 16;
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
index 8f75c76c4257..a5bac51e356f 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -17,7 +17,7 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
-class StringRef;
+class Triple;
// If you need to create another MCAsmInfo class, which inherits from MCAsmInfo,
// you will need to make sure your new class sets PrivateGlobalPrefix to
@@ -26,7 +26,7 @@ class StringRef;
// with 'L' as a local symbol.
class AMDGPUMCAsmInfo : public MCAsmInfoELF {
public:
- explicit AMDGPUMCAsmInfo(StringRef &TT);
+ explicit AMDGPUMCAsmInfo(const Triple &TT);
};
} // namespace llvm
#endif
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index a809564e3be0..e683498d52a5 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -148,15 +149,11 @@ void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
}
void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
- for (unsigned i = 0; i < 4; i++) {
- OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
- }
+ support::endian::Writer<support::little>(OS).write(Value);
}
void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
- for (unsigned i = 0; i < 8; i++) {
- EmitByte((Value >> (8 * i)) & 0xff, OS);
- }
+ support::endian::Writer<support::little>(OS).write(Value);
}
unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 7126c82c0331..7beed092b3f7 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -781,10 +781,10 @@ def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
-def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>;
-def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>;
-def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>;
-def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
+def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", smax>;
+def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", smin>;
+def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", umax>;
+def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", umin>;
def SETE_INT : R600_2OP <
0x3A, "SETE_INT",
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 52bf2aeb87de..12d08cf4c7f5 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -155,7 +155,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
@@ -211,6 +210,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMINNUM);
setTargetDAGCombine(ISD::FMAXNUM);
+ setTargetDAGCombine(ISD::SMIN);
+ setTargetDAGCombine(ISD::SMAX);
+ setTargetDAGCombine(ISD::UMIN);
+ setTargetDAGCombine(ISD::UMAX);
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::AND);
@@ -251,47 +254,83 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
return false;
}
-// FIXME: This really needs an address space argument. The immediate offset
-// size is different for different sets of memory instruction sets.
-
-// The single offset DS instructions have a 16-bit unsigned byte offset.
-//
-// MUBUF / MTBUF have a 12-bit unsigned byte offset, and additionally can do r +
-// r + i with addr64. 32-bit has more addressing mode options. Depending on the
-// resource constant, it can also do (i64 r0) + (i32 r1) * (i14 i).
-//
-// SMRD instructions have an 8-bit, dword offset.
-//
bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty, unsigned AS) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
- // Allow a 16-bit unsigned immediate field, since this is what DS instructions
- // use.
- if (!isUInt<16>(AM.BaseOffs))
- return false;
+ switch (AS) {
+ case AMDGPUAS::GLOBAL_ADDRESS:
+ case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
+ // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
+ // additionally can do r + r + i with addr64. 32-bit has more addressing
+ // mode options. Depending on the resource constant, it can also do
+ // (i64 r0) + (i32 r1) * (i14 i).
+ //
+ // SMRD instructions have an 8-bit, dword offset.
+ //
+ // Assume nonunifom access, since the address space isn't enough to know
+ // what instruction we will use, and since we don't know if this is a load
+ // or store and scalar stores are only available on VI.
+ //
+ // We also know if we are doing an extload, we can't do a scalar load.
+ //
+ // Private arrays end up using a scratch buffer most of the time, so also
+ // assume those use MUBUF instructions. Scratch loads / stores are currently
+ // implemented as mubuf instructions with offen bit set, so slightly
+ // different than the normal addr64.
+ if (!isUInt<12>(AM.BaseOffs))
+ return false;
- // Only support r+r,
- switch (AM.Scale) {
- case 0: // "r+i" or just "i", depending on HasBaseReg.
- break;
- case 1:
- if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ // FIXME: Since we can split immediate into soffset and immediate offset,
+ // would it make sense to allow any immediate?
+
+ switch (AM.Scale) {
+ case 0: // r + i or just i, depending on HasBaseReg.
+ return true;
+ case 1:
+ return true; // We have r + r or r + i.
+ case 2:
+ if (AM.HasBaseReg) {
+ // Reject 2 * r + r.
+ return false;
+ }
+
+ // Allow 2 * r as r + r
+ // Or 2 * r + i is allowed as r + r + i.
+ return true;
+ default: // Don't allow n * r
return false;
- // Otherwise we have r+r or r+i.
- break;
- case 2:
- if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ }
+ }
+ case AMDGPUAS::LOCAL_ADDRESS:
+ case AMDGPUAS::REGION_ADDRESS: {
+ // Basic, single offset DS instructions allow a 16-bit unsigned immediate
+ // field.
+ // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
+ // an 8-bit dword offset but we don't know the alignment here.
+ if (!isUInt<16>(AM.BaseOffs))
return false;
- // Allow 2*r as r+r.
- break;
- default: // Don't allow n * r
+
+ if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
+ return true;
+
+ if (AM.Scale == 1 && AM.HasBaseReg)
+ return true;
+
return false;
}
-
- return true;
+ case AMDGPUAS::FLAT_ADDRESS: {
+ // Flat instructions do not have offsets, and only have the register
+ // address.
+ return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
+ }
+ default:
+ llvm_unreachable("unhandled address space");
+ }
}
bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
@@ -368,6 +407,12 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return TII->isInlineConstant(Imm);
}
+static EVT toIntegerVT(EVT VT) {
+ if (VT.isVector())
+ return VT.changeVectorElementTypeToInteger();
+ return MVT::getIntegerVT(VT.getSizeInBits());
+}
+
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc SL, SDValue Chain,
unsigned Offset, bool Signed) const {
@@ -380,20 +425,42 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ MVT PtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
- SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
- MRI.getLiveInVirtReg(InputPtrReg), MVT::i64);
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, BasePtr,
- DAG.getConstant(Offset, SL, MVT::i64));
+ SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
+ MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+ DAG.getConstant(Offset, SL, PtrVT));
SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
- return DAG.getLoad(ISD::UNINDEXED, Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD,
+ unsigned Align = DL->getABITypeAlignment(Ty);
+
+ if (VT != MemVT && VT.isFloatingPoint()) {
+ // Do an integer load and convert.
+ // FIXME: This is mostly because load legalization after type legalization
+ // doesn't handle FP extloads.
+ assert(VT.getScalarType() == MVT::f32 &&
+ MemVT.getScalarType() == MVT::f16);
+
+ EVT IVT = toIntegerVT(VT);
+ EVT MemIVT = toIntegerVT(MemVT);
+ SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD,
+ IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT,
+ false, // isVolatile
+ true, // isNonTemporal
+ true, // isInvariant
+ Align); // Alignment
+ return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load);
+ }
+
+ ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ return DAG.getLoad(ISD::UNINDEXED, ExtTy,
VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
false, // isVolatile
true, // isNonTemporal
true, // isInvariant
- DL->getABITypeAlignment(Ty)); // Alignment
+ Align); // Alignment
}
SDValue SITargetLowering::LowerFormalArguments(
@@ -1570,15 +1637,15 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
switch (Opc) {
case ISD::FMAXNUM:
return AMDGPUISD::FMAX3;
- case AMDGPUISD::SMAX:
+ case ISD::SMAX:
return AMDGPUISD::SMAX3;
- case AMDGPUISD::UMAX:
+ case ISD::UMAX:
return AMDGPUISD::UMAX3;
case ISD::FMINNUM:
return AMDGPUISD::FMIN3;
- case AMDGPUISD::SMIN:
+ case ISD::SMIN:
return AMDGPUISD::SMIN3;
- case AMDGPUISD::UMIN:
+ case ISD::UMIN:
return AMDGPUISD::UMIN3;
default:
llvm_unreachable("Not a min/max opcode");
@@ -1664,10 +1731,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performSetCCCombine(N, DCI);
case ISD::FMAXNUM: // TODO: What about fmax_legacy?
case ISD::FMINNUM:
- case AMDGPUISD::SMAX:
- case AMDGPUISD::SMIN:
- case AMDGPUISD::UMAX:
- case AMDGPUISD::UMIN: {
+ case ISD::SMAX:
+ case ISD::SMIN:
+ case ISD::UMAX:
+ case ISD::UMIN: {
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index a95354c38816..a956b013bdb1 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -63,7 +63,7 @@ public:
EVT /*VT*/) const override;
bool isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const override;
+ Type *Ty, unsigned AS) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 839c2e9ecdd2..2f39074802b7 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -224,16 +224,16 @@ defm S_SUBB_U32 : SOP2_32 <sop2<0x05>, "s_subb_u32",
} // End Uses = [SCC]
defm S_MIN_I32 : SOP2_32 <sop2<0x06>, "s_min_i32",
- [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
+ [(set i32:$dst, (smin i32:$src0, i32:$src1))]
>;
defm S_MIN_U32 : SOP2_32 <sop2<0x07>, "s_min_u32",
- [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
+ [(set i32:$dst, (umin i32:$src0, i32:$src1))]
>;
defm S_MAX_I32 : SOP2_32 <sop2<0x08>, "s_max_i32",
- [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
+ [(set i32:$dst, (smax i32:$src0, i32:$src1))]
>;
defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
- [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
+ [(set i32:$dst, (umax i32:$src0, i32:$src1))]
>;
} // End Defs = [SCC]