diff options
Diffstat (limited to 'lib/Target/R600')
-rw-r--r-- | lib/Target/R600/AMDGPU.h | 7 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 58 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.h | 4 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUInstrInfo.cpp | 14 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUInstrInfo.h | 2 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUInstrInfo.td | 10 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUMCInstLower.cpp | 8 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUSubtarget.h | 2 | ||||
-rw-r--r-- | lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp | 2 | ||||
-rw-r--r-- | lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 2 | ||||
-rw-r--r-- | lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp | 8 | ||||
-rw-r--r-- | lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h | 4 | ||||
-rw-r--r-- | lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 9 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 8 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 159 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 8 |
18 files changed, 184 insertions, 125 deletions
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 9b3606372035..0a05d25189b0 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -105,7 +105,7 @@ namespace ShaderType { /// a separate piece of memory that is unique from other /// memory locations. namespace AMDGPUAS { -enum AddressSpaces { +enum AddressSpaces : unsigned { PRIVATE_ADDRESS = 0, ///< Address space for private memory. GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). CONSTANT_ADDRESS = 2, ///< Address space for constant memory @@ -137,7 +137,10 @@ enum AddressSpaces { CONSTANT_BUFFER_14 = 22, CONSTANT_BUFFER_15 = 23, ADDRESS_NONE = 24, ///< Address space for unknown memory. - LAST_ADDRESS = ADDRESS_NONE + LAST_ADDRESS = ADDRESS_NONE, + + // Some places use this if the address space can't be determined. + UNKNOWN_ADDRESS_SPACE = ~0u }; } // namespace AMDGPUAS diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index d00ae78c99b0..d56838ec2019 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -257,9 +257,22 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); + setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); + setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; for (MVT VT : ScalarIntVTs) { @@ -301,6 +314,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::SMIN, MVT::i32, Legal); + setOperationAction(ISD::UMIN, MVT::i32, Legal); + setOperationAction(ISD::SMAX, MVT::i32, Legal); + setOperationAction(ISD::UMAX, MVT::i32, Legal); + if (!Subtarget->hasFFBH()) setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); @@ -962,17 +980,17 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_imax: - return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::SMAX, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_umax: - return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::UMAX, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_imin: - return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::SMIN, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_umin: - return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::UMIN, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_umul24: return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, @@ -1050,7 +1068,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op.getOperand(1)); - return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); + return DAG.getNode(ISD::SMAX, DL, VT, Neg, Op.getOperand(1)); } /// Linear Interpolation @@ -1149,7 +1167,7 @@ SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL, return SDValue(); } -/// \brief Generate Min/Max node +// FIXME: Remove this when combines added to DAGCombiner. SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL, EVT VT, SDValue LHS, @@ -1165,22 +1183,22 @@ SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL, switch (CCOpcode) { case ISD::SETULE: case ISD::SETULT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX; + unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETLE: case ISD::SETLT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX; + unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETGT: case ISD::SETGE: { - unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN; + unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETUGE: case ISD::SETUGT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN; + unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN; return DAG.getNode(Opc, DL, VT, LHS, RHS); } default: @@ -2644,11 +2662,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(COS_HW) NODE_NAME_CASE(SIN_HW) NODE_NAME_CASE(FMAX_LEGACY) - NODE_NAME_CASE(SMAX) - NODE_NAME_CASE(UMAX) NODE_NAME_CASE(FMIN_LEGACY) - NODE_NAME_CASE(SMIN) - NODE_NAME_CASE(UMIN) NODE_NAME_CASE(FMAX3) NODE_NAME_CASE(SMAX3) NODE_NAME_CASE(UMAX3) @@ -2794,14 +2808,6 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( break; } - case AMDGPUISD::SMAX: - case AMDGPUISD::UMAX: - case AMDGPUISD::SMIN: - case AMDGPUISD::UMIN: - computeKnownBitsForMinMax(Op.getOperand(0), Op.getOperand(1), - KnownZero, KnownOne, DAG, Depth); - break; - case AMDGPUISD::CARRY: case AMDGPUISD::BORROW: { KnownZero = APInt::getHighBitsSet(32, 31); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index c9f198129efc..fbb7d3c88437 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -228,11 +228,7 @@ enum NodeType : unsigned { COS_HW, SIN_HW, FMAX_LEGACY, - SMAX, - UMAX, FMIN_LEGACY, - SMIN, - UMIN, FMAX3, SMAX3, UMAX3, diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index f0f10ca59723..64e295f1144c 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -152,17 +152,15 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const return true; } -MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - int FrameIndex) const { +MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const { // TODO: Implement this function return nullptr; } -MachineInstr * -AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - ArrayRef<unsigned> Ops, - MachineInstr *LoadMI) const { +MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const { // TODO: Implement this function return nullptr; } diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index 07042b59be7b..8fd27a17638b 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -87,9 +87,11 @@ public: protected: MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const override; public: diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 790f34cea8cd..b413897d9d23 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -94,16 +94,6 @@ def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp, [] >; -// out = min(a, b) a and b are signed ints -def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp, - [SDNPCommutative, SDNPAssociative] ->; - -// out = min(a, b) a and b are unsigned ints -def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp, - [SDNPCommutative, SDNPAssociative] ->; - // FIXME: TableGen doesn't like commutative instructions with more // than 2 operands. // out = max(a, b, c) a, b and c are floats diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp index 9565e3fd5fa6..20831460b933 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -64,25 +64,25 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::createReg(MO.getReg()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName())); - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(Sym, Ctx)); + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx)); break; } case MachineOperand::MO_TargetIndex: { assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START); MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); - const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); MCOp = MCOperand::createExpr(Expr); break; } case MachineOperand::MO_ExternalSymbol: { MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName())); - const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); MCOp = MCOperand::createExpr(Expr); break; } diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index b262cdf57712..a5a901c739d4 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -272,7 +272,7 @@ public: } bool enableSubRegLiveness() const override { - return false; + return true; } }; diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp index 19bffd575117..95025a6e29f1 100644 --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp @@ -1084,7 +1084,7 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { case AsmToken::Identifier: Operands.push_back(AMDGPUOperand::CreateExpr( - MCSymbolRefExpr::Create(getContext().getOrCreateSymbol( + MCSymbolRefExpr::create(getContext().getOrCreateSymbol( Parser.getTok().getString()), getContext()), S)); Parser.Lex(); return MatchOperand_Success; diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index 279c3eb1912f..f70676943bb3 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -337,7 +337,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } else if (Op.isExpr()) { const MCExpr *Exp = Op.getExpr(); - Exp->print(O); + Exp->print(O, &MAI); } else { llvm_unreachable("unknown operand type in printOperand"); } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp index 2605ca52dfde..3713223697ed 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -25,18 +25,18 @@ namespace { class AMDGPUMCObjectWriter : public MCObjectWriter { public: AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {} - void ExecutePostLayoutBinding(MCAssembler &Asm, + void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override { //XXX: Implement if necessary. } - void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, + void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) override { assert(!"Not implemented"); } - void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; }; @@ -64,7 +64,7 @@ public: } //End anonymous namespace -void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm, +void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) { Asm.writeSectionData(&*I, Layout); diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 19d89fb27caa..028a86dfc7ad 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -11,7 +11,7 @@ #include "AMDGPUMCAsmInfo.h" using namespace llvm; -AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfoELF() { +AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() { HasSingleParameterDotFile = false; //===------------------------------------------------------------------===// MaxInstLength = 16; diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h index 8f75c76c4257..a5bac51e356f 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h @@ -17,7 +17,7 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { -class StringRef; +class Triple; // If you need to create another MCAsmInfo class, which inherits from MCAsmInfo, // you will need to make sure your new class sets PrivateGlobalPrefix to @@ -26,7 +26,7 @@ class StringRef; // with 'L' as a local symbol. class AMDGPUMCAsmInfo : public MCAsmInfoELF { public: - explicit AMDGPUMCAsmInfo(StringRef &TT); + explicit AMDGPUMCAsmInfo(const Triple &TT); }; } // namespace llvm #endif diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index a809564e3be0..e683498d52a5 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -148,15 +149,11 @@ void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const { } void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const { - for (unsigned i = 0; i < 4; i++) { - OS.write((uint8_t) ((Value >> (8 * i)) & 0xff)); - } + support::endian::Writer<support::little>(OS).write(Value); } void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const { - for (unsigned i = 0; i < 8; i++) { - EmitByte((Value >> (8 * i)) & 0xff, OS); - } + support::endian::Writer<support::little>(OS).write(Value); } unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const { diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 7126c82c0331..7beed092b3f7 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -781,10 +781,10 @@ def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>; def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>; def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>; def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>; -def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>; -def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>; -def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>; -def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>; +def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", smax>; +def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", smin>; +def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", umax>; +def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", umin>; def SETE_INT : R600_2OP < 0x3A, "SETE_INT", diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 52bf2aeb87de..12d08cf4c7f5 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -155,7 +155,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); - setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); @@ -211,6 +210,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FMINNUM); setTargetDAGCombine(ISD::FMAXNUM); + setTargetDAGCombine(ISD::SMIN); + setTargetDAGCombine(ISD::SMAX); + setTargetDAGCombine(ISD::UMIN); + setTargetDAGCombine(ISD::UMAX); setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::AND); @@ -251,47 +254,83 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &, return false; } -// FIXME: This really needs an address space argument. The immediate offset -// size is different for different sets of memory instruction sets. - -// The single offset DS instructions have a 16-bit unsigned byte offset. -// -// MUBUF / MTBUF have a 12-bit unsigned byte offset, and additionally can do r + -// r + i with addr64. 32-bit has more addressing mode options. Depending on the -// resource constant, it can also do (i64 r0) + (i32 r1) * (i14 i). -// -// SMRD instructions have an 8-bit, dword offset. -// bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, unsigned AS) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; - // Allow a 16-bit unsigned immediate field, since this is what DS instructions - // use. - if (!isUInt<16>(AM.BaseOffs)) - return false; + switch (AS) { + case AMDGPUAS::GLOBAL_ADDRESS: + case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions? + case AMDGPUAS::PRIVATE_ADDRESS: + case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: { + // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and + // additionally can do r + r + i with addr64. 32-bit has more addressing + // mode options. Depending on the resource constant, it can also do + // (i64 r0) + (i32 r1) * (i14 i). + // + // SMRD instructions have an 8-bit, dword offset. + // + // Assume nonunifom access, since the address space isn't enough to know + // what instruction we will use, and since we don't know if this is a load + // or store and scalar stores are only available on VI. + // + // We also know if we are doing an extload, we can't do a scalar load. + // + // Private arrays end up using a scratch buffer most of the time, so also + // assume those use MUBUF instructions. Scratch loads / stores are currently + // implemented as mubuf instructions with offen bit set, so slightly + // different than the normal addr64. + if (!isUInt<12>(AM.BaseOffs)) + return false; - // Only support r+r, - switch (AM.Scale) { - case 0: // "r+i" or just "i", depending on HasBaseReg. - break; - case 1: - if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + // FIXME: Since we can split immediate into soffset and immediate offset, + // would it make sense to allow any immediate? + + switch (AM.Scale) { + case 0: // r + i or just i, depending on HasBaseReg. + return true; + case 1: + return true; // We have r + r or r + i. + case 2: + if (AM.HasBaseReg) { + // Reject 2 * r + r. + return false; + } + + // Allow 2 * r as r + r + // Or 2 * r + i is allowed as r + r + i. + return true; + default: // Don't allow n * r return false; - // Otherwise we have r+r or r+i. - break; - case 2: - if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + } + } + case AMDGPUAS::LOCAL_ADDRESS: + case AMDGPUAS::REGION_ADDRESS: { + // Basic, single offset DS instructions allow a 16-bit unsigned immediate + // field. + // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have + // an 8-bit dword offset but we don't know the alignment here. + if (!isUInt<16>(AM.BaseOffs)) return false; - // Allow 2*r as r+r. - break; - default: // Don't allow n * r + + if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg. + return true; + + if (AM.Scale == 1 && AM.HasBaseReg) + return true; + return false; } - - return true; + case AMDGPUAS::FLAT_ADDRESS: { + // Flat instructions do not have offsets, and only have the register + // address. + return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1); + } + default: + llvm_unreachable("unhandled address space"); + } } bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, @@ -368,6 +407,12 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return TII->isInlineConstant(Imm); } +static EVT toIntegerVT(EVT VT) { + if (VT.isVector()) + return VT.changeVectorElementTypeToInteger(); + return MVT::getIntegerVT(VT.getSizeInBits()); +} + SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc SL, SDValue Chain, unsigned Offset, bool Signed) const { @@ -380,20 +425,42 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, Type *Ty = VT.getTypeForEVT(*DAG.getContext()); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + MVT PtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS); PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); - SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, - MRI.getLiveInVirtReg(InputPtrReg), MVT::i64); - SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, BasePtr, - DAG.getConstant(Offset, SL, MVT::i64)); + SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, + MRI.getLiveInVirtReg(InputPtrReg), PtrVT); + SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, + DAG.getConstant(Offset, SL, PtrVT)); SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS)); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); - return DAG.getLoad(ISD::UNINDEXED, Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, + unsigned Align = DL->getABITypeAlignment(Ty); + + if (VT != MemVT && VT.isFloatingPoint()) { + // Do an integer load and convert. + // FIXME: This is mostly because load legalization after type legalization + // doesn't handle FP extloads. + assert(VT.getScalarType() == MVT::f32 && + MemVT.getScalarType() == MVT::f16); + + EVT IVT = toIntegerVT(VT); + EVT MemIVT = toIntegerVT(MemVT); + SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD, + IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT, + false, // isVolatile + true, // isNonTemporal + true, // isInvariant + Align); // Alignment + return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load); + } + + ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + return DAG.getLoad(ISD::UNINDEXED, ExtTy, VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT, false, // isVolatile true, // isNonTemporal true, // isInvariant - DL->getABITypeAlignment(Ty)); // Alignment + Align); // Alignment } SDValue SITargetLowering::LowerFormalArguments( @@ -1570,15 +1637,15 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) { switch (Opc) { case ISD::FMAXNUM: return AMDGPUISD::FMAX3; - case AMDGPUISD::SMAX: + case ISD::SMAX: return AMDGPUISD::SMAX3; - case AMDGPUISD::UMAX: + case ISD::UMAX: return AMDGPUISD::UMAX3; case ISD::FMINNUM: return AMDGPUISD::FMIN3; - case AMDGPUISD::SMIN: + case ISD::SMIN: return AMDGPUISD::SMIN3; - case AMDGPUISD::UMIN: + case ISD::UMIN: return AMDGPUISD::UMIN3; default: llvm_unreachable("Not a min/max opcode"); @@ -1664,10 +1731,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, return performSetCCCombine(N, DCI); case ISD::FMAXNUM: // TODO: What about fmax_legacy? case ISD::FMINNUM: - case AMDGPUISD::SMAX: - case AMDGPUISD::SMIN: - case AMDGPUISD::UMAX: - case AMDGPUISD::UMIN: { + case ISD::SMAX: + case ISD::SMIN: + case ISD::UMAX: + case ISD::UMIN: { if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG && N->getValueType(0) != MVT::f64 && getTargetMachine().getOptLevel() > CodeGenOpt::None) diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index a95354c38816..a956b013bdb1 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -63,7 +63,7 @@ public: EVT /*VT*/) const override; bool isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const override; + Type *Ty, unsigned AS) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 839c2e9ecdd2..2f39074802b7 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -224,16 +224,16 @@ defm S_SUBB_U32 : SOP2_32 <sop2<0x05>, "s_subb_u32", } // End Uses = [SCC] defm S_MIN_I32 : SOP2_32 <sop2<0x06>, "s_min_i32", - [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))] + [(set i32:$dst, (smin i32:$src0, i32:$src1))] >; defm S_MIN_U32 : SOP2_32 <sop2<0x07>, "s_min_u32", - [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))] + [(set i32:$dst, (umin i32:$src0, i32:$src1))] >; defm S_MAX_I32 : SOP2_32 <sop2<0x08>, "s_max_i32", - [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))] + [(set i32:$dst, (smax i32:$src0, i32:$src1))] >; defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32", - [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))] + [(set i32:$dst, (umax i32:$src0, i32:$src1))] >; } // End Defs = [SCC] |