diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 377 |
1 files changed, 203 insertions, 174 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 66291d0be4e6..113d6249fa60 100644 --- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -8,18 +8,18 @@ //===----------------------------------------------------------------------===// // /// \file -/// \brief Custom DAG lowering for R600 +/// Custom DAG lowering for R600 // //===----------------------------------------------------------------------===// #include "R600ISelLowering.h" #include "AMDGPUFrameLowering.h" -#include "AMDGPUIntrinsicInfo.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600FrameLowering.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -35,13 +35,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include <cassert> #include <cstdint> #include <iterator> @@ -50,17 +50,19 @@ using namespace llvm; +#include "R600GenCallingConv.inc" + R600TargetLowering::R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI) - : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) { - addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); - addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); - addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass); - addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass); - addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); + : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) { + addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass); + addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass); + addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass); + addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass); + addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass); + addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass); - computeRegisterProperties(STI.getRegisterInfo()); + computeRegisterProperties(Subtarget->getRegisterInfo()); // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, MVT::i32, Custom); @@ -147,6 +149,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSUB, MVT::f32, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Custom); + setOperationAction(ISD::FTRUNC, MVT::f64, Custom); + setOperationAction(ISD::FRINT, MVT::f64, Custom); + setOperationAction(ISD::FFLOOR, MVT::f64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); @@ -216,6 +223,34 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMA, MVT::f64, Expand); } + // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we + // need it for R600. + if (!Subtarget->hasFP32Denormals()) + setOperationAction(ISD::FMAD, MVT::f32, Legal); + + if (!Subtarget->hasBFI()) { + // fcopysign can be done in a single instruction with BFI. + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + } + + if (!Subtarget->hasBCNT(32)) + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + + if (!Subtarget->hasBCNT(64)) + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + + if (Subtarget->hasFFBH()) + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); + + if (Subtarget->hasFFBL()) + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); + + // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we + // need it for R600. + if (Subtarget->hasBFE()) + setHasExtractBitsInsn(true); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; @@ -245,14 +280,10 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::LOAD); } -const R600Subtarget *R600TargetLowering::getSubtarget() const { - return static_cast<const R600Subtarget *>(Subtarget); -} - static inline bool isEOP(MachineBasicBlock::iterator I) { if (std::next(I) == I->getParent()->end()) return false; - return std::next(I)->getOpcode() == AMDGPU::RETURN; + return std::next(I)->getOpcode() == R600::RETURN; } MachineBasicBlock * @@ -261,24 +292,24 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock::iterator I = MI; - const R600InstrInfo *TII = getSubtarget()->getInstrInfo(); + const R600InstrInfo *TII = Subtarget->getInstrInfo(); switch (MI.getOpcode()) { default: // Replace LDS_*_RET instruction that don't have any uses with the // equivalent LDS_*_NORET instruction. if (TII->isLDSRetInstr(MI.getOpcode())) { - int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); + int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst); assert(DstIdx != -1); MachineInstrBuilder NewMI; // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add // LDS_1A2D support and remove this special case. if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) || - MI.getOpcode() == AMDGPU::LDS_CMPST_RET) + MI.getOpcode() == R600::LDS_CMPST_RET) return BB; NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), - TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode()))); + TII->get(R600::getLDSNoRetOp(MI.getOpcode()))); for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) { NewMI.add(MI.getOperand(i)); } @@ -286,31 +317,24 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } break; - case AMDGPU::CLAMP_R600: { - MachineInstr *NewMI = TII->buildDefaultInstruction( - *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(), - MI.getOperand(1).getReg()); - TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP); - break; - } - case AMDGPU::FABS_R600: { + case R600::FABS_R600: { MachineInstr *NewMI = TII->buildDefaultInstruction( - *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(), + *BB, I, R600::MOV, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); TII->addFlag(*NewMI, 0, MO_FLAG_ABS); break; } - case AMDGPU::FNEG_R600: { + case R600::FNEG_R600: { MachineInstr *NewMI = TII->buildDefaultInstruction( - *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(), + *BB, I, R600::MOV, MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); TII->addFlag(*NewMI, 0, MO_FLAG_NEG); break; } - case AMDGPU::MASK_WRITE: { + case R600::MASK_WRITE: { unsigned maskedRegister = MI.getOperand(0).getReg(); assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); @@ -318,7 +342,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, break; } - case AMDGPU::MOV_IMM_F32: + case R600::MOV_IMM_F32: TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1) .getFPImm() ->getValueAPF() @@ -326,39 +350,39 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .getZExtValue()); break; - case AMDGPU::MOV_IMM_I32: + case R600::MOV_IMM_I32: TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1).getImm()); break; - case AMDGPU::MOV_IMM_GLOBAL_ADDR: { + case R600::MOV_IMM_GLOBAL_ADDR: { //TODO: Perhaps combine this instruction with the next if possible auto MIB = TII->buildDefaultInstruction( - *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X); - int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal); + *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X); + int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal); //TODO: Ugh this is rather ugly MIB->getOperand(Idx) = MI.getOperand(1); break; } - case AMDGPU::CONST_COPY: { + case R600::CONST_COPY: { MachineInstr *NewMI = TII->buildDefaultInstruction( - *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); - TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel, + *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST); + TII->setImmOperand(*NewMI, R600::OpName::src0_sel, MI.getOperand(1).getImm()); break; } - case AMDGPU::RAT_WRITE_CACHELESS_32_eg: - case AMDGPU::RAT_WRITE_CACHELESS_64_eg: - case AMDGPU::RAT_WRITE_CACHELESS_128_eg: + case R600::RAT_WRITE_CACHELESS_32_eg: + case R600::RAT_WRITE_CACHELESS_64_eg: + case R600::RAT_WRITE_CACHELESS_128_eg: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) .add(MI.getOperand(0)) .add(MI.getOperand(1)) .addImm(isEOP(I)); // Set End of program bit break; - case AMDGPU::RAT_STORE_TYPED_eg: + case R600::RAT_STORE_TYPED_eg: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) .add(MI.getOperand(0)) .add(MI.getOperand(1)) @@ -366,49 +390,49 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addImm(isEOP(I)); // Set End of program bit break; - case AMDGPU::BRANCH: - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) + case R600::BRANCH: + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP)) .add(MI.getOperand(0)); break; - case AMDGPU::BRANCH_COND_f32: { + case R600::BRANCH_COND_f32: { MachineInstr *NewMI = - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), - AMDGPU::PREDICATE_BIT) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), + R600::PREDICATE_BIT) .add(MI.getOperand(1)) - .addImm(AMDGPU::PRED_SETNE) + .addImm(R600::PRED_SETNE) .addImm(0); // Flags TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) .add(MI.getOperand(0)) - .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + .addReg(R600::PREDICATE_BIT, RegState::Kill); break; } - case AMDGPU::BRANCH_COND_i32: { + case R600::BRANCH_COND_i32: { MachineInstr *NewMI = - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), - AMDGPU::PREDICATE_BIT) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), + R600::PREDICATE_BIT) .add(MI.getOperand(1)) - .addImm(AMDGPU::PRED_SETNE_INT) + .addImm(R600::PRED_SETNE_INT) .addImm(0); // Flags TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) .add(MI.getOperand(0)) - .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + .addReg(R600::PREDICATE_BIT, RegState::Kill); break; } - case AMDGPU::EG_ExportSwz: - case AMDGPU::R600_ExportSwz: { + case R600::EG_ExportSwz: + case R600::R600_ExportSwz: { // Instruction is left unmodified if its not the last one of its type bool isLastInstructionOfItsType = true; unsigned InstExportType = MI.getOperand(1).getImm(); for (MachineBasicBlock::iterator NextExportInst = std::next(I), EndBlock = BB->end(); NextExportInst != EndBlock; NextExportInst = std::next(NextExportInst)) { - if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || - NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { + if (NextExportInst->getOpcode() == R600::EG_ExportSwz || + NextExportInst->getOpcode() == R600::R600_ExportSwz) { unsigned CurrentInstExportType = NextExportInst->getOperand(1) .getImm(); if (CurrentInstExportType == InstExportType) { @@ -420,7 +444,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, bool EOP = isEOP(I); if (!EOP && !isLastInstructionOfItsType) return BB; - unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40; + unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40; BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) .add(MI.getOperand(0)) .add(MI.getOperand(1)) @@ -433,7 +457,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addImm(EOP); break; } - case AMDGPU::RETURN: { + case R600::RETURN: { return BB; } } @@ -478,7 +502,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); switch (IntrinsicID) { - case AMDGPUIntrinsic::r600_store_swizzle: { + case Intrinsic::r600_store_swizzle: { SDLoc DL(Op); const SDValue Args[8] = { Chain, @@ -505,14 +529,14 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const EVT VT = Op.getValueType(); SDLoc DL(Op); switch (IntrinsicID) { - case AMDGPUIntrinsic::r600_tex: - case AMDGPUIntrinsic::r600_texc: { + case Intrinsic::r600_tex: + case Intrinsic::r600_texc: { unsigned TextureOp; switch (IntrinsicID) { - case AMDGPUIntrinsic::r600_tex: + case Intrinsic::r600_tex: TextureOp = 0; break; - case AMDGPUIntrinsic::r600_texc: + case Intrinsic::r600_texc: TextureOp = 1; break; default: @@ -542,7 +566,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const }; return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs); } - case AMDGPUIntrinsic::r600_dot4: { + case Intrinsic::r600_dot4: { SDValue Args[8] = { DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), DAG.getConstant(0, DL, MVT::i32)), @@ -566,7 +590,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case Intrinsic::r600_implicitarg_ptr: { MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS); - uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT); + uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT); return DAG.getConstant(ByteOffset, DL, PtrVT); } case Intrinsic::r600_read_ngroups_x: @@ -589,23 +613,23 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const return LowerImplicitParameter(DAG, VT, DL, 8); case Intrinsic::r600_read_tgid_x: - return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_X, VT); + return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, + R600::T1_X, VT); case Intrinsic::r600_read_tgid_y: - return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_Y, VT); + return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, + R600::T1_Y, VT); case Intrinsic::r600_read_tgid_z: - return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T1_Z, VT); + return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, + R600::T1_Z, VT); case Intrinsic::r600_read_tidig_x: - return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_X, VT); + return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, + R600::T0_X, VT); case Intrinsic::r600_read_tidig_y: - return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_Y, VT); + return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, + R600::T0_Y, VT); case Intrinsic::r600_read_tidig_z: - return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, - AMDGPU::T0_Z, VT); + return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, + R600::T0_Z, VT); case Intrinsic::r600_recipsqrt_ieee: return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); @@ -755,7 +779,7 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { SDValue TrigVal = DAG.getNode(TrigNode, DL, VT, DAG.getNode(ISD::FADD, DL, VT, FractPart, DAG.getConstantFP(-0.5, DL, MVT::f32))); - if (Gen >= R600Subtarget::R700) + if (Gen >= AMDGPUSubtarget::R700) return TrigVal; // On R600 hw, COS/SIN input must be between -Pi and Pi. return DAG.getNode(ISD::FMUL, DL, VT, TrigVal, @@ -1527,7 +1551,7 @@ SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue R600TargetLowering::lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - const R600FrameLowering *TFL = getSubtarget()->getFrameLowering(); + const R600FrameLowering *TFL = Subtarget->getFrameLowering(); FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op); @@ -1539,6 +1563,28 @@ SDValue R600TargetLowering::lowerFrameIndex(SDValue Op, Op.getValueType()); } +CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC, + bool IsVarArg) const { + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + case CallingConv::C: + case CallingConv::Fast: + case CallingConv::Cold: + llvm_unreachable("kernels should not be handled here"); + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: + case CallingConv::AMDGPU_LS: + return CC_R600; + default: + report_fatal_error("Unsupported calling convention."); + } +} + /// XXX Only kernel functions are supported, so we can assume for now that /// every function is a kernel function, but in the future we should use /// separate calling conventions for kernel and non-kernel functions. @@ -1550,8 +1596,6 @@ SDValue R600TargetLowering::LowerFormalArguments( CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); MachineFunction &MF = DAG.getMachineFunction(); - R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); - SmallVector<ISD::InputArg, 8> LocalIns; if (AMDGPU::isShader(CallConv)) { @@ -1571,7 +1615,7 @@ SDValue R600TargetLowering::LowerFormalArguments( } if (AMDGPU::isShader(CallConv)) { - unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass); SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT); InVals.push_back(Register); continue; @@ -1602,19 +1646,18 @@ SDValue R600TargetLowering::LowerFormalArguments( unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); unsigned PartOffset = VA.getLocMemOffset(); - unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset(); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); SDValue Arg = DAG.getLoad( ISD::UNINDEXED, Ext, VT, DL, Chain, - DAG.getConstant(Offset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo, + DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), + PtrInfo, MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); // 4 is the preferred alignment for the CONSTANT memory space. InVals.push_back(Arg); - MFI->setABIArgOffset(Offset + MemVT.getStoreSize()); } return Chain; } @@ -1989,26 +2032,26 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) const { - const R600InstrInfo *TII = getSubtarget()->getInstrInfo(); + const R600InstrInfo *TII = Subtarget->getInstrInfo(); if (!Src.isMachineOpcode()) return false; switch (Src.getMachineOpcode()) { - case AMDGPU::FNEG_R600: + case R600::FNEG_R600: if (!Neg.getNode()) return false; Src = Src.getOperand(0); Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); return true; - case AMDGPU::FABS_R600: + case R600::FABS_R600: if (!Abs.getNode()) return false; Src = Src.getOperand(0); Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); return true; - case AMDGPU::CONST_COPY: { + case R600::CONST_COPY: { unsigned Opcode = ParentNode->getMachineOpcode(); - bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; if (!Sel.getNode()) return false; @@ -2019,17 +2062,17 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, // Gather constants values int SrcIndices[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) + TII->getOperandIdx(Opcode, R600::OpName::src0), + TII->getOperandIdx(Opcode, R600::OpName::src1), + TII->getOperandIdx(Opcode, R600::OpName::src2), + TII->getOperandIdx(Opcode, R600::OpName::src0_X), + TII->getOperandIdx(Opcode, R600::OpName::src0_Y), + TII->getOperandIdx(Opcode, R600::OpName::src0_Z), + TII->getOperandIdx(Opcode, R600::OpName::src0_W), + TII->getOperandIdx(Opcode, R600::OpName::src1_X), + TII->getOperandIdx(Opcode, R600::OpName::src1_Y), + TII->getOperandIdx(Opcode, R600::OpName::src1_Z), + TII->getOperandIdx(Opcode, R600::OpName::src1_W) }; std::vector<unsigned> Consts; for (int OtherSrcIdx : SrcIndices) { @@ -2042,7 +2085,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, } if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) { - if (Reg->getReg() == AMDGPU::ALU_CONST) { + if (Reg->getReg() == R600::ALU_CONST) { ConstantSDNode *Cst = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx)); Consts.push_back(Cst->getZExtValue()); @@ -2057,30 +2100,30 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, } Sel = CstOffset; - Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32); + Src = DAG.getRegister(R600::ALU_CONST, MVT::f32); return true; } - case AMDGPU::MOV_IMM_GLOBAL_ADDR: + case R600::MOV_IMM_GLOBAL_ADDR: // Check if the Imm slot is used. Taken from below. if (cast<ConstantSDNode>(Imm)->getZExtValue()) return false; Imm = Src.getOperand(0); - Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32); + Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32); return true; - case AMDGPU::MOV_IMM_I32: - case AMDGPU::MOV_IMM_F32: { - unsigned ImmReg = AMDGPU::ALU_LITERAL_X; + case R600::MOV_IMM_I32: + case R600::MOV_IMM_F32: { + unsigned ImmReg = R600::ALU_LITERAL_X; uint64_t ImmValue = 0; - if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) { + if (Src.getMachineOpcode() == R600::MOV_IMM_F32) { ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0)); float FloatValue = FPC->getValueAPF().convertToFloat(); if (FloatValue == 0.0) { - ImmReg = AMDGPU::ZERO; + ImmReg = R600::ZERO; } else if (FloatValue == 0.5) { - ImmReg = AMDGPU::HALF; + ImmReg = R600::HALF; } else if (FloatValue == 1.0) { - ImmReg = AMDGPU::ONE; + ImmReg = R600::ONE; } else { ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); } @@ -2088,9 +2131,9 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0)); uint64_t Value = C->getZExtValue(); if (Value == 0) { - ImmReg = AMDGPU::ZERO; + ImmReg = R600::ZERO; } else if (Value == 1) { - ImmReg = AMDGPU::ONE_INT; + ImmReg = R600::ONE_INT; } else { ImmValue = Value; } @@ -2099,7 +2142,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, // Check that we aren't already using an immediate. // XXX: It's possible for an instruction to have more than one // immediate operand, but this is not supported yet. - if (ImmReg == AMDGPU::ALU_LITERAL_X) { + if (ImmReg == R600::ALU_LITERAL_X) { if (!Imm.getNode()) return false; ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm); @@ -2116,10 +2159,10 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, } } -/// \brief Fold the instructions after selecting them +/// Fold the instructions after selecting them SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, SelectionDAG &DAG) const { - const R600InstrInfo *TII = getSubtarget()->getInstrInfo(); + const R600InstrInfo *TII = Subtarget->getInstrInfo(); if (!Node->isMachineOpcode()) return Node; @@ -2128,36 +2171,36 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, std::vector<SDValue> Ops(Node->op_begin(), Node->op_end()); - if (Opcode == AMDGPU::DOT_4) { + if (Opcode == R600::DOT_4) { int OperandIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) + TII->getOperandIdx(Opcode, R600::OpName::src0_X), + TII->getOperandIdx(Opcode, R600::OpName::src0_Y), + TII->getOperandIdx(Opcode, R600::OpName::src0_Z), + TII->getOperandIdx(Opcode, R600::OpName::src0_W), + TII->getOperandIdx(Opcode, R600::OpName::src1_X), + TII->getOperandIdx(Opcode, R600::OpName::src1_Y), + TII->getOperandIdx(Opcode, R600::OpName::src1_Z), + TII->getOperandIdx(Opcode, R600::OpName::src1_W) }; int NegIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W) + TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X), + TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y), + TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z), + TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W), + TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X), + TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y), + TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z), + TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W) }; int AbsIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W) + TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X), + TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y), + TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z), + TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W), + TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X), + TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y), + TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z), + TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W) }; for (unsigned i = 0; i < 8; i++) { if (OperandIdx[i] < 0) @@ -2165,7 +2208,7 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, SDValue &Src = Ops[OperandIdx[i] - 1]; SDValue &Neg = Ops[NegIdx[i] - 1]; SDValue &Abs = Ops[AbsIdx[i] - 1]; - bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); if (HasDst) SelIdx--; @@ -2173,42 +2216,28 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } - } else if (Opcode == AMDGPU::REG_SEQUENCE) { + } else if (Opcode == R600::REG_SEQUENCE) { for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) { SDValue &Src = Ops[i]; if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } - } else if (Opcode == AMDGPU::CLAMP_R600) { - SDValue Src = Node->getOperand(0); - if (!Src.isMachineOpcode() || - !TII->hasInstrModifiers(Src.getMachineOpcode())) - return Node; - int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(), - AMDGPU::OpName::clamp); - if (ClampIdx < 0) - return Node; - SDLoc DL(Node); - std::vector<SDValue> Ops(Src->op_begin(), Src->op_end()); - Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32); - return DAG.getMachineNode(Src.getMachineOpcode(), DL, - Node->getVTList(), Ops); } else { if (!TII->hasInstrModifiers(Opcode)) return Node; int OperandIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2) + TII->getOperandIdx(Opcode, R600::OpName::src0), + TII->getOperandIdx(Opcode, R600::OpName::src1), + TII->getOperandIdx(Opcode, R600::OpName::src2) }; int NegIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg) + TII->getOperandIdx(Opcode, R600::OpName::src0_neg), + TII->getOperandIdx(Opcode, R600::OpName::src1_neg), + TII->getOperandIdx(Opcode, R600::OpName::src2_neg) }; int AbsIdx[] = { - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs), - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs), + TII->getOperandIdx(Opcode, R600::OpName::src0_abs), + TII->getOperandIdx(Opcode, R600::OpName::src1_abs), -1 }; for (unsigned i = 0; i < 3; i++) { @@ -2218,9 +2247,9 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, SDValue &Neg = Ops[NegIdx[i] - 1]; SDValue FakeAbs; SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; - bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; + bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); - int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal); + int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal); if (HasDst) { SelIdx--; ImmIdx--; |