diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp | 2375 |
1 files changed, 2375 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp new file mode 100644 index 000000000000..bc9957194f6d --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -0,0 +1,2375 @@ +//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PowerPC-specific support for the FastISel class. Some +// of the target-specific code is generated by tablegen in the file +// PPCGenFastISel.inc, which is #included here. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" +#include "PPCCCState.h" +#include "PPCCallingConv.h" +#include "PPCISelLowering.h" +#include "PPCMachineFunctionInfo.h" +#include "PPCSubtarget.h" +#include "PPCTargetMachine.h" +#include "llvm/ADT/Optional.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" + +//===----------------------------------------------------------------------===// +// +// TBD: +// fastLowerArguments: Handle simple cases. +// PPCMaterializeGV: Handle TLS. +// SelectCall: Handle function pointers. +// SelectCall: Handle multi-register return values. +// SelectCall: Optimize away nops for local calls. +// processCallArgs: Handle bit-converted arguments. +// finishCall: Handle multi-register return values. +// PPCComputeAddress: Handle parameter references as FrameIndex's. +// PPCEmitCmp: Handle immediate as operand 1. +// SelectCall: Handle small byval arguments. +// SelectIntrinsicCall: Implement. +// SelectSelect: Implement. +// Consider factoring isTypeLegal into the base class. +// Implement switches and jump tables. +// +//===----------------------------------------------------------------------===// +using namespace llvm; + +#define DEBUG_TYPE "ppcfastisel" + +namespace { + +typedef struct Address { + enum { + RegBase, + FrameIndexBase + } BaseType; + + union { + unsigned Reg; + int FI; + } Base; + + long Offset; + + // Innocuous defaults for our address. + Address() + : BaseType(RegBase), Offset(0) { + Base.Reg = 0; + } +} Address; + +class PPCFastISel final : public FastISel { + + const TargetMachine &TM; + const PPCSubtarget *PPCSubTarget; + PPCFunctionInfo *PPCFuncInfo; + const TargetInstrInfo &TII; + const TargetLowering &TLI; + LLVMContext *Context; + + public: + explicit PPCFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) + : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()), + PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()), + PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()), + TII(*PPCSubTarget->getInstrInfo()), + TLI(*PPCSubTarget->getTargetLowering()), + Context(&FuncInfo.Fn->getContext()) {} + + // Backend specific FastISel code. + private: + bool fastSelectInstruction(const Instruction *I) override; + unsigned fastMaterializeConstant(const Constant *C) override; + unsigned fastMaterializeAlloca(const AllocaInst *AI) override; + bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) override; + bool fastLowerArguments() override; + unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override; + unsigned fastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + unsigned fastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + unsigned fastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); + + bool fastLowerCall(CallLoweringInfo &CLI) override; + + // Instruction selection routines. + private: + bool SelectLoad(const Instruction *I); + bool SelectStore(const Instruction *I); + bool SelectBranch(const Instruction *I); + bool SelectIndirectBr(const Instruction *I); + bool SelectFPExt(const Instruction *I); + bool SelectFPTrunc(const Instruction *I); + bool SelectIToFP(const Instruction *I, bool IsSigned); + bool SelectFPToI(const Instruction *I, bool IsSigned); + bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); + bool SelectRet(const Instruction *I); + bool SelectTrunc(const Instruction *I); + bool SelectIntExt(const Instruction *I); + + // Utility routines. + private: + bool isTypeLegal(Type *Ty, MVT &VT); + bool isLoadTypeLegal(Type *Ty, MVT &VT); + bool isValueAvailable(const Value *V) const; + bool isVSFRCRegClass(const TargetRegisterClass *RC) const { + return RC->getID() == PPC::VSFRCRegClassID; + } + bool isVSSRCRegClass(const TargetRegisterClass *RC) const { + return RC->getID() == PPC::VSSRCRegClassID; + } + bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, + bool isZExt, unsigned DestReg); + bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + const TargetRegisterClass *RC, bool IsZExt = true, + unsigned FP64LoadOpc = PPC::LFD); + bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr); + bool PPCComputeAddress(const Value *Obj, Address &Addr); + void PPCSimplifyAddress(Address &Addr, bool &UseOffset, + unsigned &IndexReg); + bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, + unsigned DestReg, bool IsZExt); + unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT); + unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT); + unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT, + bool UseSExt = true); + unsigned PPCMaterialize32BitInt(int64_t Imm, + const TargetRegisterClass *RC); + unsigned PPCMaterialize64BitInt(int64_t Imm, + const TargetRegisterClass *RC); + unsigned PPCMoveToIntReg(const Instruction *I, MVT VT, + unsigned SrcReg, bool IsSigned); + unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned); + + // Call handling routines. + private: + bool processCallArgs(SmallVectorImpl<Value*> &Args, + SmallVectorImpl<unsigned> &ArgRegs, + SmallVectorImpl<MVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, + SmallVectorImpl<unsigned> &RegArgs, + CallingConv::ID CC, + unsigned &NumBytes, + bool IsVarArg); + bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes); + LLVM_ATTRIBUTE_UNUSED CCAssignFn *usePPC32CCs(unsigned Flag); + + private: + #include "PPCGenFastISel.inc" + +}; + +} // end anonymous namespace + +#include "PPCGenCallingConv.inc" + +// Function whose sole purpose is to kill compiler warnings +// stemming from unused functions included from PPCGenCallingConv.inc. +CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) { + if (Flag == 1) + return CC_PPC32_SVR4; + else if (Flag == 2) + return CC_PPC32_SVR4_ByVal; + else if (Flag == 3) + return CC_PPC32_SVR4_VarArg; + else + return RetCC_PPC; +} + +static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) { + switch (Pred) { + // These are not representable with any single compare. + case CmpInst::FCMP_FALSE: + case CmpInst::FCMP_TRUE: + // Major concern about the following 6 cases is NaN result. The comparison + // result consists of 4 bits, indicating lt, eq, gt and un (unordered), + // only one of which will be set. The result is generated by fcmpu + // instruction. However, bc instruction only inspects one of the first 3 + // bits, so when un is set, bc instruction may jump to to an undesired + // place. + // + // More specifically, if we expect an unordered comparison and un is set, we + // expect to always go to true branch; in such case UEQ, UGT and ULT still + // give false, which are undesired; but UNE, UGE, ULE happen to give true, + // since they are tested by inspecting !eq, !lt, !gt, respectively. + // + // Similarly, for ordered comparison, when un is set, we always expect the + // result to be false. In such case OGT, OLT and OEQ is good, since they are + // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE + // and ONE are tested through !lt, !gt and !eq, and these are true. + case CmpInst::FCMP_UEQ: + case CmpInst::FCMP_UGT: + case CmpInst::FCMP_ULT: + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ONE: + default: + return Optional<PPC::Predicate>(); + + case CmpInst::FCMP_OEQ: + case CmpInst::ICMP_EQ: + return PPC::PRED_EQ; + + case CmpInst::FCMP_OGT: + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_SGT: + return PPC::PRED_GT; + + case CmpInst::FCMP_UGE: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SGE: + return PPC::PRED_GE; + + case CmpInst::FCMP_OLT: + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_SLT: + return PPC::PRED_LT; + + case CmpInst::FCMP_ULE: + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SLE: + return PPC::PRED_LE; + + case CmpInst::FCMP_UNE: + case CmpInst::ICMP_NE: + return PPC::PRED_NE; + + case CmpInst::FCMP_ORD: + return PPC::PRED_NU; + + case CmpInst::FCMP_UNO: + return PPC::PRED_UN; + } +} + +// Determine whether the type Ty is simple enough to be handled by +// fast-isel, and return its equivalent machine type in VT. +// FIXME: Copied directly from ARM -- factor into base class? +bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) { + EVT Evt = TLI.getValueType(DL, Ty, true); + + // Only handle simple types. + if (Evt == MVT::Other || !Evt.isSimple()) return false; + VT = Evt.getSimpleVT(); + + // Handle all legal types, i.e. a register that will directly hold this + // value. + return TLI.isTypeLegal(VT); +} + +// Determine whether the type Ty is simple enough to be handled by +// fast-isel as a load target, and return its equivalent machine type in VT. +bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { + if (isTypeLegal(Ty, VT)) return true; + + // If this is a type than can be sign or zero-extended to a basic operation + // go ahead and accept it now. + if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) { + return true; + } + + return false; +} + +bool PPCFastISel::isValueAvailable(const Value *V) const { + if (!isa<Instruction>(V)) + return true; + + const auto *I = cast<Instruction>(V); + return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; +} + +// Given a value Obj, create an Address object Addr that represents its +// address. Return false if we can't handle it. +bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { + const User *U = nullptr; + unsigned Opcode = Instruction::UserOp1; + if (const Instruction *I = dyn_cast<Instruction>(Obj)) { + // Don't walk into other basic blocks unless the object is an alloca from + // another block, otherwise it may not have a virtual register assigned. + if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || + FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + Opcode = I->getOpcode(); + U = I; + } + } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { + Opcode = C->getOpcode(); + U = C; + } + + switch (Opcode) { + default: + break; + case Instruction::BitCast: + // Look through bitcasts. + return PPCComputeAddress(U->getOperand(0), Addr); + case Instruction::IntToPtr: + // Look past no-op inttoptrs. + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) + return PPCComputeAddress(U->getOperand(0), Addr); + break; + case Instruction::PtrToInt: + // Look past no-op ptrtoints. + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) + return PPCComputeAddress(U->getOperand(0), Addr); + break; + case Instruction::GetElementPtr: { + Address SavedAddr = Addr; + long TmpOffset = Addr.Offset; + + // Iterate through the GEP folding the constants into offsets where + // we can. + gep_type_iterator GTI = gep_type_begin(U); + for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end(); + II != IE; ++II, ++GTI) { + const Value *Op = *II; + if (StructType *STy = GTI.getStructTypeOrNull()) { + const StructLayout *SL = DL.getStructLayout(STy); + unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); + TmpOffset += SL->getElementOffset(Idx); + } else { + uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + for (;;) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + // Constant-offset addressing. + TmpOffset += CI->getSExtValue() * S; + break; + } + if (canFoldAddIntoGEP(U, Op)) { + // A compatible add with a constant operand. Fold the constant. + ConstantInt *CI = + cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); + TmpOffset += CI->getSExtValue() * S; + // Iterate on the other operand. + Op = cast<AddOperator>(Op)->getOperand(0); + continue; + } + // Unsupported + goto unsupported_gep; + } + } + } + + // Try to grab the base operand now. + Addr.Offset = TmpOffset; + if (PPCComputeAddress(U->getOperand(0), Addr)) return true; + + // We failed, restore everything and try the other options. + Addr = SavedAddr; + + unsupported_gep: + break; + } + case Instruction::Alloca: { + const AllocaInst *AI = cast<AllocaInst>(Obj); + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = SI->second; + return true; + } + break; + } + } + + // FIXME: References to parameters fall through to the behavior + // below. They should be able to reference a frame index since + // they are stored to the stack, so we can get "ld rx, offset(r1)" + // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will + // just contain the parameter. Try to handle this with a FI. + + // Try to get this in a register if nothing else has worked. + if (Addr.Base.Reg == 0) + Addr.Base.Reg = getRegForValue(Obj); + + // Prevent assignment of base register to X0, which is inappropriate + // for loads and stores alike. + if (Addr.Base.Reg != 0) + MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass); + + return Addr.Base.Reg != 0; +} + +// Fix up some addresses that can't be used directly. For example, if +// an offset won't fit in an instruction field, we may need to move it +// into an index register. +void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset, + unsigned &IndexReg) { + + // Check whether the offset fits in the instruction field. + if (!isInt<16>(Addr.Offset)) + UseOffset = false; + + // If this is a stack pointer and the offset needs to be simplified then + // put the alloca address into a register, set the base type back to + // register and continue. This should almost never happen. + if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) { + unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8), + ResultReg).addFrameIndex(Addr.Base.FI).addImm(0); + Addr.Base.Reg = ResultReg; + Addr.BaseType = Address::RegBase; + } + + if (!UseOffset) { + IntegerType *OffsetTy = Type::getInt64Ty(*Context); + const ConstantInt *Offset = + ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset)); + IndexReg = PPCMaterializeInt(Offset, MVT::i64); + assert(IndexReg && "Unexpected error in PPCMaterializeInt!"); + } +} + +// Emit a load instruction if possible, returning true if we succeeded, +// otherwise false. See commentary below for how the register class of +// the load is determined. +bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + const TargetRegisterClass *RC, + bool IsZExt, unsigned FP64LoadOpc) { + unsigned Opc; + bool UseOffset = true; + + // If ResultReg is given, it determines the register class of the load. + // Otherwise, RC is the register class to use. If the result of the + // load isn't anticipated in this block, both may be zero, in which + // case we must make a conservative guess. In particular, don't assign + // R0 or X0 to the result register, as the result may be used in a load, + // store, add-immediate, or isel that won't permit this. (Though + // perhaps the spill and reload of live-exit values would handle this?) + const TargetRegisterClass *UseRC = + (ResultReg ? MRI.getRegClass(ResultReg) : + (RC ? RC : + (VT == MVT::f64 ? &PPC::F8RCRegClass : + (VT == MVT::f32 ? &PPC::F4RCRegClass : + (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : + &PPC::GPRC_and_GPRC_NOR0RegClass))))); + + bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass); + + switch (VT.SimpleTy) { + default: // e.g., vector types not handled + return false; + case MVT::i8: + Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8; + break; + case MVT::i16: + Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8) + : (Is32BitInt ? PPC::LHA : PPC::LHA8)); + break; + case MVT::i32: + Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8) + : (Is32BitInt ? PPC::LWA_32 : PPC::LWA)); + if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0)) + UseOffset = false; + break; + case MVT::i64: + Opc = PPC::LD; + assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) && + "64-bit load with 32-bit target??"); + UseOffset = ((Addr.Offset & 3) == 0); + break; + case MVT::f32: + Opc = PPC::LFS; + break; + case MVT::f64: + Opc = FP64LoadOpc; + break; + } + + // If necessary, materialize the offset into a register and use + // the indexed form. Also handle stack pointers with special needs. + unsigned IndexReg = 0; + PPCSimplifyAddress(Addr, UseOffset, IndexReg); + + // If this is a potential VSX load with an offset of 0, a VSX indexed load can + // be used. + bool IsVSSRC = isVSSRCRegClass(UseRC); + bool IsVSFRC = isVSFRCRegClass(UseRC); + bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS; + bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD; + if ((Is32VSXLoad || Is64VSXLoad) && + (Addr.BaseType != Address::FrameIndexBase) && UseOffset && + (Addr.Offset == 0)) { + UseOffset = false; + } + + if (ResultReg == 0) + ResultReg = createResultReg(UseRC); + + // Note: If we still have a frame index here, we know the offset is + // in range, as otherwise PPCSimplifyAddress would have converted it + // into a RegBase. + if (Addr.BaseType == Address::FrameIndexBase) { + // VSX only provides an indexed load. + if (Is32VSXLoad || Is64VSXLoad) return false; + + MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI, + Addr.Offset), + MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI), + MFI.getObjectAlignment(Addr.Base.FI)); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); + + // Base reg with offset in range. + } else if (UseOffset) { + // VSX only provides an indexed load. + if (Is32VSXLoad || Is64VSXLoad) return false; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addImm(Addr.Offset).addReg(Addr.Base.Reg); + + // Indexed form. + } else { + // Get the RR opcode corresponding to the RI one. FIXME: It would be + // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it + // is hard to get at. + switch (Opc) { + default: llvm_unreachable("Unexpected opcode!"); + case PPC::LBZ: Opc = PPC::LBZX; break; + case PPC::LBZ8: Opc = PPC::LBZX8; break; + case PPC::LHZ: Opc = PPC::LHZX; break; + case PPC::LHZ8: Opc = PPC::LHZX8; break; + case PPC::LHA: Opc = PPC::LHAX; break; + case PPC::LHA8: Opc = PPC::LHAX8; break; + case PPC::LWZ: Opc = PPC::LWZX; break; + case PPC::LWZ8: Opc = PPC::LWZX8; break; + case PPC::LWA: Opc = PPC::LWAX; break; + case PPC::LWA_32: Opc = PPC::LWAX_32; break; + case PPC::LD: Opc = PPC::LDX; break; + case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break; + case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break; + } + + auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + ResultReg); + + // If we have an index register defined we use it in the store inst, + // otherwise we use X0 as base as it makes the vector instructions to + // use zero in the computation of the effective address regardless the + // content of the register. + if (IndexReg) + MIB.addReg(Addr.Base.Reg).addReg(IndexReg); + else + MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg); + } + + return true; +} + +// Attempt to fast-select a load instruction. +bool PPCFastISel::SelectLoad(const Instruction *I) { + // FIXME: No atomic loads are supported. + if (cast<LoadInst>(I)->isAtomic()) + return false; + + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(I->getType(), VT)) + return false; + + // See if we can handle this address. + Address Addr; + if (!PPCComputeAddress(I->getOperand(0), Addr)) + return false; + + // Look at the currently assigned register for this instruction + // to determine the required register class. This is necessary + // to constrain RA from using R0/X0 when this is not legal. + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; + + unsigned ResultReg = 0; + if (!PPCEmitLoad(VT, ResultReg, Addr, RC)) + return false; + updateValueMap(I, ResultReg); + return true; +} + +// Emit a store instruction to store SrcReg at Addr. +bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { + assert(SrcReg && "Nothing to store!"); + unsigned Opc; + bool UseOffset = true; + + const TargetRegisterClass *RC = MRI.getRegClass(SrcReg); + bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass); + + switch (VT.SimpleTy) { + default: // e.g., vector types not handled + return false; + case MVT::i8: + Opc = Is32BitInt ? PPC::STB : PPC::STB8; + break; + case MVT::i16: + Opc = Is32BitInt ? PPC::STH : PPC::STH8; + break; + case MVT::i32: + assert(Is32BitInt && "Not GPRC for i32??"); + Opc = PPC::STW; + break; + case MVT::i64: + Opc = PPC::STD; + UseOffset = ((Addr.Offset & 3) == 0); + break; + case MVT::f32: + Opc = PPC::STFS; + break; + case MVT::f64: + Opc = PPC::STFD; + break; + } + + // If necessary, materialize the offset into a register and use + // the indexed form. Also handle stack pointers with special needs. + unsigned IndexReg = 0; + PPCSimplifyAddress(Addr, UseOffset, IndexReg); + + // If this is a potential VSX store with an offset of 0, a VSX indexed store + // can be used. + bool IsVSSRC = isVSSRCRegClass(RC); + bool IsVSFRC = isVSFRCRegClass(RC); + bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS; + bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD; + if ((Is32VSXStore || Is64VSXStore) && + (Addr.BaseType != Address::FrameIndexBase) && UseOffset && + (Addr.Offset == 0)) { + UseOffset = false; + } + + // Note: If we still have a frame index here, we know the offset is + // in range, as otherwise PPCSimplifyAddress would have converted it + // into a RegBase. + if (Addr.BaseType == Address::FrameIndexBase) { + // VSX only provides an indexed store. + if (Is32VSXStore || Is64VSXStore) return false; + + MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI, + Addr.Offset), + MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI), + MFI.getObjectAlignment(Addr.Base.FI)); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg) + .addImm(Addr.Offset) + .addFrameIndex(Addr.Base.FI) + .addMemOperand(MMO); + + // Base reg with offset in range. + } else if (UseOffset) { + // VSX only provides an indexed store. + if (Is32VSXStore || Is64VSXStore) + return false; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); + + // Indexed form. + } else { + // Get the RR opcode corresponding to the RI one. FIXME: It would be + // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it + // is hard to get at. + switch (Opc) { + default: llvm_unreachable("Unexpected opcode!"); + case PPC::STB: Opc = PPC::STBX; break; + case PPC::STH : Opc = PPC::STHX; break; + case PPC::STW : Opc = PPC::STWX; break; + case PPC::STB8: Opc = PPC::STBX8; break; + case PPC::STH8: Opc = PPC::STHX8; break; + case PPC::STW8: Opc = PPC::STWX8; break; + case PPC::STD: Opc = PPC::STDX; break; + case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break; + case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break; + } + + auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg); + + // If we have an index register defined we use it in the store inst, + // otherwise we use X0 as base as it makes the vector instructions to + // use zero in the computation of the effective address regardless the + // content of the register. + if (IndexReg) + MIB.addReg(Addr.Base.Reg).addReg(IndexReg); + else + MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg); + } + + return true; +} + +// Attempt to fast-select a store instruction. +bool PPCFastISel::SelectStore(const Instruction *I) { + Value *Op0 = I->getOperand(0); + unsigned SrcReg = 0; + + // FIXME: No atomics loads are supported. + if (cast<StoreInst>(I)->isAtomic()) + return false; + + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(Op0->getType(), VT)) + return false; + + // Get the value to be stored into a register. + SrcReg = getRegForValue(Op0); + if (SrcReg == 0) + return false; + + // See if we can handle this address. + Address Addr; + if (!PPCComputeAddress(I->getOperand(1), Addr)) + return false; + + if (!PPCEmitStore(VT, SrcReg, Addr)) + return false; + + return true; +} + +// Attempt to fast-select a branch instruction. +bool PPCFastISel::SelectBranch(const Instruction *I) { + const BranchInst *BI = cast<BranchInst>(I); + MachineBasicBlock *BrBB = FuncInfo.MBB; + MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; + + // For now, just try the simplest case where it's fed by a compare. + if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { + if (isValueAvailable(CI)) { + Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate()); + if (!OptPPCPred) + return false; + + PPC::Predicate PPCPred = OptPPCPred.getValue(); + + // Take advantage of fall-through opportunities. + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + PPCPred = PPC::InvertPredicate(PPCPred); + } + + unsigned CondReg = createResultReg(&PPC::CRRCRegClass); + + if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), + CondReg)) + return false; + + BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC)) + .addImm(PPCPred).addReg(CondReg).addMBB(TBB); + finishCondBranch(BI->getParent(), TBB, FBB); + return true; + } + } else if (const ConstantInt *CI = + dyn_cast<ConstantInt>(BI->getCondition())) { + uint64_t Imm = CI->getZExtValue(); + MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; + fastEmitBranch(Target, DbgLoc); + return true; + } + + // FIXME: ARM looks for a case where the block containing the compare + // has been split from the block containing the branch. If this happens, + // there is a vreg available containing the result of the compare. I'm + // not sure we can do much, as we've lost the predicate information with + // the compare instruction -- we have a 4-bit CR but don't know which bit + // to test here. + return false; +} + +// Attempt to emit a compare of the two source values. Signed and unsigned +// comparisons are supported. Return false if we can't handle it. +bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, + bool IsZExt, unsigned DestReg) { + Type *Ty = SrcValue1->getType(); + EVT SrcEVT = TLI.getValueType(DL, Ty, true); + if (!SrcEVT.isSimple()) + return false; + MVT SrcVT = SrcEVT.getSimpleVT(); + + if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits()) + return false; + + // See if operand 2 is an immediate encodeable in the compare. + // FIXME: Operands are not in canonical order at -O0, so an immediate + // operand in position 1 is a lost opportunity for now. We are + // similar to ARM in this regard. + long Imm = 0; + bool UseImm = false; + + // Only 16-bit integer constants can be represented in compares for + // PowerPC. Others will be materialized into a register. + if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) { + if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || + SrcVT == MVT::i8 || SrcVT == MVT::i1) { + const APInt &CIVal = ConstInt->getValue(); + Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue(); + if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm))) + UseImm = true; + } + } + + unsigned CmpOpc; + bool NeedsExt = false; + switch (SrcVT.SimpleTy) { + default: return false; + case MVT::f32: + CmpOpc = PPC::FCMPUS; + break; + case MVT::f64: + CmpOpc = PPC::FCMPUD; + break; + case MVT::i1: + case MVT::i8: + case MVT::i16: + NeedsExt = true; + // Intentional fall-through. + case MVT::i32: + if (!UseImm) + CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW; + else + CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI; + break; + case MVT::i64: + if (!UseImm) + CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD; + else + CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI; + break; + } + + unsigned SrcReg1 = getRegForValue(SrcValue1); + if (SrcReg1 == 0) + return false; + + unsigned SrcReg2 = 0; + if (!UseImm) { + SrcReg2 = getRegForValue(SrcValue2); + if (SrcReg2 == 0) + return false; + } + + if (NeedsExt) { + unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt)) + return false; + SrcReg1 = ExtReg; + + if (!UseImm) { + unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt)) + return false; + SrcReg2 = ExtReg; + } + } + + if (!UseImm) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg) + .addReg(SrcReg1).addReg(SrcReg2); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg) + .addReg(SrcReg1).addImm(Imm); + + return true; +} + +// Attempt to fast-select a floating-point extend instruction. +bool PPCFastISel::SelectFPExt(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); + + if (SrcVT != MVT::f32 || DestVT != MVT::f64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // No code is generated for a FP extend. + updateValueMap(I, SrcReg); + return true; +} + +// Attempt to fast-select a floating-point truncate instruction. +bool PPCFastISel::SelectFPTrunc(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); + + if (SrcVT != MVT::f64 || DestVT != MVT::f32) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // Round the result to single precision. + unsigned DestReg = createResultReg(&PPC::F4RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg) + .addReg(SrcReg); + + updateValueMap(I, DestReg); + return true; +} + +// Move an i32 or i64 value in a GPR to an f64 value in an FPR. +// FIXME: When direct register moves are implemented (see PowerISA 2.07), +// those should be used instead of moving via a stack slot when the +// subtarget permits. +// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte +// stack slot and 4-byte store/load sequence. Or just sext the 4-byte +// case to 8 bytes which produces tighter code but wastes stack space. +unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, + bool IsSigned) { + + // If necessary, extend 32-bit int to 64-bit. + if (SrcVT == MVT::i32) { + unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); + if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned)) + return 0; + SrcReg = TmpReg; + } + + // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. + Address Addr; + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = MFI.CreateStackObject(8, 8, false); + + // Store the value from the GPR. + if (!PPCEmitStore(MVT::i64, SrcReg, Addr)) + return 0; + + // Load the integer value into an FPR. The kind of load used depends + // on a number of conditions. + unsigned LoadOpc = PPC::LFD; + + if (SrcVT == MVT::i32) { + if (!IsSigned) { + LoadOpc = PPC::LFIWZX; + Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4; + } else if (PPCSubTarget->hasLFIWAX()) { + LoadOpc = PPC::LFIWAX; + Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4; + } + } + + const TargetRegisterClass *RC = &PPC::F8RCRegClass; + unsigned ResultReg = 0; + if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc)) + return 0; + + return ResultReg; +} + +// Attempt to fast-select an integer-to-floating-point conversion. +// FIXME: Once fast-isel has better support for VSX, conversions using +// direct moves should be implemented. +bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { + MVT DstVT; + Type *DstTy = I->getType(); + if (!isTypeLegal(DstTy, DstVT)) + return false; + + if (DstVT != MVT::f32 && DstVT != MVT::f64) + return false; + + Value *Src = I->getOperand(0); + EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true); + if (!SrcEVT.isSimple()) + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + + if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && + SrcVT != MVT::i32 && SrcVT != MVT::i64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (SrcReg == 0) + return false; + + // We can only lower an unsigned convert if we have the newer + // floating-point conversion operations. + if (!IsSigned && !PPCSubTarget->hasFPCVT()) + return false; + + // FIXME: For now we require the newer floating-point conversion operations + // (which are present only on P7 and A2 server models) when converting + // to single-precision float. Otherwise we have to generate a lot of + // fiddly code to avoid double rounding. If necessary, the fiddly code + // can be found in PPCTargetLowering::LowerINT_TO_FP(). + if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT()) + return false; + + // Extend the input if necessary. + if (SrcVT == MVT::i8 || SrcVT == MVT::i16) { + unsigned TmpReg = createResultReg(&PPC::G8RCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned)) + return false; + SrcVT = MVT::i64; + SrcReg = TmpReg; + } + + // Move the integer value to an FPR. + unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned); + if (FPReg == 0) + return false; + + // Determine the opcode for the conversion. + const TargetRegisterClass *RC = &PPC::F8RCRegClass; + unsigned DestReg = createResultReg(RC); + unsigned Opc; + + if (DstVT == MVT::f32) + Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS; + else + Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU; + + // Generate the convert. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addReg(FPReg); + + updateValueMap(I, DestReg); + return true; +} + +// Move the floating-point value in SrcReg into an integer destination +// register, and return the register (or zero if we can't handle it). +// FIXME: When direct register moves are implemented (see PowerISA 2.07), +// those should be used instead of moving via a stack slot when the +// subtarget permits. +unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, + unsigned SrcReg, bool IsSigned) { + // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary. + // Note that if have STFIWX available, we could use a 4-byte stack + // slot for i32, but this being fast-isel we'll just go with the + // easiest code gen possible. + Address Addr; + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = MFI.CreateStackObject(8, 8, false); + + // Store the value from the FPR. + if (!PPCEmitStore(MVT::f64, SrcReg, Addr)) + return 0; + + // Reload it into a GPR. If we want an i32 on big endian, modify the + // address to have a 4-byte offset so we load from the right place. + if (VT == MVT::i32) + Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4; + + // Look at the currently assigned register for this instruction + // to determine the required register class. + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; + + unsigned ResultReg = 0; + if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) + return 0; + + return ResultReg; +} + +// Attempt to fast-select a floating-point-to-integer conversion. +// FIXME: Once fast-isel has better support for VSX, conversions using +// direct moves should be implemented. +bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { + MVT DstVT, SrcVT; + Type *DstTy = I->getType(); + if (!isTypeLegal(DstTy, DstVT)) + return false; + + if (DstVT != MVT::i32 && DstVT != MVT::i64) + return false; + + // If we don't have FCTIDUZ and we need it, punt to SelectionDAG. + if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT()) + return false; + + Value *Src = I->getOperand(0); + Type *SrcTy = Src->getType(); + if (!isTypeLegal(SrcTy, SrcVT)) + return false; + + if (SrcVT != MVT::f32 && SrcVT != MVT::f64) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (SrcReg == 0) + return false; + + // Convert f32 to f64 if necessary. This is just a meaningless copy + // to get the register class right. + const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg); + if (InRC == &PPC::F4RCRegClass) { + unsigned TmpReg = createResultReg(&PPC::F8RCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), TmpReg) + .addReg(SrcReg); + SrcReg = TmpReg; + } + + // Determine the opcode for the conversion, which takes place + // entirely within FPRs. + unsigned DestReg = createResultReg(&PPC::F8RCRegClass); + unsigned Opc; + + if (DstVT == MVT::i32) + if (IsSigned) + Opc = PPC::FCTIWZ; + else + Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; + else + Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; + + // Generate the convert. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addReg(SrcReg); + + // Now move the integer value from a float register to an integer register. + unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned); + if (IntReg == 0) + return false; + + updateValueMap(I, IntReg); + return true; +} + +// Attempt to fast-select a binary integer operation that isn't already +// handled automatically. +bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { + EVT DestVT = TLI.getValueType(DL, I->getType(), true); + + // We can get here in the case when we have a binary operation on a non-legal + // type and the target independent selector doesn't know how to handle it. + if (DestVT != MVT::i16 && DestVT != MVT::i8) + return false; + + // Look at the currently assigned register for this instruction + // to determine the required register class. If there is no register, + // make a conservative choice (don't assign R0). + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + (AssignedReg ? MRI.getRegClass(AssignedReg) : + &PPC::GPRC_and_GPRC_NOR0RegClass); + bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass); + + unsigned Opc; + switch (ISDOpcode) { + default: return false; + case ISD::ADD: + Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8; + break; + case ISD::OR: + Opc = IsGPRC ? PPC::OR : PPC::OR8; + break; + case ISD::SUB: + Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8; + break; + } + + unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass); + unsigned SrcReg1 = getRegForValue(I->getOperand(0)); + if (SrcReg1 == 0) return false; + + // Handle case of small immediate operand. + if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) { + const APInt &CIVal = ConstInt->getValue(); + int Imm = (int)CIVal.getSExtValue(); + bool UseImm = true; + if (isInt<16>(Imm)) { + switch (Opc) { + default: + llvm_unreachable("Missing case!"); + case PPC::ADD4: + Opc = PPC::ADDI; + MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass); + break; + case PPC::ADD8: + Opc = PPC::ADDI8; + MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass); + break; + case PPC::OR: + Opc = PPC::ORI; + break; + case PPC::OR8: + Opc = PPC::ORI8; + break; + case PPC::SUBF: + if (Imm == -32768) + UseImm = false; + else { + Opc = PPC::ADDI; + MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass); + Imm = -Imm; + } + break; + case PPC::SUBF8: + if (Imm == -32768) + UseImm = false; + else { + Opc = PPC::ADDI8; + MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass); + Imm = -Imm; + } + break; + } + + if (UseImm) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + ResultReg) + .addReg(SrcReg1) + .addImm(Imm); + updateValueMap(I, ResultReg); + return true; + } + } + } + + // Reg-reg case. + unsigned SrcReg2 = getRegForValue(I->getOperand(1)); + if (SrcReg2 == 0) return false; + + // Reverse operands for subtract-from. + if (ISDOpcode == ISD::SUB) + std::swap(SrcReg1, SrcReg2); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(SrcReg1).addReg(SrcReg2); + updateValueMap(I, ResultReg); + return true; +} + +// Handle arguments to a call that we're attempting to fast-select. +// Return false if the arguments are too complex for us at the moment. +bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, + SmallVectorImpl<unsigned> &ArgRegs, + SmallVectorImpl<MVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, + SmallVectorImpl<unsigned> &RegArgs, + CallingConv::ID CC, + unsigned &NumBytes, + bool IsVarArg) { + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context); + + // Reserve space for the linkage area on the stack. + unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize(); + CCInfo.AllocateStack(LinkageSize, 8); + + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); + + // Bail out if we can't handle any of the arguments. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // Skip vector arguments for now, as well as long double and + // uint128_t, and anything that isn't passed in a register. + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 || + !VA.isRegLoc() || VA.needsCustom()) + return false; + + // Skip bit-converted arguments for now. + if (VA.getLocInfo() == CCValAssign::BCvt) + return false; + } + + // Get a count of how many bytes are to be pushed onto the stack. + NumBytes = CCInfo.getNextStackOffset(); + + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. + NumBytes = std::max(NumBytes, LinkageSize + 64); + + // Issue CALLSEQ_START. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TII.getCallFrameSetupOpcode())) + .addImm(NumBytes).addImm(0); + + // Prepare to assign register arguments. Every argument uses up a + // GPR protocol register even if it's passed in a floating-point + // register (unless we're using the fast calling convention). + unsigned NextGPR = PPC::X3; + unsigned NextFPR = PPC::F1; + + // Process arguments. + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + unsigned Arg = ArgRegs[VA.getValNo()]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // Handle argument promotion and bitcasts. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: { + MVT DestVT = VA.getLocVT(); + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false)) + llvm_unreachable("Failed to emit a sext!"); + ArgVT = DestVT; + Arg = TmpReg; + break; + } + case CCValAssign::AExt: + case CCValAssign::ZExt: { + MVT DestVT = VA.getLocVT(); + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true)) + llvm_unreachable("Failed to emit a zext!"); + ArgVT = DestVT; + Arg = TmpReg; + break; + } + case CCValAssign::BCvt: { + // FIXME: Not yet handled. + llvm_unreachable("Should have bailed before getting here!"); + break; + } + } + + // Copy this argument to the appropriate register. + unsigned ArgReg; + if (ArgVT == MVT::f32 || ArgVT == MVT::f64) { + ArgReg = NextFPR++; + if (CC != CallingConv::Fast) + ++NextGPR; + } else + ArgReg = NextGPR++; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg); + RegArgs.push_back(ArgReg); + } + + return true; +} + +// For a call that we've determined we can fast-select, finish the +// call sequence and generate a copy to obtain the return value (if any). +bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) { + CallingConv::ID CC = CLI.CallConv; + + // Issue CallSEQ_END. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TII.getCallFrameDestroyOpcode())) + .addImm(NumBytes).addImm(0); + + // Next, generate a copy to obtain the return value. + // FIXME: No multi-register return values yet, though I don't foresee + // any real difficulties there. + if (RetVT != MVT::isVoid) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); + CCValAssign &VA = RVLocs[0]; + assert(RVLocs.size() == 1 && "No support for multi-reg return values!"); + assert(VA.isRegLoc() && "Can only return in registers!"); + + MVT DestVT = VA.getValVT(); + MVT CopyVT = DestVT; + + // Ints smaller than a register still arrive in a full 64-bit + // register, so make sure we recognize this. + if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) + CopyVT = MVT::i64; + + unsigned SourcePhysReg = VA.getLocReg(); + unsigned ResultReg = 0; + + if (RetVT == CopyVT) { + const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT); + ResultReg = createResultReg(CpyRC); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(SourcePhysReg); + + // If necessary, round the floating result to single precision. + } else if (CopyVT == MVT::f64) { + ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), + ResultReg).addReg(SourcePhysReg); + + // If only the low half of a general register is needed, generate + // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be + // used along the fast-isel path (not lowered), and downstream logic + // also doesn't like a direct subreg copy on a physical reg.) + } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) { + ResultReg = createResultReg(&PPC::GPRCRegClass); + // Convert physical register from G8RC to GPRC. + SourcePhysReg -= PPC::X0 - PPC::R0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(SourcePhysReg); + } + + assert(ResultReg && "ResultReg unset!"); + CLI.InRegs.push_back(SourcePhysReg); + CLI.ResultReg = ResultReg; + CLI.NumResultRegs = 1; + } + + return true; +} + +bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { + CallingConv::ID CC = CLI.CallConv; + bool IsTailCall = CLI.IsTailCall; + bool IsVarArg = CLI.IsVarArg; + const Value *Callee = CLI.Callee; + const MCSymbol *Symbol = CLI.Symbol; + + if (!Callee && !Symbol) + return false; + + // Allow SelectionDAG isel to handle tail calls. + if (IsTailCall) + return false; + + // Let SDISel handle vararg functions. + if (IsVarArg) + return false; + + // Handle simple calls for now, with legal return types and + // those that can be extended. + Type *RetTy = CLI.RetTy; + MVT RetVT; + if (RetTy->isVoidTy()) + RetVT = MVT::isVoid; + else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && + RetVT != MVT::i8) + return false; + else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits()) + // We can't handle boolean returns when CR bits are in use. + return false; + + // FIXME: No multi-register return values yet. + if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 && + RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 && + RetVT != MVT::f64) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); + if (RVLocs.size() > 1) + return false; + } + + // Bail early if more than 8 arguments, as we only currently + // handle arguments passed in registers. + unsigned NumArgs = CLI.OutVals.size(); + if (NumArgs > 8) + return false; + + // Set up the argument vectors. + SmallVector<Value*, 8> Args; + SmallVector<unsigned, 8> ArgRegs; + SmallVector<MVT, 8> ArgVTs; + SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; + + Args.reserve(NumArgs); + ArgRegs.reserve(NumArgs); + ArgVTs.reserve(NumArgs); + ArgFlags.reserve(NumArgs); + + for (unsigned i = 0, ie = NumArgs; i != ie; ++i) { + // Only handle easy calls for now. It would be reasonably easy + // to handle <= 8-byte structures passed ByVal in registers, but we + // have to ensure they are right-justified in the register. + ISD::ArgFlagsTy Flags = CLI.OutFlags[i]; + if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal()) + return false; + + Value *ArgValue = CLI.OutVals[i]; + Type *ArgTy = ArgValue->getType(); + MVT ArgVT; + if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8) + return false; + + if (ArgVT.isVector()) + return false; + + unsigned Arg = getRegForValue(ArgValue); + if (Arg == 0) + return false; + + Args.push_back(ArgValue); + ArgRegs.push_back(Arg); + ArgVTs.push_back(ArgVT); + ArgFlags.push_back(Flags); + } + + // Process the arguments. + SmallVector<unsigned, 8> RegArgs; + unsigned NumBytes; + + if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, + RegArgs, CC, NumBytes, IsVarArg)) + return false; + + MachineInstrBuilder MIB; + // FIXME: No handling for function pointers yet. This requires + // implementing the function descriptor (OPD) setup. + const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); + if (!GV) { + // patchpoints are a special case; they always dispatch to a pointer value. + // However, we don't actually want to generate the indirect call sequence + // here (that will be generated, as necessary, during asm printing), and + // the call we generate here will be erased by FastISel::selectPatchpoint, + // so don't try very hard... + if (CLI.IsPatchPoint) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP)); + else + return false; + } else { + // Build direct call with NOP for TOC restore. + // FIXME: We can and should optimize away the NOP for local calls. + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(PPC::BL8_NOP)); + // Add callee. + MIB.addGlobalAddress(GV); + } + + // Add implicit physical register uses to the call. + for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) + MIB.addReg(RegArgs[II], RegState::Implicit); + + // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live + // into the call. + PPCFuncInfo->setUsesTOCBasePtr(); + MIB.addReg(PPC::X2, RegState::Implicit); + + // Add a register mask with the call-preserved registers. Proper + // defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); + + CLI.Call = MIB; + + // Finish off the call including any return values. + return finishCall(RetVT, CLI, NumBytes); +} + +// Attempt to fast-select a return instruction. +bool PPCFastISel::SelectRet(const Instruction *I) { + + if (!FuncInfo.CanLowerReturn) + return false; + + if (TLI.supportSplitCSR(FuncInfo.MF)) + return false; + + const ReturnInst *Ret = cast<ReturnInst>(I); + const Function &F = *I->getParent()->getParent(); + + // Build a list of return value registers. + SmallVector<unsigned, 4> RetRegs; + CallingConv::ID CC = F.getCallingConv(); + + if (Ret->getNumOperands() > 0) { + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ValLocs; + CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context); + CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS); + const Value *RV = Ret->getOperand(0); + + // FIXME: Only one output register for now. + if (ValLocs.size() > 1) + return false; + + // Special case for returning a constant integer of any size - materialize + // the constant as an i64 and copy it to the return register. + if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) { + CCValAssign &VA = ValLocs[0]; + + unsigned RetReg = VA.getLocReg(); + // We still need to worry about properly extending the sign. For example, + // we could have only a single bit or a constant that needs zero + // extension rather than sign extension. Make sure we pass the return + // value extension property to integer materialization. + unsigned SrcReg = + PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg); + + RetRegs.push_back(RetReg); + + } else { + unsigned Reg = getRegForValue(RV); + + if (Reg == 0) + return false; + + // Copy the result values into the output registers. + for (unsigned i = 0; i < ValLocs.size(); ++i) { + + CCValAssign &VA = ValLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + RetRegs.push_back(VA.getLocReg()); + unsigned SrcReg = Reg + VA.getValNo(); + + EVT RVEVT = TLI.getValueType(DL, RV->getType()); + if (!RVEVT.isSimple()) + return false; + MVT RVVT = RVEVT.getSimpleVT(); + MVT DestVT = VA.getLocVT(); + + if (RVVT != DestVT && RVVT != MVT::i8 && + RVVT != MVT::i16 && RVVT != MVT::i32) + return false; + + if (RVVT != DestVT) { + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + llvm_unreachable("Full value assign but types don't match?"); + case CCValAssign::AExt: + case CCValAssign::ZExt: { + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true)) + return false; + SrcReg = TmpReg; + break; + } + case CCValAssign::SExt: { + const TargetRegisterClass *RC = + (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned TmpReg = createResultReg(RC); + if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false)) + return false; + SrcReg = TmpReg; + break; + } + } + } + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), RetRegs[i]) + .addReg(SrcReg); + } + } + } + + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(PPC::BLR8)); + + for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) + MIB.addReg(RetRegs[i], RegState::Implicit); + + return true; +} + +// Attempt to emit an integer extend of SrcReg into DestReg. Both +// signed and zero extensions are supported. Return false if we +// can't handle it. +bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, + unsigned DestReg, bool IsZExt) { + if (DestVT != MVT::i32 && DestVT != MVT::i64) + return false; + if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32) + return false; + + // Signed extensions use EXTSB, EXTSH, EXTSW. + if (!IsZExt) { + unsigned Opc; + if (SrcVT == MVT::i8) + Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64; + else if (SrcVT == MVT::i16) + Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64; + else { + assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??"); + Opc = PPC::EXTSW_32_64; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addReg(SrcReg); + + // Unsigned 32-bit extensions use RLWINM. + } else if (DestVT == MVT::i32) { + unsigned MB; + if (SrcVT == MVT::i8) + MB = 24; + else { + assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??"); + MB = 16; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM), + DestReg) + .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31); + + // Unsigned 64-bit extensions use RLDICL (with a 32-bit source). + } else { + unsigned MB; + if (SrcVT == MVT::i8) + MB = 56; + else if (SrcVT == MVT::i16) + MB = 48; + else + MB = 32; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(PPC::RLDICL_32_64), DestReg) + .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB); + } + + return true; +} + +// Attempt to fast-select an indirect branch instruction. +bool PPCFastISel::SelectIndirectBr(const Instruction *I) { + unsigned AddrReg = getRegForValue(I->getOperand(0)); + if (AddrReg == 0) + return false; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8)) + .addReg(AddrReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8)); + + const IndirectBrInst *IB = cast<IndirectBrInst>(I); + for (const BasicBlock *SuccBB : IB->successors()) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]); + + return true; +} + +// Attempt to fast-select an integer truncate instruction. +bool PPCFastISel::SelectTrunc(const Instruction *I) { + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); + + if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16) + return false; + + if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) + return false; + + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + // The only interesting case is when we need to switch register classes. + if (SrcVT == MVT::i64) { + unsigned ResultReg = createResultReg(&PPC::GPRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), + ResultReg).addReg(SrcReg, 0, PPC::sub_32); + SrcReg = ResultReg; + } + + updateValueMap(I, SrcReg); + return true; +} + +// Attempt to fast-select an integer extend instruction. +bool PPCFastISel::SelectIntExt(const Instruction *I) { + Type *DestTy = I->getType(); + Value *Src = I->getOperand(0); + Type *SrcTy = Src->getType(); + + bool IsZExt = isa<ZExtInst>(I); + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) return false; + + EVT SrcEVT, DestEVT; + SrcEVT = TLI.getValueType(DL, SrcTy, true); + DestEVT = TLI.getValueType(DL, DestTy, true); + if (!SrcEVT.isSimple()) + return false; + if (!DestEVT.isSimple()) + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DestVT = DestEVT.getSimpleVT(); + + // If we know the register class needed for the result of this + // instruction, use it. Otherwise pick the register class of the + // correct size that does not contain X0/R0, since we don't know + // whether downstream uses permit that assignment. + unsigned AssignedReg = FuncInfo.ValueMap[I]; + const TargetRegisterClass *RC = + (AssignedReg ? MRI.getRegClass(AssignedReg) : + (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : + &PPC::GPRC_and_GPRC_NOR0RegClass)); + unsigned ResultReg = createResultReg(RC); + + if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt)) + return false; + + updateValueMap(I, ResultReg); + return true; +} + +// Attempt to fast-select an instruction that wasn't handled by +// the table-generated machinery. +bool PPCFastISel::fastSelectInstruction(const Instruction *I) { + + switch (I->getOpcode()) { + case Instruction::Load: + return SelectLoad(I); + case Instruction::Store: + return SelectStore(I); + case Instruction::Br: + return SelectBranch(I); + case Instruction::IndirectBr: + return SelectIndirectBr(I); + case Instruction::FPExt: + return SelectFPExt(I); + case Instruction::FPTrunc: + return SelectFPTrunc(I); + case Instruction::SIToFP: + return SelectIToFP(I, /*IsSigned*/ true); + case Instruction::UIToFP: + return SelectIToFP(I, /*IsSigned*/ false); + case Instruction::FPToSI: + return SelectFPToI(I, /*IsSigned*/ true); + case Instruction::FPToUI: + return SelectFPToI(I, /*IsSigned*/ false); + case Instruction::Add: + return SelectBinaryIntOp(I, ISD::ADD); + case Instruction::Or: + return SelectBinaryIntOp(I, ISD::OR); + case Instruction::Sub: + return SelectBinaryIntOp(I, ISD::SUB); + case Instruction::Call: + return selectCall(I); + case Instruction::Ret: + return SelectRet(I); + case Instruction::Trunc: + return SelectTrunc(I); + case Instruction::ZExt: + case Instruction::SExt: + return SelectIntExt(I); + // Here add other flavors of Instruction::XXX that automated + // cases don't catch. For example, switches are terminators + // that aren't yet handled. + default: + break; + } + return false; +} + +// Materialize a floating-point constant into a register, and return +// the register number (or zero if we failed to handle it). +unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { + // No plans to handle long double here. + if (VT != MVT::f32 && VT != MVT::f64) + return 0; + + // All FP constants are loaded from the constant pool. + unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); + assert(Align > 0 && "Unexpectedly missing alignment information!"); + unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); + const TargetRegisterClass *RC = + (VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass; + unsigned DestReg = createResultReg(RC); + CodeModel::Model CModel = TM.getCodeModel(); + + MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getConstantPool(*FuncInfo.MF), + MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align); + + unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD; + unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + + PPCFuncInfo->setUsesTOCBasePtr(); + // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)). + if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT), + TmpReg) + .addConstantPoolIndex(Idx).addReg(PPC::X2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addImm(0).addReg(TmpReg).addMemOperand(MMO); + } else { + // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)). + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), + TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); + // But for large code model, we must generate a LDtocL followed + // by the LF[SD]. + if (CModel == CodeModel::Large) { + unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), + TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addImm(0) + .addReg(TmpReg2); + } else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO) + .addReg(TmpReg) + .addMemOperand(MMO); + } + + return DestReg; +} + +// Materialize the address of a global value into a register, and return +// the register number (or zero if we failed to handle it). +unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { + assert(VT == MVT::i64 && "Non-address!"); + const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass; + unsigned DestReg = createResultReg(RC); + + // Global values may be plain old object addresses, TLS object + // addresses, constant pool entries, or jump tables. How we generate + // code for these may depend on small, medium, or large code model. + CodeModel::Model CModel = TM.getCodeModel(); + + // FIXME: Jump tables are not yet required because fast-isel doesn't + // handle switches; if that changes, we need them as well. For now, + // what follows assumes everything's a generic (or TLS) global address. + + // FIXME: We don't yet handle the complexity of TLS. + if (GV->isThreadLocal()) + return 0; + + PPCFuncInfo->setUsesTOCBasePtr(); + // For small code model, generate a simple TOC load. + if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc), + DestReg) + .addGlobalAddress(GV) + .addReg(PPC::X2); + else { + // If the address is an externally defined symbol, a symbol with common + // or externally available linkage, a non-local function address, or a + // jump table address (not yet needed), or if we are generating code + // for large code model, we generate: + // LDtocL(GV, ADDIStocHA(%X2, GV)) + // Otherwise we generate: + // ADDItocL(ADDIStocHA(%X2, GV), GV) + // Either way, start with the ADDIStocHA: + unsigned HighPartReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), + HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); + + unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); + if (GVFlags & PPCII::MO_NLP_FLAG) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), + DestReg).addGlobalAddress(GV).addReg(HighPartReg); + } else { + // Otherwise generate the ADDItocL. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL), + DestReg).addReg(HighPartReg).addGlobalAddress(GV); + } + } + + return DestReg; +} + +// Materialize a 32-bit integer constant into a register, and return +// the register number (or zero if we failed to handle it). +unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm, + const TargetRegisterClass *RC) { + unsigned Lo = Imm & 0xFFFF; + unsigned Hi = (Imm >> 16) & 0xFFFF; + + unsigned ResultReg = createResultReg(RC); + bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass); + + if (isInt<16>(Imm)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg) + .addImm(Imm); + else if (Lo) { + // Both Lo and Hi have nonzero bits. + unsigned TmpReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg) + .addImm(Hi); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg) + .addReg(TmpReg).addImm(Lo); + } else + // Just Hi bits. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg) + .addImm(Hi); + + return ResultReg; +} + +// Materialize a 64-bit integer constant into a register, and return +// the register number (or zero if we failed to handle it). +unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, + const TargetRegisterClass *RC) { + unsigned Remainder = 0; + unsigned Shift = 0; + + // If the value doesn't fit in 32 bits, see if we can shift it + // so that it fits in 32 bits. + if (!isInt<32>(Imm)) { + Shift = countTrailingZeros<uint64_t>(Imm); + int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; + + if (isInt<32>(ImmSh)) + Imm = ImmSh; + else { + Remainder = Imm; + Shift = 32; + Imm >>= 32; + } + } + + // Handle the high-order 32 bits (if shifted) or the whole 32 bits + // (if not shifted). + unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC); + if (!Shift) + return TmpReg1; + + // If upper 32 bits were not zero, we've built them and need to shift + // them into place. + unsigned TmpReg2; + if (Imm) { + TmpReg2 = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR), + TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift); + } else + TmpReg2 = TmpReg1; + + unsigned TmpReg3, Hi, Lo; + if ((Hi = (Remainder >> 16) & 0xFFFF)) { + TmpReg3 = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8), + TmpReg3).addReg(TmpReg2).addImm(Hi); + } else + TmpReg3 = TmpReg2; + + if ((Lo = Remainder & 0xFFFF)) { + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8), + ResultReg).addReg(TmpReg3).addImm(Lo); + return ResultReg; + } + + return TmpReg3; +} + +// Materialize an integer constant into a register, and return +// the register number (or zero if we failed to handle it). +unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT, + bool UseSExt) { + // If we're using CR bit registers for i1 values, handle that as a special + // case first. + if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { + unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg); + return ImmReg; + } + + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && + VT != MVT::i1) + return 0; + + const TargetRegisterClass *RC = + ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); + int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue(); + + // If the constant is in range, use a load-immediate. + // Since LI will sign extend the constant we need to make sure that for + // our zeroext constants that the sign extended constant fits into 16-bits - + // a range of 0..0x7fff. + if (isInt<16>(Imm)) { + unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; + unsigned ImmReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) + .addImm(Imm); + return ImmReg; + } + + // Construct the constant piecewise. + if (VT == MVT::i64) + return PPCMaterialize64BitInt(Imm, RC); + else if (VT == MVT::i32) + return PPCMaterialize32BitInt(Imm, RC); + + return 0; +} + +// Materialize a constant into a register, and return the register +// number (or zero if we failed to handle it). +unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) { + EVT CEVT = TLI.getValueType(DL, C->getType(), true); + + // Only handle simple types. + if (!CEVT.isSimple()) return 0; + MVT VT = CEVT.getSimpleVT(); + + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) + return PPCMaterializeFP(CFP, VT); + else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return PPCMaterializeGV(GV, VT); + else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) + // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo + // assumes that constant PHI operands will be zero extended, and failure to + // match that assumption will cause problems if we sign extend here but + // some user of a PHI is in a block for which we fall back to full SDAG + // instruction selection. + return PPCMaterializeInt(CI, VT, false); + + return 0; +} + +// Materialize the address created by an alloca into a register, and +// return the register number (or zero if we failed to handle it). +unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) { + // Don't handle dynamic allocas. + if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; + + MVT VT; + if (!isLoadTypeLegal(AI->getType(), VT)) return 0; + + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8), + ResultReg).addFrameIndex(SI->second).addImm(0); + return ResultReg; + } + + return 0; +} + +// Fold loads into extends when possible. +// FIXME: We can have multiple redundant extend/trunc instructions +// following a load. The folding only picks up one. Extend this +// to check subsequent instructions for the same pattern and remove +// them. Thus ResultReg should be the def reg for the last redundant +// instruction in a chain, and all intervening instructions can be +// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll +// to add ELF64-NOT: rldicl to the appropriate tests when this works. +bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) { + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(LI->getType(), VT)) + return false; + + // Combine load followed by zero- or sign-extend. + bool IsZExt = false; + switch(MI->getOpcode()) { + default: + return false; + + case PPC::RLDICL: + case PPC::RLDICL_32_64: { + IsZExt = true; + unsigned MB = MI->getOperand(3).getImm(); + if ((VT == MVT::i8 && MB <= 56) || + (VT == MVT::i16 && MB <= 48) || + (VT == MVT::i32 && MB <= 32)) + break; + return false; + } + + case PPC::RLWINM: + case PPC::RLWINM8: { + IsZExt = true; + unsigned MB = MI->getOperand(3).getImm(); + if ((VT == MVT::i8 && MB <= 24) || + (VT == MVT::i16 && MB <= 16)) + break; + return false; + } + + case PPC::EXTSB: + case PPC::EXTSB8: + case PPC::EXTSB8_32_64: + /* There is no sign-extending load-byte instruction. */ + return false; + + case PPC::EXTSH: + case PPC::EXTSH8: + case PPC::EXTSH8_32_64: { + if (VT != MVT::i16 && VT != MVT::i8) + return false; + break; + } + + case PPC::EXTSW: + case PPC::EXTSW_32: + case PPC::EXTSW_32_64: { + if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8) + return false; + break; + } + } + + // See if we can handle this address. + Address Addr; + if (!PPCComputeAddress(LI->getOperand(0), Addr)) + return false; + + unsigned ResultReg = MI->getOperand(0).getReg(); + + if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt)) + return false; + + MI->eraseFromParent(); + return true; +} + +// Attempt to lower call arguments in a faster way than done by +// the selection DAG code. +bool PPCFastISel::fastLowerArguments() { + // Defer to normal argument lowering for now. It's reasonably + // efficient. Consider doing something like ARM to handle the + // case where all args fit in registers, no varargs, no float + // or vector args. + return false; +} + +// Handle materializing integer constants into a register. This is not +// automatically generated for PowerPC, so must be explicitly created here. +unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { + + if (Opc != ISD::Constant) + return 0; + + // If we're using CR bit registers for i1 values, handle that as a special + // case first. + if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { + unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg); + return ImmReg; + } + + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && + VT != MVT::i1) + return 0; + + const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass : + &PPC::GPRCRegClass); + if (VT == MVT::i64) + return PPCMaterialize64BitInt(Imm, RC); + else + return PPCMaterialize32BitInt(Imm, RC); +} + +// Override for ADDI and ADDI8 to set the correct register class +// on RHS operand 0. The automatic infrastructure naively assumes +// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost +// for these cases. At the moment, none of the other automatically +// generated RI instructions require special treatment. However, once +// SelectSelect is implemented, "isel" requires similar handling. +// +// Also be conservative about the output register class. Avoid +// assigning R0 or X0 to the output register for GPRC and G8RC +// register classes, as any such result could be used in ADDI, etc., +// where those regs have another meaning. +unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm) { + if (MachineInstOpcode == PPC::ADDI) + MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass); + else if (MachineInstOpcode == PPC::ADDI8) + MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass); + + const TargetRegisterClass *UseRC = + (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : + (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); + + return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC, + Op0, Op0IsKill, Imm); +} + +// Override for instructions with one register operand to avoid use of +// R0/X0. The automatic infrastructure isn't aware of the context so +// we must be conservative. +unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass* RC, + unsigned Op0, bool Op0IsKill) { + const TargetRegisterClass *UseRC = + (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : + (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); + + return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill); +} + +// Override for instructions with two register operands to avoid use +// of R0/X0. The automatic infrastructure isn't aware of the context +// so we must be conservative. +unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass* RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { + const TargetRegisterClass *UseRC = + (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass : + (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC)); + + return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill, + Op1, Op1IsKill); +} + +namespace llvm { + // Create the fast instruction selector for PowerPC64 ELF. + FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) { + // Only available on 64-bit ELF for now. + const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>(); + if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) + return new PPCFastISel(FuncInfo, LibInfo); + return nullptr; + } +} |