diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 266 |
1 files changed, 167 insertions, 99 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 547b71a6101a..22c662a79d87 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -132,6 +132,10 @@ cl::opt<bool> DisableAutoPairedVecSt( cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden); +static cl::opt<unsigned> PPCMinimumJumpTableEntries( + "ppc-min-jump-table-entries", cl::init(64), cl::Hidden, + cl::desc("Set minimum number of entries to use a jump table on PPC")); + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); STATISTIC(ShufflesHandledWithVPERM, @@ -144,6 +148,12 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl); static const char AIXSSPCanaryWordName[] = "__ssp_canary_word"; +// A faster local-exec TLS access sequence (enabled with the +// -maix-small-local-exec-tls option) can be produced for TLS variables; +// consistent with the IBM XL compiler, we apply a max size of slightly under +// 32KB. +constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751; + // FIXME: Remove this once the bug has been fixed! extern cl::opt<bool> ANDIGlueBug; @@ -389,7 +399,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // MASS transformation for LLVM intrinsics with replicating fast-math flag // to be consistent to PPCGenScalarMASSEntries pass - if (TM.getOptLevel() == CodeGenOpt::Aggressive) { + if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) { setOperationAction(ISD::FSIN , MVT::f64, Custom); setOperationAction(ISD::FCOS , MVT::f64, Custom); setOperationAction(ISD::FPOW , MVT::f64, Custom); @@ -1419,6 +1429,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLibcallName(RTLIB::LLRINT_F128, "llrintf128"); setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128"); setLibcallName(RTLIB::FMA_F128, "fmaf128"); + setLibcallName(RTLIB::FREXP_F128, "frexpf128"); if (Subtarget.isAIXABI()) { setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove"); @@ -1434,6 +1445,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setJumpIsExpensive(); } + // TODO: The default entry number is set to 64. This stops most jump table + // generation on PPC. But it is good for current PPC HWs because the indirect + // branch instruction mtctr to the jump table may lead to bad branch predict. + // Re-evaluate this value on future HWs that can do better with mtctr. + setMinimumJumpTableEntries(PPCMinimumJumpTableEntries); + setMinFunctionAlignment(Align(4)); switch (Subtarget.getCPUDirective()) { @@ -1627,6 +1644,27 @@ bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { return VT.isScalarInteger(); } +bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore( + Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const { + if (!Subtarget.isPPC64() || !Subtarget.hasVSX()) + return false; + + if (auto *VTy = dyn_cast<VectorType>(VectorTy)) { + if (VTy->getScalarType()->isIntegerTy()) { + // ElemSizeInBits 8/16 can fit in immediate field, not needed here. + if (ElemSizeInBits == 32) { + Index = Subtarget.isLittleEndian() ? 2 : 1; + return true; + } + if (ElemSizeInBits == 64) { + Index = Subtarget.isLittleEndian() ? 1 : 0; + return true; + } + } + } + return false; +} + const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; @@ -2936,7 +2974,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, template <typename Ty> static bool isValidPCRelNode(SDValue N) { Ty *PCRelCand = dyn_cast<Ty>(N); - return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG); + return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags())); } /// Returns true if this address is a PC Relative address. @@ -3097,8 +3135,8 @@ static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, // Don't use the pic base if not in PIC relocation model. if (IsPIC) { - HiOpFlags |= PPCII::MO_PIC_FLAG; - LoOpFlags |= PPCII::MO_PIC_FLAG; + HiOpFlags = PPCII::MO_PIC_HA_FLAG; + LoOpFlags = PPCII::MO_PIC_LO_FLAG; } } @@ -3326,36 +3364,60 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op, const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool Is64Bit = Subtarget.isPPC64(); + bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS(); TLSModel::Model Model = getTargetMachine().getTLSModel(GV); + bool IsTLSLocalExecModel = Model == TLSModel::LocalExec; - if (Model == TLSModel::LocalExec) { + if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) { SDValue VariableOffsetTGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG); SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA); SDValue TLSReg; - if (Is64Bit) - // For local-exec on AIX (64-bit), the sequence that is generated involves - // a load of the variable offset (from the TOC), followed by an add of the - // loaded variable offset to R13 (the thread pointer). + if (Is64Bit) { + // For local-exec and initial-exec on AIX (64-bit), the sequence generated + // involves a load of the variable offset (from the TOC), followed by an + // add of the loaded variable offset to R13 (the thread pointer). // This code sequence looks like: // ld reg1,var[TC](2) // add reg2, reg1, r13 // r13 contains the thread pointer TLSReg = DAG.getRegister(PPC::X13, MVT::i64); - else - // For local-exec on AIX (32-bit), the sequence that is generated involves - // loading the variable offset from the TOC, generating a call to + + // With the -maix-small-local-exec-tls option, produce a faster access + // sequence for local-exec TLS variables where the offset from the TLS + // base is encoded as an immediate operand. + // + // We only utilize the faster local-exec access sequence when the TLS + // variable has a size within the policy limit. We treat types that are + // not sized or are empty as being over the policy size limit. + if (HasAIXSmallLocalExecTLS && IsTLSLocalExecModel) { + Type *GVType = GV->getValueType(); + if (GVType->isSized() && !GVType->isEmptyTy() && + GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <= + AIXSmallTlsPolicySizeLimit) + return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg); + } + } else { + // For local-exec and initial-exec on AIX (32-bit), the sequence generated + // involves loading the variable offset from the TOC, generating a call to // .__get_tpointer to get the thread pointer (which will be in R3), and // adding the two together: // lwz reg1,var[TC](2) // bla .__get_tpointer // add reg2, reg1, r3 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT); + + // We do not implement the 32-bit version of the faster access sequence + // for local-exec that is controlled by -maix-small-local-exec-tls. + if (HasAIXSmallLocalExecTLS) + report_fatal_error("The small-local-exec TLS access sequence is " + "currently only supported on AIX (64-bit mode)."); + } return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset); } - // The Local-Exec and General-Dynamic TLS models are currently the only - // supported access models. If Local-exec is not possible or specified, all - // GlobalTLSAddress nodes are lowered using the general-dynamic model. + // Only Local-Exec, Initial-Exec and General-Dynamic TLS models are currently + // supported models. If Local- or Initial-exec are not possible or specified, + // all GlobalTLSAddress nodes are lowered using the general-dynamic model. // We need to generate two TOC entries, one for the variable offset, one for // the region handle. The global address for the TOC entry of the region // handle is created with the MO_TLSGDM_FLAG flag and the global address @@ -3393,8 +3455,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op, if (Model == TLSModel::LocalExec) { if (Subtarget.isUsingPCRelativeCalls()) { SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64); - SDValue TGA = DAG.getTargetGlobalAddress( - GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG)); + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TPREL_PCREL_FLAG); SDValue MatAddr = DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA); return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr); @@ -3416,8 +3478,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op, SDValue TGA = DAG.getTargetGlobalAddress( GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0); SDValue TGATLS = DAG.getTargetGlobalAddress( - GV, dl, PtrVT, 0, - IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS); + GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS); SDValue TPOffset; if (IsPCRel) { SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA); @@ -3513,8 +3574,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, EVT Ty = getPointerTy(DAG.getDataLayout()); if (isAccessedAsGotIndirect(Op)) { SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(), - PPCII::MO_PCREL_FLAG | - PPCII::MO_GOT_FLAG); + PPCII::MO_GOT_PCREL_FLAG); SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA); SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel, MachinePointerInfo()); @@ -3764,21 +3824,22 @@ SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { // Check all operands that may contain the LR. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue(); - unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + const InlineAsm::Flag Flags( + cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue()); + unsigned NumVals = Flags.getNumOperandRegisters(); ++i; // Skip the ID value. - switch (InlineAsm::getKind(Flags)) { + switch (Flags.getKind()) { default: llvm_unreachable("Bad flags!"); - case InlineAsm::Kind_RegUse: - case InlineAsm::Kind_Imm: - case InlineAsm::Kind_Mem: + case InlineAsm::Kind::RegUse: + case InlineAsm::Kind::Imm: + case InlineAsm::Kind::Mem: i += NumVals; break; - case InlineAsm::Kind_Clobber: - case InlineAsm::Kind_RegDef: - case InlineAsm::Kind_RegDefEarlyClobber: { + case InlineAsm::Kind::Clobber: + case InlineAsm::Kind::RegDef: + case InlineAsm::Kind::RegDefEarlyClobber: { for (; NumVals; --NumVals, ++i) { Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg(); if (Reg != PPC::LR && Reg != PPC::LR8) @@ -5278,7 +5339,7 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, // inserted into the DAG as part of call lowering. The restore of the TOC // pointer is modeled by using a pseudo instruction for the call opcode that // represents the 2 instruction sequence of an indirect branch and link, - // immediately followed by a load of the TOC pointer from the the stack save + // immediately followed by a load of the TOC pointer from the stack save // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC // as it is not saved or used. RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC @@ -7193,7 +7254,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( // be future work. SDValue Store = DAG.getStore( CopyFrom.getValue(1), dl, CopyFrom, - DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)), + DAG.getObjectPtrOffset(dl, FIN, TypeSize::getFixed(Offset)), MachinePointerInfo::getFixedStack(MF, FI, Offset)); MemOps.push_back(Store); @@ -7373,12 +7434,12 @@ SDValue PPCTargetLowering::LowerCall_AIX( } auto GetLoad = [&](EVT VT, unsigned LoadOffset) { - return DAG.getExtLoad( - ISD::ZEXTLOAD, dl, PtrVT, Chain, - (LoadOffset != 0) - ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset)) - : Arg, - MachinePointerInfo(), VT); + return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, + (LoadOffset != 0) + ? DAG.getObjectPtrOffset( + dl, Arg, TypeSize::getFixed(LoadOffset)) + : Arg, + MachinePointerInfo(), VT); }; unsigned LoadOffset = 0; @@ -7408,11 +7469,11 @@ SDValue PPCTargetLowering::LowerCall_AIX( // Only memcpy the bytes that don't pass in register. MemcpyFlags.setByValSize(ByValSize - LoadOffset); Chain = CallSeqStart = createMemcpyOutsideCallSeq( - (LoadOffset != 0) - ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset)) - : Arg, - DAG.getObjectPtrOffset(dl, StackPtr, - TypeSize::Fixed(ByValVA.getLocMemOffset())), + (LoadOffset != 0) ? DAG.getObjectPtrOffset( + dl, Arg, TypeSize::getFixed(LoadOffset)) + : Arg, + DAG.getObjectPtrOffset( + dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())), CallSeqStart, MemcpyFlags, DAG, dl); continue; } @@ -8020,7 +8081,8 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // For more information, see section F.3 of the 2.06 ISA specification. // With ISA 3.0 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) || - (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs())) + (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) || + ResVT == MVT::f128) return Op; // If the RHS of the comparison is a 0.0, we don't need to do the @@ -10254,11 +10316,6 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG, bool isLittleEndian = Subtarget.isLittleEndian(); bool isPPC64 = Subtarget.isPPC64(); - // Only need to place items backwards in LE, - // the mask will be properly calculated. - if (isLittleEndian) - std::swap(V1, V2); - if (Subtarget.hasVSX() && Subtarget.hasP9Vector() && (V1->hasOneUse() || V2->hasOneUse())) { LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using " @@ -10268,7 +10325,8 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG, // The second input to XXPERM is also an output so if the second input has // multiple uses then copying is necessary, as a result we want the // single-use operand to be used as the second input to prevent copying. - if (!V2->hasOneUse() && V1->hasOneUse()) { + if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) || + (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) { std::swap(V1, V2); NeedSwap = !NeedSwap; } @@ -10307,27 +10365,24 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG, for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; - if (Opcode == PPCISD::XXPERM) { - if (V1HasXXSWAPD) { - if (SrcElt < 8) - SrcElt += 8; - else if (SrcElt < 16) - SrcElt -= 8; - } - if (V2HasXXSWAPD) { - if (SrcElt > 23) - SrcElt -= 8; - else if (SrcElt > 15) - SrcElt += 8; - } - if (NeedSwap) { - if (SrcElt < 16) - SrcElt += 16; - else - SrcElt -= 16; - } + if (V1HasXXSWAPD) { + if (SrcElt < 8) + SrcElt += 8; + else if (SrcElt < 16) + SrcElt -= 8; + } + if (V2HasXXSWAPD) { + if (SrcElt > 23) + SrcElt -= 8; + else if (SrcElt > 15) + SrcElt += 8; + } + if (NeedSwap) { + if (SrcElt < 16) + SrcElt += 16; + else + SrcElt -= 16; } - for (unsigned j = 0; j != BytesPerElement; ++j) if (isLittleEndian) ResultMask.push_back( @@ -10337,18 +10392,19 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG, DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32)); } - if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) { - if (V1HasXXSWAPD) { - dl = SDLoc(V1->getOperand(0)); - V1 = V1->getOperand(0)->getOperand(1); - } - if (V2HasXXSWAPD) { - dl = SDLoc(V2->getOperand(0)); - V2 = V2->getOperand(0)->getOperand(1); - } - if (isPPC64 && ValType != MVT::v2f64) + if (V1HasXXSWAPD) { + dl = SDLoc(V1->getOperand(0)); + V1 = V1->getOperand(0)->getOperand(1); + } + if (V2HasXXSWAPD) { + dl = SDLoc(V2->getOperand(0)); + V2 = V2->getOperand(0)->getOperand(1); + } + + if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) { + if (ValType != MVT::v2f64) V1 = DAG.getBitcast(MVT::v2f64, V1); - if (isPPC64 && V2.getValueType() != MVT::v2f64) + if (V2.getValueType() != MVT::v2f64) V2 = DAG.getBitcast(MVT::v2f64, V2); } @@ -10369,6 +10425,11 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG, if (Opcode == PPCISD::XXPERM) VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask); + // Only need to place items backwards in LE, + // the mask was properly calculated. + if (isLittleEndian) + std::swap(V1, V2); + SDValue VPERMNode = DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask); @@ -11037,14 +11098,14 @@ SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op, SmallVector<SDValue, 4> Ops{ N->getOperand(0), DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)}; - SDValue Val = N->getOperand(2); + SDValue Val = N->getOperand(1); SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val); SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val, DAG.getConstant(64, dl, MVT::i32)); ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi); Ops.push_back(ValLo); Ops.push_back(ValHi); - Ops.push_back(N->getOperand(1)); + Ops.push_back(N->getOperand(2)); return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT, N->getMemOperand()); } @@ -16659,13 +16720,14 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector<SDValue>&Ops, + StringRef Constraint, + std::vector<SDValue> &Ops, SelectionDAG &DAG) const { SDValue Result; // Only support length 1 constraints. - if (Constraint.length() > 1) return; + if (Constraint.size() > 1) + return; char Letter = Constraint[0]; switch (Letter) { @@ -17075,13 +17137,23 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType( const MemOp &Op, const AttributeList &FuncAttributes) const { - if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { + if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) { // We should use Altivec/VSX loads and stores when available. For unaligned // addresses, unaligned VSX loads are only fast starting with the P8. - if (Subtarget.hasAltivec() && Op.size() >= 16 && - (Op.isAligned(Align(16)) || - ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) - return MVT::v4i32; + if (Subtarget.hasAltivec() && Op.size() >= 16) { + if (Op.isMemset() && Subtarget.hasVSX()) { + uint64_t TailSize = Op.size() % 16; + // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant + // element if vector element type matches tail store. For tail size + // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one. + if (TailSize > 2 && TailSize <= 4) { + return MVT::v8i16; + } + return MVT::v4i32; + } + if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector()) + return MVT::v4i32; + } } if (Subtarget.isPPC64()) { @@ -17227,7 +17299,7 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const { - if (Subtarget.hasSPE()) + if (Subtarget.hasSPE() || Subtarget.useSoftFloat()) return false; switch (Ty->getScalarType()->getTypeID()) { case Type::FloatTyID: @@ -17431,7 +17503,7 @@ bool PPCTargetLowering::useLoadStackGuardNode() const { void PPCTargetLowering::insertSSPDeclarations(Module &M) const { if (Subtarget.isAIXABI()) { M.getOrInsertGlobal(AIXSSPCanaryWordName, - Type::getInt8PtrTy(M.getContext())); + PointerType::getUnqual(M.getContext())); return; } if (!Subtarget.isTargetLinux()) @@ -18539,9 +18611,7 @@ Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic( Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo"); Value *IncrHi = Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi"); - Value *Addr = - Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext())); - Value *LoHi = Builder.CreateCall(RMW, {Addr, IncrLo, IncrHi}); + Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi}); Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); @@ -18566,11 +18636,9 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo"); Value *NewHi = Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi"); - Value *Addr = - Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext())); emitLeadingFence(Builder, CI, Ord); Value *LoHi = - Builder.CreateCall(IntCmpXchg, {Addr, CmpLo, CmpHi, NewLo, NewHi}); + Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi}); emitTrailingFence(Builder, CI, Ord); Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); |