diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp | 198 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMISelLowering.h | 41 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp | 35 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 3 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 3 |
5 files changed, 226 insertions, 54 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index afba1587a743..32b7c87e61bb 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -608,15 +608,27 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have // a __gnu_ prefix (which is the default). if (Subtarget->isTargetAEABI()) { - setLibcallName(RTLIB::FPROUND_F32_F16, "__aeabi_f2h"); - setLibcallName(RTLIB::FPROUND_F64_F16, "__aeabi_d2h"); - setLibcallName(RTLIB::FPEXT_F16_F32, "__aeabi_h2f"); + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + } LibraryCalls[] = { + { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS }, + { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS }, + { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + } } if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, &ARM::tGPRRegClass); else addRegisterClass(MVT::i32, &ARM::GPRRegClass); + if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, &ARM::SPRRegClass); @@ -976,6 +988,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); + // Register based DivRem for AEABI (RTABI 4.2) if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || @@ -984,29 +997,49 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UREM, MVT::i64, Custom); HasStandaloneRem = false; - for (const auto &LC : - {RTLIB::SDIVREM_I8, RTLIB::SDIVREM_I16, RTLIB::SDIVREM_I32}) - setLibcallName(LC, Subtarget->isTargetWindows() ? "__rt_sdiv" - : "__aeabi_idivmod"); - setLibcallName(RTLIB::SDIVREM_I64, Subtarget->isTargetWindows() - ? "__rt_sdiv64" - : "__aeabi_ldivmod"); - for (const auto &LC : - {RTLIB::UDIVREM_I8, RTLIB::UDIVREM_I16, RTLIB::UDIVREM_I32}) - setLibcallName(LC, Subtarget->isTargetWindows() ? "__rt_udiv" - : "__aeabi_uidivmod"); - setLibcallName(RTLIB::UDIVREM_I64, Subtarget->isTargetWindows() - ? "__rt_udiv64" - : "__aeabi_uldivmod"); - - setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS); + if (Subtarget->isTargetWindows()) { + const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + } LibraryCalls[] = { + { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS }, + { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS }, + { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS }, + { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS }, + + { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS }, + { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS }, + { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS }, + { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + } + } else { + const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + } LibraryCalls[] = { + { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, + { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, + { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, + { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS }, + + { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, + { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, + { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, + { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + } + } setOperationAction(ISD::SDIVREM, MVT::i32, Custom); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); @@ -3305,11 +3338,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. - case Intrinsic::arm_rbit: { - assert(Op.getOperand(1).getValueType() == MVT::i32 && - "RBIT intrinsic must have i32 type!"); - return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1)); - } case Intrinsic::thread_pointer: { EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); @@ -9232,12 +9260,102 @@ SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, return SDValue(); } -// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction -// (only after legalization). -static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, +static bool IsVUZPShuffleNode(SDNode *N) { + // VUZP shuffle node. + if (N->getOpcode() == ARMISD::VUZP) + return true; + + // "VUZP" on i32 is an alias for VTRN. + if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32) + return true; + + return false; +} + +static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { + // Look for ADD(VUZP.0, VUZP.1). + if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() || + N0 == N1) + return SDValue(); + + // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD. + if (!N->getValueType(0).is64BitVector()) + return SDValue(); + // Generate vpadd. + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc dl(N); + SDNode *Unzip = N0.getNode(); + EVT VT = N->getValueType(0); + + SmallVector<SDValue, 8> Ops; + Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + Ops.push_back(Unzip->getOperand(0)); + Ops.push_back(Unzip->getOperand(1)); + + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops); +} + +static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Check for two extended operands. + if (!(N0.getOpcode() == ISD::SIGN_EXTEND && + N1.getOpcode() == ISD::SIGN_EXTEND) && + !(N0.getOpcode() == ISD::ZERO_EXTEND && + N1.getOpcode() == ISD::ZERO_EXTEND)) + return SDValue(); + + SDValue N00 = N0.getOperand(0); + SDValue N10 = N1.getOperand(0); + + // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1)) + if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() || + N00 == N10) + return SDValue(); + + // We only recognize Q register paddl here; this can't be reached until + // after type legalization. + if (!N00.getValueType().is64BitVector() || + !N0.getValueType().is128BitVector()) + return SDValue(); + + // Generate vpaddl. + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc dl(N); + EVT VT = N->getValueType(0); + + SmallVector<SDValue, 8> Ops; + // Form vpaddl.sN or vpaddl.uN depending on the kind of extension. + unsigned Opcode; + if (N0.getOpcode() == ISD::SIGN_EXTEND) + Opcode = Intrinsic::arm_neon_vpaddls; + else + Opcode = Intrinsic::arm_neon_vpaddlu; + Ops.push_back(DAG.getConstant(Opcode, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + EVT ElemTy = N00.getValueType().getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT, + N00.getOperand(0), N00.getOperand(1)); + Ops.push_back(Concat); + + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops); +} + +// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in +// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is +// much easier to match. +static SDValue +AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // Only perform optimization if after legalize, and if NEON is available. We // also expected both operands to be BUILD_VECTORs. if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() @@ -9293,6 +9411,10 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, return SDValue(); } + // Don't generate vpaddl+vmovn; we'll match it to vpadd later. + if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) + return SDValue(); + // Create VPADDL node. SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -9564,9 +9686,15 @@ static SDValue PerformADDCCombine(SDNode *N, static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget){ + // Attempt to create vpadd for this add. + if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget)) + return Result; // Attempt to create vpaddl for this add. - if (SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget)) + if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget)) + return Result; + if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI, + Subtarget)) return Result; // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 5255d82d647a..7a7f91f4d3c4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -16,16 +16,28 @@ #define LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H #include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetLowering.h" -#include <vector> +#include <utility> namespace llvm { - class ARMConstantPoolValue; - class ARMSubtarget; + +class ARMSubtarget; +class InstrItineraryData; namespace ARMISD { + // ARM Specific DAG Nodes enum NodeType : unsigned { // Start the numbering where the builtin ops and target ops leave off. @@ -217,12 +229,15 @@ namespace llvm { VST3LN_UPD, VST4LN_UPD }; - } + + } // end namespace ARMISD /// Define some predicates that are used for node matching. namespace ARM { + bool isBitFieldInvertedMask(unsigned v); - } + + } // end namespace ARM //===--------------------------------------------------------------------===// // ARMTargetLowering - ARM Implementation of the TargetLowering interface @@ -531,6 +546,7 @@ namespace llvm { std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const; typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector; + void PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, @@ -623,6 +639,7 @@ namespace llvm { return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); } + void initializeSplitCSR(MachineBasicBlock *Entry) const override; void insertCopiesSplitCSR( MachineBasicBlock *Entry, @@ -644,9 +661,8 @@ namespace llvm { unsigned ArgOffset, unsigned TotalArgRegsSaveSize, bool ForceMutable = false) const; - SDValue - LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const override; + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const override; /// HandleByVal - Target-specific cleanup for ByVal support. void HandleByVal(CCState *, unsigned &, unsigned) const override; @@ -712,9 +728,12 @@ namespace llvm { }; namespace ARM { + FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); - } -} -#endif // ARMISELLOWERING_H + } // end namespace ARM + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index 9bd036a1eace..324087d670b5 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -29,7 +29,33 @@ using namespace llvm; // into an ARMGenRegisterBankInfo.def (similar to AArch64). namespace llvm { namespace ARM { -RegisterBank GPRRegBank; +const uint32_t GPRCoverageData[] = { + // Classes 0-31 + (1u << ARM::GPRRegClassID) | (1u << ARM::GPRwithAPSRRegClassID) | + (1u << ARM::GPRnopcRegClassID) | (1u << ARM::rGPRRegClassID) | + (1u << ARM::hGPRRegClassID) | (1u << ARM::tGPRRegClassID) | + (1u << ARM::GPRnopc_and_hGPRRegClassID) | + (1u << ARM::hGPR_and_rGPRRegClassID) | (1u << ARM::tcGPRRegClassID) | + (1u << ARM::tGPR_and_tcGPRRegClassID) | (1u << ARM::GPRspRegClassID) | + (1u << ARM::hGPR_and_tcGPRRegClassID), + // Classes 32-63 + 0, + // Classes 64-96 + 0, + // FIXME: Some of the entries below this point can be safely removed once + // this is tablegenerated. It's only needed because of the hardcoded + // register class limit. + // Classes 97-128 + 0, + // Classes 129-160 + 0, + // Classes 161-192 + 0, + // Classes 193-224 + 0, +}; + +RegisterBank GPRRegBank(ARM::GPRRegBankID, "GPRB", 32, ARM::GPRCoverageData); RegisterBank *RegBanks[] = {&GPRRegBank}; RegisterBankInfo::PartialMapping GPRPartialMapping{0, 32, GPRRegBank}; @@ -51,14 +77,11 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) return; AlreadyInit = true; - // Initialize the GPR bank. - createRegisterBank(ARM::GPRRegBankID, "GPRB"); - - addRegBankCoverage(ARM::GPRRegBankID, ARM::GPRRegClassID, TRI); - addRegBankCoverage(ARM::GPRRegBankID, ARM::GPRwithAPSRRegClassID, TRI); const RegisterBank &RBGPR = getRegBank(ARM::GPRRegBankID); (void)RBGPR; assert(&ARM::GPRRegBank == &RBGPR && "The order in RegBanks is messed up"); + + // Initialize the GPR bank. assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRRegClassID)) && "Subclass not added?"); assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRwithAPSRRegClassID)) && diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index cc001b596785..2b6b36bc3e68 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -433,7 +433,8 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, int ARMTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo) { + TTI::OperandValueProperties Opd2PropInfo, + ArrayRef<const Value *> Args) { int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 731a5adf3d73..3c83cd92a61a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -114,7 +114,8 @@ public: TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, TTI::OperandValueKind Op2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, - TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef<const Value *> Args = ArrayRef<const Value *>()); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace); |