aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp198
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h41
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp35
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h3
5 files changed, 226 insertions, 54 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index afba1587a743..32b7c87e61bb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -608,15 +608,27 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
// a __gnu_ prefix (which is the default).
if (Subtarget->isTargetAEABI()) {
- setLibcallName(RTLIB::FPROUND_F32_F16, "__aeabi_f2h");
- setLibcallName(RTLIB::FPROUND_F64_F16, "__aeabi_d2h");
- setLibcallName(RTLIB::FPEXT_F16_F32, "__aeabi_h2f");
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ } LibraryCalls[] = {
+ { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
+ { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
+ { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ }
}
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
addRegisterClass(MVT::i32, &ARM::GPRRegClass);
+
if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
@@ -976,6 +988,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
+
// Register based DivRem for AEABI (RTABI 4.2)
if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
@@ -984,29 +997,49 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UREM, MVT::i64, Custom);
HasStandaloneRem = false;
- for (const auto &LC :
- {RTLIB::SDIVREM_I8, RTLIB::SDIVREM_I16, RTLIB::SDIVREM_I32})
- setLibcallName(LC, Subtarget->isTargetWindows() ? "__rt_sdiv"
- : "__aeabi_idivmod");
- setLibcallName(RTLIB::SDIVREM_I64, Subtarget->isTargetWindows()
- ? "__rt_sdiv64"
- : "__aeabi_ldivmod");
- for (const auto &LC :
- {RTLIB::UDIVREM_I8, RTLIB::UDIVREM_I16, RTLIB::UDIVREM_I32})
- setLibcallName(LC, Subtarget->isTargetWindows() ? "__rt_udiv"
- : "__aeabi_uidivmod");
- setLibcallName(RTLIB::UDIVREM_I64, Subtarget->isTargetWindows()
- ? "__rt_udiv64"
- : "__aeabi_uldivmod");
-
- setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
+ if (Subtarget->isTargetWindows()) {
+ const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ } LibraryCalls[] = {
+ { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
+ { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
+ { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
+ { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
+
+ { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
+ { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
+ { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
+ { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ }
+ } else {
+ const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ } LibraryCalls[] = {
+ { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
+ { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
+ { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
+ { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
+
+ { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
+ { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
+ { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
+ { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ }
+ }
setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
@@ -3305,11 +3338,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
- case Intrinsic::arm_rbit: {
- assert(Op.getOperand(1).getValueType() == MVT::i32 &&
- "RBIT intrinsic must have i32 type!");
- return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1));
- }
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
@@ -9232,12 +9260,102 @@ SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
return SDValue();
}
-// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
-// (only after legalization).
-static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+static bool IsVUZPShuffleNode(SDNode *N) {
+ // VUZP shuffle node.
+ if (N->getOpcode() == ARMISD::VUZP)
+ return true;
+
+ // "VUZP" on i32 is an alias for VTRN.
+ if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
+ return true;
+
+ return false;
+}
+
+static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
+ // Look for ADD(VUZP.0, VUZP.1).
+ if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
+ N0 == N1)
+ return SDValue();
+
+ // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
+ if (!N->getValueType(0).is64BitVector())
+ return SDValue();
+ // Generate vpadd.
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc dl(N);
+ SDNode *Unzip = N0.getNode();
+ EVT VT = N->getValueType(0);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Ops.push_back(Unzip->getOperand(0));
+ Ops.push_back(Unzip->getOperand(1));
+
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
+}
+
+static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Check for two extended operands.
+ if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N1.getOpcode() == ISD::SIGN_EXTEND) &&
+ !(N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N1.getOpcode() == ISD::ZERO_EXTEND))
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ SDValue N10 = N1.getOperand(0);
+
+ // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
+ if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
+ N00 == N10)
+ return SDValue();
+
+ // We only recognize Q register paddl here; this can't be reached until
+ // after type legalization.
+ if (!N00.getValueType().is64BitVector() ||
+ !N0.getValueType().is128BitVector())
+ return SDValue();
+
+ // Generate vpaddl.
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ SmallVector<SDValue, 8> Ops;
+ // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
+ unsigned Opcode;
+ if (N0.getOpcode() == ISD::SIGN_EXTEND)
+ Opcode = Intrinsic::arm_neon_vpaddls;
+ else
+ Opcode = Intrinsic::arm_neon_vpaddlu;
+ Ops.push_back(DAG.getConstant(Opcode, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ EVT ElemTy = N00.getValueType().getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
+ N00.getOperand(0), N00.getOperand(1));
+ Ops.push_back(Concat);
+
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
+}
+
+// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
+// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
+// much easier to match.
+static SDValue
+AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
// Only perform optimization if after legalize, and if NEON is available. We
// also expected both operands to be BUILD_VECTORs.
if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
@@ -9293,6 +9411,10 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
+ // Don't generate vpaddl+vmovn; we'll match it to vpadd later.
+ if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
+ return SDValue();
+
// Create VPADDL node.
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -9564,9 +9686,15 @@ static SDValue PerformADDCCombine(SDNode *N,
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget){
+ // Attempt to create vpadd for this add.
+ if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
+ return Result;
// Attempt to create vpaddl for this add.
- if (SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget))
+ if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
+ return Result;
+ if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
+ Subtarget))
return Result;
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 5255d82d647a..7a7f91f4d3c4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -16,16 +16,28 @@
#define LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetLowering.h"
-#include <vector>
+#include <utility>
namespace llvm {
- class ARMConstantPoolValue;
- class ARMSubtarget;
+
+class ARMSubtarget;
+class InstrItineraryData;
namespace ARMISD {
+
// ARM Specific DAG Nodes
enum NodeType : unsigned {
// Start the numbering where the builtin ops and target ops leave off.
@@ -217,12 +229,15 @@ namespace llvm {
VST3LN_UPD,
VST4LN_UPD
};
- }
+
+ } // end namespace ARMISD
/// Define some predicates that are used for node matching.
namespace ARM {
+
bool isBitFieldInvertedMask(unsigned v);
- }
+
+ } // end namespace ARM
//===--------------------------------------------------------------------===//
// ARMTargetLowering - ARM Implementation of the TargetLowering interface
@@ -531,6 +546,7 @@ namespace llvm {
std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const;
typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
+
void PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain,
SDValue &Arg, RegsToPassVector &RegsToPass,
CCValAssign &VA, CCValAssign &NextVA,
@@ -623,6 +639,7 @@ namespace llvm {
return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
}
+
void initializeSplitCSR(MachineBasicBlock *Entry) const override;
void insertCopiesSplitCSR(
MachineBasicBlock *Entry,
@@ -644,9 +661,8 @@ namespace llvm {
unsigned ArgOffset, unsigned TotalArgRegsSaveSize,
bool ForceMutable = false) const;
- SDValue
- LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
/// HandleByVal - Target-specific cleanup for ByVal support.
void HandleByVal(CCState *, unsigned &, unsigned) const override;
@@ -712,9 +728,12 @@ namespace llvm {
};
namespace ARM {
+
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
- }
-}
-#endif // ARMISELLOWERING_H
+ } // end namespace ARM
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 9bd036a1eace..324087d670b5 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -29,7 +29,33 @@ using namespace llvm;
// into an ARMGenRegisterBankInfo.def (similar to AArch64).
namespace llvm {
namespace ARM {
-RegisterBank GPRRegBank;
+const uint32_t GPRCoverageData[] = {
+ // Classes 0-31
+ (1u << ARM::GPRRegClassID) | (1u << ARM::GPRwithAPSRRegClassID) |
+ (1u << ARM::GPRnopcRegClassID) | (1u << ARM::rGPRRegClassID) |
+ (1u << ARM::hGPRRegClassID) | (1u << ARM::tGPRRegClassID) |
+ (1u << ARM::GPRnopc_and_hGPRRegClassID) |
+ (1u << ARM::hGPR_and_rGPRRegClassID) | (1u << ARM::tcGPRRegClassID) |
+ (1u << ARM::tGPR_and_tcGPRRegClassID) | (1u << ARM::GPRspRegClassID) |
+ (1u << ARM::hGPR_and_tcGPRRegClassID),
+ // Classes 32-63
+ 0,
+ // Classes 64-96
+ 0,
+ // FIXME: Some of the entries below this point can be safely removed once
+ // this is tablegenerated. It's only needed because of the hardcoded
+ // register class limit.
+ // Classes 97-128
+ 0,
+ // Classes 129-160
+ 0,
+ // Classes 161-192
+ 0,
+ // Classes 193-224
+ 0,
+};
+
+RegisterBank GPRRegBank(ARM::GPRRegBankID, "GPRB", 32, ARM::GPRCoverageData);
RegisterBank *RegBanks[] = {&GPRRegBank};
RegisterBankInfo::PartialMapping GPRPartialMapping{0, 32, GPRRegBank};
@@ -51,14 +77,11 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
return;
AlreadyInit = true;
- // Initialize the GPR bank.
- createRegisterBank(ARM::GPRRegBankID, "GPRB");
-
- addRegBankCoverage(ARM::GPRRegBankID, ARM::GPRRegClassID, TRI);
- addRegBankCoverage(ARM::GPRRegBankID, ARM::GPRwithAPSRRegClassID, TRI);
const RegisterBank &RBGPR = getRegBank(ARM::GPRRegBankID);
(void)RBGPR;
assert(&ARM::GPRRegBank == &RBGPR && "The order in RegBanks is messed up");
+
+ // Initialize the GPR bank.
assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRRegClassID)) &&
"Subclass not added?");
assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRwithAPSRRegClassID)) &&
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index cc001b596785..2b6b36bc3e68 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -433,7 +433,8 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
int ARMTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo) {
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args) {
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 731a5adf3d73..3c83cd92a61a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -114,7 +114,8 @@ public:
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
- TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);