aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp')
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp420
1 files changed, 322 insertions, 98 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 8d32510e2004..081d4ff033bd 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -97,6 +97,8 @@ public:
return SelectImmShifterOperand(N, A, B, false);
}
+ bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
+
bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
@@ -118,8 +120,10 @@ public:
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
- bool SelectAddrMode5(SDValue N, SDValue &Base,
- SDValue &Offset);
+ bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
+ int Lwb, int Upb, bool FP16);
+ bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
@@ -199,10 +203,11 @@ private:
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
/// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
- /// for loading D registers. (Q registers are not supported.)
- void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
- const uint16_t *DOpcodes,
- const uint16_t *QOpcodes = nullptr);
+ /// for loading D registers.
+ void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
+ unsigned NumVecs, const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0 = nullptr,
+ const uint16_t *QOpcodes1 = nullptr);
/// Try to select SBFX/UBFX instructions for ARM.
bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
@@ -281,7 +286,7 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
isInt32Immediate(N->getOperand(1).getNode(), Imm);
}
-/// \brief Check whether a particular node is a constant value representable as
+/// Check whether a particular node is a constant value representable as
/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
///
/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
@@ -498,7 +503,7 @@ bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
- CurDAG->ReplaceAllUsesWith(N, M);
+ ReplaceUses(N, M);
}
bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
@@ -567,6 +572,14 @@ bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
return true;
}
+// Determine whether an ISD::OR's operands are suitable to turn the operation
+// into an addition, which often has more compact encodings.
+bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
+ assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
+ Out = N;
+ return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
+}
+
bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
SDValue &Base,
@@ -886,8 +899,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
return true;
}
-bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
- SDValue &Base, SDValue &Offset) {
+bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
+ int Lwb, int Upb, bool FP16) {
if (!CurDAG->isBaseWithConstantOffset(N)) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
@@ -907,8 +920,9 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
// If the RHS is +/- imm8, fold into addr mode.
int RHSC;
- if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
- -256 + 1, 256, RHSC)) {
+ const int Scale = FP16 ? 2 : 4;
+
+ if (isScaledConstantInRange(N.getOperand(1), Scale, Lwb, Upb, RHSC)) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
@@ -921,17 +935,43 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
AddSub = ARM_AM::sub;
RHSC = -RHSC;
}
- Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
- SDLoc(N), MVT::i32);
+
+ if (FP16)
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
+ SDLoc(N), MVT::i32);
+ else
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+ SDLoc(N), MVT::i32);
+
return true;
}
Base = N;
- Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
- SDLoc(N), MVT::i32);
+
+ if (FP16)
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
+ SDLoc(N), MVT::i32);
+ else
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ SDLoc(N), MVT::i32);
+
return true;
}
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
+ SDValue &Base, SDValue &Offset) {
+ int Lwb = -256 + 1;
+ int Upb = 256;
+ return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ false);
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
+ SDValue &Base, SDValue &Offset) {
+ int Lwb = -512 + 1;
+ int Upb = 512;
+ return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ true);
+}
+
bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
SDValue &Align) {
Addr = N;
@@ -1467,7 +1507,7 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
return false;
}
-/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
+/// Form a GPRPair pseudo register from a pair of GPR regs.
SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
SDLoc dl(V0.getNode());
SDValue RegClass =
@@ -1478,7 +1518,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
-/// \brief Form a D register from a pair of S registers.
+/// Form a D register from a pair of S registers.
SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDLoc dl(V0.getNode());
SDValue RegClass =
@@ -1489,7 +1529,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
-/// \brief Form a quad register from a pair of D registers.
+/// Form a quad register from a pair of D registers.
SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
@@ -1500,7 +1540,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
-/// \brief Form 4 consecutive D registers from a pair of Q registers.
+/// Form 4 consecutive D registers from a pair of Q registers.
SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDLoc dl(V0.getNode());
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
@@ -1511,7 +1551,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
-/// \brief Form 4 consecutive S registers.
+/// Form 4 consecutive S registers.
SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
SDLoc dl(V0.getNode());
@@ -1526,7 +1566,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
-/// \brief Form 4 consecutive D registers.
+/// Form 4 consecutive D registers.
SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
SDLoc dl(V0.getNode());
@@ -1541,7 +1581,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
-/// \brief Form 4 consecutive Q registers.
+/// Form 4 consecutive Q registers.
SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
SDLoc dl(V0.getNode());
@@ -1708,7 +1748,9 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
SDLoc dl(N);
SDValue MemAddr, Align;
- unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
+ // nodes are not intrinsics.
+ unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return;
@@ -1732,9 +1774,7 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
case MVT::v2f64:
- case MVT::v2i64: OpcodeIndex = 3;
- assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
- break;
+ case MVT::v2i64: OpcodeIndex = 3; break;
}
EVT ResTy;
@@ -1765,15 +1805,17 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
- // case entirely when the rest are updated to that form, too.
bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
- if ((NumVecs <= 2) && !IsImmUpdate)
- Opc = getVLDSTRegisterUpdateOpcode(Opc);
- // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
- // check for that explicitly too. Horribly hacky, but temporary.
- if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate)
- Ops.push_back(IsImmUpdate ? Reg0 : Inc);
+ if (!IsImmUpdate) {
+ // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
+ // check for the opcode rather than the number of vector elements.
+ if (isVLDfixed(Opc))
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
+ // the operands if not such an opcode.
+ } else if (!isVLDfixed(Opc))
+ Ops.push_back(Reg0);
}
Ops.push_back(Pred);
Ops.push_back(Reg0);
@@ -1844,7 +1886,9 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDLoc dl(N);
SDValue MemAddr, Align;
- unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
+ // nodes are not intrinsics.
+ unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return;
@@ -1862,19 +1906,19 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
default: llvm_unreachable("unhandled vst type");
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4f16:
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
case MVT::v1i64: OpcodeIndex = 3; break;
// Quad-register operations:
case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8f16:
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
case MVT::v2f64:
- case MVT::v2i64: OpcodeIndex = 3;
- assert(NumVecs == 1 && "v2i64 type only supported for VST1");
- break;
+ case MVT::v2i64: OpcodeIndex = 3; break;
}
std::vector<EVT> ResTys;
@@ -1919,16 +1963,17 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
- // case entirely when the rest are updated to that form, too.
bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
- if (NumVecs <= 2 && !IsImmUpdate)
- Opc = getVLDSTRegisterUpdateOpcode(Opc);
- // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
- // check for that explicitly too. Horribly hacky, but temporary.
- if (!IsImmUpdate)
+ if (!IsImmUpdate) {
+ // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
+ // check for the opcode rather than the number of vector elements.
+ if (isVSTfixed(Opc))
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
Ops.push_back(Inc);
- else if (NumVecs > 2 && !isVSTfixed(Opc))
+ }
+ // VST1/VST2 fixed increment does not need Reg0 so only include it in
+ // the operands if not such an opcode.
+ else if (!isVSTfixed(Opc))
Ops.push_back(Reg0);
}
Ops.push_back(SrcReg);
@@ -1993,7 +2038,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
SDLoc dl(N);
SDValue MemAddr, Align;
- unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
+ // nodes are not intrinsics.
+ unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return;
@@ -2109,21 +2156,22 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
CurDAG->RemoveDeadNode(N);
}
-void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
+ bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
- const uint16_t *QOpcodes) {
+ const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
SDLoc dl(N);
SDValue MemAddr, Align;
- if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
+ unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return;
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-
SDValue Chain = N->getOperand(0);
EVT VT = N->getValueType(0);
+ bool is64BitVector = VT.is64BitVector();
unsigned Alignment = 0;
if (NumVecs != 3) {
@@ -2140,49 +2188,84 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
}
Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
- unsigned Opc;
+ unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld-dup type");
- case MVT::v8i8: Opc = DOpcodes[0]; break;
- case MVT::v16i8: Opc = QOpcodes[0]; break;
- case MVT::v4i16: Opc = DOpcodes[1]; break;
- case MVT::v8i16: Opc = QOpcodes[1]; break;
+ case MVT::v8i8:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v4i16:
+ case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v2f32:
- case MVT::v2i32: Opc = DOpcodes[2]; break;
+ case MVT::v2i32:
case MVT::v4f32:
- case MVT::v4i32: Opc = QOpcodes[2]; break;
- }
-
- SDValue Pred = getAL(CurDAG, dl);
- SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(MemAddr);
- Ops.push_back(Align);
- if (isUpdating) {
- // fixed-stride update instructions don't have an explicit writeback
- // operand. It's implicit in the opcode itself.
- SDValue Inc = N->getOperand(2);
- bool IsImmUpdate =
- isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
- if (NumVecs <= 2 && !IsImmUpdate)
- Opc = getVLDSTRegisterUpdateOpcode(Opc);
- if (!IsImmUpdate)
- Ops.push_back(Inc);
- // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
- else if (NumVecs > 2)
- Ops.push_back(Reg0);
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v1f64:
+ case MVT::v1i64: OpcodeIndex = 3; break;
}
- Ops.push_back(Pred);
- Ops.push_back(Reg0);
- Ops.push_back(Chain);
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+
std::vector<EVT> ResTys;
- ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
+ ResTys.push_back(ResTy);
if (isUpdating)
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::Other);
- SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ SDNode *VLdDup;
+ if (is64BitVector || NumVecs == 1) {
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex];
+ if (isUpdating) {
+ // fixed-stride update instructions don't have an explicit writeback
+ // operand. It's implicit in the opcode itself.
+ SDValue Inc = N->getOperand(2);
+ bool IsImmUpdate =
+ isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
+ if (NumVecs <= 2 && !IsImmUpdate)
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ if (!IsImmUpdate)
+ Ops.push_back(Inc);
+ // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
+ else if (NumVecs > 2)
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ } else if (NumVecs == 2) {
+ const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
+ dl, ResTys, OpsA);
+
+ Chain = SDValue(VLdA, 1);
+ const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
+ VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
+ } else {
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
+ dl, ResTys, OpsA);
+
+ SDValue SuperReg = SDValue(VLdA, 0);
+ Chain = SDValue(VLdA, 1);
+ const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
+ VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
+ }
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
// Extract the subregisters.
@@ -2191,10 +2274,11 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
} else {
SDValue SuperReg = SDValue(VLdDup, 0);
static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
- unsigned SubIdx = ARM::dsub_0;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
ReplaceUses(SDValue(N, Vec),
CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+ }
}
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
if (isUpdating)
@@ -2253,6 +2337,7 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
return true;
}
+ assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, dl, MVT::i32),
CurDAG->getTargetConstant(Width, dl, MVT::i32),
@@ -2277,6 +2362,7 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
if (LSB < 0)
return false;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, dl, MVT::i32),
CurDAG->getTargetConstant(Width, dl, MVT::i32),
@@ -2298,6 +2384,7 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
// Note: The width operand is encoded as width-1.
unsigned Width = MSB - LSB;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
CurDAG->getTargetConstant(Width, dl, MVT::i32),
@@ -2318,6 +2405,7 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
return false;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, dl, MVT::i32),
CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
@@ -2427,7 +2515,7 @@ void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
SDValue X = And.getOperand(0);
auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
- if (!C || !X->hasOneUse())
+ if (!C)
return;
auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
if (!Range)
@@ -2765,7 +2853,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
}
case ARMISD::SUBE: {
- if (!Subtarget->hasV6Ops())
+ if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
break;
// Look for a pattern to match SMMLS
// (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
@@ -3026,14 +3114,14 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD1DUPd32 };
static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
ARM::VLD1DUPq32 };
- SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
return;
}
case ARMISD::VLD2DUP: {
static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
ARM::VLD2DUPd32 };
- SelectVLDDup(N, false, 2, Opcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
return;
}
@@ -3041,7 +3129,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
ARM::VLD3DUPd16Pseudo,
ARM::VLD3DUPd32Pseudo };
- SelectVLDDup(N, false, 3, Opcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
return;
}
@@ -3049,7 +3137,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
ARM::VLD4DUPd16Pseudo,
ARM::VLD4DUPd32Pseudo };
- SelectVLDDup(N, false, 4, Opcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
return;
}
@@ -3060,7 +3148,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
ARM::VLD1DUPq16wb_fixed,
ARM::VLD1DUPq32wb_fixed };
- SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
return;
}
@@ -3068,7 +3156,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
ARM::VLD2DUPd16wb_fixed,
ARM::VLD2DUPd32wb_fixed };
- SelectVLDDup(N, true, 2, Opcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
return;
}
@@ -3076,7 +3164,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
ARM::VLD3DUPd16Pseudo_UPD,
ARM::VLD3DUPd32Pseudo_UPD };
- SelectVLDDup(N, true, 3, Opcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
return;
}
@@ -3084,7 +3172,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
ARM::VLD4DUPd16Pseudo_UPD,
ARM::VLD4DUPd32Pseudo_UPD };
- SelectVLDDup(N, true, 4, Opcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
return;
}
@@ -3407,6 +3495,51 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
+ case Intrinsic::arm_neon_vld1x2: {
+ static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
+ ARM::VLD1q32, ARM::VLD1q64 };
+ static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
+ ARM::VLD1d16QPseudo,
+ ARM::VLD1d32QPseudo,
+ ARM::VLD1d64QPseudo };
+ SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
+ return;
+ }
+
+ case Intrinsic::arm_neon_vld1x3: {
+ static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
+ ARM::VLD1d16TPseudo,
+ ARM::VLD1d32TPseudo,
+ ARM::VLD1d64TPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
+ ARM::VLD1q16LowTPseudo_UPD,
+ ARM::VLD1q32LowTPseudo_UPD,
+ ARM::VLD1q64LowTPseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
+ ARM::VLD1q16HighTPseudo,
+ ARM::VLD1q32HighTPseudo,
+ ARM::VLD1q64HighTPseudo };
+ SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+
+ case Intrinsic::arm_neon_vld1x4: {
+ static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
+ ARM::VLD1d16QPseudo,
+ ARM::VLD1d32QPseudo,
+ ARM::VLD1d64QPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
+ ARM::VLD1q16LowQPseudo_UPD,
+ ARM::VLD1q32LowQPseudo_UPD,
+ ARM::VLD1q64LowQPseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
+ ARM::VLD1q16HighQPseudo,
+ ARM::VLD1q32HighQPseudo,
+ ARM::VLD1q64HighQPseudo };
+ SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+
case Intrinsic::arm_neon_vld2: {
static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
ARM::VLD2d32, ARM::VLD1q64 };
@@ -3446,6 +3579,52 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
+ case Intrinsic::arm_neon_vld2dup: {
+ static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+ ARM::VLD2DUPd32, ARM::VLD1q64 };
+ static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
+ ARM::VLD2DUPq16EvenPseudo,
+ ARM::VLD2DUPq32EvenPseudo };
+ static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
+ ARM::VLD2DUPq16OddPseudo,
+ ARM::VLD2DUPq32OddPseudo };
+ SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
+ DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+
+ case Intrinsic::arm_neon_vld3dup: {
+ static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
+ ARM::VLD3DUPd16Pseudo,
+ ARM::VLD3DUPd32Pseudo,
+ ARM::VLD1d64TPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
+ ARM::VLD3DUPq16EvenPseudo,
+ ARM::VLD3DUPq32EvenPseudo };
+ static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
+ ARM::VLD3DUPq16OddPseudo,
+ ARM::VLD3DUPq32OddPseudo };
+ SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
+ DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+
+ case Intrinsic::arm_neon_vld4dup: {
+ static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
+ ARM::VLD4DUPd16Pseudo,
+ ARM::VLD4DUPd32Pseudo,
+ ARM::VLD1d64QPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
+ ARM::VLD4DUPq16EvenPseudo,
+ ARM::VLD4DUPq32EvenPseudo };
+ static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
+ ARM::VLD4DUPq16OddPseudo,
+ ARM::VLD4DUPq32OddPseudo };
+ SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
+ DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+
case Intrinsic::arm_neon_vld2lane: {
static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
ARM::VLD2LNd16Pseudo,
@@ -3485,6 +3664,51 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
+ case Intrinsic::arm_neon_vst1x2: {
+ static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
+ ARM::VST1q32, ARM::VST1q64 };
+ static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
+ ARM::VST1d16QPseudo,
+ ARM::VST1d32QPseudo,
+ ARM::VST1d64QPseudo };
+ SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
+ return;
+ }
+
+ case Intrinsic::arm_neon_vst1x3: {
+ static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
+ ARM::VST1d16TPseudo,
+ ARM::VST1d32TPseudo,
+ ARM::VST1d64TPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
+ ARM::VST1q16LowTPseudo_UPD,
+ ARM::VST1q32LowTPseudo_UPD,
+ ARM::VST1q64LowTPseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
+ ARM::VST1q16HighTPseudo,
+ ARM::VST1q32HighTPseudo,
+ ARM::VST1q64HighTPseudo };
+ SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+
+ case Intrinsic::arm_neon_vst1x4: {
+ static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
+ ARM::VST1d16QPseudo,
+ ARM::VST1d32QPseudo,
+ ARM::VST1d64QPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
+ ARM::VST1q16LowQPseudo_UPD,
+ ARM::VST1q32LowQPseudo_UPD,
+ ARM::VST1q64LowQPseudo_UPD };
+ static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
+ ARM::VST1q16HighQPseudo,
+ ARM::VST1q32HighQPseudo,
+ ARM::VST1q64HighQPseudo };
+ SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+
case Intrinsic::arm_neon_vst2: {
static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
ARM::VST2d32, ARM::VST1q64 };