diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 420 |
1 files changed, 322 insertions, 98 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 8d32510e2004..081d4ff033bd 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -97,6 +97,8 @@ public: return SelectImmShifterOperand(N, A, B, false); } + bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); + bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); @@ -118,8 +120,10 @@ public: SDValue &Offset, SDValue &Opc); bool SelectAddrMode3Offset(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); - bool SelectAddrMode5(SDValue N, SDValue &Base, - SDValue &Offset); + bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, + int Lwb, int Upb, bool FP16); + bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); + bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); @@ -199,10 +203,11 @@ private: /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used - /// for loading D registers. (Q registers are not supported.) - void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *DOpcodes, - const uint16_t *QOpcodes = nullptr); + /// for loading D registers. + void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, + unsigned NumVecs, const uint16_t *DOpcodes, + const uint16_t *QOpcodes0 = nullptr, + const uint16_t *QOpcodes1 = nullptr); /// Try to select SBFX/UBFX instructions for ARM. bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); @@ -281,7 +286,7 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { isInt32Immediate(N->getOperand(1).getNode(), Imm); } -/// \brief Check whether a particular node is a constant value representable as +/// Check whether a particular node is a constant value representable as /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). /// /// \param ScaledConstant [out] - On success, the pre-scaled constant value. @@ -498,7 +503,7 @@ bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); - CurDAG->ReplaceAllUsesWith(N, M); + ReplaceUses(N, M); } bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, @@ -567,6 +572,14 @@ bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, return true; } +// Determine whether an ISD::OR's operands are suitable to turn the operation +// into an addition, which often has more compact encodings. +bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { + assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); + Out = N; + return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); +} + bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, SDValue &Base, @@ -886,8 +899,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, - SDValue &Base, SDValue &Offset) { +bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, + int Lwb, int Upb, bool FP16) { if (!CurDAG->isBaseWithConstantOffset(N)) { Base = N; if (N.getOpcode() == ISD::FrameIndex) { @@ -907,8 +920,9 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, // If the RHS is +/- imm8, fold into addr mode. int RHSC; - if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, - -256 + 1, 256, RHSC)) { + const int Scale = FP16 ? 2 : 4; + + if (isScaledConstantInRange(N.getOperand(1), Scale, Lwb, Upb, RHSC)) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); @@ -921,17 +935,43 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, AddSub = ARM_AM::sub; RHSC = -RHSC; } - Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), - SDLoc(N), MVT::i32); + + if (FP16) + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), + SDLoc(N), MVT::i32); + else + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), + SDLoc(N), MVT::i32); + return true; } Base = N; - Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), - SDLoc(N), MVT::i32); + + if (FP16) + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), + SDLoc(N), MVT::i32); + else + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), + SDLoc(N), MVT::i32); + return true; } +bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, + SDValue &Base, SDValue &Offset) { + int Lwb = -256 + 1; + int Upb = 256; + return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ false); +} + +bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, + SDValue &Base, SDValue &Offset) { + int Lwb = -512 + 1; + int Upb = 512; + return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ true); +} + bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, SDValue &Align) { Addr = N; @@ -1467,7 +1507,7 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { return false; } -/// \brief Form a GPRPair pseudo register from a pair of GPR regs. +/// Form a GPRPair pseudo register from a pair of GPR regs. SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { SDLoc dl(V0.getNode()); SDValue RegClass = @@ -1478,7 +1518,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } -/// \brief Form a D register from a pair of S registers. +/// Form a D register from a pair of S registers. SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { SDLoc dl(V0.getNode()); SDValue RegClass = @@ -1489,7 +1529,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } -/// \brief Form a quad register from a pair of D registers. +/// Form a quad register from a pair of D registers. SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, @@ -1500,7 +1540,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } -/// \brief Form 4 consecutive D registers from a pair of Q registers. +/// Form 4 consecutive D registers from a pair of Q registers. SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, @@ -1511,7 +1551,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } -/// \brief Form 4 consecutive S registers. +/// Form 4 consecutive S registers. SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { SDLoc dl(V0.getNode()); @@ -1526,7 +1566,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } -/// \brief Form 4 consecutive D registers. +/// Form 4 consecutive D registers. SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { SDLoc dl(V0.getNode()); @@ -1541,7 +1581,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } -/// \brief Form 4 consecutive Q registers. +/// Form 4 consecutive Q registers. SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { SDLoc dl(V0.getNode()); @@ -1708,7 +1748,9 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDLoc dl(N); SDValue MemAddr, Align; - unsigned AddrOpIdx = isUpdating ? 1 : 2; + bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating + // nodes are not intrinsics. + unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return; @@ -1732,9 +1774,7 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, case MVT::v4f32: case MVT::v4i32: OpcodeIndex = 2; break; case MVT::v2f64: - case MVT::v2i64: OpcodeIndex = 3; - assert(NumVecs == 1 && "v2i64 type only supported for VLD1"); - break; + case MVT::v2i64: OpcodeIndex = 3; break; } EVT ResTy; @@ -1765,15 +1805,17 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 - // case entirely when the rest are updated to that form, too. bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); - if ((NumVecs <= 2) && !IsImmUpdate) - Opc = getVLDSTRegisterUpdateOpcode(Opc); - // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so - // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate) - Ops.push_back(IsImmUpdate ? Reg0 : Inc); + if (!IsImmUpdate) { + // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so + // check for the opcode rather than the number of vector elements. + if (isVLDfixed(Opc)) + Opc = getVLDSTRegisterUpdateOpcode(Opc); + Ops.push_back(Inc); + // VLD1/VLD2 fixed increment does not need Reg0 so only include it in + // the operands if not such an opcode. + } else if (!isVLDfixed(Opc)) + Ops.push_back(Reg0); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -1844,7 +1886,9 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDLoc dl(N); SDValue MemAddr, Align; - unsigned AddrOpIdx = isUpdating ? 1 : 2; + bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating + // nodes are not intrinsics. + unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return; @@ -1862,19 +1906,19 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, default: llvm_unreachable("unhandled vst type"); // Double-register operations: case MVT::v8i8: OpcodeIndex = 0; break; + case MVT::v4f16: case MVT::v4i16: OpcodeIndex = 1; break; case MVT::v2f32: case MVT::v2i32: OpcodeIndex = 2; break; case MVT::v1i64: OpcodeIndex = 3; break; // Quad-register operations: case MVT::v16i8: OpcodeIndex = 0; break; + case MVT::v8f16: case MVT::v8i16: OpcodeIndex = 1; break; case MVT::v4f32: case MVT::v4i32: OpcodeIndex = 2; break; case MVT::v2f64: - case MVT::v2i64: OpcodeIndex = 3; - assert(NumVecs == 1 && "v2i64 type only supported for VST1"); - break; + case MVT::v2i64: OpcodeIndex = 3; break; } std::vector<EVT> ResTys; @@ -1919,16 +1963,17 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 - // case entirely when the rest are updated to that form, too. bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); - if (NumVecs <= 2 && !IsImmUpdate) - Opc = getVLDSTRegisterUpdateOpcode(Opc); - // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so - // check for that explicitly too. Horribly hacky, but temporary. - if (!IsImmUpdate) + if (!IsImmUpdate) { + // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so + // check for the opcode rather than the number of vector elements. + if (isVSTfixed(Opc)) + Opc = getVLDSTRegisterUpdateOpcode(Opc); Ops.push_back(Inc); - else if (NumVecs > 2 && !isVSTfixed(Opc)) + } + // VST1/VST2 fixed increment does not need Reg0 so only include it in + // the operands if not such an opcode. + else if (!isVSTfixed(Opc)) Ops.push_back(Reg0); } Ops.push_back(SrcReg); @@ -1993,7 +2038,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, SDLoc dl(N); SDValue MemAddr, Align; - unsigned AddrOpIdx = isUpdating ? 1 : 2; + bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating + // nodes are not intrinsics. + unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return; @@ -2109,21 +2156,22 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, CurDAG->RemoveDeadNode(N); } -void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, +void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, + bool isUpdating, unsigned NumVecs, const uint16_t *DOpcodes, - const uint16_t *QOpcodes) { + const uint16_t *QOpcodes0, + const uint16_t *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); SDLoc dl(N); SDValue MemAddr, Align; - if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) + unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; + if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return; - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); - SDValue Chain = N->getOperand(0); EVT VT = N->getValueType(0); + bool is64BitVector = VT.is64BitVector(); unsigned Alignment = 0; if (NumVecs != 3) { @@ -2140,49 +2188,84 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, } Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); - unsigned Opc; + unsigned OpcodeIndex; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vld-dup type"); - case MVT::v8i8: Opc = DOpcodes[0]; break; - case MVT::v16i8: Opc = QOpcodes[0]; break; - case MVT::v4i16: Opc = DOpcodes[1]; break; - case MVT::v8i16: Opc = QOpcodes[1]; break; + case MVT::v8i8: + case MVT::v16i8: OpcodeIndex = 0; break; + case MVT::v4i16: + case MVT::v8i16: OpcodeIndex = 1; break; case MVT::v2f32: - case MVT::v2i32: Opc = DOpcodes[2]; break; + case MVT::v2i32: case MVT::v4f32: - case MVT::v4i32: Opc = QOpcodes[2]; break; - } - - SDValue Pred = getAL(CurDAG, dl); - SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector<SDValue, 6> Ops; - Ops.push_back(MemAddr); - Ops.push_back(Align); - if (isUpdating) { - // fixed-stride update instructions don't have an explicit writeback - // operand. It's implicit in the opcode itself. - SDValue Inc = N->getOperand(2); - bool IsImmUpdate = - isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); - if (NumVecs <= 2 && !IsImmUpdate) - Opc = getVLDSTRegisterUpdateOpcode(Opc); - if (!IsImmUpdate) - Ops.push_back(Inc); - // FIXME: VLD3 and VLD4 haven't been updated to that form yet. - else if (NumVecs > 2) - Ops.push_back(Reg0); + case MVT::v4i32: OpcodeIndex = 2; break; + case MVT::v1f64: + case MVT::v1i64: OpcodeIndex = 3; break; } - Ops.push_back(Pred); - Ops.push_back(Reg0); - Ops.push_back(Chain); unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; + if (!is64BitVector) + ResTyElts *= 2; + EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); + std::vector<EVT> ResTys; - ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts)); + ResTys.push_back(ResTy); if (isUpdating) ResTys.push_back(MVT::i32); ResTys.push_back(MVT::Other); - SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + + SDValue Pred = getAL(CurDAG, dl); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + + SDNode *VLdDup; + if (is64BitVector || NumVecs == 1) { + SmallVector<SDValue, 6> Ops; + Ops.push_back(MemAddr); + Ops.push_back(Align); + unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] : + QOpcodes0[OpcodeIndex]; + if (isUpdating) { + // fixed-stride update instructions don't have an explicit writeback + // operand. It's implicit in the opcode itself. + SDValue Inc = N->getOperand(2); + bool IsImmUpdate = + isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); + if (NumVecs <= 2 && !IsImmUpdate) + Opc = getVLDSTRegisterUpdateOpcode(Opc); + if (!IsImmUpdate) + Ops.push_back(Inc); + // FIXME: VLD3 and VLD4 haven't been updated to that form yet. + else if (NumVecs > 2) + Ops.push_back(Reg0); + } + Ops.push_back(Pred); + Ops.push_back(Reg0); + Ops.push_back(Chain); + VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + } else if (NumVecs == 2) { + const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], + dl, ResTys, OpsA); + + Chain = SDValue(VLdA, 1); + const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain }; + VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); + } else { + SDValue ImplDef = + SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); + const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], + dl, ResTys, OpsA); + + SDValue SuperReg = SDValue(VLdA, 0); + Chain = SDValue(VLdA, 1); + const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain }; + VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); + } + + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); // Extract the subregisters. @@ -2191,10 +2274,11 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, } else { SDValue SuperReg = SDValue(VLdDup, 0); static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); - unsigned SubIdx = ARM::dsub_0; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; + for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { ReplaceUses(SDValue(N, Vec), CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); + } } ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); if (isUpdating) @@ -2253,6 +2337,7 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { return true; } + assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, dl, MVT::i32), CurDAG->getTargetConstant(Width, dl, MVT::i32), @@ -2277,6 +2362,7 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (LSB < 0) return false; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, dl, MVT::i32), CurDAG->getTargetConstant(Width, dl, MVT::i32), @@ -2298,6 +2384,7 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { // Note: The width operand is encoded as width-1. unsigned Width = MSB - LSB; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), CurDAG->getTargetConstant(Width, dl, MVT::i32), @@ -2318,6 +2405,7 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { return false; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, dl, MVT::i32), CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), @@ -2427,7 +2515,7 @@ void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { SDValue X = And.getOperand(0); auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); - if (!C || !X->hasOneUse()) + if (!C) return; auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); if (!Range) @@ -2765,7 +2853,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { } } case ARMISD::SUBE: { - if (!Subtarget->hasV6Ops()) + if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) break; // Look for a pattern to match SMMLS // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) @@ -3026,14 +3114,14 @@ void ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD1DUPd32 }; static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, ARM::VLD1DUPq32 }; - SelectVLDDup(N, false, 1, DOpcodes, QOpcodes); + SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); return; } case ARMISD::VLD2DUP: { static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, ARM::VLD2DUPd32 }; - SelectVLDDup(N, false, 2, Opcodes); + SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); return; } @@ -3041,7 +3129,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd32Pseudo }; - SelectVLDDup(N, false, 3, Opcodes); + SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); return; } @@ -3049,7 +3137,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd32Pseudo }; - SelectVLDDup(N, false, 4, Opcodes); + SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); return; } @@ -3060,7 +3148,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, ARM::VLD1DUPq16wb_fixed, ARM::VLD1DUPq32wb_fixed }; - SelectVLDDup(N, true, 1, DOpcodes, QOpcodes); + SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); return; } @@ -3068,7 +3156,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed, ARM::VLD2DUPd32wb_fixed }; - SelectVLDDup(N, true, 2, Opcodes); + SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes); return; } @@ -3076,7 +3164,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd32Pseudo_UPD }; - SelectVLDDup(N, true, 3, Opcodes); + SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes); return; } @@ -3084,7 +3172,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd32Pseudo_UPD }; - SelectVLDDup(N, true, 4, Opcodes); + SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes); return; } @@ -3407,6 +3495,51 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } + case Intrinsic::arm_neon_vld1x2: { + static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, + ARM::VLD1q32, ARM::VLD1q64 }; + static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, + ARM::VLD1d16QPseudo, + ARM::VLD1d32QPseudo, + ARM::VLD1d64QPseudo }; + SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); + return; + } + + case Intrinsic::arm_neon_vld1x3: { + static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, + ARM::VLD1d16TPseudo, + ARM::VLD1d32TPseudo, + ARM::VLD1d64TPseudo }; + static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, + ARM::VLD1q16LowTPseudo_UPD, + ARM::VLD1q32LowTPseudo_UPD, + ARM::VLD1q64LowTPseudo_UPD }; + static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, + ARM::VLD1q16HighTPseudo, + ARM::VLD1q32HighTPseudo, + ARM::VLD1q64HighTPseudo }; + SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); + return; + } + + case Intrinsic::arm_neon_vld1x4: { + static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, + ARM::VLD1d16QPseudo, + ARM::VLD1d32QPseudo, + ARM::VLD1d64QPseudo }; + static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, + ARM::VLD1q16LowQPseudo_UPD, + ARM::VLD1q32LowQPseudo_UPD, + ARM::VLD1q64LowQPseudo_UPD }; + static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, + ARM::VLD1q16HighQPseudo, + ARM::VLD1q32HighQPseudo, + ARM::VLD1q64HighQPseudo }; + SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); + return; + } + case Intrinsic::arm_neon_vld2: { static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, ARM::VLD2d32, ARM::VLD1q64 }; @@ -3446,6 +3579,52 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } + case Intrinsic::arm_neon_vld2dup: { + static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, + ARM::VLD2DUPd32, ARM::VLD1q64 }; + static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, + ARM::VLD2DUPq16EvenPseudo, + ARM::VLD2DUPq32EvenPseudo }; + static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, + ARM::VLD2DUPq16OddPseudo, + ARM::VLD2DUPq32OddPseudo }; + SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, + DOpcodes, QOpcodes0, QOpcodes1); + return; + } + + case Intrinsic::arm_neon_vld3dup: { + static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, + ARM::VLD3DUPd16Pseudo, + ARM::VLD3DUPd32Pseudo, + ARM::VLD1d64TPseudo }; + static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, + ARM::VLD3DUPq16EvenPseudo, + ARM::VLD3DUPq32EvenPseudo }; + static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, + ARM::VLD3DUPq16OddPseudo, + ARM::VLD3DUPq32OddPseudo }; + SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, + DOpcodes, QOpcodes0, QOpcodes1); + return; + } + + case Intrinsic::arm_neon_vld4dup: { + static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, + ARM::VLD4DUPd16Pseudo, + ARM::VLD4DUPd32Pseudo, + ARM::VLD1d64QPseudo }; + static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, + ARM::VLD4DUPq16EvenPseudo, + ARM::VLD4DUPq32EvenPseudo }; + static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, + ARM::VLD4DUPq16OddPseudo, + ARM::VLD4DUPq32OddPseudo }; + SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, + DOpcodes, QOpcodes0, QOpcodes1); + return; + } + case Intrinsic::arm_neon_vld2lane: { static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo, @@ -3485,6 +3664,51 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } + case Intrinsic::arm_neon_vst1x2: { + static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, + ARM::VST1q32, ARM::VST1q64 }; + static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, + ARM::VST1d16QPseudo, + ARM::VST1d32QPseudo, + ARM::VST1d64QPseudo }; + SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); + return; + } + + case Intrinsic::arm_neon_vst1x3: { + static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, + ARM::VST1d16TPseudo, + ARM::VST1d32TPseudo, + ARM::VST1d64TPseudo }; + static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, + ARM::VST1q16LowTPseudo_UPD, + ARM::VST1q32LowTPseudo_UPD, + ARM::VST1q64LowTPseudo_UPD }; + static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, + ARM::VST1q16HighTPseudo, + ARM::VST1q32HighTPseudo, + ARM::VST1q64HighTPseudo }; + SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); + return; + } + + case Intrinsic::arm_neon_vst1x4: { + static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, + ARM::VST1d16QPseudo, + ARM::VST1d32QPseudo, + ARM::VST1d64QPseudo }; + static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, + ARM::VST1q16LowQPseudo_UPD, + ARM::VST1q32LowQPseudo_UPD, + ARM::VST1q64LowQPseudo_UPD }; + static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, + ARM::VST1q16HighQPseudo, + ARM::VST1q32HighQPseudo, + ARM::VST1q64HighQPseudo }; + SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); + return; + } + case Intrinsic::arm_neon_vst2: { static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, ARM::VST2d32, ARM::VST1q64 }; |