aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp')
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp483
1 files changed, 410 insertions, 73 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 76a9ac12062d..2a9a31dab74f 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -239,6 +239,10 @@ private:
void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
+ /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
+ /// vector lanes.
+ void SelectMVE_VSHLC(SDNode *N, bool Predicated);
+
/// Select long MVE vector reductions with two vector operands
/// Stride is the number of vector element widths the instruction can operate
/// on:
@@ -266,7 +270,21 @@ private:
/// pointer points to a set of NumVecs sub-opcodes used for the
/// different stages (e.g. VLD20 versus VLD21) of each load family.
void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
- const uint16_t *const *Opcodes);
+ const uint16_t *const *Opcodes, bool HasWriteback);
+
+ /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
+ /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
+ void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
+ bool Wrapping, bool Predicated);
+
+ /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
+ /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
+ /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
+ /// the accumulator and the immediate operand, i.e. 0
+ /// for CX1*, 1 for CX2*, 2 for CX3*
+ /// \arg \c HasAccum whether the instruction has an accumulator operand
+ void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
+ bool HasAccum);
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
/// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
@@ -1173,8 +1191,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
// Only multiples of 4 are allowed for the offset, so the frame object
// alignment must be at least 4.
MachineFrameInfo &MFI = MF->getFrameInfo();
- if (MFI.getObjectAlignment(FI) < 4)
- MFI.setObjectAlignment(FI, 4);
+ if (MFI.getObjectAlign(FI) < Align(4))
+ MFI.setObjectAlignment(FI, Align(4));
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
@@ -1197,9 +1215,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
if (RHSC * 4 < MFI.getObjectSize(FI)) {
// For LHS+RHS to result in an offset that's a multiple of 4 the object
// indexed by the LHS must be 4-byte aligned.
- if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
- MFI.setObjectAlignment(FI, 4);
- if (MFI.getObjectAlignment(FI) >= 4) {
+ if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
+ MFI.setObjectAlignment(FI, Align(4));
+ if (MFI.getObjectAlign(FI) >= Align(4)) {
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
@@ -1708,7 +1726,7 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
EVT LoadedVT;
unsigned Opcode = 0;
bool isSExtLd, isPre;
- unsigned Align;
+ Align Alignment;
ARMVCC::VPTCodes Pred;
SDValue PredReg;
SDValue Chain, Base, Offset;
@@ -1724,7 +1742,7 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
Chain = LD->getChain();
Base = LD->getBasePtr();
Offset = LD->getOffset();
- Align = LD->getAlignment();
+ Alignment = LD->getAlign();
isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
Pred = ARMVCC::None;
@@ -1740,7 +1758,7 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
Chain = LD->getChain();
Base = LD->getBasePtr();
Offset = LD->getOffset();
- Align = LD->getAlignment();
+ Alignment = LD->getAlign();
isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
Pred = ARMVCC::Then;
@@ -1754,7 +1772,7 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
SDValue NewOffset;
- if (Align >= 2 && LoadedVT == MVT::v4i16 &&
+ if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
if (isSExtLd)
Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
@@ -1772,12 +1790,12 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
else
Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
- } else if (Align >= 4 &&
+ } else if (Alignment >= Align(4) &&
(CanChangeType || LoadedVT == MVT::v4i32 ||
LoadedVT == MVT::v4f32) &&
SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
- else if (Align >= 2 &&
+ else if (Alignment >= Align(2) &&
(CanChangeType || LoadedVT == MVT::v8i16 ||
LoadedVT == MVT::v8f16) &&
SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
@@ -1791,8 +1809,8 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
SDValue Ops[] = {Base, NewOffset,
CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
Chain};
- SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), N->getValueType(0),
- MVT::i32, MVT::Other, Ops);
+ SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
+ N->getValueType(0), MVT::Other, Ops);
transferMemOperands(N, New);
ReplaceUses(SDValue(N, 0), SDValue(New, 1));
ReplaceUses(SDValue(N, 1), SDValue(New, 0));
@@ -2038,6 +2056,7 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0,
const uint16_t *QOpcodes1) {
+ assert(Subtarget->hasNEON());
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
SDLoc dl(N);
@@ -2059,6 +2078,7 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
case MVT::v4f16:
+ case MVT::v4bf16:
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
@@ -2066,6 +2086,7 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
// Quad-register operations:
case MVT::v16i8: OpcodeIndex = 0; break;
case MVT::v8f16:
+ case MVT::v8bf16:
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
@@ -2177,6 +2198,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0,
const uint16_t *QOpcodes1) {
+ assert(Subtarget->hasNEON());
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
SDLoc dl(N);
@@ -2201,6 +2223,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
case MVT::v4f16:
+ case MVT::v4bf16:
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
@@ -2208,6 +2231,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
// Quad-register operations:
case MVT::v16i8: OpcodeIndex = 0; break;
case MVT::v8f16:
+ case MVT::v8bf16:
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
@@ -2328,6 +2352,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes) {
+ assert(Subtarget->hasNEON());
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
SDLoc dl(N);
@@ -2368,11 +2393,13 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
case MVT::v4f16:
+ case MVT::v4bf16:
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
// Quad-register operations:
case MVT::v8f16:
+ case MVT::v8bf16:
case MVT::v8i16: OpcodeIndex = 0; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 1; break;
@@ -2511,7 +2538,16 @@ void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
Ops.push_back(N->getOperand(0)); // chain
- CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
+ SmallVector<EVT, 8> VTs;
+ VTs.push_back(N->getValueType(1));
+ VTs.push_back(N->getValueType(0));
+ VTs.push_back(N->getValueType(2));
+
+ SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
+ ReplaceUses(SDValue(N, 0), SDValue(New, 1));
+ ReplaceUses(SDValue(N, 1), SDValue(New, 0));
+ ReplaceUses(SDValue(N, 2), SDValue(New, 2));
+ CurDAG->RemoveDeadNode(N);
}
void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
@@ -2581,6 +2617,25 @@ void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
}
+void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
+ SDLoc Loc(N);
+ SmallVector<SDValue, 8> Ops;
+
+ // One vector input, followed by a 32-bit word of bits to shift in
+ // and then an immediate shift count
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(N->getOperand(2));
+ int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+ Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
+
+ if (Predicated)
+ AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
+ else
+ AddEmptyMVEPredicateToOps(Ops, Loc);
+
+ CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops));
+}
+
static bool SDValueToConstBool(SDValue SDVal) {
assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
@@ -2673,7 +2728,8 @@ void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
}
void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
- const uint16_t *const *Opcodes) {
+ const uint16_t *const *Opcodes,
+ bool HasWriteback) {
EVT VT = N->getValueType(0);
SDLoc Loc(N);
@@ -2693,23 +2749,141 @@ void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
}
EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
- EVT ResultTys[] = {DataTy, MVT::Other};
+ SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
+ unsigned PtrOperand = HasWriteback ? 1 : 2;
auto Data = SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
SDValue Chain = N->getOperand(0);
- for (unsigned Stage = 0; Stage < NumVecs; ++Stage) {
- SDValue Ops[] = {Data, N->getOperand(2), Chain};
+ // Add a MVE_VLDn instruction for each Vec, except the last
+ for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
+ SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
auto LoadInst =
CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
Data = SDValue(LoadInst, 0);
Chain = SDValue(LoadInst, 1);
}
+ // The last may need a writeback on it
+ if (HasWriteback)
+ ResultTys = {DataTy, MVT::i32, MVT::Other};
+ SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
+ auto LoadInst =
+ CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
- for (unsigned i = 0; i < NumVecs; i++)
+ unsigned i;
+ for (i = 0; i < NumVecs; i++)
ReplaceUses(SDValue(N, i),
- CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, Data));
- ReplaceUses(SDValue(N, NumVecs), Chain);
+ CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
+ SDValue(LoadInst, 0)));
+ if (HasWriteback)
+ ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
+ ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
+ CurDAG->RemoveDeadNode(N);
+}
+
+void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
+ bool Wrapping, bool Predicated) {
+ EVT VT = N->getValueType(0);
+ SDLoc Loc(N);
+
+ uint16_t Opcode;
+ switch (VT.getScalarSizeInBits()) {
+ case 8:
+ Opcode = Opcodes[0];
+ break;
+ case 16:
+ Opcode = Opcodes[1];
+ break;
+ case 32:
+ Opcode = Opcodes[2];
+ break;
+ default:
+ llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ unsigned OpIdx = 1;
+
+ SDValue Inactive;
+ if (Predicated)
+ Inactive = N->getOperand(OpIdx++);
+
+ Ops.push_back(N->getOperand(OpIdx++)); // base
+ if (Wrapping)
+ Ops.push_back(N->getOperand(OpIdx++)); // limit
+
+ SDValue ImmOp = N->getOperand(OpIdx++); // step
+ int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue();
+ Ops.push_back(getI32Imm(ImmValue, Loc));
+
+ if (Predicated)
+ AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
+ else
+ AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
+
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
+}
+
+void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
+ size_t NumExtraOps, bool HasAccum) {
+ bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
+ SDLoc Loc(N);
+ SmallVector<SDValue, 8> Ops;
+
+ unsigned OpIdx = 1;
+
+ // Convert and append the immediate operand designating the coprocessor.
+ SDValue ImmCorpoc = N->getOperand(OpIdx++);
+ uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue();
+ Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
+
+ // For accumulating variants copy the low and high order parts of the
+ // accumulator into a register pair and add it to the operand vector.
+ if (HasAccum) {
+ SDValue AccLo = N->getOperand(OpIdx++);
+ SDValue AccHi = N->getOperand(OpIdx++);
+ if (IsBigEndian)
+ std::swap(AccLo, AccHi);
+ Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
+ }
+
+ // Copy extra operands as-is.
+ for (size_t I = 0; I < NumExtraOps; I++)
+ Ops.push_back(N->getOperand(OpIdx++));
+
+ // Convert and append the immediate operand
+ SDValue Imm = N->getOperand(OpIdx);
+ uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue();
+ Ops.push_back(getI32Imm(ImmVal, Loc));
+
+ // Accumulating variants are IT-predicable, add predicate operands.
+ if (HasAccum) {
+ SDValue Pred = getAL(CurDAG, Loc);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ Ops.push_back(Pred);
+ Ops.push_back(PredReg);
+ }
+
+ // Create the CDE intruction
+ SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
+ SDValue ResultPair = SDValue(InstrNode, 0);
+
+ // The original intrinsic had two outputs, and the output of the dual-register
+ // CDE instruction is a register pair. We need to extract the two subregisters
+ // and replace all uses of the original outputs with the extracted
+ // subregisters.
+ uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
+ if (IsBigEndian)
+ std::swap(SubRegs[0], SubRegs[1]);
+
+ for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
+ if (SDValue(N, ResIdx).use_empty())
+ continue;
+ SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
+ MVT::i32, ResultPair);
+ ReplaceUses(SDValue(N, ResIdx), SubReg);
+ }
+
CurDAG->RemoveDeadNode(N);
}
@@ -2718,6 +2892,7 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0,
const uint16_t *QOpcodes1) {
+ assert(Subtarget->hasNEON());
assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
SDLoc dl(N);
@@ -2754,6 +2929,8 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
case MVT::v8i16:
case MVT::v4f16:
case MVT::v8f16:
+ case MVT::v4bf16:
+ case MVT::v8bf16:
OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32:
@@ -3231,7 +3408,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
MachineFunction& MF = CurDAG->getMachineFunction();
MachineMemOperand *MemOp =
MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand::MOLoad, 4, Align(4));
CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
@@ -3251,8 +3428,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// Set the alignment of the frame object to 4, to avoid having to generate
// more than one ADD
MachineFrameInfo &MFI = MF->getFrameInfo();
- if (MFI.getObjectAlignment(FI) < 4)
- MFI.setObjectAlignment(FI, 4);
+ if (MFI.getObjectAlign(FI) < Align(4))
+ MFI.setObjectAlignment(FI, Align(4));
CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
CurDAG->getTargetConstant(0, dl, MVT::i32));
return;
@@ -3522,6 +3699,14 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
const SDValue &Chain = N->getOperand(0);
const SDValue &Addr = N->getOperand(1);
SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
+ if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
+ // The register-offset variant of LDRD mandates that the register
+ // allocated to RegOffset is not reused in any of the remaining operands.
+ // This restriction is currently not enforced. Therefore emitting this
+ // variant is explicitly avoided.
+ Base = Addr;
+ RegOffset = CurDAG->getRegister(0, MVT::i32);
+ }
SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
{MVT::Untyped, MVT::Other}, Ops);
@@ -3529,12 +3714,37 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
SDValue(New, 0));
SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
SDValue(New, 0));
+ transferMemOperands(N, New);
ReplaceUses(SDValue(N, 0), Lo);
ReplaceUses(SDValue(N, 1), Hi);
ReplaceUses(SDValue(N, 2), SDValue(New, 1));
CurDAG->RemoveDeadNode(N);
return;
}
+ case ARMISD::STRD: {
+ if (Subtarget->isThumb2())
+ break; // TableGen handles isel in this case.
+ SDValue Base, RegOffset, ImmOffset;
+ const SDValue &Chain = N->getOperand(0);
+ const SDValue &Addr = N->getOperand(3);
+ SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
+ if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
+ // The register-offset variant of STRD mandates that the register
+ // allocated to RegOffset is not reused in any of the remaining operands.
+ // This restriction is currently not enforced. Therefore emitting this
+ // variant is explicitly avoided.
+ Base = Addr;
+ RegOffset = CurDAG->getRegister(0, MVT::i32);
+ }
+ SDNode *RegPair =
+ createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
+ SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
+ SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
+ transferMemOperands(N, New);
+ ReplaceUses(SDValue(N, 0), SDValue(New, 0));
+ CurDAG->RemoveDeadNode(N);
+ return;
+ }
case ARMISD::LOOP_DEC: {
SDValue Ops[] = { N->getOperand(1),
N->getOperand(2),
@@ -3877,14 +4087,24 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2_UPD: {
- static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
- ARM::VLD2d16wb_fixed,
- ARM::VLD2d32wb_fixed,
- ARM::VLD1q64wb_fixed};
- static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
- ARM::VLD2q16PseudoWB_fixed,
- ARM::VLD2q32PseudoWB_fixed };
- SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ if (Subtarget->hasNEON()) {
+ static const uint16_t DOpcodes[] = {
+ ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
+ ARM::VLD2q16PseudoWB_fixed,
+ ARM::VLD2q32PseudoWB_fixed};
+ SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ } else {
+ static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
+ ARM::MVE_VLD21_8_wb};
+ static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
+ ARM::MVE_VLD21_16_wb};
+ static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
+ ARM::MVE_VLD21_32_wb};
+ static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
+ SelectMVE_VLD(N, 2, Opcodes, true);
+ }
return;
}
@@ -3904,17 +4124,30 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD4_UPD: {
- static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
- ARM::VLD4d16Pseudo_UPD,
- ARM::VLD4d32Pseudo_UPD,
- ARM::VLD1d64QPseudoWB_fixed};
- static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
- ARM::VLD4q16Pseudo_UPD,
- ARM::VLD4q32Pseudo_UPD };
- static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
- ARM::VLD4q16oddPseudo_UPD,
- ARM::VLD4q32oddPseudo_UPD };
- SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ if (Subtarget->hasNEON()) {
+ static const uint16_t DOpcodes[] = {
+ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
+ ARM::VLD1d64QPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD};
+ static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD};
+ SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ } else {
+ static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
+ ARM::MVE_VLD42_8,
+ ARM::MVE_VLD43_8_wb};
+ static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
+ ARM::MVE_VLD42_16,
+ ARM::MVE_VLD43_16_wb};
+ static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
+ ARM::MVE_VLD42_32,
+ ARM::MVE_VLD43_32_wb};
+ static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
+ SelectMVE_VLD(N, 4, Opcodes, true);
+ }
return;
}
@@ -3962,15 +4195,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VST2_UPD: {
- static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
- ARM::VST2d16wb_fixed,
- ARM::VST2d32wb_fixed,
- ARM::VST1q64wb_fixed};
- static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
- ARM::VST2q16PseudoWB_fixed,
- ARM::VST2q32PseudoWB_fixed };
- SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
- return;
+ if (Subtarget->hasNEON()) {
+ static const uint16_t DOpcodes[] = {
+ ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
+ ARM::VST1q64wb_fixed};
+ static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
+ ARM::VST2q16PseudoWB_fixed,
+ ARM::VST2q32PseudoWB_fixed};
+ SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ return;
+ }
+ break;
}
case ARMISD::VST3_UPD: {
@@ -3989,18 +4224,20 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VST4_UPD: {
- static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
- ARM::VST4d16Pseudo_UPD,
- ARM::VST4d32Pseudo_UPD,
- ARM::VST1d64QPseudoWB_fixed};
- static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
- ARM::VST4q16Pseudo_UPD,
- ARM::VST4q32Pseudo_UPD };
- static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
- ARM::VST4q16oddPseudo_UPD,
- ARM::VST4q32oddPseudo_UPD };
- SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
- return;
+ if (Subtarget->hasNEON()) {
+ static const uint16_t DOpcodes[] = {
+ ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
+ ARM::VST1d64QPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD};
+ static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD};
+ SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+ break;
}
case ARMISD::VST2LN_UPD: {
@@ -4479,7 +4716,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
ARM::MVE_VLD21_32};
static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
- SelectMVE_VLD(N, 2, Opcodes);
+ SelectMVE_VLD(N, 2, Opcodes, false);
return;
}
@@ -4493,7 +4730,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
ARM::MVE_VLD42_32,
ARM::MVE_VLD43_32};
static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
- SelectMVE_VLD(N, 4, Opcodes);
+ SelectMVE_VLD(N, 4, Opcodes, false);
return;
}
}
@@ -4506,6 +4743,29 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
default:
break;
+ // Scalar f32 -> bf16
+ case Intrinsic::arm_neon_vcvtbfp2bf: {
+ SDLoc dl(N);
+ const SDValue &Src = N->getOperand(1);
+ llvm::EVT DestTy = N->getValueType(0);
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { Src, Src, Pred, Reg0 };
+ CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
+ return;
+ }
+
+ // Vector v4f32 -> v4bf16
+ case Intrinsic::arm_neon_vcvtfp2bf: {
+ SDLoc dl(N);
+ const SDValue &Src = N->getOperand(1);
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { Src, Pred, Reg0 };
+ CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
+ return;
+ }
+
case Intrinsic::arm_mve_urshrl:
SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
return;
@@ -4524,18 +4784,21 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_mve_sqrshrl:
SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
return;
- case Intrinsic::arm_mve_lsll:
- SelectMVE_LongShift(N, ARM::MVE_LSLLr, false, false);
- return;
- case Intrinsic::arm_mve_asrl:
- SelectMVE_LongShift(N, ARM::MVE_ASRLr, false, false);
- return;
case Intrinsic::arm_mve_vadc:
case Intrinsic::arm_mve_vadc_predicated:
SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
IntNo == Intrinsic::arm_mve_vadc_predicated);
return;
+ case Intrinsic::arm_mve_vsbc:
+ case Intrinsic::arm_mve_vsbc_predicated:
+ SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
+ IntNo == Intrinsic::arm_mve_vsbc_predicated);
+ return;
+ case Intrinsic::arm_mve_vshlc:
+ case Intrinsic::arm_mve_vshlc_predicated:
+ SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
+ return;
case Intrinsic::arm_mve_vmlldava:
case Intrinsic::arm_mve_vmlldava_predicated: {
@@ -4573,6 +4836,80 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
OpcodesS, OpcodesU);
return;
}
+
+ case Intrinsic::arm_mve_vidup:
+ case Intrinsic::arm_mve_vidup_predicated: {
+ static const uint16_t Opcodes[] = {
+ ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
+ };
+ SelectMVE_VxDUP(N, Opcodes, false,
+ IntNo == Intrinsic::arm_mve_vidup_predicated);
+ return;
+ }
+
+ case Intrinsic::arm_mve_vddup:
+ case Intrinsic::arm_mve_vddup_predicated: {
+ static const uint16_t Opcodes[] = {
+ ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
+ };
+ SelectMVE_VxDUP(N, Opcodes, false,
+ IntNo == Intrinsic::arm_mve_vddup_predicated);
+ return;
+ }
+
+ case Intrinsic::arm_mve_viwdup:
+ case Intrinsic::arm_mve_viwdup_predicated: {
+ static const uint16_t Opcodes[] = {
+ ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
+ };
+ SelectMVE_VxDUP(N, Opcodes, true,
+ IntNo == Intrinsic::arm_mve_viwdup_predicated);
+ return;
+ }
+
+ case Intrinsic::arm_mve_vdwdup:
+ case Intrinsic::arm_mve_vdwdup_predicated: {
+ static const uint16_t Opcodes[] = {
+ ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
+ };
+ SelectMVE_VxDUP(N, Opcodes, true,
+ IntNo == Intrinsic::arm_mve_vdwdup_predicated);
+ return;
+ }
+
+ case Intrinsic::arm_cde_cx1d:
+ case Intrinsic::arm_cde_cx1da:
+ case Intrinsic::arm_cde_cx2d:
+ case Intrinsic::arm_cde_cx2da:
+ case Intrinsic::arm_cde_cx3d:
+ case Intrinsic::arm_cde_cx3da: {
+ bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
+ IntNo == Intrinsic::arm_cde_cx2da ||
+ IntNo == Intrinsic::arm_cde_cx3da;
+ size_t NumExtraOps;
+ uint16_t Opcode;
+ switch (IntNo) {
+ case Intrinsic::arm_cde_cx1d:
+ case Intrinsic::arm_cde_cx1da:
+ NumExtraOps = 0;
+ Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
+ break;
+ case Intrinsic::arm_cde_cx2d:
+ case Intrinsic::arm_cde_cx2da:
+ NumExtraOps = 1;
+ Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
+ break;
+ case Intrinsic::arm_cde_cx3d:
+ case Intrinsic::arm_cde_cx3da:
+ NumExtraOps = 2;
+ Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
+ break;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+ SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
+ return;
+ }
}
break;
}