aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp646
1 files changed, 308 insertions, 338 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index cfaafc7b53d2..5b823af1e9b8 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -43,92 +43,95 @@ namespace RISCV {
} // namespace llvm
void RISCVDAGToDAGISel::PreprocessISelDAG() {
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end();
- I != E;) {
- SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
-
- // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
- // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
- if (N->getOpcode() == ISD::SPLAT_VECTOR) {
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+
+ bool MadeChange = false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ if (N->use_empty())
+ continue;
+
+ SDValue Result;
+ switch (N->getOpcode()) {
+ case ISD::SPLAT_VECTOR: {
+ // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
+ // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
MVT VT = N->getSimpleValueType(0);
unsigned Opc =
VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
SDLoc DL(N);
SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
- SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
- N->getOperand(0), VL);
+ Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
+ N->getOperand(0), VL);
+ break;
+ }
+ case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
+ // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
+ // load. Done after lowering and combining so that we have a chance to
+ // optimize this to VMV_V_X_VL when the upper bits aren't needed.
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands");
+ MVT VT = N->getSimpleValueType(0);
+ SDValue Passthru = N->getOperand(0);
+ SDValue Lo = N->getOperand(1);
+ SDValue Hi = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+ assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
+ Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
+ "Unexpected VTs!");
+ MachineFunction &MF = CurDAG->getMachineFunction();
+ RISCVMachineFunctionInfo *FuncInfo =
+ MF.getInfo<RISCVMachineFunctionInfo>();
+ SDLoc DL(N);
- --I;
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
- ++I;
- CurDAG->DeleteNode(N);
- continue;
+ // We use the same frame index we use for moving two i32s into 64-bit FPR.
+ // This is an analogous operation.
+ int FI = FuncInfo->getMoveF64FrameIndex(MF);
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
+ const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
+ SDValue StackSlot =
+ CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout()));
+
+ SDValue Chain = CurDAG->getEntryNode();
+ Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
+
+ SDValue OffsetSlot =
+ CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
+ Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
+ Align(8));
+
+ Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+
+ SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
+ SDValue IntID =
+ CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
+ SDValue Ops[] = {Chain,
+ IntID,
+ Passthru,
+ StackSlot,
+ CurDAG->getRegister(RISCV::X0, MVT::i64),
+ VL};
+
+ Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
+ MVT::i64, MPI, Align(8),
+ MachineMemOperand::MOLoad);
+ break;
+ }
}
- // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
- // load. Done after lowering and combining so that we have a chance to
- // optimize this to VMV_V_X_VL when the upper bits aren't needed.
- if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL)
- continue;
+ if (Result) {
+ LLVM_DEBUG(dbgs() << "RISCV DAG preprocessing replacing:\nOld: ");
+ LLVM_DEBUG(N->dump(CurDAG));
+ LLVM_DEBUG(dbgs() << "\nNew: ");
+ LLVM_DEBUG(Result->dump(CurDAG));
+ LLVM_DEBUG(dbgs() << "\n");
- assert(N->getNumOperands() == 4 && "Unexpected number of operands");
- MVT VT = N->getSimpleValueType(0);
- SDValue Passthru = N->getOperand(0);
- SDValue Lo = N->getOperand(1);
- SDValue Hi = N->getOperand(2);
- SDValue VL = N->getOperand(3);
- assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
- Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
- "Unexpected VTs!");
- MachineFunction &MF = CurDAG->getMachineFunction();
- RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
- SDLoc DL(N);
-
- // We use the same frame index we use for moving two i32s into 64-bit FPR.
- // This is an analogous operation.
- int FI = FuncInfo->getMoveF64FrameIndex(MF);
- MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
- const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
- SDValue StackSlot =
- CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout()));
-
- SDValue Chain = CurDAG->getEntryNode();
- Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
-
- SDValue OffsetSlot =
- CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
- Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
- Align(8));
-
- Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
-
- SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
- SDValue IntID =
- CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
- SDValue Ops[] = {Chain,
- IntID,
- Passthru,
- StackSlot,
- CurDAG->getRegister(RISCV::X0, MVT::i64),
- VL};
-
- SDValue Result = CurDAG->getMemIntrinsicNode(
- ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8),
- MachineMemOperand::MOLoad);
-
- // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the
- // vlse we created. This will cause general havok on the dag because
- // anything below the conversion could be folded into other existing nodes.
- // To avoid invalidating 'I', back it up to the convert node.
- --I;
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
-
- // Now that we did that, the node is dead. Increment the iterator to the
- // next node to process, then delete N.
- ++I;
- CurDAG->DeleteNode(N);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ MadeChange = true;
+ }
}
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
@@ -143,7 +146,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
continue;
MadeChange |= doPeepholeSExtW(N);
- MadeChange |= doPeepholeLoadStoreADDI(N);
MadeChange |= doPeepholeMaskedRVV(N);
}
@@ -153,40 +155,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
CurDAG->RemoveDeadNodes();
}
-// Returns true if N is a MachineSDNode that has a reg and simm12 memory
-// operand. The indices of the base pointer and offset are returned in BaseOpIdx
-// and OffsetOpIdx.
-static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx,
- unsigned &OffsetOpIdx) {
- switch (N->getMachineOpcode()) {
- case RISCV::LB:
- case RISCV::LH:
- case RISCV::LW:
- case RISCV::LBU:
- case RISCV::LHU:
- case RISCV::LWU:
- case RISCV::LD:
- case RISCV::FLH:
- case RISCV::FLW:
- case RISCV::FLD:
- BaseOpIdx = 0;
- OffsetOpIdx = 1;
- return true;
- case RISCV::SB:
- case RISCV::SH:
- case RISCV::SW:
- case RISCV::SD:
- case RISCV::FSH:
- case RISCV::FSW:
- case RISCV::FSD:
- BaseOpIdx = 1;
- OffsetOpIdx = 2;
- return true;
- }
-
- return false;
-}
-
static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
RISCVMatInt::InstSeq &Seq) {
SDNode *Result = nullptr;
@@ -285,9 +253,7 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
SDValue Chain = Node->getOperand(0);
SDValue Glue;
- SDValue Base;
- SelectBaseAddr(Node->getOperand(CurOp++), Base);
- Operands.push_back(Base); // Base pointer.
+ Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
if (IsStridedOrIndexed) {
Operands.push_back(Node->getOperand(CurOp++)); // Index.
@@ -651,83 +617,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));
return;
}
- case ISD::ADD: {
- // Try to select ADD + immediate used as memory addresses to
- // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by
- // doPeepholeLoadStoreADDI.
-
- // LHS should be an immediate.
- auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
- if (!N1C)
- break;
-
- int64_t Offset = N1C->getSExtValue();
- int64_t Lo12 = SignExtend64<12>(Offset);
-
- // Don't do this if the lower 12 bits are 0 or we could use ADDI directly.
- if (Lo12 == 0 || isInt<12>(Offset))
- break;
-
- // Don't do this if we can use a pair of ADDIs.
- if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2))
- break;
-
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits());
-
- Offset -= Lo12;
- // Restore sign bits for RV32.
- if (!Subtarget->is64Bit())
- Offset = SignExtend64<32>(Offset);
-
- // We can fold if the last operation is an ADDI or its an ADDIW that could
- // be treated as an ADDI.
- if (Seq.back().Opc != RISCV::ADDI &&
- !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset)))
- break;
- assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12");
- // Drop the last operation.
- Seq.pop_back();
- assert(!Seq.empty() && "Expected more instructions in sequence");
-
- bool AllPointerUses = true;
- for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
-
- // Is this user a memory instruction that uses a register and immediate
- // that has this ADD as its pointer.
- unsigned BaseOpIdx, OffsetOpIdx;
- if (!User->isMachineOpcode() ||
- !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) ||
- UI.getOperandNo() != BaseOpIdx) {
- AllPointerUses = false;
- break;
- }
-
- // If the memory instruction already has an offset, make sure the combined
- // offset is foldable.
- int64_t MemOffs =
- cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue();
- MemOffs += Lo12;
- if (!isInt<12>(MemOffs)) {
- AllPointerUses = false;
- break;
- }
- }
-
- if (!AllPointerUses)
- break;
-
- // Emit (ADDI (ADD X, Hi), Lo)
- SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq);
- SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT,
- Node->getOperand(0), SDValue(Imm, 0));
- SDNode *ADDI =
- CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0),
- CurDAG->getTargetConstant(Lo12, DL, VT));
- ReplaceNode(Node, ADDI);
- return;
- }
case ISD::SHL: {
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
if (!N1C)
@@ -856,10 +745,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C)
break;
- uint64_t C2 = C->getZExtValue();
+ unsigned C2 = C->getZExtValue();
unsigned XLen = Subtarget->getXLen();
- if (!C2 || C2 >= XLen)
- break;
+ assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
uint64_t C1 = N1C->getZExtValue();
@@ -885,10 +773,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
// with c3 leading zeros.
if (!LeftShift && isMask_64(C1)) {
- uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
- if (C2 < C3) {
+ unsigned Leading = XLen - (64 - countLeadingZeros(C1));
+ if (C2 < Leading) {
// If the number of leading zeros is C2+32 this can be SRLIW.
- if (C2 + 32 == C3) {
+ if (C2 + 32 == Leading) {
SDNode *SRLIW = CurDAG->getMachineNode(
RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
ReplaceNode(Node, SRLIW);
@@ -900,7 +788,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
//
// This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
// legalized and goes through DAG combine.
- if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() &&
+ if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
SDNode *SRAIW =
@@ -908,25 +796,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
CurDAG->getTargetConstant(31, DL, VT));
SDNode *SRLIW = CurDAG->getMachineNode(
RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
- CurDAG->getTargetConstant(C3 - 32, DL, VT));
+ CurDAG->getTargetConstant(Leading - 32, DL, VT));
ReplaceNode(Node, SRLIW);
return;
}
// (srli (slli x, c3-c2), c3).
// Skip if we could use (zext.w (sraiw X, C2)).
- bool Skip = Subtarget->hasStdExtZba() && C3 == 32 &&
+ bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
// Also Skip if we can use bexti.
- Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1;
+ Skip |= Subtarget->hasStdExtZbs() && Leading == XLen - 1;
if (OneUseOrZExtW && !Skip) {
SDNode *SLLI = CurDAG->getMachineNode(
RISCV::SLLI, DL, VT, X,
- CurDAG->getTargetConstant(C3 - C2, DL, VT));
- SDNode *SRLI =
- CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
- CurDAG->getTargetConstant(C3, DL, VT));
+ CurDAG->getTargetConstant(Leading - C2, DL, VT));
+ SDNode *SRLI = CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(Leading, DL, VT));
ReplaceNode(Node, SRLI);
return;
}
@@ -936,12 +824,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
// shifted by c2 bits with c3 leading zeros.
if (LeftShift && isShiftedMask_64(C1)) {
- uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
+ unsigned Leading = XLen - (64 - countLeadingZeros(C1));
- if (C2 + C3 < XLen &&
- C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {
+ if (C2 + Leading < XLen &&
+ C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
// Use slli.uw when possible.
- if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {
+ if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
SDNode *SLLI_UW = CurDAG->getMachineNode(
RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
ReplaceNode(Node, SLLI_UW);
@@ -952,10 +840,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (OneUseOrZExtW && !IsCANDI) {
SDNode *SLLI = CurDAG->getMachineNode(
RISCV::SLLI, DL, VT, X,
- CurDAG->getTargetConstant(C2 + C3, DL, VT));
- SDNode *SRLI =
- CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
- CurDAG->getTargetConstant(C3, DL, VT));
+ CurDAG->getTargetConstant(C2 + Leading, DL, VT));
+ SDNode *SRLI = CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(Leading, DL, VT));
ReplaceNode(Node, SRLI);
return;
}
@@ -965,9 +853,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
// shifted mask with c2 leading zeros and c3 trailing zeros.
if (!LeftShift && isShiftedMask_64(C1)) {
- uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
- uint64_t C3 = countTrailingZeros(C1);
- if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) {
+ unsigned Leading = XLen - (64 - countLeadingZeros(C1));
+ unsigned Trailing = countTrailingZeros(C1);
+ if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && !IsCANDI) {
unsigned SrliOpc = RISCV::SRLI;
// If the input is zexti32 we should use SRLIW.
if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
@@ -976,22 +864,23 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
X = X.getOperand(0);
}
SDNode *SRLI = CurDAG->getMachineNode(
- SrliOpc, DL, VT, X, CurDAG->getTargetConstant(C2 + C3, DL, VT));
+ SrliOpc, DL, VT, X,
+ CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
- CurDAG->getTargetConstant(C3, DL, VT));
+ CurDAG->getTargetConstant(Trailing, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
// If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
- if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&
+ if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
OneUseOrZExtW && !IsCANDI) {
- SDNode *SRLIW =
- CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X,
- CurDAG->getTargetConstant(C2 + C3, DL, VT));
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, X,
+ CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
- CurDAG->getTargetConstant(C3, DL, VT));
+ CurDAG->getTargetConstant(Trailing, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
@@ -1000,25 +889,26 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
// shifted mask with no leading zeros and c3 trailing zeros.
if (LeftShift && isShiftedMask_64(C1)) {
- uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
- uint64_t C3 = countTrailingZeros(C1);
- if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) {
+ unsigned Leading = XLen - (64 - countLeadingZeros(C1));
+ unsigned Trailing = countTrailingZeros(C1);
+ if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
SDNode *SRLI = CurDAG->getMachineNode(
- RISCV::SRLI, DL, VT, X, CurDAG->getTargetConstant(C3 - C2, DL, VT));
+ RISCV::SRLI, DL, VT, X,
+ CurDAG->getTargetConstant(Trailing - C2, DL, VT));
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
- CurDAG->getTargetConstant(C3, DL, VT));
+ CurDAG->getTargetConstant(Trailing, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
// If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
- if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
- SDNode *SRLIW =
- CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X,
- CurDAG->getTargetConstant(C3 - C2, DL, VT));
+ if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, X,
+ CurDAG->getTargetConstant(Trailing - C2, DL, VT));
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
- CurDAG->getTargetConstant(C3, DL, VT));
+ CurDAG->getTargetConstant(Trailing, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
@@ -1885,13 +1775,74 @@ bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
return false;
}
-bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) {
- // If this is FrameIndex, select it directly. Otherwise just let it get
- // selected to a register independently.
- if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr))
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
- else
- Base = Addr;
+// Fold constant addresses.
+static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
+ const MVT VT, const RISCVSubtarget *Subtarget,
+ SDValue Addr, SDValue &Base, SDValue &Offset) {
+ if (!isa<ConstantSDNode>(Addr))
+ return false;
+
+ int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
+
+ // If the constant is a simm12, we can fold the whole constant and use X0 as
+ // the base. If the constant can be materialized with LUI+simm12, use LUI as
+ // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
+ int64_t Lo12 = SignExtend64<12>(CVal);
+ int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
+ if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
+ if (Hi) {
+ int64_t Hi20 = (Hi >> 12) & 0xfffff;
+ Base = SDValue(
+ CurDAG->getMachineNode(RISCV::LUI, DL, VT,
+ CurDAG->getTargetConstant(Hi20, DL, VT)),
+ 0);
+ } else {
+ Base = CurDAG->getRegister(RISCV::X0, VT);
+ }
+ Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
+ return true;
+ }
+
+ // Ask how constant materialization would handle this constant.
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits());
+
+ // If the last instruction would be an ADDI, we can fold its immediate and
+ // emit the rest of the sequence as the base.
+ if (Seq.back().Opc != RISCV::ADDI)
+ return false;
+ Lo12 = Seq.back().Imm;
+
+ // Drop the last instruction.
+ Seq.pop_back();
+ assert(!Seq.empty() && "Expected more instructions in sequence");
+
+ Base = SDValue(selectImmSeq(CurDAG, DL, VT, Seq), 0);
+ Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
+ return true;
+}
+
+// Is this ADD instruction only used as the base pointer of scalar loads and
+// stores?
+static bool isWorthFoldingAdd(SDValue Add) {
+ for (auto Use : Add->uses()) {
+ if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
+ Use->getOpcode() != ISD::ATOMIC_LOAD &&
+ Use->getOpcode() != ISD::ATOMIC_STORE)
+ return false;
+ EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
+ if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
+ VT != MVT::f64)
+ return false;
+ // Don't allow stores of the value. It must be used as the address.
+ if (Use->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(Use)->getValue() == Add)
+ return false;
+ if (Use->getOpcode() == ISD::ATOMIC_STORE &&
+ cast<AtomicSDNode>(Use)->getVal() == Add)
+ return false;
+ }
+
return true;
}
@@ -1947,9 +1898,10 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
assert(!isInt<12>(CVal) && "simm12 not already handled?");
+ // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
+ // an ADDI for part of the offset and fold the rest into the load/store.
+ // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
- // We can use an ADDI for part of the offset and fold the rest into the
- // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
int64_t Adj = CVal < 0 ? -2048 : 2047;
Base = SDValue(
CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
@@ -1958,8 +1910,27 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
return true;
}
+
+ // For larger immediates, we might be able to save one instruction from
+ // constant materialization by folding the Lo12 bits of the immediate into
+ // the address. We should only do this if the ADD is only used by loads and
+ // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
+ // separately with the full materialized immediate creating extra
+ // instructions.
+ if (isWorthFoldingAdd(Addr) &&
+ selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
+ Offset)) {
+ // Insert an ADD instruction with the materialized Hi52 bits.
+ Base = SDValue(
+ CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
+ 0);
+ return true;
+ }
}
+ if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
+ return true;
+
Base = Addr;
Offset = CurDAG->getTargetConstant(0, DL, VT);
return true;
@@ -2044,6 +2015,101 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
return false;
}
+/// Look for various patterns that can be done with a SHL that can be folded
+/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
+/// SHXADD we are trying to match.
+bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
+ SDValue &Val) {
+ if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
+ SDValue N0 = N.getOperand(0);
+
+ bool LeftShift = N0.getOpcode() == ISD::SHL;
+ if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ uint64_t Mask = N.getConstantOperandVal(1);
+ unsigned C2 = N0.getConstantOperandVal(1);
+
+ unsigned XLen = Subtarget->getXLen();
+ if (LeftShift)
+ Mask &= maskTrailingZeros<uint64_t>(C2);
+ else
+ Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
+
+ // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
+ // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
+ // followed by a SHXADD with c3 for the X amount.
+ if (isShiftedMask_64(Mask)) {
+ unsigned Leading = XLen - (64 - countLeadingZeros(Mask));
+ unsigned Trailing = countTrailingZeros(Mask);
+ if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ Val = SDValue(CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
+ 0);
+ return true;
+ }
+ // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
+ // leading zeros and c3 trailing zeros. We can use an SRLI by C3
+ // followed by a SHXADD using c3 for the X amount.
+ if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ Val = SDValue(
+ CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
+ 0);
+ return true;
+ }
+ }
+ }
+ }
+
+ bool LeftShift = N.getOpcode() == ISD::SHL;
+ if ((LeftShift || N.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N.getOperand(1))) {
+ SDValue N0 = N.getOperand(0);
+ if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ uint64_t Mask = N0.getConstantOperandVal(1);
+ if (isShiftedMask_64(Mask)) {
+ unsigned C1 = N.getConstantOperandVal(1);
+ unsigned XLen = Subtarget->getXLen();
+ unsigned Leading = XLen - (64 - countLeadingZeros(Mask));
+ unsigned Trailing = countTrailingZeros(Mask);
+ // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
+ // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
+ if (LeftShift && Leading == 32 && Trailing > 0 &&
+ (Trailing + C1) == ShAmt) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ Val = SDValue(CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(Trailing, DL, VT)),
+ 0);
+ return true;
+ }
+ // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
+ // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
+ if (!LeftShift && Leading == 32 && Trailing > C1 &&
+ (Trailing - C1) == ShAmt) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ Val = SDValue(CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(Trailing, DL, VT)),
+ 0);
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
// Return true if all users of this SDNode* only consume the lower \p Bits.
// This can be used to form W instructions for add/sub/mul/shl even when the
// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
@@ -2271,102 +2337,6 @@ bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
return false;
}
-// Merge an ADDI into the offset of a load/store instruction where possible.
-// (load (addi base, off1), off2) -> (load base, off1+off2)
-// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
-// (load (add base, (addi src, off1)), off2)
-// -> (load (add base, src), off1+off2)
-// (store val, (add base, (addi src, off1)), off2)
-// -> (store val, (add base, src), off1+off2)
-// This is possible when off1+off2 fits a 12-bit immediate.
-bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
- unsigned OffsetOpIdx, BaseOpIdx;
- if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx))
- return false;
-
- if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
- return false;
-
- SDValue Base = N->getOperand(BaseOpIdx);
-
- if (!Base.isMachineOpcode())
- return false;
-
- if (Base.getMachineOpcode() == RISCV::ADDI) {
- // If the base is an ADDI, we can merge it in to the load/store.
- } else if (Base.getMachineOpcode() == RISCV::ADDIW &&
- isa<ConstantSDNode>(Base.getOperand(1)) &&
- Base.getOperand(0).isMachineOpcode() &&
- Base.getOperand(0).getMachineOpcode() == RISCV::LUI &&
- isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) {
- // ADDIW can be merged if it's part of LUI+ADDIW constant materialization
- // and LUI+ADDI would have produced the same result. This is true for all
- // simm32 values except 0x7ffff800-0x7fffffff.
- int64_t Offset =
- SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12);
- Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue();
- if (!isInt<32>(Offset))
- return false;
- } else
- return false;
-
- SDValue ImmOperand = Base.getOperand(1);
- uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
-
- if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
- int64_t Offset1 = Const->getSExtValue();
- int64_t CombinedOffset = Offset1 + Offset2;
- if (!isInt<12>(CombinedOffset))
- return false;
- ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
- ImmOperand.getValueType());
- } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
- // If the off1 in (addi base, off1) is a global variable's address (its
- // low part, really), then we can rely on the alignment of that variable
- // to provide a margin of safety before off1 can overflow the 12 bits.
- // Check if off2 falls within that margin; if so off1+off2 can't overflow.
- const DataLayout &DL = CurDAG->getDataLayout();
- Align Alignment = commonAlignment(GA->getGlobal()->getPointerAlignment(DL),
- GA->getOffset());
- if (Offset2 != 0 && Alignment <= Offset2)
- return false;
- int64_t Offset1 = GA->getOffset();
- int64_t CombinedOffset = Offset1 + Offset2;
- ImmOperand = CurDAG->getTargetGlobalAddress(
- GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
- CombinedOffset, GA->getTargetFlags());
- } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
- // Ditto.
- Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset());
- if (Offset2 != 0 && Alignment <= Offset2)
- return false;
- int64_t Offset1 = CP->getOffset();
- int64_t CombinedOffset = Offset1 + Offset2;
- ImmOperand = CurDAG->getTargetConstantPool(
- CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
- CombinedOffset, CP->getTargetFlags());
- } else {
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
- LLVM_DEBUG(Base->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\nN: ");
- LLVM_DEBUG(N->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\n");
-
- // Modify the offset operand of the load/store.
- if (BaseOpIdx == 0) { // Load
- N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
- N->getOperand(2));
- } else { // Store
- N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
- ImmOperand, N->getOperand(3));
- }
-
- return true;
-}
-
// Try to remove sext.w if the input is a W instruction or can be made into
// a W instruction cheaply.
bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {