diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 646 |
1 files changed, 308 insertions, 338 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index cfaafc7b53d2..5b823af1e9b8 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -43,92 +43,95 @@ namespace RISCV { } // namespace llvm void RISCVDAGToDAGISel::PreprocessISelDAG() { - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); - I != E;) { - SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. - - // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point - // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. - if (N->getOpcode() == ISD::SPLAT_VECTOR) { + SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); + + bool MadeChange = false; + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = &*--Position; + if (N->use_empty()) + continue; + + SDValue Result; + switch (N->getOpcode()) { + case ISD::SPLAT_VECTOR: { + // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point + // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. MVT VT = N->getSimpleValueType(0); unsigned Opc = VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; SDLoc DL(N); SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); - SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), - N->getOperand(0), VL); + Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), + N->getOperand(0), VL); + break; + } + case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { + // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector + // load. Done after lowering and combining so that we have a chance to + // optimize this to VMV_V_X_VL when the upper bits aren't needed. + assert(N->getNumOperands() == 4 && "Unexpected number of operands"); + MVT VT = N->getSimpleValueType(0); + SDValue Passthru = N->getOperand(0); + SDValue Lo = N->getOperand(1); + SDValue Hi = N->getOperand(2); + SDValue VL = N->getOperand(3); + assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && + Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && + "Unexpected VTs!"); + MachineFunction &MF = CurDAG->getMachineFunction(); + RISCVMachineFunctionInfo *FuncInfo = + MF.getInfo<RISCVMachineFunctionInfo>(); + SDLoc DL(N); - --I; - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); - ++I; - CurDAG->DeleteNode(N); - continue; + // We use the same frame index we use for moving two i32s into 64-bit FPR. + // This is an analogous operation. + int FI = FuncInfo->getMoveF64FrameIndex(MF); + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); + const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); + SDValue StackSlot = + CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); + + SDValue Chain = CurDAG->getEntryNode(); + Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); + + SDValue OffsetSlot = + CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); + Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), + Align(8)); + + Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + + SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); + SDValue IntID = + CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); + SDValue Ops[] = {Chain, + IntID, + Passthru, + StackSlot, + CurDAG->getRegister(RISCV::X0, MVT::i64), + VL}; + + Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, + MVT::i64, MPI, Align(8), + MachineMemOperand::MOLoad); + break; + } } - // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector - // load. Done after lowering and combining so that we have a chance to - // optimize this to VMV_V_X_VL when the upper bits aren't needed. - if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) - continue; + if (Result) { + LLVM_DEBUG(dbgs() << "RISCV DAG preprocessing replacing:\nOld: "); + LLVM_DEBUG(N->dump(CurDAG)); + LLVM_DEBUG(dbgs() << "\nNew: "); + LLVM_DEBUG(Result->dump(CurDAG)); + LLVM_DEBUG(dbgs() << "\n"); - assert(N->getNumOperands() == 4 && "Unexpected number of operands"); - MVT VT = N->getSimpleValueType(0); - SDValue Passthru = N->getOperand(0); - SDValue Lo = N->getOperand(1); - SDValue Hi = N->getOperand(2); - SDValue VL = N->getOperand(3); - assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && - Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && - "Unexpected VTs!"); - MachineFunction &MF = CurDAG->getMachineFunction(); - RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); - SDLoc DL(N); - - // We use the same frame index we use for moving two i32s into 64-bit FPR. - // This is an analogous operation. - int FI = FuncInfo->getMoveF64FrameIndex(MF); - MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); - const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); - SDValue StackSlot = - CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); - - SDValue Chain = CurDAG->getEntryNode(); - Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); - - SDValue OffsetSlot = - CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); - Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), - Align(8)); - - Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); - - SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); - SDValue IntID = - CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); - SDValue Ops[] = {Chain, - IntID, - Passthru, - StackSlot, - CurDAG->getRegister(RISCV::X0, MVT::i64), - VL}; - - SDValue Result = CurDAG->getMemIntrinsicNode( - ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), - MachineMemOperand::MOLoad); - - // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the - // vlse we created. This will cause general havok on the dag because - // anything below the conversion could be folded into other existing nodes. - // To avoid invalidating 'I', back it up to the convert node. - --I; - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); - - // Now that we did that, the node is dead. Increment the iterator to the - // next node to process, then delete N. - ++I; - CurDAG->DeleteNode(N); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + MadeChange = true; + } } + + if (MadeChange) + CurDAG->RemoveDeadNodes(); } void RISCVDAGToDAGISel::PostprocessISelDAG() { @@ -143,7 +146,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() { continue; MadeChange |= doPeepholeSExtW(N); - MadeChange |= doPeepholeLoadStoreADDI(N); MadeChange |= doPeepholeMaskedRVV(N); } @@ -153,40 +155,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() { CurDAG->RemoveDeadNodes(); } -// Returns true if N is a MachineSDNode that has a reg and simm12 memory -// operand. The indices of the base pointer and offset are returned in BaseOpIdx -// and OffsetOpIdx. -static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx, - unsigned &OffsetOpIdx) { - switch (N->getMachineOpcode()) { - case RISCV::LB: - case RISCV::LH: - case RISCV::LW: - case RISCV::LBU: - case RISCV::LHU: - case RISCV::LWU: - case RISCV::LD: - case RISCV::FLH: - case RISCV::FLW: - case RISCV::FLD: - BaseOpIdx = 0; - OffsetOpIdx = 1; - return true; - case RISCV::SB: - case RISCV::SH: - case RISCV::SW: - case RISCV::SD: - case RISCV::FSH: - case RISCV::FSW: - case RISCV::FSD: - BaseOpIdx = 1; - OffsetOpIdx = 2; - return true; - } - - return false; -} - static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq) { SDNode *Result = nullptr; @@ -285,9 +253,7 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands( SDValue Chain = Node->getOperand(0); SDValue Glue; - SDValue Base; - SelectBaseAddr(Node->getOperand(CurOp++), Base); - Operands.push_back(Base); // Base pointer. + Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. if (IsStridedOrIndexed) { Operands.push_back(Node->getOperand(CurOp++)); // Index. @@ -651,83 +617,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); return; } - case ISD::ADD: { - // Try to select ADD + immediate used as memory addresses to - // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by - // doPeepholeLoadStoreADDI. - - // LHS should be an immediate. - auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); - if (!N1C) - break; - - int64_t Offset = N1C->getSExtValue(); - int64_t Lo12 = SignExtend64<12>(Offset); - - // Don't do this if the lower 12 bits are 0 or we could use ADDI directly. - if (Lo12 == 0 || isInt<12>(Offset)) - break; - - // Don't do this if we can use a pair of ADDIs. - if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2)) - break; - - RISCVMatInt::InstSeq Seq = - RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits()); - - Offset -= Lo12; - // Restore sign bits for RV32. - if (!Subtarget->is64Bit()) - Offset = SignExtend64<32>(Offset); - - // We can fold if the last operation is an ADDI or its an ADDIW that could - // be treated as an ADDI. - if (Seq.back().Opc != RISCV::ADDI && - !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset))) - break; - assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12"); - // Drop the last operation. - Seq.pop_back(); - assert(!Seq.empty() && "Expected more instructions in sequence"); - - bool AllPointerUses = true; - for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - - // Is this user a memory instruction that uses a register and immediate - // that has this ADD as its pointer. - unsigned BaseOpIdx, OffsetOpIdx; - if (!User->isMachineOpcode() || - !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) || - UI.getOperandNo() != BaseOpIdx) { - AllPointerUses = false; - break; - } - - // If the memory instruction already has an offset, make sure the combined - // offset is foldable. - int64_t MemOffs = - cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue(); - MemOffs += Lo12; - if (!isInt<12>(MemOffs)) { - AllPointerUses = false; - break; - } - } - - if (!AllPointerUses) - break; - - // Emit (ADDI (ADD X, Hi), Lo) - SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq); - SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT, - Node->getOperand(0), SDValue(Imm, 0)); - SDNode *ADDI = - CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0), - CurDAG->getTargetConstant(Lo12, DL, VT)); - ReplaceNode(Node, ADDI); - return; - } case ISD::SHL: { auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); if (!N1C) @@ -856,10 +745,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); if (!C) break; - uint64_t C2 = C->getZExtValue(); + unsigned C2 = C->getZExtValue(); unsigned XLen = Subtarget->getXLen(); - if (!C2 || C2 >= XLen) - break; + assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); uint64_t C1 = N1C->getZExtValue(); @@ -885,10 +773,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask // with c3 leading zeros. if (!LeftShift && isMask_64(C1)) { - uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); - if (C2 < C3) { + unsigned Leading = XLen - (64 - countLeadingZeros(C1)); + if (C2 < Leading) { // If the number of leading zeros is C2+32 this can be SRLIW. - if (C2 + 32 == C3) { + if (C2 + 32 == Leading) { SDNode *SRLIW = CurDAG->getMachineNode( RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); ReplaceNode(Node, SRLIW); @@ -900,7 +788,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type // legalized and goes through DAG combine. - if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && + if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && X.getOpcode() == ISD::SIGN_EXTEND_INREG && cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { SDNode *SRAIW = @@ -908,25 +796,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { CurDAG->getTargetConstant(31, DL, VT)); SDNode *SRLIW = CurDAG->getMachineNode( RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), - CurDAG->getTargetConstant(C3 - 32, DL, VT)); + CurDAG->getTargetConstant(Leading - 32, DL, VT)); ReplaceNode(Node, SRLIW); return; } // (srli (slli x, c3-c2), c3). // Skip if we could use (zext.w (sraiw X, C2)). - bool Skip = Subtarget->hasStdExtZba() && C3 == 32 && + bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && X.getOpcode() == ISD::SIGN_EXTEND_INREG && cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; // Also Skip if we can use bexti. - Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1; + Skip |= Subtarget->hasStdExtZbs() && Leading == XLen - 1; if (OneUseOrZExtW && !Skip) { SDNode *SLLI = CurDAG->getMachineNode( RISCV::SLLI, DL, VT, X, - CurDAG->getTargetConstant(C3 - C2, DL, VT)); - SDNode *SRLI = - CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0), - CurDAG->getTargetConstant(C3, DL, VT)); + CurDAG->getTargetConstant(Leading - C2, DL, VT)); + SDNode *SRLI = CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, SDValue(SLLI, 0), + CurDAG->getTargetConstant(Leading, DL, VT)); ReplaceNode(Node, SRLI); return; } @@ -936,12 +824,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask // shifted by c2 bits with c3 leading zeros. if (LeftShift && isShiftedMask_64(C1)) { - uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); + unsigned Leading = XLen - (64 - countLeadingZeros(C1)); - if (C2 + C3 < XLen && - C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { + if (C2 + Leading < XLen && + C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) { // Use slli.uw when possible. - if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { + if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { SDNode *SLLI_UW = CurDAG->getMachineNode( RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); ReplaceNode(Node, SLLI_UW); @@ -952,10 +840,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (OneUseOrZExtW && !IsCANDI) { SDNode *SLLI = CurDAG->getMachineNode( RISCV::SLLI, DL, VT, X, - CurDAG->getTargetConstant(C2 + C3, DL, VT)); - SDNode *SRLI = - CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0), - CurDAG->getTargetConstant(C3, DL, VT)); + CurDAG->getTargetConstant(C2 + Leading, DL, VT)); + SDNode *SRLI = CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, SDValue(SLLI, 0), + CurDAG->getTargetConstant(Leading, DL, VT)); ReplaceNode(Node, SRLI); return; } @@ -965,9 +853,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a // shifted mask with c2 leading zeros and c3 trailing zeros. if (!LeftShift && isShiftedMask_64(C1)) { - uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); - uint64_t C3 = countTrailingZeros(C1); - if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) { + unsigned Leading = XLen - (64 - countLeadingZeros(C1)); + unsigned Trailing = countTrailingZeros(C1); + if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && !IsCANDI) { unsigned SrliOpc = RISCV::SRLI; // If the input is zexti32 we should use SRLIW. if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && @@ -976,22 +864,23 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { X = X.getOperand(0); } SDNode *SRLI = CurDAG->getMachineNode( - SrliOpc, DL, VT, X, CurDAG->getTargetConstant(C2 + C3, DL, VT)); + SrliOpc, DL, VT, X, + CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); SDNode *SLLI = CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), - CurDAG->getTargetConstant(C3, DL, VT)); + CurDAG->getTargetConstant(Trailing, DL, VT)); ReplaceNode(Node, SLLI); return; } // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. - if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && + if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && OneUseOrZExtW && !IsCANDI) { - SDNode *SRLIW = - CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X, - CurDAG->getTargetConstant(C2 + C3, DL, VT)); + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, X, + CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); SDNode *SLLI = CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), - CurDAG->getTargetConstant(C3, DL, VT)); + CurDAG->getTargetConstant(Trailing, DL, VT)); ReplaceNode(Node, SLLI); return; } @@ -1000,25 +889,26 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a // shifted mask with no leading zeros and c3 trailing zeros. if (LeftShift && isShiftedMask_64(C1)) { - uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); - uint64_t C3 = countTrailingZeros(C1); - if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) { + unsigned Leading = XLen - (64 - countLeadingZeros(C1)); + unsigned Trailing = countTrailingZeros(C1); + if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { SDNode *SRLI = CurDAG->getMachineNode( - RISCV::SRLI, DL, VT, X, CurDAG->getTargetConstant(C3 - C2, DL, VT)); + RISCV::SRLI, DL, VT, X, + CurDAG->getTargetConstant(Trailing - C2, DL, VT)); SDNode *SLLI = CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), - CurDAG->getTargetConstant(C3, DL, VT)); + CurDAG->getTargetConstant(Trailing, DL, VT)); ReplaceNode(Node, SLLI); return; } // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. - if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { - SDNode *SRLIW = - CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X, - CurDAG->getTargetConstant(C3 - C2, DL, VT)); + if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, X, + CurDAG->getTargetConstant(Trailing - C2, DL, VT)); SDNode *SLLI = CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), - CurDAG->getTargetConstant(C3, DL, VT)); + CurDAG->getTargetConstant(Trailing, DL, VT)); ReplaceNode(Node, SLLI); return; } @@ -1885,13 +1775,74 @@ bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, return false; } -bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { - // If this is FrameIndex, select it directly. Otherwise just let it get - // selected to a register independently. - if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); - else - Base = Addr; +// Fold constant addresses. +static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, + const MVT VT, const RISCVSubtarget *Subtarget, + SDValue Addr, SDValue &Base, SDValue &Offset) { + if (!isa<ConstantSDNode>(Addr)) + return false; + + int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue(); + + // If the constant is a simm12, we can fold the whole constant and use X0 as + // the base. If the constant can be materialized with LUI+simm12, use LUI as + // the base. We can't use generateInstSeq because it favors LUI+ADDIW. + int64_t Lo12 = SignExtend64<12>(CVal); + int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; + if (!Subtarget->is64Bit() || isInt<32>(Hi)) { + if (Hi) { + int64_t Hi20 = (Hi >> 12) & 0xfffff; + Base = SDValue( + CurDAG->getMachineNode(RISCV::LUI, DL, VT, + CurDAG->getTargetConstant(Hi20, DL, VT)), + 0); + } else { + Base = CurDAG->getRegister(RISCV::X0, VT); + } + Offset = CurDAG->getTargetConstant(Lo12, DL, VT); + return true; + } + + // Ask how constant materialization would handle this constant. + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits()); + + // If the last instruction would be an ADDI, we can fold its immediate and + // emit the rest of the sequence as the base. + if (Seq.back().Opc != RISCV::ADDI) + return false; + Lo12 = Seq.back().Imm; + + // Drop the last instruction. + Seq.pop_back(); + assert(!Seq.empty() && "Expected more instructions in sequence"); + + Base = SDValue(selectImmSeq(CurDAG, DL, VT, Seq), 0); + Offset = CurDAG->getTargetConstant(Lo12, DL, VT); + return true; +} + +// Is this ADD instruction only used as the base pointer of scalar loads and +// stores? +static bool isWorthFoldingAdd(SDValue Add) { + for (auto Use : Add->uses()) { + if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && + Use->getOpcode() != ISD::ATOMIC_LOAD && + Use->getOpcode() != ISD::ATOMIC_STORE) + return false; + EVT VT = cast<MemSDNode>(Use)->getMemoryVT(); + if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 && + VT != MVT::f64) + return false; + // Don't allow stores of the value. It must be used as the address. + if (Use->getOpcode() == ISD::STORE && + cast<StoreSDNode>(Use)->getValue() == Add) + return false; + if (Use->getOpcode() == ISD::ATOMIC_STORE && + cast<AtomicSDNode>(Use)->getVal() == Add) + return false; + } + return true; } @@ -1947,9 +1898,10 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); assert(!isInt<12>(CVal) && "simm12 not already handled?"); + // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use + // an ADDI for part of the offset and fold the rest into the load/store. + // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { - // We can use an ADDI for part of the offset and fold the rest into the - // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. int64_t Adj = CVal < 0 ? -2048 : 2047; Base = SDValue( CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), @@ -1958,8 +1910,27 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT); return true; } + + // For larger immediates, we might be able to save one instruction from + // constant materialization by folding the Lo12 bits of the immediate into + // the address. We should only do this if the ADD is only used by loads and + // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled + // separately with the full materialized immediate creating extra + // instructions. + if (isWorthFoldingAdd(Addr) && + selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, + Offset)) { + // Insert an ADD instruction with the materialized Hi52 bits. + Base = SDValue( + CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), + 0); + return true; + } } + if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset)) + return true; + Base = Addr; Offset = CurDAG->getTargetConstant(0, DL, VT); return true; @@ -2044,6 +2015,101 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { return false; } +/// Look for various patterns that can be done with a SHL that can be folded +/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which +/// SHXADD we are trying to match. +bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, + SDValue &Val) { + if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { + SDValue N0 = N.getOperand(0); + + bool LeftShift = N0.getOpcode() == ISD::SHL; + if ((LeftShift || N0.getOpcode() == ISD::SRL) && + isa<ConstantSDNode>(N0.getOperand(1))) { + uint64_t Mask = N.getConstantOperandVal(1); + unsigned C2 = N0.getConstantOperandVal(1); + + unsigned XLen = Subtarget->getXLen(); + if (LeftShift) + Mask &= maskTrailingZeros<uint64_t>(C2); + else + Mask &= maskTrailingOnes<uint64_t>(XLen - C2); + + // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no + // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 + // followed by a SHXADD with c3 for the X amount. + if (isShiftedMask_64(Mask)) { + unsigned Leading = XLen - (64 - countLeadingZeros(Mask)); + unsigned Trailing = countTrailingZeros(Mask); + if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) { + SDLoc DL(N); + EVT VT = N.getValueType(); + Val = SDValue(CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Trailing - C2, DL, VT)), + 0); + return true; + } + // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 + // leading zeros and c3 trailing zeros. We can use an SRLI by C3 + // followed by a SHXADD using c3 for the X amount. + if (!LeftShift && Leading == C2 && Trailing == ShAmt) { + SDLoc DL(N); + EVT VT = N.getValueType(); + Val = SDValue( + CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Leading + Trailing, DL, VT)), + 0); + return true; + } + } + } + } + + bool LeftShift = N.getOpcode() == ISD::SHL; + if ((LeftShift || N.getOpcode() == ISD::SRL) && + isa<ConstantSDNode>(N.getOperand(1))) { + SDValue N0 = N.getOperand(0); + if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + uint64_t Mask = N0.getConstantOperandVal(1); + if (isShiftedMask_64(Mask)) { + unsigned C1 = N.getConstantOperandVal(1); + unsigned XLen = Subtarget->getXLen(); + unsigned Leading = XLen - (64 - countLeadingZeros(Mask)); + unsigned Trailing = countTrailingZeros(Mask); + // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and + // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. + if (LeftShift && Leading == 32 && Trailing > 0 && + (Trailing + C1) == ShAmt) { + SDLoc DL(N); + EVT VT = N.getValueType(); + Val = SDValue(CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Trailing, DL, VT)), + 0); + return true; + } + // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and + // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. + if (!LeftShift && Leading == 32 && Trailing > C1 && + (Trailing - C1) == ShAmt) { + SDLoc DL(N); + EVT VT = N.getValueType(); + Val = SDValue(CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Trailing, DL, VT)), + 0); + return true; + } + } + } + } + + return false; +} + // Return true if all users of this SDNode* only consume the lower \p Bits. // This can be used to form W instructions for add/sub/mul/shl even when the // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if @@ -2271,102 +2337,6 @@ bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, return false; } -// Merge an ADDI into the offset of a load/store instruction where possible. -// (load (addi base, off1), off2) -> (load base, off1+off2) -// (store val, (addi base, off1), off2) -> (store val, base, off1+off2) -// (load (add base, (addi src, off1)), off2) -// -> (load (add base, src), off1+off2) -// (store val, (add base, (addi src, off1)), off2) -// -> (store val, (add base, src), off1+off2) -// This is possible when off1+off2 fits a 12-bit immediate. -bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { - unsigned OffsetOpIdx, BaseOpIdx; - if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx)) - return false; - - if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) - return false; - - SDValue Base = N->getOperand(BaseOpIdx); - - if (!Base.isMachineOpcode()) - return false; - - if (Base.getMachineOpcode() == RISCV::ADDI) { - // If the base is an ADDI, we can merge it in to the load/store. - } else if (Base.getMachineOpcode() == RISCV::ADDIW && - isa<ConstantSDNode>(Base.getOperand(1)) && - Base.getOperand(0).isMachineOpcode() && - Base.getOperand(0).getMachineOpcode() == RISCV::LUI && - isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) { - // ADDIW can be merged if it's part of LUI+ADDIW constant materialization - // and LUI+ADDI would have produced the same result. This is true for all - // simm32 values except 0x7ffff800-0x7fffffff. - int64_t Offset = - SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12); - Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue(); - if (!isInt<32>(Offset)) - return false; - } else - return false; - - SDValue ImmOperand = Base.getOperand(1); - uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); - - if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { - int64_t Offset1 = Const->getSExtValue(); - int64_t CombinedOffset = Offset1 + Offset2; - if (!isInt<12>(CombinedOffset)) - return false; - ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), - ImmOperand.getValueType()); - } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { - // If the off1 in (addi base, off1) is a global variable's address (its - // low part, really), then we can rely on the alignment of that variable - // to provide a margin of safety before off1 can overflow the 12 bits. - // Check if off2 falls within that margin; if so off1+off2 can't overflow. - const DataLayout &DL = CurDAG->getDataLayout(); - Align Alignment = commonAlignment(GA->getGlobal()->getPointerAlignment(DL), - GA->getOffset()); - if (Offset2 != 0 && Alignment <= Offset2) - return false; - int64_t Offset1 = GA->getOffset(); - int64_t CombinedOffset = Offset1 + Offset2; - ImmOperand = CurDAG->getTargetGlobalAddress( - GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), - CombinedOffset, GA->getTargetFlags()); - } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { - // Ditto. - Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset()); - if (Offset2 != 0 && Alignment <= Offset2) - return false; - int64_t Offset1 = CP->getOffset(); - int64_t CombinedOffset = Offset1 + Offset2; - ImmOperand = CurDAG->getTargetConstantPool( - CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), - CombinedOffset, CP->getTargetFlags()); - } else { - return false; - } - - LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); - LLVM_DEBUG(Base->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\nN: "); - LLVM_DEBUG(N->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); - - // Modify the offset operand of the load/store. - if (BaseOpIdx == 0) { // Load - N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, - N->getOperand(2)); - } else { // Store - N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), - ImmOperand, N->getOperand(3)); - } - - return true; -} - // Try to remove sext.w if the input is a W instruction or can be made into // a W instruction cheaply. bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { |