diff options
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARMBaseRegisterInfo.cpp | 15 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 53 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 6 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 215 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 26 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 233 | ||||
-rw-r--r-- | lib/Target/ARM/ARMRegisterBankInfo.cpp | 14 | ||||
-rw-r--r-- | lib/Target/ARM/ARMRegisterBankInfo.h | 3 | ||||
-rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp | 1 |
9 files changed, 387 insertions, 179 deletions
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index a20887564f44..b18ed509ed23 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -245,11 +245,18 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, switch (RC->getID()) { default: return 0; - case ARM::tGPRRegClassID: - return TFI->hasFP(MF) ? 4 : 5; + case ARM::tGPRRegClassID: { + // hasFP ends up calling getMaxCallFrameComputed() which may not be + // available when getPressureLimit() is called as part of + // ScheduleDAGRRList. + bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed() + ? TFI->hasFP(MF) : true; + return 5 - HasFP; + } case ARM::GPRRegClassID: { - unsigned FP = TFI->hasFP(MF) ? 1 : 0; - return 10 - FP - (STI.isR9Reserved() ? 1 : 0); + bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed() + ? TFI->hasFP(MF) : true; + return 10 - HasFP - (STI.isR9Reserved() ? 1 : 0); } case ARM::SPRRegClassID: // Currently not used as 'rep' register class. case ARM::DPRRegClassID: diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 9f7e60a848d9..e64582402fe1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -202,7 +202,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) - for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) + for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); } @@ -822,6 +822,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::ADDC, MVT::i32, Custom); setOperationAction(ISD::ADDE, MVT::i32, Custom); @@ -1344,6 +1345,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SMLALTT: return "ARMISD::SMLALTT"; case ARMISD::SMULWB: return "ARMISD::SMULWB"; case ARMISD::SMULWT: return "ARMISD::SMULWT"; + case ARMISD::SMLALD: return "ARMISD::SMLALD"; + case ARMISD::SMLALDX: return "ARMISD::SMLALDX"; + case ARMISD::SMLSLD: return "ARMISD::SMLSLD"; + case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; @@ -3311,6 +3316,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } return Result; } + case Intrinsic::arm_neon_vabs: + return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); case Intrinsic::arm_neon_vmulls: case Intrinsic::arm_neon_vmullu: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) @@ -7722,6 +7730,37 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { } } +static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) { + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned Opc = 0; + if (IntNo == Intrinsic::arm_smlald) + Opc = ARMISD::SMLALD; + else if (IntNo == Intrinsic::arm_smlaldx) + Opc = ARMISD::SMLALDX; + else if (IntNo == Intrinsic::arm_smlsld) + Opc = ARMISD::SMLSLD; + else if (IntNo == Intrinsic::arm_smlsldx) + Opc = ARMISD::SMLSLDX; + else + return; + + SDLoc dl(N); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(3), + DAG.getConstant(0, dl, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(3), + DAG.getConstant(1, dl, MVT::i32)); + + SDValue LongMul = DAG.getNode(Opc, dl, + DAG.getVTList(MVT::i32, MVT::i32), + N->getOperand(1), N->getOperand(2), + Lo, Hi); + Results.push_back(LongMul.getValue(0)); + Results.push_back(LongMul.getValue(1)); +} + /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. void ARMTargetLowering::ReplaceNodeResults(SDNode *N, @@ -7763,6 +7802,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_64Results(N, Results, DAG); return; + case ISD::INTRINSIC_WO_CHAIN: + return ReplaceLongIntrinsic(N, Results, DAG); } if (Res.getNode()) Results.push_back(Res); @@ -12602,7 +12643,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, const SelectionDAG &DAG, unsigned Depth) const { unsigned BitWidth = Known.getBitWidth(); - Known.Zero.clearAllBits(); Known.One.clearAllBits(); + Known.resetAll(); switch (Op.getOpcode()) { default: break; case ARMISD::ADDC: @@ -12617,7 +12658,8 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1); - if (Known.Zero == 0 && Known.One == 0) return; + if (Known.isUnknown()) + return; KnownBits KnownRHS; DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1); @@ -14015,3 +14057,8 @@ void ARMTargetLowering::insertCopiesSplitCSR( .addReg(NewVR); } } + +void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const { + MF.getFrameInfo().computeMaxCallFrameSize(MF); + TargetLoweringBase::finalizeLowering(MF); +} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 76e4b60e01fb..08c51b66dfe7 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -184,6 +184,10 @@ class InstrItineraryData; SMLALBT, // 64-bit signed accumulate multiply bottom, top 16 SMLALTB, // 64-bit signed accumulate multiply top, bottom 16 SMLALTT, // 64-bit signed accumulate multiply top, top 16 + SMLALD, // Signed multiply accumulate long dual + SMLALDX, // Signed multiply accumulate long dual exchange + SMLSLD, // Signed multiply subtract long dual + SMLSLDX, // Signed multiply subtract long dual exchange // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other @@ -540,6 +544,8 @@ class InstrItineraryData; unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const; + void finalizeLowering(MachineFunction &MF) const override; + protected: std::pair<const TargetRegisterClass *, uint8_t> findRepresentativeClass(const TargetRegisterInfo *TRI, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 28eb5fc30864..a94d6048f02d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -99,6 +99,11 @@ def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>, SDTCisSameAs<0, 4>, SDTCisSameAs<0, 5>]>; +def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; +def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; +def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; +def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; @@ -870,7 +875,9 @@ def imm1_16_XFORM: SDNodeXForm<imm, [{ MVT::i32); }]>; def Imm1_16AsmOperand: ImmAsmOperandMinusOne<1,16> { let Name = "Imm1_16"; } -def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }], +def imm1_16 : Operand<i32>, ImmLeaf<i32, [{ + return Imm > 0 && Imm <= 16; + }], imm1_16_XFORM> { let PrintMethod = "printImmPlusOneOperand"; let ParserMatchClass = Imm1_16AsmOperand; @@ -1983,7 +1990,9 @@ def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; def : InstAlias<"esb$p", (HINT 16, pred:$p)>, Requires<[IsARM, HasRAS]>; def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", - "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { + "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -3472,8 +3481,12 @@ def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), (SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">; +def : ARMV6Pat<(int_arm_sxtb16 GPR:$Src), + (SXTB16 GPR:$Src, 0)>; def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">; +def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, GPR:$RHS), + (SXTAB16 GPR:$LHS, GPR:$RHS, 0)>; // Zero extenders @@ -3493,6 +3506,8 @@ def UXTB16 : AI_ext_rrot<0b01101100, // (UXTB16r_rot GPR:$Src, 3)>; def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), (UXTB16 GPR:$Src, 1)>; +def : ARMV6Pat<(int_arm_uxtb16 GPR:$Src), + (UXTB16 GPR:$Src, 0)>; def UXTAB : AI_exta_rrot<0b01101110, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; @@ -3507,6 +3522,8 @@ def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), // This isn't safe in general, the add is two 16-bit units, not a 32-bit add. def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; +def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, GPR:$RHS), + (UXTAB16 GPR:$LHS, GPR:$RHS, 0)>; def SBFX : I<(outs GPRnopc:$Rd), @@ -3633,71 +3650,85 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc, let Unpredictable{11-8} = 0b1111; } -// Saturating add/subtract +// Wrappers around the AAI class +class AAIRevOpr<bits<8> op27_20, bits<8> op11_4, string opc, + list<dag> pattern = []> + : AAI<op27_20, op11_4, opc, + pattern, + (ins GPRnopc:$Rm, GPRnopc:$Rn), + "\t$Rd, $Rm, $Rn">; +class AAIIntrinsic<bits<8> op27_20, bits<8> op11_4, string opc, + Intrinsic intrinsic> + : AAI<op27_20, op11_4, opc, + [(set GPRnopc:$Rd, (intrinsic GPRnopc:$Rn, GPRnopc:$Rm))]>; + +// Saturating add/subtract +let hasSideEffects = 1 in { +def QADD8 : AAIIntrinsic<0b01100010, 0b11111001, "qadd8", int_arm_qadd8>; +def QADD16 : AAIIntrinsic<0b01100010, 0b11110001, "qadd16", int_arm_qadd16>; +def QSUB16 : AAIIntrinsic<0b01100010, 0b11110111, "qsub16", int_arm_qsub16>; +def QSUB8 : AAIIntrinsic<0b01100010, 0b11111111, "qsub8", int_arm_qsub8>; + +def QDADD : AAIRevOpr<0b00010100, 0b00000101, "qdadd", + [(set GPRnopc:$Rd, (int_arm_qadd (int_arm_qadd GPRnopc:$Rm, + GPRnopc:$Rm), + GPRnopc:$Rn))]>; +def QDSUB : AAIRevOpr<0b00010110, 0b00000101, "qdsub", + [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, + (int_arm_qadd GPRnopc:$Rn, GPRnopc:$Rn)))]>; +def QSUB : AAIRevOpr<0b00010010, 0b00000101, "qsub", + [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))]>; let DecoderMethod = "DecodeQADDInstruction" in -def QADD : AAI<0b00010000, 0b00000101, "qadd", - [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))], - (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; - -def QSUB : AAI<0b00010010, 0b00000101, "qsub", - [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))], - (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; -def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], - (ins GPRnopc:$Rm, GPRnopc:$Rn), - "\t$Rd, $Rm, $Rn">; -def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], - (ins GPRnopc:$Rm, GPRnopc:$Rn), - "\t$Rd, $Rm, $Rn">; - -def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">; -def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">; -def QASX : AAI<0b01100010, 0b11110011, "qasx">; -def QSAX : AAI<0b01100010, 0b11110101, "qsax">; -def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">; -def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">; -def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">; -def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">; -def UQASX : AAI<0b01100110, 0b11110011, "uqasx">; -def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">; -def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">; -def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">; + def QADD : AAIRevOpr<0b00010000, 0b00000101, "qadd", + [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>; +} + +def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>; +def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>; +def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>; +def UQSUB8 : AAIIntrinsic<0b01100110, 0b11111111, "uqsub8", int_arm_uqsub8>; +def QASX : AAIIntrinsic<0b01100010, 0b11110011, "qasx", int_arm_qasx>; +def QSAX : AAIIntrinsic<0b01100010, 0b11110101, "qsax", int_arm_qsax>; +def UQASX : AAIIntrinsic<0b01100110, 0b11110011, "uqasx", int_arm_uqasx>; +def UQSAX : AAIIntrinsic<0b01100110, 0b11110101, "uqsax", int_arm_uqsax>; // Signed/Unsigned add/subtract -def SASX : AAI<0b01100001, 0b11110011, "sasx">; -def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">; -def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">; -def SSAX : AAI<0b01100001, 0b11110101, "ssax">; -def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">; -def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">; -def UASX : AAI<0b01100101, 0b11110011, "uasx">; -def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">; -def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">; -def USAX : AAI<0b01100101, 0b11110101, "usax">; -def USUB16 : AAI<0b01100101, 0b11110111, "usub16">; -def USUB8 : AAI<0b01100101, 0b11111111, "usub8">; +def SASX : AAIIntrinsic<0b01100001, 0b11110011, "sasx", int_arm_sasx>; +def SADD16 : AAIIntrinsic<0b01100001, 0b11110001, "sadd16", int_arm_sadd16>; +def SADD8 : AAIIntrinsic<0b01100001, 0b11111001, "sadd8", int_arm_sadd8>; +def SSAX : AAIIntrinsic<0b01100001, 0b11110101, "ssax", int_arm_ssax>; +def SSUB16 : AAIIntrinsic<0b01100001, 0b11110111, "ssub16", int_arm_ssub16>; +def SSUB8 : AAIIntrinsic<0b01100001, 0b11111111, "ssub8", int_arm_ssub8>; +def UASX : AAIIntrinsic<0b01100101, 0b11110011, "uasx", int_arm_uasx>; +def UADD16 : AAIIntrinsic<0b01100101, 0b11110001, "uadd16", int_arm_uadd16>; +def UADD8 : AAIIntrinsic<0b01100101, 0b11111001, "uadd8", int_arm_uadd8>; +def USAX : AAIIntrinsic<0b01100101, 0b11110101, "usax", int_arm_usax>; +def USUB16 : AAIIntrinsic<0b01100101, 0b11110111, "usub16", int_arm_usub16>; +def USUB8 : AAIIntrinsic<0b01100101, 0b11111111, "usub8", int_arm_usub8>; // Signed/Unsigned halving add/subtract -def SHASX : AAI<0b01100011, 0b11110011, "shasx">; -def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">; -def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">; -def SHSAX : AAI<0b01100011, 0b11110101, "shsax">; -def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">; -def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">; -def UHASX : AAI<0b01100111, 0b11110011, "uhasx">; -def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">; -def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">; -def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">; -def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">; -def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">; +def SHASX : AAIIntrinsic<0b01100011, 0b11110011, "shasx", int_arm_shasx>; +def SHADD16 : AAIIntrinsic<0b01100011, 0b11110001, "shadd16", int_arm_shadd16>; +def SHADD8 : AAIIntrinsic<0b01100011, 0b11111001, "shadd8", int_arm_shadd8>; +def SHSAX : AAIIntrinsic<0b01100011, 0b11110101, "shsax", int_arm_shsax>; +def SHSUB16 : AAIIntrinsic<0b01100011, 0b11110111, "shsub16", int_arm_shsub16>; +def SHSUB8 : AAIIntrinsic<0b01100011, 0b11111111, "shsub8", int_arm_shsub8>; +def UHASX : AAIIntrinsic<0b01100111, 0b11110011, "uhasx", int_arm_uhasx>; +def UHADD16 : AAIIntrinsic<0b01100111, 0b11110001, "uhadd16", int_arm_uhadd16>; +def UHADD8 : AAIIntrinsic<0b01100111, 0b11111001, "uhadd8", int_arm_uhadd8>; +def UHSAX : AAIIntrinsic<0b01100111, 0b11110101, "uhsax", int_arm_uhsax>; +def UHSUB16 : AAIIntrinsic<0b01100111, 0b11110111, "uhsub16", int_arm_uhsub16>; +def UHSUB8 : AAIIntrinsic<0b01100111, 0b11111111, "uhsub8", int_arm_uhsub8>; // Unsigned Sum of Absolute Differences [and Accumulate]. def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), MulFrm /* for convenience */, NoItinerary, "usad8", - "\t$Rd, $Rn, $Rm", []>, + "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_usad8 GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; @@ -3711,7 +3742,8 @@ def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), } def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), MulFrm /* for convenience */, NoItinerary, "usada8", - "\t$Rd, $Rn, $Rm, $Ra", []>, + "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (int_arm_usada8 GPR:$Rn, GPR:$Rm, GPR:$Ra))]>, Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{ bits<4> Rd; bits<4> Rn; @@ -3726,7 +3758,6 @@ def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), } // Signed/Unsigned saturate - def SSAT : AI<(outs GPRnopc:$Rd), (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh), SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>, @@ -3795,6 +3826,10 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos), (USAT imm0_31:$pos, GPRnopc:$a, 0)>; def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos), + (SSAT16 imm1_16:$pos, GPRnopc:$a)>; +def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos), + (USAT16 imm0_15:$pos, GPRnopc:$a)>; //===----------------------------------------------------------------------===// // Bitwise Instructions. @@ -4220,8 +4255,8 @@ multiclass AI_smla<string opc> { IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, (add GPR:$Ra, (ARMsmulwt GPRnopc:$Rn, GPRnopc:$Rm)))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; } } @@ -4255,7 +4290,8 @@ def : ARMV5TEPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), // Helper class for AI_smld. class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops, InstrItinClass itin, string opc, string asm> - : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> { + : AI<oops, iops, MulFrm, itin, opc, asm, []>, + Requires<[IsARM, HasV6]> { bits<4> Rn; bits<4> Rm; let Inst{27-23} = 0b01110; @@ -4305,20 +4341,40 @@ multiclass AI_smld<bit sub, string opc> { Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + NoItinerary, !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + NoItinerary, !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; - } defm SMLA : AI_smld<0, "smla">; defm SMLS : AI_smld<1, "smls">; +def : ARMV6Pat<(int_arm_smlad GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLAD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(int_arm_smladx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLADX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(int_arm_smlsd GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLSD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(int_arm_smlsdx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLSDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(ARMSmlald GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLALD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : ARMV6Pat<(ARMSmlaldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLALDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : ARMV6Pat<(ARMSmlsld GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLSLD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : ARMV6Pat<(ARMSmlsldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLSLDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; + multiclass AI_sdml<bit sub, string opc> { def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), @@ -4332,6 +4388,15 @@ multiclass AI_sdml<bit sub, string opc> { defm SMUA : AI_sdml<0, "smua">; defm SMUS : AI_sdml<1, "smus">; +def : ARMV6Pat<(int_arm_smuad GPRnopc:$Rn, GPRnopc:$Rm), + (SMUAD GPRnopc:$Rn, GPRnopc:$Rm)>; +def : ARMV6Pat<(int_arm_smuadx GPRnopc:$Rn, GPRnopc:$Rm), + (SMUADX GPRnopc:$Rn, GPRnopc:$Rm)>; +def : ARMV6Pat<(int_arm_smusd GPRnopc:$Rn, GPRnopc:$Rm), + (SMUSD GPRnopc:$Rn, GPRnopc:$Rm)>; +def : ARMV6Pat<(int_arm_smusdx GPRnopc:$Rn, GPRnopc:$Rm), + (SMUSDX GPRnopc:$Rn, GPRnopc:$Rm)>; + //===----------------------------------------------------------------------===// // Division Instructions (ARMv7-A with virtualization extension) // @@ -5648,6 +5713,32 @@ def : ARMV5MOPat<(add GPR:$acc, (SMLATB GPR:$a, GPR:$b, GPR:$acc)>, Sched<[WriteMUL32, ReadMUL, ReadMUL]>; +def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulbt GPR:$a, GPR:$b), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smultb GPR:$a, GPR:$b), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smultt GPR:$a, GPR:$b), + (SMULTT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulwb GPR:$a, GPR:$b), + (SMULWB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulwt GPR:$a, GPR:$b), + (SMULWT GPR:$a, GPR:$b)>; + +def : ARMV5TEPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc), + (SMLATT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc), + (SMLAWT GPR:$a, GPR:$b, GPR:$acc)>; + // Pre-v7 uses MCR for synchronization barriers. def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, Requires<[IsARM, HasV6]>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 9b08c612e16b..51290e5a5b93 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5558,8 +5558,7 @@ defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; // VABS : Vector Absolute Value defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, - IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", - int_arm_neon_vabs>; + IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs", "f32", v2f32, v2f32, fabs>; @@ -5575,29 +5574,6 @@ def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, v8f16, v8f16, fabs>, Requires<[HasNEON, HasFullFP16]>; -def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), - (v2i32 (bitconvert (v8i8 (add DPR:$src, - (NEONvshrs DPR:$src, (i32 7))))))), - (VABSv8i8 DPR:$src)>; -def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), - (v2i32 (bitconvert (v4i16 (add DPR:$src, - (NEONvshrs DPR:$src, (i32 15))))))), - (VABSv4i16 DPR:$src)>; -def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), - (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), - (VABSv2i32 DPR:$src)>; -def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), - (v4i32 (bitconvert (v16i8 (add QPR:$src, - (NEONvshrs QPR:$src, (i32 7))))))), - (VABSv16i8 QPR:$src)>; -def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), - (v4i32 (bitconvert (v8i16 (add QPR:$src, - (NEONvshrs QPR:$src, (i32 15))))))), - (VABSv8i16 QPR:$src)>; -def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), - (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), - (VABSv4i32 QPR:$src)>; - // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index f710ee6a7e77..bf3d820e7b7d 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1993,6 +1993,10 @@ def : Thumb2DSPPat<(add rGPR:$Rn, def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i16)), (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(int_arm_sxtb16 rGPR:$Rn), + (t2SXTB16 rGPR:$Rn, 0)>; +def : Thumb2DSPPat<(int_arm_sxtab16 rGPR:$Rn, rGPR:$Rm), + (t2SXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>; // A simple right-shift can also be used in most cases (the exception is the @@ -2026,6 +2030,9 @@ def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x0000FFFF), def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x00FF00FF), (t2UXTB16 rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(int_arm_uxtb16 rGPR:$Rm), + (t2UXTB16 rGPR:$Rm, 0)>; + // FIXME: This pattern incorrectly assumes the shl operator is a rotate. // The transformation should probably be done as a combiner action // instead so we can include a check for masking back in the upper @@ -2053,6 +2060,8 @@ def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(int_arm_uxtab16 rGPR:$Rn, rGPR:$Rm), + (t2UXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>; } @@ -2137,10 +2146,9 @@ def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR), def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR), (t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>; -// Select Bytes -- for disassembly only - def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, + NoItinerary, "sel", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>, Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-24} = 0b010; @@ -2154,9 +2162,7 @@ def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), // A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned) // And Miscellaneous operations -- for disassembly only class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc, - list<dag> pat = [/* For disassembly only; pattern left blank */], - dag iops = (ins rGPR:$Rn, rGPR:$Rm), - string asm = "\t$Rd, $Rn, $Rm"> + list<dag> pat, dag iops, string asm> : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>, Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; @@ -2174,60 +2180,72 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc, let Inst{3-0} = Rm; } -// Saturating add/subtract -- for disassembly only - -def t2QADD : T2I_pam<0b000, 0b1000, "qadd", - [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">; -def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">; -def t2QASX : T2I_pam<0b010, 0b0001, "qasx">; -def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">; -def t2QSUB : T2I_pam<0b000, 0b1010, "qsub", - [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">; -def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">; -def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">; -def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">; -def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">; -def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">; -def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">; -def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">; - -// Signed/Unsigned add/subtract -- for disassembly only - -def t2SASX : T2I_pam<0b010, 0b0000, "sasx">; -def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">; -def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">; -def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">; -def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">; -def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">; -def t2UASX : T2I_pam<0b010, 0b0100, "uasx">; -def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">; -def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">; -def t2USAX : T2I_pam<0b110, 0b0100, "usax">; -def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">; -def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">; - -// Signed/Unsigned halving add/subtract -- for disassembly only - -def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">; -def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">; -def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">; -def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">; -def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">; -def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">; -def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">; -def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">; -def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">; -def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">; -def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">; -def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">; +class T2I_pam_intrinsics<bits<3> op22_20, bits<4> op7_4, string opc, + Intrinsic intrinsic> + : T2I_pam<op22_20, op7_4, opc, + [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm))], + (ins rGPR:$Rn, rGPR:$Rm), "\t$Rd, $Rn, $Rm">; + +class T2I_pam_intrinsics_rev<bits<3> op22_20, bits<4> op7_4, string opc> + : T2I_pam<op22_20, op7_4, opc, [], + (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; + +// Saturating add/subtract +def t2QADD16 : T2I_pam_intrinsics<0b001, 0b0001, "qadd16", int_arm_qadd16>; +def t2QADD8 : T2I_pam_intrinsics<0b000, 0b0001, "qadd8", int_arm_qadd8>; +def t2QASX : T2I_pam_intrinsics<0b010, 0b0001, "qasx", int_arm_qasx>; +def t2UQSUB8 : T2I_pam_intrinsics<0b100, 0b0101, "uqsub8", int_arm_uqsub8>; +def t2QSAX : T2I_pam_intrinsics<0b110, 0b0001, "qsax", int_arm_qsax>; +def t2QSUB16 : T2I_pam_intrinsics<0b101, 0b0001, "qsub16", int_arm_qsub16>; +def t2QSUB8 : T2I_pam_intrinsics<0b100, 0b0001, "qsub8", int_arm_qsub8>; +def t2UQADD16 : T2I_pam_intrinsics<0b001, 0b0101, "uqadd16", int_arm_uqadd16>; +def t2UQADD8 : T2I_pam_intrinsics<0b000, 0b0101, "uqadd8", int_arm_uqadd8>; +def t2UQASX : T2I_pam_intrinsics<0b010, 0b0101, "uqasx", int_arm_uqasx>; +def t2UQSAX : T2I_pam_intrinsics<0b110, 0b0101, "uqsax", int_arm_uqsax>; +def t2UQSUB16 : T2I_pam_intrinsics<0b101, 0b0101, "uqsub16", int_arm_uqsub16>; +def t2QADD : T2I_pam_intrinsics_rev<0b000, 0b1000, "qadd">; +def t2QSUB : T2I_pam_intrinsics_rev<0b000, 0b1010, "qsub">; +def t2QDADD : T2I_pam_intrinsics_rev<0b000, 0b1001, "qdadd">; +def t2QDSUB : T2I_pam_intrinsics_rev<0b000, 0b1011, "qdsub">; + +def : Thumb2DSPPat<(int_arm_qadd rGPR:$Rm, rGPR:$Rn), + (t2QADD rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, rGPR:$Rn), + (t2QSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(int_arm_qadd(int_arm_qadd rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), + (t2QDADD rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)), + (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; + +// Signed/Unsigned add/subtract + +def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>; +def t2SADD16 : T2I_pam_intrinsics<0b001, 0b0000, "sadd16", int_arm_sadd16>; +def t2SADD8 : T2I_pam_intrinsics<0b000, 0b0000, "sadd8", int_arm_sadd8>; +def t2SSAX : T2I_pam_intrinsics<0b110, 0b0000, "ssax", int_arm_ssax>; +def t2SSUB16 : T2I_pam_intrinsics<0b101, 0b0000, "ssub16", int_arm_ssub16>; +def t2SSUB8 : T2I_pam_intrinsics<0b100, 0b0000, "ssub8", int_arm_ssub8>; +def t2UASX : T2I_pam_intrinsics<0b010, 0b0100, "uasx", int_arm_uasx>; +def t2UADD16 : T2I_pam_intrinsics<0b001, 0b0100, "uadd16", int_arm_uadd16>; +def t2UADD8 : T2I_pam_intrinsics<0b000, 0b0100, "uadd8", int_arm_uadd8>; +def t2USAX : T2I_pam_intrinsics<0b110, 0b0100, "usax", int_arm_usax>; +def t2USUB16 : T2I_pam_intrinsics<0b101, 0b0100, "usub16", int_arm_usub16>; +def t2USUB8 : T2I_pam_intrinsics<0b100, 0b0100, "usub8", int_arm_usub8>; + +// Signed/Unsigned halving add/subtract + +def t2SHASX : T2I_pam_intrinsics<0b010, 0b0010, "shasx", int_arm_shasx>; +def t2SHADD16 : T2I_pam_intrinsics<0b001, 0b0010, "shadd16", int_arm_shadd16>; +def t2SHADD8 : T2I_pam_intrinsics<0b000, 0b0010, "shadd8", int_arm_shadd8>; +def t2SHSAX : T2I_pam_intrinsics<0b110, 0b0010, "shsax", int_arm_shsax>; +def t2SHSUB16 : T2I_pam_intrinsics<0b101, 0b0010, "shsub16", int_arm_shsub16>; +def t2SHSUB8 : T2I_pam_intrinsics<0b100, 0b0010, "shsub8", int_arm_shsub8>; +def t2UHASX : T2I_pam_intrinsics<0b010, 0b0110, "uhasx", int_arm_uhasx>; +def t2UHADD16 : T2I_pam_intrinsics<0b001, 0b0110, "uhadd16", int_arm_uhadd16>; +def t2UHADD8 : T2I_pam_intrinsics<0b000, 0b0110, "uhadd8", int_arm_uhadd8>; +def t2UHSAX : T2I_pam_intrinsics<0b110, 0b0110, "uhsax", int_arm_uhsax>; +def t2UHSUB16 : T2I_pam_intrinsics<0b101, 0b0110, "uhsub16", int_arm_uhsub16>; +def t2UHSUB8 : T2I_pam_intrinsics<0b100, 0b0110, "uhsub8", int_arm_uhsub8>; // Helper class for disassembly only // A6.3.16 & A6.3.17 @@ -2255,16 +2273,19 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, // Unsigned Sum of Absolute Differences [and Accumulate]. def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>, + NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (int_arm_usad8 rGPR:$Rn, rGPR:$Rm))]>, Requires<[IsThumb2, HasDSP]> { let Inst{15-12} = 0b1111; } def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary, - "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>, + "usada8", "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (int_arm_usada8 rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>, Requires<[IsThumb2, HasDSP]>; // Signed/Unsigned saturate. +let hasSideEffects = 1 in class T2SatI<dag iops, string opc, string asm> : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, []> { bits<4> Rd; @@ -2313,10 +2334,16 @@ def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn), let Inst{4} = 0; } -def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>; -def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>; def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), + (t2SSAT imm1_32:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), + (t2USAT imm0_31:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_ssat16 GPR:$a, imm1_16:$pos), + (t2SSAT16 imm1_16:$pos, GPR:$a)>; +def : T2Pat<(int_arm_usat16 GPR:$a, imm0_15:$pos), + (t2USAT16 imm0_15:$pos, GPR:$a)>; //===----------------------------------------------------------------------===// // Shift and rotate Instructions. @@ -2689,6 +2716,18 @@ def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))), (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm), (t2SMULTB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulbb rGPR:$Rn, rGPR:$Rm), + (t2SMULBB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulbt rGPR:$Rn, rGPR:$Rm), + (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smultb rGPR:$Rn, rGPR:$Rm), + (t2SMULTB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smultt rGPR:$Rn, rGPR:$Rm), + (t2SMULTT rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulwb rGPR:$Rn, rGPR:$Rm), + (t2SMULWB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulwt rGPR:$Rn, rGPR:$Rm), + (t2SMULWT rGPR:$Rn, rGPR:$Rm)>; class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc, list<dag> pattern> @@ -2730,6 +2769,19 @@ def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)), (t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; +def : Thumb2DSPPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc), + (t2SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc), + (t2SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc), + (t2SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc), + (t2SMLATT GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc), + (t2SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc), + (t2SMLAWT GPR:$a, GPR:$b, GPR:$acc)>; + // Halfword multiple accumulate long: SMLAL<x><y> def t2SMLALBB : T2MlaLong<0b100, 0b1000, "smlalbb">, Requires<[IsThumb2, HasDSP]>; @@ -2749,39 +2801,44 @@ def : Thumb2DSPPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), def : Thumb2DSPPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), (t2SMLALTT $Rn, $Rm, $RLo, $RHi)>; -class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc> +class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc, + Intrinsic intrinsic> : T2ThreeReg_mac<0, op22_20, op7_4, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>, + IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm))]>, Requires<[IsThumb2, HasDSP]>, Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { let Inst{15-12} = 0b1111; } // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD -def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad">; -def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx">; -def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd">; -def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx">; +def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad", int_arm_smuad>; +def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx", int_arm_smuadx>; +def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd", int_arm_smusd>; +def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx", int_arm_smusdx>; -class T2DualHalfMulAdd<bits<3> op22_20, bits<4> op7_4, string opc> +class T2DualHalfMulAdd<bits<3> op22_20, bits<4> op7_4, string opc, + Intrinsic intrinsic> : T2FourReg_mac<0, op22_20, op7_4, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), - IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", []>, + IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>, Requires<[IsThumb2, HasDSP]>; -def t2SMLAD : T2DualHalfMulAdd<0b010, 0b0000, "smlad">; -def t2SMLADX : T2DualHalfMulAdd<0b010, 0b0001, "smladx">; -def t2SMLSD : T2DualHalfMulAdd<0b100, 0b0000, "smlsd">; -def t2SMLSDX : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx">; +def t2SMLAD : T2DualHalfMulAdd<0b010, 0b0000, "smlad", int_arm_smlad>; +def t2SMLADX : T2DualHalfMulAdd<0b010, 0b0001, "smladx", int_arm_smladx>; +def t2SMLSD : T2DualHalfMulAdd<0b100, 0b0000, "smlsd", int_arm_smlsd>; +def t2SMLSDX : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx", int_arm_smlsdx>; class T2DualHalfMulAddLong<bits<3> op22_20, bits<4> op7_4, string opc> : T2FourReg_mac<1, op22_20, op7_4, (outs rGPR:$Ra, rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm), + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>, + RegConstraint<"$Ra = $RLo, $Rd = $RHi">, Requires<[IsThumb2, HasDSP]>, Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; @@ -2790,6 +2847,15 @@ def t2SMLALDX : T2DualHalfMulAddLong<0b100, 0b1101, "smlaldx">; def t2SMLSLD : T2DualHalfMulAddLong<0b101, 0b1100, "smlsld">; def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">; +def : Thumb2DSPPat<(ARMSmlald rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), + (t2SMLALD rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>; +def : Thumb2DSPPat<(ARMSmlaldx rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), + (t2SMLALDX rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>; +def : Thumb2DSPPat<(ARMSmlsld rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), + (t2SMLSLD rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>; +def : Thumb2DSPPat<(ARMSmlsldx rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), + (t2SMLSLDX rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>; + //===----------------------------------------------------------------------===// // Division Instructions. // Signed and unsigned division on v7-M @@ -4640,6 +4706,19 @@ def : t2InstSubst<"and${s}${p} $Rd, $Rn, $imm", def : t2InstSubst<"and${s}${p} $Rdn, $imm", (t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; +// And ORR <--> ORN +def : t2InstSubst<"orn${s}${p} $Rd, $Rn, $imm", + (t2ORRri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"orn${s}${p} $Rdn, $imm", + (t2ORRri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"orr${s}${p} $Rd, $Rn, $imm", + (t2ORNri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"orr${s}${p} $Rdn, $imm", + (t2ORNri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, + pred:$p, cc_out:$s)>; // Likewise, "add Rd, t2_so_imm_neg" -> sub def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index 7325817d446b..13a32211f88c 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -196,14 +196,14 @@ const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass( llvm_unreachable("Switch should handle all register classes"); } -RegisterBankInfo::InstructionMapping +const RegisterBankInfo::InstructionMapping & ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { auto Opc = MI.getOpcode(); // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. if (!isPreISelGenericOpcode(Opc)) { - InstructionMapping Mapping = getInstrMappingImpl(MI); + const InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; } @@ -258,7 +258,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { LLT Ty2 = MRI.getType(MI.getOperand(3).getReg()); if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 || Ty2.getSizeInBits() != 32) - return InstructionMapping{}; + return getInvalidInstructionMapping(); OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, @@ -271,14 +271,14 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 64 || MI.getOperand(2).getImm() % 32 != 0) - return InstructionMapping{}; + return getInvalidInstructionMapping(); OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], &ARM::ValueMappings[ARM::DPR3OpsIdx], nullptr, nullptr}); break; } default: - return InstructionMapping{}; + return getInvalidInstructionMapping(); } #ifndef NDEBUG @@ -292,6 +292,6 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } #endif - return InstructionMapping{DefaultMappingID, /*Cost=*/1, OperandsMapping, - NumOperands}; + return getInstructionMapping(DefaultMappingID, /*Cost=*/1, OperandsMapping, + NumOperands); } diff --git a/lib/Target/ARM/ARMRegisterBankInfo.h b/lib/Target/ARM/ARMRegisterBankInfo.h index 5222c1e6389f..9650b358f319 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.h +++ b/lib/Target/ARM/ARMRegisterBankInfo.h @@ -36,7 +36,8 @@ public: const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override; - InstructionMapping getInstrMapping(const MachineInstr &MI) const override; + const InstructionMapping & + getInstrMapping(const MachineInstr &MI) const override; }; } // End llvm namespace. #endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index 2b0cd461df7a..4a943187ab6d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -38,6 +38,7 @@ const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr, SMLoc void ARMTargetStreamer::emitCurrentConstantPool() { ConstantPools->emitForCurrentSection(Streamer); + ConstantPools->clearCacheForCurrentSection(Streamer); } // finish() - write out any non-empty assembler constant pools. |