diff options
Diffstat (limited to 'lib/Target')
264 files changed, 11277 insertions, 3282 deletions
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index a0a09e4a833b..da22d8d9e4c5 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -206,7 +206,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, // FIXME: Can we get anything other than a plain symbol here? assert(!MO.getTargetFlags() && "Unknown operand target flag!"); - O << *Sym; + Sym->print(O, MAI); printOffset(MO.getOffset(), O); break; } diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 78a2021f79a3..1ea4abcf05fa 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -156,6 +156,9 @@ public: SDNode *SelectLIBM(SDNode *N); + SDNode *SelectReadRegister(SDNode *N); + SDNode *SelectWriteRegister(SDNode *N); + // Include the pieces autogenerated from the target description. #include "AArch64GenDAGISel.inc" @@ -2114,6 +2117,120 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, return true; } +// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields +// of the string and obtains the integer values from them and combines these +// into a single value to be used in the MRS/MSR instruction. +static int getIntOperandFromRegisterString(StringRef RegString) { + SmallVector<StringRef, 5> Fields; + RegString.split(Fields, ":"); + + if (Fields.size() == 1) + return -1; + + assert(Fields.size() == 5 + && "Invalid number of fields in read register string"); + + SmallVector<int, 5> Ops; + bool AllIntFields = true; + + for (StringRef Field : Fields) { + unsigned IntField; + AllIntFields &= !Field.getAsInteger(10, IntField); + Ops.push_back(IntField); + } + + assert(AllIntFields && + "Unexpected non-integer value in special register string."); + + // Need to combine the integer fields of the string into a single value + // based on the bit encoding of MRS/MSR instruction. + return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | + (Ops[3] << 3) | (Ops[4]); +} + +// Lower the read_register intrinsic to an MRS instruction node if the special +// register string argument is either of the form detailed in the ALCE (the +// form described in getIntOperandsFromRegsterString) or is a named register +// known by the MRS SysReg mapper. +SDNode *AArch64DAGToDAGISel::SelectReadRegister(SDNode *N) { + const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); + const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + SDLoc DL(N); + + int Reg = getIntOperandFromRegisterString(RegString->getString()); + if (Reg != -1) + return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0), + MVT::Other, + CurDAG->getTargetConstant(Reg, DL, MVT::i32), + N->getOperand(0)); + + // Use the sysreg mapper to map the remaining possible strings to the + // value for the register to be used for the instruction operand. + AArch64SysReg::MRSMapper mapper; + bool IsValidSpecialReg; + Reg = mapper.fromString(RegString->getString(), + Subtarget->getFeatureBits(), + IsValidSpecialReg); + if (IsValidSpecialReg) + return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0), + MVT::Other, + CurDAG->getTargetConstant(Reg, DL, MVT::i32), + N->getOperand(0)); + + return nullptr; +} + +// Lower the write_register intrinsic to an MSR instruction node if the special +// register string argument is either of the form detailed in the ALCE (the +// form described in getIntOperandsFromRegsterString) or is a named register +// known by the MSR SysReg mapper. +SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) { + const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); + const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + SDLoc DL(N); + + int Reg = getIntOperandFromRegisterString(RegString->getString()); + if (Reg != -1) + return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, + CurDAG->getTargetConstant(Reg, DL, MVT::i32), + N->getOperand(2), N->getOperand(0)); + + // Check if the register was one of those allowed as the pstatefield value in + // the MSR (immediate) instruction. To accept the values allowed in the + // pstatefield for the MSR (immediate) instruction, we also require that an + // immediate value has been provided as an argument, we know that this is + // the case as it has been ensured by semantic checking. + AArch64PState::PStateMapper PMapper; + bool IsValidSpecialReg; + Reg = PMapper.fromString(RegString->getString(), + Subtarget->getFeatureBits(), + IsValidSpecialReg); + if (IsValidSpecialReg) { + assert (isa<ConstantSDNode>(N->getOperand(2)) + && "Expected a constant integer expression."); + uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + return CurDAG->getMachineNode(AArch64::MSRpstate, DL, MVT::Other, + CurDAG->getTargetConstant(Reg, DL, MVT::i32), + CurDAG->getTargetConstant(Immed, DL, MVT::i16), + N->getOperand(0)); + } + + // Use the sysreg mapper to attempt to map the remaining possible strings + // to the value for the register to be used for the MSR (register) + // instruction operand. + AArch64SysReg::MSRMapper Mapper; + Reg = Mapper.fromString(RegString->getString(), + Subtarget->getFeatureBits(), + IsValidSpecialReg); + + if (IsValidSpecialReg) + return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, + CurDAG->getTargetConstant(Reg, DL, MVT::i32), + N->getOperand(2), N->getOperand(0)); + + return nullptr; +} + SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(errs() << "Selecting: "); @@ -2135,6 +2252,16 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { default: break; + case ISD::READ_REGISTER: + if (SDNode *Res = SelectReadRegister(Node)) + return Res; + break; + + case ISD::WRITE_REGISTER: + if (SDNode *Res = SelectWriteRegister(Node)) + return Res; + break; + case ISD::ADD: if (SDNode *I = SelectMLAV64LaneV128(Node)) return I; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index e6108c3e95e2..1616ff13535d 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -76,6 +76,9 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration( cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false)); +/// Value type used for condition codes. +static const MVT MVT_CC = MVT::i32; + AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -807,6 +810,9 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::ADCS: return "AArch64ISD::ADCS"; case AArch64ISD::SBCS: return "AArch64ISD::SBCS"; case AArch64ISD::ANDS: return "AArch64ISD::ANDS"; + case AArch64ISD::CCMP: return "AArch64ISD::CCMP"; + case AArch64ISD::CCMN: return "AArch64ISD::CCMN"; + case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP"; case AArch64ISD::FCMP: return "AArch64ISD::FCMP"; case AArch64ISD::FMIN: return "AArch64ISD::FMIN"; case AArch64ISD::FMAX: return "AArch64ISD::FMAX"; @@ -1165,10 +1171,133 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, LHS = LHS.getOperand(0); } - return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS) + return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS) .getValue(1); } +static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, + ISD::CondCode CC, SDValue CCOp, + SDValue Condition, unsigned NZCV, + SDLoc DL, SelectionDAG &DAG) { + unsigned Opcode = 0; + if (LHS.getValueType().isFloatingPoint()) + Opcode = AArch64ISD::FCCMP; + else if (RHS.getOpcode() == ISD::SUB) { + SDValue SubOp0 = RHS.getOperand(0); + if (const ConstantSDNode *SubOp0C = dyn_cast<ConstantSDNode>(SubOp0)) + if (SubOp0C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + // See emitComparison() on why we can only do this for SETEQ and SETNE. + Opcode = AArch64ISD::CCMN; + RHS = RHS.getOperand(1); + } + } + if (Opcode == 0) + Opcode = AArch64ISD::CCMP; + + SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); + return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); +} + +/// Returns true if @p Val is a tree of AND/OR/SETCC operations. +static bool isConjunctionDisjunctionTree(const SDValue Val, unsigned Depth) { + if (!Val.hasOneUse()) + return false; + if (Val->getOpcode() == ISD::SETCC) + return true; + // Protect against stack overflow. + if (Depth > 1000) + return false; + if (Val->getOpcode() == ISD::AND || Val->getOpcode() == ISD::OR) { + SDValue O0 = Val->getOperand(0); + SDValue O1 = Val->getOperand(1); + return isConjunctionDisjunctionTree(O0, Depth+1) && + isConjunctionDisjunctionTree(O1, Depth+1); + } + return false; +} + +/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain +/// of CCMP/CFCMP ops. For example (SETCC_0 & SETCC_1) with condition cond0 and +/// cond1 can be transformed into "CMP; CCMP" with CCMP executing on cond_0 +/// and setting flags to inversed(cond_1) otherwise. +/// This recursive function produces DAG nodes that produce condition flags +/// suitable to determine the truth value of @p Val (which is AND/OR/SETCC) +/// by testing the result for the condition set to @p OutCC. If @p Negate is +/// set the opposite truth value is produced. If @p CCOp and @p Condition are +/// given then conditional comparison are created so that false is reported +/// when they are false. +static SDValue emitConjunctionDisjunctionTree( + SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, + SDValue CCOp = SDValue(), AArch64CC::CondCode Condition = AArch64CC::AL) { + assert(isConjunctionDisjunctionTree(Val, 0)); + // We're at a tree leaf, produce a c?f?cmp. + unsigned Opcode = Val->getOpcode(); + if (Opcode == ISD::SETCC) { + SDValue LHS = Val->getOperand(0); + SDValue RHS = Val->getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get(); + bool isInteger = LHS.getValueType().isInteger(); + if (Negate) + CC = getSetCCInverse(CC, isInteger); + SDLoc DL(Val); + // Determine OutCC and handle FP special case. + if (isInteger) { + OutCC = changeIntCCToAArch64CC(CC); + } else { + assert(LHS.getValueType().isFloatingPoint()); + AArch64CC::CondCode ExtraCC; + changeFPCCToAArch64CC(CC, OutCC, ExtraCC); + // Surpisingly some floating point conditions can't be tested with a + // single condition code. Construct an additional comparison in this case. + // See comment below on how we deal with OR conditions. + if (ExtraCC != AArch64CC::AL) { + SDValue ExtraCmp; + if (!CCOp.getNode()) + ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG); + else { + SDValue ConditionOp = DAG.getConstant(Condition, DL, MVT_CC); + // Note that we want the inverse of ExtraCC, so NZCV is not inversed. + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC); + ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, + NZCV, DL, DAG); + } + CCOp = ExtraCmp; + Condition = AArch64CC::getInvertedCondCode(ExtraCC); + OutCC = AArch64CC::getInvertedCondCode(OutCC); + } + } + + // Produce a normal comparison if we are first in the chain + if (!CCOp.getNode()) + return emitComparison(LHS, RHS, CC, DL, DAG); + // Otherwise produce a ccmp. + SDValue ConditionOp = DAG.getConstant(Condition, DL, MVT_CC); + AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); + return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL, + DAG); + } + + // Construct comparison sequence for the left hand side. + SDValue LHS = Val->getOperand(0); + SDValue RHS = Val->getOperand(1); + + // We can only implement AND-like behaviour here, but negation is free. So we + // use (not (and (not x) (not y))) to implement (or x y). + bool isOr = Val->getOpcode() == ISD::OR; + assert((isOr || Val->getOpcode() == ISD::AND) && "Should have AND or OR."); + Negate ^= isOr; + + AArch64CC::CondCode RHSCC; + SDValue CmpR = + emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, isOr, CCOp, Condition); + SDValue CmpL = + emitConjunctionDisjunctionTree(DAG, LHS, OutCC, isOr, CmpR, RHSCC); + if (Negate) + OutCC = AArch64CC::getInvertedCondCode(OutCC); + return CmpL; +} + static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) { SDValue Cmp; @@ -1227,47 +1356,55 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } } } - // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. - // For the i8 operand, the largest immediate is 255, so this can be easily - // encoded in the compare instruction. For the i16 operand, however, the - // largest immediate cannot be encoded in the compare. - // Therefore, use a sign extending load and cmn to avoid materializing the -1 - // constant. For example, - // movz w1, #65535 - // ldrh w0, [x0, #0] - // cmp w0, w1 - // > - // ldrsh w0, [x0, #0] - // cmn w0, #1 - // Fundamental, we're relying on the property that (zext LHS) == (zext RHS) - // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure - // both the LHS and RHS are truely zero extended and to make sure the - // transformation is profitable. if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) { - if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) && - isa<LoadSDNode>(LHS)) { - if (cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD && - cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 && - LHS.getNode()->hasNUsesOfValue(1, 0)) { - int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue(); - if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { - SDValue SExt = - DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, - DAG.getValueType(MVT::i16)); - Cmp = emitComparison(SExt, - DAG.getConstant(ValueofRHS, dl, - RHS.getValueType()), - CC, dl, DAG); - AArch64CC = changeIntCCToAArch64CC(CC); - AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32); - return Cmp; - } + const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS); + + // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. + // For the i8 operand, the largest immediate is 255, so this can be easily + // encoded in the compare instruction. For the i16 operand, however, the + // largest immediate cannot be encoded in the compare. + // Therefore, use a sign extending load and cmn to avoid materializing the + // -1 constant. For example, + // movz w1, #65535 + // ldrh w0, [x0, #0] + // cmp w0, w1 + // > + // ldrsh w0, [x0, #0] + // cmn w0, #1 + // Fundamental, we're relying on the property that (zext LHS) == (zext RHS) + // if and only if (sext LHS) == (sext RHS). The checks are in place to + // ensure both the LHS and RHS are truely zero extended and to make sure the + // transformation is profitable. + if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) && + cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD && + cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 && + LHS.getNode()->hasNUsesOfValue(1, 0)) { + int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue(); + if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { + SDValue SExt = + DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, + DAG.getValueType(MVT::i16)); + Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl, + RHS.getValueType()), + CC, dl, DAG); + AArch64CC = changeIntCCToAArch64CC(CC); + goto CreateCCNode; } } + + if ((RHSC->isNullValue() || RHSC->isOne()) && + isConjunctionDisjunctionTree(LHS, 0)) { + bool Negate = (CC == ISD::SETNE) ^ RHSC->isNullValue(); + Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC, Negate); + goto CreateCCNode; + } } + Cmp = emitComparison(LHS, RHS, CC, dl, DAG); AArch64CC = changeIntCCToAArch64CC(CC); - AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32); + +CreateCCNode: + AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC); return Cmp; } @@ -4065,7 +4202,8 @@ unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, .Default(0); if (Reg) return Reg; - report_fatal_error("Invalid register name global variable"); + report_fatal_error(Twine("Invalid register name \"" + + StringRef(RegName) + "\".")); } SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, @@ -6741,7 +6879,8 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // AArch64 has five basic addressing modes: // reg // reg + 9-bit signed offset @@ -6792,7 +6931,8 @@ bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM, } int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // Scaling factors are not free at all. // Operands | Rt Latency // ------------------------------------------- @@ -6800,7 +6940,7 @@ int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM, // ------------------------------------------- // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5 // Rt, [Xn, Wm, <extend> #imm] | - if (isLegalAddressingMode(AM, Ty)) + if (isLegalAddressingMode(AM, Ty, AS)) // Scale represents reg2 * scale, thus account for 1 if // it is not equal to 0 or 1. return AM.Scale != 0 && AM.Scale != 1; @@ -9120,3 +9260,8 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { return Ty->isArrayTy(); } + +bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, + EVT) const { + return false; +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 0d9b8b7c875e..db192c78169a 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -58,6 +58,11 @@ enum NodeType : unsigned { SBCS, ANDS, + // Conditional compares. Operands: left,right,falsecc,cc,flags + CCMP, + CCMN, + FCCMP, + // Floating point comparison FCMP, @@ -314,14 +319,16 @@ public: /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; /// \brief Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store /// of the specified type. /// If the AM is supported, the return value must be >= 0. /// If the AM is not supported, it returns a negative value. - int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; + int getScalingFactorCost(const AddrMode &AM, Type *Ty, + unsigned AS) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be @@ -506,6 +513,8 @@ private: bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + + bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; }; namespace AArch64 { diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 3b8b6681a084..1fe9c7f8cc5a 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -525,6 +525,13 @@ def imm0_31 : Operand<i64>, ImmLeaf<i64, [{ let ParserMatchClass = Imm0_31Operand; } +// True if the 32-bit immediate is in the range [0,31] +def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint64_t)Imm) < 32; +}]> { + let ParserMatchClass = Imm0_31Operand; +} + // imm0_15 predicate - True if the immediate is in the range [0,15] def imm0_15 : Operand<i64>, ImmLeaf<i64, [{ return ((uint64_t)Imm) < 16; @@ -542,7 +549,9 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{ // imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15] def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{ return ((uint32_t)Imm) < 16; -}]>; +}]> { + let ParserMatchClass = Imm0_15Operand; +} // An arithmetic shifter operand: // {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr @@ -2068,9 +2077,12 @@ multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic, //--- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm> - : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $imm, $nzcv, $cond", "", []>, +class BaseCondComparisonImm<bit op, RegisterClass regtype, ImmLeaf immtype, + string mnemonic, SDNode OpNode> + : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $imm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI]> { let Uses = [NZCV]; let Defs = [NZCV]; @@ -2090,19 +2102,13 @@ class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm> let Inst{3-0} = nzcv; } -multiclass CondSetFlagsImm<bit op, string asm> { - def Wi : BaseCondSetFlagsImm<op, GPR32, asm> { - let Inst{31} = 0; - } - def Xi : BaseCondSetFlagsImm<op, GPR64, asm> { - let Inst{31} = 1; - } -} - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm> - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, +class BaseCondComparisonReg<bit op, RegisterClass regtype, string mnemonic, + SDNode OpNode> + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI, ReadI]> { let Uses = [NZCV]; let Defs = [NZCV]; @@ -2122,11 +2128,19 @@ class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm> let Inst{3-0} = nzcv; } -multiclass CondSetFlagsReg<bit op, string asm> { - def Wr : BaseCondSetFlagsReg<op, GPR32, asm> { +multiclass CondComparison<bit op, string mnemonic, SDNode OpNode> { + // immediate operand variants + def Wi : BaseCondComparisonImm<op, GPR32, imm32_0_31, mnemonic, OpNode> { + let Inst{31} = 0; + } + def Xi : BaseCondComparisonImm<op, GPR64, imm0_31, mnemonic, OpNode> { + let Inst{31} = 1; + } + // register operand variants + def Wr : BaseCondComparisonReg<op, GPR32, mnemonic, OpNode> { let Inst{31} = 0; } - def Xr : BaseCondSetFlagsReg<op, GPR64, asm> { + def Xr : BaseCondComparisonReg<op, GPR64, mnemonic, OpNode> { let Inst{31} = 1; } } @@ -3934,11 +3948,14 @@ multiclass FPComparison<bit signalAllNans, string asm, //--- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseFPCondComparison<bit signalAllNans, - RegisterClass regtype, string asm> - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, +class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype, + string mnemonic, list<dag> pat> + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>, Sched<[WriteFCmp]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + bits<5> Rn; bits<5> Rm; bits<4> nzcv; @@ -3954,16 +3971,18 @@ class BaseFPCondComparison<bit signalAllNans, let Inst{3-0} = nzcv; } -multiclass FPCondComparison<bit signalAllNans, string asm> { - let Defs = [NZCV], Uses = [NZCV] in { - def Srr : BaseFPCondComparison<signalAllNans, FPR32, asm> { +multiclass FPCondComparison<bit signalAllNans, string mnemonic, + SDPatternOperator OpNode = null_frag> { + def Srr : BaseFPCondComparison<signalAllNans, FPR32, mnemonic, + [(set NZCV, (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm), (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]> { let Inst{22} = 0; } - - def Drr : BaseFPCondComparison<signalAllNans, FPR64, asm> { + def Drr : BaseFPCondComparison<signalAllNans, FPR64, mnemonic, + [(set NZCV, (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm), (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]> { let Inst{22} = 1; } - } // Defs = [NZCV], Uses = [NZCV] } //--- @@ -8822,6 +8841,178 @@ class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode> [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>; } // end of 'let Predicates = [HasCrypto]' +//---------------------------------------------------------------------------- +// v8.1 atomic instructions extension: +// * CAS +// * CASP +// * SWP +// * LDOPregister<OP>, and aliases STOPregister<OP> + +// Instruction encodings: +// +// 31 30|29 24|23|22|21|20 16|15|14 10|9 5|4 0 +// CAS SZ |001000|1 |A |1 |Rs |R |11111 |Rn |Rt +// CASP 0|SZ|001000|0 |A |1 |Rs |R |11111 |Rn |Rt +// SWP SZ |111000|A |R |1 |Rs |1 |OPC|00|Rn |Rt +// LD SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |Rt +// ST SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |11111 + +// Instruction syntax: +// +// CAS{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>] +// CAS{<order>} <Xs>, <Xt>, [<Xn|SP>] +// CASP{<order>} <Ws>, <W(s+1)>, <Wt>, <W(t+1)>, [<Xn|SP>] +// CASP{<order>} <Xs>, <X(s+1)>, <Xt>, <X(t+1)>, [<Xn|SP>] +// SWP{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>] +// SWP{<order>} <Xs>, <Xt>, [<Xn|SP>] +// LD<OP>{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>] +// LD<OP>{<order>} <Xs>, <Xt>, [<Xn|SP>] +// ST<OP>{<order>}[<size>] <Ws>, [<Xn|SP>] +// ST<OP>{<order>} <Xs>, [<Xn|SP>] + +let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseCASEncoding<dag oops, dag iops, string asm, string operands, + string cstr, list<dag> pattern> + : I<oops, iops, asm, operands, cstr, pattern> { + bits<2> Sz; + bit NP; + bit Acq; + bit Rel; + bits<5> Rs; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b001000; + let Inst{23} = NP; + let Inst{22} = Acq; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = Rel; + let Inst{14-10} = 0b11111; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +class BaseCAS<string order, string size, RegisterClass RC> + : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "cas" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rs",[]> { + let NP = 1; +} + +multiclass CompareAndSwap<bits<1> Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in def b : BaseCAS<order, "b", GPR32>; + let Sz = 0b01, Acq = Acq, Rel = Rel in def h : BaseCAS<order, "h", GPR32>; + let Sz = 0b10, Acq = Acq, Rel = Rel in def s : BaseCAS<order, "", GPR32>; + let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseCAS<order, "", GPR64>; +} + +class BaseCASP<string order, string size, RegisterOperand RC> + : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "casp" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rs",[]> { + let NP = 0; +} + +multiclass CompareAndSwapPair<bits<1> Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in + def s : BaseCASP<order, "", WSeqPairClassOperand>; + let Sz = 0b01, Acq = Acq, Rel = Rel in + def d : BaseCASP<order, "", XSeqPairClassOperand>; +} + +let Predicates = [HasV8_1a] in +class BaseSWP<string order, string size, RegisterClass RC> + : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size, + "\t$Rs, $Rt, [$Rn]","",[]> { + bits<2> Sz; + bit Acq; + bit Rel; + bits<5> Rs; + bits<3> opc = 0b000; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = Acq; + let Inst{22} = Rel; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b1; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Swap<bits<1> Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in def b : BaseSWP<order, "b", GPR32>; + let Sz = 0b01, Acq = Acq, Rel = Rel in def h : BaseSWP<order, "h", GPR32>; + let Sz = 0b10, Acq = Acq, Rel = Rel in def s : BaseSWP<order, "", GPR32>; + let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseSWP<order, "", GPR64>; +} + +let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseLDOPregister<string op, string order, string size, RegisterClass RC> + : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size, + "\t$Rs, $Rt, [$Rn]","",[]> { + bits<2> Sz; + bit Acq; + bit Rel; + bits<5> Rs; + bits<3> opc; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = Acq; + let Inst{22} = Rel; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b0; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass LDOPregister<bits<3> opc, string op, bits<1> Acq, bits<1> Rel, + string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in + def b : BaseLDOPregister<op, order, "b", GPR32>; + let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in + def h : BaseLDOPregister<op, order, "h", GPR32>; + let Sz = 0b10, Acq = Acq, Rel = Rel, opc = opc in + def s : BaseLDOPregister<op, order, "", GPR32>; + let Sz = 0b11, Acq = Acq, Rel = Rel, opc = opc in + def d : BaseLDOPregister<op, order, "", GPR64>; +} + +let Predicates = [HasV8_1a] in +class BaseSTOPregister<string asm, RegisterClass OP, Register Reg, + Instruction inst> : + InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn)>; + +multiclass STOPregister<string asm, string instr> { + def : BaseSTOPregister<asm # "lb", GPR32, WZR, + !cast<Instruction>(instr # "Lb")>; + def : BaseSTOPregister<asm # "lh", GPR32, WZR, + !cast<Instruction>(instr # "Lh")>; + def : BaseSTOPregister<asm # "l", GPR32, WZR, + !cast<Instruction>(instr # "Ls")>; + def : BaseSTOPregister<asm # "l", GPR64, XZR, + !cast<Instruction>(instr # "Ld")>; + def : BaseSTOPregister<asm # "b", GPR32, WZR, + !cast<Instruction>(instr # "b")>; + def : BaseSTOPregister<asm # "h", GPR32, WZR, + !cast<Instruction>(instr # "h")>; + def : BaseSTOPregister<asm, GPR32, WZR, + !cast<Instruction>(instr # "s")>; + def : BaseSTOPregister<asm, GPR64, XZR, + !cast<Instruction>(instr # "d")>; +} + +//---------------------------------------------------------------------------- // Allow the size specifier tokens to be upper case, not just lower. def : TokenAlias<".8B", ".8b">; def : TokenAlias<".4H", ".4h">; diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 207c34ca7f0b..6941a6bf1b47 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2066,10 +2066,9 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, .setMIFlag(Flag); } -MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - int FrameIndex) const { +MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const { // This is a bit of a hack. Consider this instruction: // // %vreg0<def> = COPY %SP; GPR64all:%vreg0 diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index fa4b8b7e6179..d296768ab9b0 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -131,6 +131,7 @@ public: using TargetInstrInfo::foldMemoryOperandImpl; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const override; bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index c7d6a69b9fd7..2f1b8933bf61 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -66,6 +66,20 @@ def SDT_AArch64CSel : SDTypeProfile<1, 4, SDTCisSameAs<0, 2>, SDTCisInt<3>, SDTCisVT<4, i32>]>; +def SDT_AArch64CCMP : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisInt<1>, + SDTCisSameAs<1, 2>, + SDTCisInt<3>, + SDTCisInt<4>, + SDTCisVT<5, i32>]>; +def SDT_AArch64FCCMP : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisFP<1>, + SDTCisSameAs<1, 2>, + SDTCisInt<3>, + SDTCisInt<4>, + SDTCisVT<5, i32>]>; def SDT_AArch64FCmp : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; @@ -160,6 +174,10 @@ def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; +def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; +def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; +def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; + def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; @@ -727,6 +745,74 @@ def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; +// v8.1 atomic CAS +defm CAS : CompareAndSwap<0, 0, "">; +defm CASA : CompareAndSwap<1, 0, "a">; +defm CASL : CompareAndSwap<0, 1, "l">; +defm CASAL : CompareAndSwap<1, 1, "al">; + +// v8.1 atomic CASP +defm CASP : CompareAndSwapPair<0, 0, "">; +defm CASPA : CompareAndSwapPair<1, 0, "a">; +defm CASPL : CompareAndSwapPair<0, 1, "l">; +defm CASPAL : CompareAndSwapPair<1, 1, "al">; + +// v8.1 atomic SWP +defm SWP : Swap<0, 0, "">; +defm SWPA : Swap<1, 0, "a">; +defm SWPL : Swap<0, 1, "l">; +defm SWPAL : Swap<1, 1, "al">; + +// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register) +defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; +defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; +defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; +defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; + +defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; +defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; +defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; +defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; + +defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; +defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; +defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; +defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; + +defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; +defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; +defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; +defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; + +defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; +defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; +defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; +defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; + +defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; +defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; +defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; +defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; + +defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; +defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; +defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; +defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; + +defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; +defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; +defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; +defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; + +// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR" +defm : STOPregister<"stadd","LDADD">; // STADDx +defm : STOPregister<"stclr","LDCLR">; // STCLRx +defm : STOPregister<"steor","LDEOR">; // STEORx +defm : STOPregister<"stset","LDSET">; // STSETx +defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx +defm : STOPregister<"stsmin","LDSMIN">;// STSMINx +defm : STOPregister<"stumax","LDUMAX">;// STUMAXx +defm : STOPregister<"stumin","LDUMIN">;// STUMINx //===----------------------------------------------------------------------===// // Logical instructions. @@ -950,13 +1036,10 @@ def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; //===----------------------------------------------------------------------===// -// Conditionally set flags instructions. +// Conditional comparison instructions. //===----------------------------------------------------------------------===// -defm CCMN : CondSetFlagsImm<0, "ccmn">; -defm CCMP : CondSetFlagsImm<1, "ccmp">; - -defm CCMN : CondSetFlagsReg<0, "ccmn">; -defm CCMP : CondSetFlagsReg<1, "ccmp">; +defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; +defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; //===----------------------------------------------------------------------===// // Conditional select instructions. @@ -2486,7 +2569,7 @@ defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>; //===----------------------------------------------------------------------===// defm FCCMPE : FPCondComparison<1, "fccmpe">; -defm FCCMP : FPCondComparison<0, "fccmp">; +defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; //===----------------------------------------------------------------------===// // Floating point conditional select instruction. diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index 72edbf14c0d8..e55ae991b635 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -69,10 +69,10 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO, AArch64II::MO_PAGEOFF) RefKind = MCSymbolRefExpr::VK_PAGEOFF; } - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd( - Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); + Expr = MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); return MCOperand::createExpr(Expr); } @@ -139,14 +139,14 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, RefFlags |= AArch64MCExpr::VK_NC; const MCExpr *Expr = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd( - Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); + Expr = MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); AArch64MCExpr::VariantKind RefKind; RefKind = static_cast<AArch64MCExpr::VariantKind>(RefFlags); - Expr = AArch64MCExpr::Create(Expr, RefKind, Ctx); + Expr = AArch64MCExpr::create(Expr, RefKind, Ctx); return MCOperand::createExpr(Expr); } @@ -179,7 +179,7 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO, break; case MachineOperand::MO_MachineBasicBlock: MCOp = MCOperand::createExpr( - MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx)); + MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_GlobalAddress: MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index d5ff3f1f3373..b2efca023372 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -26,8 +26,12 @@ let Namespace = "AArch64" in { def hsub : SubRegIndex<16>; def ssub : SubRegIndex<32>; def dsub : SubRegIndex<32>; + def sube32 : SubRegIndex<32>; + def subo32 : SubRegIndex<32>; def qhisub : SubRegIndex<64>; def qsub : SubRegIndex<64>; + def sube64 : SubRegIndex<64>; + def subo64 : SubRegIndex<64>; // Note: Code depends on these having consecutive numbers def dsub0 : SubRegIndex<64>; def dsub1 : SubRegIndex<64>; @@ -592,3 +596,40 @@ def FPR16Op : RegisterOperand<FPR16, "printOperand">; def FPR32Op : RegisterOperand<FPR32, "printOperand">; def FPR64Op : RegisterOperand<FPR64, "printOperand">; def FPR128Op : RegisterOperand<FPR128, "printOperand">; + + +//===----------------------------------------------------------------------===// +// ARMv8.1a atomic CASP register operands + + +def WSeqPairs : RegisterTuples<[sube32, subo32], + [(rotl GPR32, 0), (rotl GPR32, 1)]>; +def XSeqPairs : RegisterTuples<[sube64, subo64], + [(rotl GPR64, 0), (rotl GPR64, 1)]>; + +def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32, + (add WSeqPairs)>{ + let Size = 64; +} +def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64, + (add XSeqPairs)>{ + let Size = 128; +} + + +let RenderMethod = "addRegOperands", ParserMethod="tryParseGPRSeqPair" in { + def WSeqPairsAsmOperandClass : AsmOperandClass { let Name = "WSeqPair"; } + def XSeqPairsAsmOperandClass : AsmOperandClass { let Name = "XSeqPair"; } +} + +def WSeqPairClassOperand : + RegisterOperand<WSeqPairsClass, "printGPRSeqPairsClassOperand<32>"> { + let ParserMatchClass = WSeqPairsAsmOperandClass; +} +def XSeqPairClassOperand : + RegisterOperand<XSeqPairsClass, "printGPRSeqPairsClassOperand<64>"> { + let ParserMatchClass = XSeqPairsAsmOperandClass; +} + + +//===----- END: v8.1a atomic CASP register operands -----------------------===// diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index a9059ab37f5f..f23dd33d0146 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -250,10 +250,14 @@ bool AArch64PassConfig::addPreISel() { // FIXME: On AArch64, this depends on the type. // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). // and the offset has to be a multiple of the related size in bytes. - if ((TM->getOptLevel() == CodeGenOpt::Aggressive && + if ((TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge == cl::BOU_UNSET) || - EnableGlobalMerge == cl::BOU_TRUE) - addPass(createGlobalMergePass(TM, 4095)); + EnableGlobalMerge == cl::BOU_TRUE) { + bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) && + (EnableGlobalMerge == cl::BOU_UNSET); + addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize)); + } + if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 299b4a55dd82..18ee4a9c72b5 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -40,11 +40,11 @@ const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference( if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) { const MCSymbol *Sym = TM.getSymbol(GV, Mang); const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); MCSymbol *PCSym = getContext().createTempSymbol(); Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); - return MCBinaryExpr::CreateSub(Res, PC, getContext()); + const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext()); + return MCBinaryExpr::createSub(Res, PC, getContext()); } return TargetLoweringObjectFileMachO::getTTypeGlobalReference( @@ -65,9 +65,9 @@ const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel( // On ARM64 Darwin, we can reference symbols with foo@GOT-., which // is an indirect pc-relative reference. const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); MCSymbol *PCSym = getContext().createTempSymbol(); Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); - return MCBinaryExpr::CreateSub(Res, PC, getContext()); + const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext()); + return MCBinaryExpr::createSub(Res, PC, getContext()); } diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 38d34e65a2e4..063c053ffe8a 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -107,6 +107,7 @@ private: OperandMatchResultTy tryParseAddSubImm(OperandVector &Operands); OperandMatchResultTy tryParseGPR64sp0Operand(OperandVector &Operands); bool tryParseVectorRegister(OperandVector &Operands); + OperandMatchResultTy tryParseGPRSeqPair(OperandVector &Operands); public: enum AArch64MatchResultTy { @@ -875,6 +876,16 @@ public: return Kind == k_Register && !Reg.isVector && AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum); } + bool isWSeqPair() const { + return Kind == k_Register && !Reg.isVector && + AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains( + Reg.RegNum); + } + bool isXSeqPair() const { + return Kind == k_Register && !Reg.isVector && + AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID].contains( + Reg.RegNum); + } bool isGPR64sp0() const { return Kind == k_Register && !Reg.isVector && @@ -1753,7 +1764,7 @@ static unsigned MatchRegisterName(StringRef Name); /// } static unsigned matchVectorRegName(StringRef Name) { - return StringSwitch<unsigned>(Name) + return StringSwitch<unsigned>(Name.lower()) .Case("v0", AArch64::Q0) .Case("v1", AArch64::Q1) .Case("v2", AArch64::Q2) @@ -2024,7 +2035,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) { // No modifier was specified at all; this is the syntax for an ELF basic // ADRP relocation (unfortunately). Expr = - AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_ABS_PAGE, getContext()); + AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_PAGE, getContext()); } else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE || DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) && Addend != 0) { @@ -2157,7 +2168,7 @@ AArch64AsmParser::tryParseAddSubImm(OperandVector &Operands) { if (MCE) { int64_t Val = MCE->getValue(); if (Val > 0xfff && (Val & 0xfff) == 0) { - Imm = MCConstantExpr::Create(Val >> 12, getContext()); + Imm = MCConstantExpr::create(Val >> 12, getContext()); ShiftAmount = 12; } } @@ -2347,14 +2358,14 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, #define SYS_ALIAS(op1, Cn, Cm, op2) \ do { \ - Expr = MCConstantExpr::Create(op1, getContext()); \ + Expr = MCConstantExpr::create(op1, getContext()); \ Operands.push_back( \ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ Operands.push_back( \ AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \ Operands.push_back( \ AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \ - Expr = MCConstantExpr::Create(op2, getContext()); \ + Expr = MCConstantExpr::create(op2, getContext()); \ Operands.push_back( \ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ } while (0) @@ -2835,7 +2846,7 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) { return true; if (HasELFModifier) - ImmVal = AArch64MCExpr::Create(ImmVal, RefKind, getContext()); + ImmVal = AArch64MCExpr::create(ImmVal, RefKind, getContext()); return false; } @@ -3128,7 +3139,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, if (ShiftAmt <= MaxShiftAmt && Imm <= 0xFFFF) { Operands[0] = AArch64Operand::CreateToken("movz", false, Loc, Ctx); Operands.push_back(AArch64Operand::CreateImm( - MCConstantExpr::Create(Imm, Ctx), S, E, Ctx)); + MCConstantExpr::create(Imm, Ctx), S, E, Ctx)); if (ShiftAmt) Operands.push_back(AArch64Operand::CreateShiftExtend(AArch64_AM::LSL, ShiftAmt, true, S, E, Ctx)); @@ -3634,8 +3645,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, NewOp4Val = 63 - Op3Val; } - const MCExpr *NewOp3 = MCConstantExpr::Create(NewOp3Val, getContext()); - const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext()); + const MCExpr *NewOp3 = MCConstantExpr::create(NewOp3Val, getContext()); + const MCExpr *NewOp4 = MCConstantExpr::create(NewOp4Val, getContext()); Operands[0] = AArch64Operand::CreateToken( "ubfm", false, Op.getStartLoc(), getContext()); @@ -3685,8 +3696,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(WidthOp.getStartLoc(), "requested insert overflows register"); - const MCExpr *ImmRExpr = MCConstantExpr::Create(ImmR, getContext()); - const MCExpr *ImmSExpr = MCConstantExpr::Create(ImmS, getContext()); + const MCExpr *ImmRExpr = MCConstantExpr::create(ImmR, getContext()); + const MCExpr *ImmSExpr = MCConstantExpr::create(ImmS, getContext()); Operands[0] = AArch64Operand::CreateToken( "bfm", false, Op.getStartLoc(), getContext()); Operands[2] = AArch64Operand::CreateReg( @@ -3742,9 +3753,9 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, "requested insert overflows register"); const MCExpr *NewOp3 = - MCConstantExpr::Create(NewOp3Val, getContext()); + MCConstantExpr::create(NewOp3Val, getContext()); const MCExpr *NewOp4 = - MCConstantExpr::Create(NewOp4Val, getContext()); + MCConstantExpr::create(NewOp4Val, getContext()); Operands[3] = AArch64Operand::CreateImm( NewOp3, Op3.getStartLoc(), Op3.getEndLoc(), getContext()); Operands[4] = AArch64Operand::CreateImm( @@ -3800,7 +3811,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, "requested extract overflows register"); const MCExpr *NewOp4 = - MCConstantExpr::Create(NewOp4Val, getContext()); + MCConstantExpr::create(NewOp4Val, getContext()); Operands[4] = AArch64Operand::CreateImm( NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext()); if (Tok == "bfxil") @@ -4021,7 +4032,7 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".ltorg" || IDVal == ".pool") return parseDirectiveLtorg(Loc); if (IDVal == ".unreq") - return parseDirectiveUnreq(DirectiveID.getLoc()); + return parseDirectiveUnreq(Loc); if (!IsMachO && !IsCOFF) { if (IDVal == ".inst") @@ -4106,8 +4117,8 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) { return Error(L, "expected symbol after directive"); MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); - Expr = AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_TLSDESC, getContext()); + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, getContext()); + Expr = AArch64MCExpr::create(Expr, AArch64MCExpr::VK_TLSDESC, getContext()); MCInst Inst; Inst.setOpcode(AArch64::TLSDESCCALL); @@ -4354,3 +4365,77 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, return Match_Success; return Match_InvalidOperand; } + + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) { + + SMLoc S = getLoc(); + + if (getParser().getTok().isNot(AsmToken::Identifier)) { + Error(S, "expected register"); + return MatchOperand_ParseFail; + } + + int FirstReg = tryParseRegister(); + if (FirstReg == -1) { + return MatchOperand_ParseFail; + } + const MCRegisterClass &WRegClass = + AArch64MCRegisterClasses[AArch64::GPR32RegClassID]; + const MCRegisterClass &XRegClass = + AArch64MCRegisterClasses[AArch64::GPR64RegClassID]; + + bool isXReg = XRegClass.contains(FirstReg), + isWReg = WRegClass.contains(FirstReg); + if (!isXReg && !isWReg) { + Error(S, "expected first even register of a " + "consecutive same-size even/odd register pair"); + return MatchOperand_ParseFail; + } + + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + unsigned FirstEncoding = RI->getEncodingValue(FirstReg); + + if (FirstEncoding & 0x1) { + Error(S, "expected first even register of a " + "consecutive same-size even/odd register pair"); + return MatchOperand_ParseFail; + } + + SMLoc M = getLoc(); + if (getParser().getTok().isNot(AsmToken::Comma)) { + Error(M, "expected comma"); + return MatchOperand_ParseFail; + } + // Eat the comma + getParser().Lex(); + + SMLoc E = getLoc(); + int SecondReg = tryParseRegister(); + if (SecondReg ==-1) { + return MatchOperand_ParseFail; + } + + if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 || + (isXReg && !XRegClass.contains(SecondReg)) || + (isWReg && !WRegClass.contains(SecondReg))) { + Error(E,"expected second odd register of a " + "consecutive same-size even/odd register pair"); + return MatchOperand_ParseFail; + } + + unsigned Pair = 0; + if(isXReg) { + Pair = RI->getMatchingSuperReg(FirstReg, AArch64::sube64, + &AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID]); + } else { + Pair = RI->getMatchingSuperReg(FirstReg, AArch64::sube32, + &AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID]); + } + + Operands.push_back(AArch64Operand::CreateReg(Pair, false, S, getLoc(), + getContext())); + + return MatchOperand_Success; +} diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index a1ed703d1bf4..359c2e734e21 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -169,6 +169,14 @@ static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm, uint64_t Addr, const void *Decoder); static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm, uint64_t Addr, const void *Decoder); +static DecodeStatus DecodeWSeqPairsClassRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Addr, + const void *Decoder); +static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Addr, + const void *Decoder); static bool Check(DecodeStatus &Out, DecodeStatus In) { switch (In) { @@ -1543,3 +1551,35 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn, return Success; } + +static DecodeStatus DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst, + unsigned RegClassID, + unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + // Register number must be even (see CASP instruction) + if (RegNo & 0x1) + return Fail; + + unsigned Register = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo); + Inst.addOperand(MCOperand::createReg(Register)); + return Success; +} + +static DecodeStatus DecodeWSeqPairsClassRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + return DecodeGPRSeqPairsClassRegisterClass(Inst, + AArch64::WSeqPairsClassRegClassID, + RegNo, Addr, Decoder); +} + +static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + return DecodeGPRSeqPairsClassRegisterClass(Inst, + AArch64::XSeqPairsClassRegClassID, + RegNo, Addr, Decoder); +} diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp index 07e4a45292fa..eb05ed915ddb 100644 --- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp @@ -168,11 +168,11 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); if (Variant != MCSymbolRefExpr::VK_None) - Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx); + Add = MCSymbolRefExpr::create(Sym, Variant, Ctx); else - Add = MCSymbolRefExpr::Create(Sym, Ctx); + Add = MCSymbolRefExpr::create(Sym, Ctx); } else { - Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, Ctx); + Add = MCConstantExpr::create(SymbolicOp.AddSymbol.Value, Ctx); } } @@ -181,37 +181,37 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( if (SymbolicOp.SubtractSymbol.Name) { StringRef Name(SymbolicOp.SubtractSymbol.Name); MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); - Sub = MCSymbolRefExpr::Create(Sym, Ctx); + Sub = MCSymbolRefExpr::create(Sym, Ctx); } else { - Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx); + Sub = MCConstantExpr::create(SymbolicOp.SubtractSymbol.Value, Ctx); } } const MCExpr *Off = nullptr; if (SymbolicOp.Value != 0) - Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx); + Off = MCConstantExpr::create(SymbolicOp.Value, Ctx); const MCExpr *Expr; if (Sub) { const MCExpr *LHS; if (Add) - LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx); + LHS = MCBinaryExpr::createSub(Add, Sub, Ctx); else - LHS = MCUnaryExpr::CreateMinus(Sub, Ctx); + LHS = MCUnaryExpr::createMinus(Sub, Ctx); if (Off) - Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx); + Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx); else Expr = LHS; } else if (Add) { if (Off) - Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx); + Expr = MCBinaryExpr::createAdd(Add, Off, Ctx); else Expr = Add; } else { if (Off) Expr = Off; else - Expr = MCConstantExpr::Create(0, Ctx); + Expr = MCConstantExpr::create(0, Ctx); } MI.addOperand(MCOperand::createExpr(Expr)); diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt index 62827e8f50eb..73665eb5701a 100644 --- a/lib/Target/AArch64/Disassembler/LLVMBuild.txt +++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = AArch64Disassembler parent = AArch64 -required_libraries = AArch64Info AArch64Utils MC MCDisassembler Support +required_libraries = AArch64Desc AArch64Info AArch64Utils MC MCDisassembler Support add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 02bd929dc65d..96fbe3a9af4d 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -206,15 +206,15 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, else O << "\tmovn\t"; - O << getRegisterName(MI->getOperand(0).getReg()) << ", #" - << *MI->getOperand(1).getExpr(); + O << getRegisterName(MI->getOperand(0).getReg()) << ", #"; + MI->getOperand(1).getExpr()->print(O, &MAI); return; } if ((Opcode == AArch64::MOVKXi || Opcode == AArch64::MOVKWi) && MI->getOperand(2).isExpr()) { - O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #" - << *MI->getOperand(2).getExpr(); + O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"; + MI->getOperand(2).getExpr()->print(O, &MAI); return; } @@ -908,7 +908,7 @@ void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << '#' << Op.getImm(); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } @@ -966,7 +966,7 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, *CommentStream << '=' << (Val << Shift) << '\n'; } else { assert(MO.isExpr() && "Unexpected operand type!"); - O << *MO.getExpr(); + MO.getExpr()->print(O, &MAI); printShifter(MI, OpNum + 1, STI, O); } } @@ -1091,7 +1091,7 @@ void AArch64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum, O << "#" << (MO.getImm() * Scale); } else { assert(MO.isExpr() && "Unexpected operand type!"); - O << *MO.getExpr(); + MO.getExpr()->print(O, &MAI); } } @@ -1103,7 +1103,8 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum, O << ", #" << (MO1.getImm() * Scale); } else { assert(MO1.isExpr() && "Unexpected operand type!"); - O << ", " << *MO1.getExpr(); + O << ", "; + MO1.getExpr()->print(O, &MAI); } O << ']'; } @@ -1113,7 +1114,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned prfop = MI->getOperand(OpNum).getImm(); bool Valid; - StringRef Name = + StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, STI.getFeatureBits(), Valid); if (Valid) O << Name; @@ -1177,6 +1178,23 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) { return Reg; } +template<unsigned size> +void AArch64InstPrinter::printGPRSeqPairsClassOperand(const MCInst *MI, + unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + static_assert(size == 64 || size == 32, + "Template parameter must be either 32 or 64"); + unsigned Reg = MI->getOperand(OpNum).getReg(); + + unsigned Sube = (size == 32) ? AArch64::sube32 : AArch64::sube64; + unsigned Subo = (size == 32) ? AArch64::subo32 : AArch64::subo64; + + unsigned Even = MRI.getSubReg(Reg, Sube); + unsigned Odd = MRI.getSubReg(Reg, Subo); + O << getRegisterName(Even) << ", " << getRegisterName(Odd); +} + void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O, @@ -1264,12 +1282,12 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum, const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr()); int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) { O << "0x"; O.write_hex(Address); } else { // Otherwise, just print the expression. - O << *MI->getOperand(OpNum).getExpr(); + MI->getOperand(OpNum).getExpr()->print(O, &MAI); } } @@ -1286,7 +1304,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, } // Otherwise, just print the expression. - O << *MI->getOperand(OpNum).getExpr(); + MI->getOperand(OpNum).getExpr()->print(O, &MAI); } void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, @@ -1298,10 +1316,10 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, bool Valid; StringRef Name; if (Opcode == AArch64::ISB) - Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(), + Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(), Valid); else - Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(), + Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(), Valid); if (Valid) O << Name; @@ -1337,7 +1355,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo, unsigned Val = MI->getOperand(OpNo).getImm(); bool Valid; - StringRef Name = + StringRef Name = AArch64PState::PStateMapper().toString(Val, STI.getFeatureBits(), Valid); if (Valid) O << StringRef(Name.str()).upper(); diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index c2077a0fe557..15dee978e229 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -153,6 +153,10 @@ protected: const MCSubtargetInfo &STI, raw_ostream &O); void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template<unsigned size> + void printGPRSeqPairsClassOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O); }; class AArch64AppleInstPrinter : public AArch64InstPrinter { diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt index 573fa10561cf..642c18394a67 100644 --- a/lib/Target/AArch64/LLVMBuild.txt +++ b/lib/Target/AArch64/LLVMBuild.txt @@ -31,5 +31,5 @@ has_jit = 1 type = Library name = AArch64CodeGen parent = AArch64 -required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target +required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 31fceb653a12..6c15bf3afb2d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -252,7 +252,7 @@ bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { // We are properly aligned, so write NOPs as requested. Count /= 4; for (uint64_t i = 0; i != Count; ++i) - OW->Write32(0xd503201f); + OW->write32(0xd503201f); return true; } @@ -496,7 +496,7 @@ void ELFAArch64AsmBackend::processFixupValue( // FIXME: Should be replaced with something more principled. static bool isByteSwappedFixup(const MCExpr *E) { MCValue Val; - if (!E->EvaluateAsRelocatable(Val, nullptr, nullptr)) + if (!E->evaluateAsRelocatable(Val, nullptr, nullptr)) return false; if (!Val.getSymA() || Val.getSymA()->getSymbol().isUndefined()) diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 204a1abe72b5..78837de18b97 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -23,16 +23,14 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFStreamer.h" -#include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" @@ -161,18 +159,18 @@ private: MCSymbol *Start = getContext().createTempSymbol(); EmitLabel(Start); - MCSymbol *Symbol = getContext().getOrCreateSymbol( - Name + "." + Twine(MappingSymbolCounter++)); + auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol( + Name + "." + Twine(MappingSymbolCounter++))); - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - MCELF::SetType(SD, ELF::STT_NOTYPE); - MCELF::SetBinding(SD, ELF::STB_LOCAL); - SD.setExternal(false); + getAssembler().registerSymbol(*Symbol); + Symbol->setType(ELF::STT_NOTYPE); + Symbol->setBinding(ELF::STB_LOCAL); + Symbol->setExternal(false); auto Sec = getCurrentSection().first; assert(Sec && "need a section"); Symbol->setSection(*Sec); - const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); + const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext()); Symbol->setVariableValue(Value); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index ab2cad6547fa..921c4b94a729 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -62,15 +62,14 @@ const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol( // version. MCContext &Context = Streamer.getContext(); const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, Context); MCSymbol *PCSym = Context.createTempSymbol(); Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context); - return MCBinaryExpr::CreateSub(Res, PC, Context); + const MCExpr *PC = MCSymbolRefExpr::create(PCSym, Context); + return MCBinaryExpr::createSub(Res, PC, Context); } -AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) { - Triple T(TT); +AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) { if (T.getArch() == Triple::aarch64_be) IsLittleEndian = false; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 9b88de7dabbc..253cd30f26ee 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -18,9 +18,10 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { -class Target; -class StringRef; class MCStreamer; +class Target; +class Triple; + struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin { explicit AArch64MCAsmInfoDarwin(); const MCExpr * @@ -29,7 +30,7 @@ struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin { }; struct AArch64MCAsmInfoELF : public MCAsmInfoELF { - explicit AArch64MCAsmInfoELF(StringRef TT); + explicit AArch64MCAsmInfoELF(const Triple &T); }; } // namespace llvm diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 277ea9fbace2..7d8e79bc63c8 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -174,16 +175,6 @@ public: unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue, const MCSubtargetInfo &STI) const; - void EmitByte(unsigned char C, raw_ostream &OS) const { OS << (char)C; } - - void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const { - // Output the constant in little endian byte order. - for (unsigned i = 0; i != Size; ++i) { - EmitByte(Val & 255, OS); - Val >>= 8; - } - } - void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override; @@ -611,7 +602,7 @@ void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, } uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); - EmitConstant(Binary, 4, OS); + support::endian::Writer<support::little>(OS).write<uint32_t>(Binary); ++MCNumEmitted; // Keep track of the # of mi's emitted. } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index 74b81af2cb4d..28703419514a 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -15,9 +15,8 @@ #include "AArch64MCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -26,7 +25,7 @@ using namespace llvm; #define DEBUG_TYPE "aarch64symbolrefexpr" -const AArch64MCExpr *AArch64MCExpr::Create(const MCExpr *Expr, VariantKind Kind, +const AArch64MCExpr *AArch64MCExpr::create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx) { return new (Ctx) AArch64MCExpr(Expr, Kind); } @@ -76,24 +75,24 @@ StringRef AArch64MCExpr::getVariantKindName() const { } } -void AArch64MCExpr::PrintImpl(raw_ostream &OS) const { +void AArch64MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { if (getKind() != VK_NONE) OS << getVariantKindName(); - OS << *Expr; + Expr->print(OS, MAI); } void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const { Streamer.visitUsedExpr(*getSubExpr()); } -MCSection *AArch64MCExpr::FindAssociatedSection() const { +MCSection *AArch64MCExpr::findAssociatedSection() const { llvm_unreachable("FIXME: what goes here?"); } -bool AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, +bool AArch64MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const { - if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup)) + if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup)) return false; Res = @@ -121,8 +120,7 @@ static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { // We're known to be under a TLS fixup, so any symbol should be // modified. There should be only one. const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr); - MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol()); - MCELF::SetType(SD, ELF::STT_TLS); + cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS); break; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index 95d22775736c..1165314e4105 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -112,7 +112,7 @@ public: /// @name Construction /// @{ - static const AArch64MCExpr *Create(const MCExpr *Expr, VariantKind Kind, + static const AArch64MCExpr *create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx); /// @} @@ -145,13 +145,13 @@ public: /// (e.g. ":got:", ":lo12:"). StringRef getVariantKindName() const; - void PrintImpl(raw_ostream &OS) const override; + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *FindAssociatedSection() const override; + MCSection *findAssociatedSection() const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 2e22de08537b..f89a85273872 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -58,15 +58,13 @@ static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { } static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { - Triple TheTriple(TT); - + const Triple &TheTriple) { MCAsmInfo *MAI; if (TheTriple.isOSDarwin()) MAI = new AArch64MCAsmInfoDarwin(); else { assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF"); - MAI = new AArch64MCAsmInfoELF(TT); + MAI = new AArch64MCAsmInfoELF(TheTriple); } // Initial state of the frame pointer is SP. diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index d425975e7cb0..67af810bbbec 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -31,10 +31,9 @@ class AArch64MachObjectWriter : public MCMachObjectTargetWriter { public: AArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype) - : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype, - /*UseAggressiveSymbolFolding=*/true) {} + : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype) {} - void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; @@ -140,7 +139,7 @@ static bool canUseLocalRelocation(const MCSectionMachO &Section, return false; } -void AArch64MachObjectWriter::RecordRelocation( +void AArch64MachObjectWriter::recordRelocation( MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { @@ -209,11 +208,9 @@ void AArch64MachObjectWriter::RecordRelocation( } } else if (Target.getSymB()) { // A - B + constant const MCSymbol *A = &Target.getSymA()->getSymbol(); - const MCSymbolData &A_SD = Asm.getSymbolData(*A); const MCSymbol *A_Base = Asm.getAtom(*A); const MCSymbol *B = &Target.getSymB()->getSymbol(); - const MCSymbolData &B_SD = Asm.getSymbolData(*B); const MCSymbol *B_Base = Asm.getAtom(*B); // Check for "_foo@got - .", which comes through here as: @@ -264,14 +261,12 @@ void AArch64MachObjectWriter::RecordRelocation( Asm.getContext().reportFatalError(Fixup.getLoc(), "unsupported relocation with identical base"); - Value += (!A_SD.getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) - - (!A_Base || !A_Base->getData().getFragment() - ? 0 - : Writer->getSymbolAddress(*A_Base, Layout)); - Value -= (!B_SD.getFragment() ? 0 : Writer->getSymbolAddress(*B, Layout)) - - (!B_Base || !B_Base->getData().getFragment() - ? 0 - : Writer->getSymbolAddress(*B_Base, Layout)); + Value += (!A->getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) - + (!A_Base || !A_Base->getFragment() ? 0 : Writer->getSymbolAddress( + *A_Base, Layout)); + Value -= (!B->getFragment() ? 0 : Writer->getSymbolAddress(*B, Layout)) - + (!B_Base || !B_Base->getFragment() ? 0 : Writer->getSymbolAddress( + *B_Base, Layout)); Type = MachO::ARM64_RELOC_UNSIGNED; @@ -304,7 +299,7 @@ void AArch64MachObjectWriter::RecordRelocation( // If the evaluation is an absolute value, just use that directly // to keep things easy. int64_t Res; - if (Symbol->getVariableValue()->EvaluateAsAbsolute( + if (Symbol->getVariableValue()->evaluateAsAbsolute( Res, Layout, Writer->getSectionAddressMap())) { FixedValue = Res; return; @@ -313,12 +308,12 @@ void AArch64MachObjectWriter::RecordRelocation( // FIXME: Will the Target we already have ever have any data in it // we need to preserve and merge with the new Target? How about // the FixedValue? - if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout, + if (!Symbol->getVariableValue()->evaluateAsRelocatable(Target, &Layout, &Fixup)) Asm.getContext().reportFatalError(Fixup.getLoc(), "unable to resolve variable '" + Symbol->getName() + "'"); - return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + return recordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, FixedValue); } @@ -360,7 +355,7 @@ void AArch64MachObjectWriter::RecordRelocation( // Resolve constant variables. if (Symbol->isVariable()) { int64_t Res; - if (Symbol->getVariableValue()->EvaluateAsAbsolute( + if (Symbol->getVariableValue()->evaluateAsAbsolute( Res, Layout, Writer->getSectionAddressMap())) { FixedValue = Res; return; diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 28b8e7e29fe2..ee85b65bf39a 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -175,6 +175,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = { {"id_mmfr1_el1", ID_MMFR1_EL1, {}}, {"id_mmfr2_el1", ID_MMFR2_EL1, {}}, {"id_mmfr3_el1", ID_MMFR3_EL1, {}}, + {"id_mmfr4_el1", ID_MMFR4_EL1, {}}, {"id_isar0_el1", ID_ISAR0_EL1, {}}, {"id_isar1_el1", ID_ISAR1_EL1, {}}, {"id_isar2_el1", ID_ISAR2_EL1, {}}, diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 7125f14f1a2d..7e42f8e3601e 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -603,6 +603,7 @@ namespace AArch64SysReg { ISR_EL1 = 0xc608, // 11 000 1100 0001 000 CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001 CNTVCT_EL0 = 0xdf02, // 11 011 1110 0000 010 + ID_MMFR4_EL1 = 0xc016, // 11 000 0000 0010 110 // Trace registers TRCSTATR = 0x8818, // 10 001 0000 0011 000 diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index d3cc068993e0..9550a3a3cad1 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -16,11 +16,13 @@ #define LLVM_LIB_TARGET_ARM_ARM_H #include "llvm/Support/CodeGen.h" +#include <functional> namespace llvm { class ARMAsmPrinter; class ARMBaseTargetMachine; +class Function; class FunctionPass; class ImmutablePass; class MachineInstr; @@ -38,7 +40,8 @@ FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createARMOptimizeBarriersPass(); -FunctionPass *createThumb2SizeReductionPass(); +FunctionPass *createThumb2SizeReductionPass( + std::function<bool(const Function &)> Ftor = nullptr); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 04503b89de73..d84f2961d810 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -87,7 +87,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); assert(GV && "C++ constructor pointer was not a GlobalValue!"); - const MCExpr *E = MCSymbolRefExpr::Create(GetARMGVSymbol(GV, + const MCExpr *E = MCSymbolRefExpr::create(GetARMGVSymbol(GV, ARMII::MO_NO_FLAG), (Subtarget->isTargetELF() ? MCSymbolRefExpr::VK_ARM_TARGET1 @@ -173,7 +173,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, break; } case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); @@ -181,7 +181,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, O << ":lower16:"; else if (TF & ARMII::MO_HI16) O << ":upper16:"; - O << *GetARMGVSymbol(GV, TF); + GetARMGVSymbol(GV, TF)->print(O, MAI); printOffset(MO.getOffset(), O); if (TF == ARMII::MO_PLT) @@ -189,7 +189,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, break; } case MachineOperand::MO_ConstantPoolIndex: - O << *GetCPISymbol(MO.getIndex()); + GetCPISymbol(MO.getIndex())->print(O, MAI); break; } } @@ -467,7 +467,7 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel, // using NLPs; however, sometimes the types are local to the file. // We need to fill in the value for the NLP in those cases. OutStreamer.EmitValue( - MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()), + MCSymbolRefExpr::create(MCSym.getPointer(), OutStreamer.getContext()), 4 /*size*/); } @@ -640,9 +640,13 @@ void ARMAsmPrinter::emitAttributes() { if (STI.hasFPARMv8()) // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one // FPU, but there are two different names for it depending on the CPU. - ATS.emitFPU(STI.hasD16() ? ARM::FK_FPV5_D16 : ARM::FK_FP_ARMV8); + ATS.emitFPU(STI.hasD16() + ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16) + : ARM::FK_FP_ARMV8); else if (STI.hasVFP4()) - ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV4_D16 : ARM::FK_VFPV4); + ATS.emitFPU(STI.hasD16() + ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16) + : ARM::FK_VFPV4); else if (STI.hasVFP3()) ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3); else if (STI.hasVFP2()) @@ -895,7 +899,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { // Create an MCSymbol for the reference. const MCExpr *Expr = - MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()), + MCSymbolRefExpr::create(MCSym, getModifierVariantKind(ACPV->getModifier()), OutContext); if (ACPV->getPCAdjustment()) { @@ -903,10 +907,10 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { getFunctionNumber(), ACPV->getLabelId(), OutContext); - const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext); + const MCExpr *PCRelExpr = MCSymbolRefExpr::create(PCLabel, OutContext); PCRelExpr = - MCBinaryExpr::CreateAdd(PCRelExpr, - MCConstantExpr::Create(ACPV->getPCAdjustment(), + MCBinaryExpr::createAdd(PCRelExpr, + MCConstantExpr::create(ACPV->getPCAdjustment(), OutContext), OutContext); if (ACPV->mustAddCurrentAddress()) { @@ -914,25 +918,22 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { // label, so just emit a local label end reference that instead. MCSymbol *DotSym = OutContext.createTempSymbol(); OutStreamer->EmitLabel(DotSym); - const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); - PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext); + const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); + PCRelExpr = MCBinaryExpr::createSub(PCRelExpr, DotExpr, OutContext); } - Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext); + Expr = MCBinaryExpr::createSub(Expr, PCRelExpr, OutContext); } OutStreamer->EmitValue(Expr, Size); } -void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { - unsigned Opcode = MI->getOpcode(); - int OpNum = 1; - if (Opcode == ARM::BR_JTadd) - OpNum = 2; - else if (Opcode == ARM::BR_JTm) - OpNum = 3; - - const MachineOperand &MO1 = MI->getOperand(OpNum); +void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) { + const MachineOperand &MO1 = MI->getOperand(1); unsigned JTI = MO1.getIndex(); + // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for + // ARM mode tables. + EmitAlignment(2); + // Emit a label for the jump table. MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); OutStreamer->EmitLabel(JTISymbol); @@ -955,16 +956,16 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { // LJTI_0_0: // .word (LBB0 - LJTI_0_0) // .word (LBB1 - LJTI_0_0) - const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); + const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); if (TM.getRelocationModel() == Reloc::PIC_) - Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol, + Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol, OutContext), OutContext); // If we're generating a table of Thumb addresses in static relocation // model, we need to add one to keep interworking correctly. else if (AFI->isThumbFunction()) - Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(1,OutContext), + Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(1,OutContext), OutContext); OutStreamer->EmitValue(Expr, 4); } @@ -972,10 +973,8 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); } -void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { - unsigned Opcode = MI->getOpcode(); - int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1; - const MachineOperand &MO1 = MI->getOperand(OpNum); +void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) { + const MachineOperand &MO1 = MI->getOperand(1); unsigned JTI = MO1.getIndex(); MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); @@ -985,51 +984,67 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; - unsigned OffsetWidth = 4; - if (MI->getOpcode() == ARM::t2TBB_JT) { - OffsetWidth = 1; - // Mark the jump table as data-in-code. - OutStreamer->EmitDataRegion(MCDR_DataRegionJT8); - } else if (MI->getOpcode() == ARM::t2TBH_JT) { - OffsetWidth = 2; - // Mark the jump table as data-in-code. - OutStreamer->EmitDataRegion(MCDR_DataRegionJT16); - } for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { MachineBasicBlock *MBB = JTBBs[i]; - const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(), + const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); // If this isn't a TBB or TBH, the entries are direct branch instructions. - if (OffsetWidth == 4) { - EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B) .addExpr(MBBSymbolExpr) .addImm(ARMCC::AL) .addReg(0)); - continue; - } + } +} + +void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI, + unsigned OffsetWidth) { + assert((OffsetWidth == 1 || OffsetWidth == 2) && "invalid tbb/tbh width"); + const MachineOperand &MO1 = MI->getOperand(1); + unsigned JTI = MO1.getIndex(); + + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); + OutStreamer->EmitLabel(JTISymbol); + + // Emit each entry of the table. + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + + // Mark the jump table as data-in-code. + OutStreamer->EmitDataRegion(OffsetWidth == 1 ? MCDR_DataRegionJT8 + : MCDR_DataRegionJT16); + + for (auto MBB : JTBBs) { + const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(), + OutContext); // Otherwise it's an offset from the dispatch instruction. Construct an // MCExpr for the entry. We want a value of the form: - // (BasicBlockAddr - TableBeginAddr) / 2 + // (BasicBlockAddr - TBBInstAddr + 4) / 2 // // For example, a TBB table with entries jumping to basic blocks BB0 and BB1 // would look like: // LJTI_0_0: - // .byte (LBB0 - LJTI_0_0) / 2 - // .byte (LBB1 - LJTI_0_0) / 2 - const MCExpr *Expr = - MCBinaryExpr::CreateSub(MBBSymbolExpr, - MCSymbolRefExpr::Create(JTISymbol, OutContext), - OutContext); - Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext), + // .byte (LBB0 - (LCPI0_0 + 4)) / 2 + // .byte (LBB1 - (LCPI0_0 + 4)) / 2 + // where LCPI0_0 is a label defined just before the TBB instruction using + // this table. + MCSymbol *TBInstPC = GetCPISymbol(MI->getOperand(0).getImm()); + const MCExpr *Expr = MCBinaryExpr::createAdd( + MCSymbolRefExpr::create(TBInstPC, OutContext), + MCConstantExpr::create(4, OutContext), OutContext); + Expr = MCBinaryExpr::createSub(MBBSymbolExpr, Expr, OutContext); + Expr = MCBinaryExpr::createDiv(Expr, MCConstantExpr::create(2, OutContext), OutContext); OutStreamer->EmitValue(Expr, OffsetWidth); } // Mark the end of jump table data-in-code region. 32-bit offsets use // actual branch instructions here, so we don't mark those as a data-region // at all. - if (OffsetWidth != 4) - OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); + OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); + + // Make sure the next instruction is 2-byte aligned. + EmitAlignment(1); } void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { @@ -1212,7 +1227,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR : ARM::ADR)) .addReg(MI->getOperand(0).getReg()) - .addExpr(MCSymbolRefExpr::Create(CPISymbol, OutContext)) + .addExpr(MCSymbolRefExpr::create(CPISymbol, OutContext)) // Add predicate operands. .addImm(MI->getOperand(2).getImm()) .addReg(MI->getOperand(3).getReg())); @@ -1228,7 +1243,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR : ARM::ADR)) .addReg(MI->getOperand(0).getReg()) - .addExpr(MCSymbolRefExpr::Create(JTIPICSymbol, OutContext)) + .addExpr(MCSymbolRefExpr::create(JTIPICSymbol, OutContext)) // Add predicate operands. .addImm(MI->getOperand(2).getImm()) .addReg(MI->getOperand(3).getReg())); @@ -1278,7 +1293,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBL) // Predicate comes first here. .addImm(ARMCC::AL).addReg(0) - .addExpr(MCSymbolRefExpr::Create(TRegSym, OutContext))); + .addExpr(MCSymbolRefExpr::create(TRegSym, OutContext))); return; } case ARM::BMOVPCRX_CALL: { @@ -1315,7 +1330,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GV = Op.getGlobal(); const unsigned TF = Op.getTargetFlags(); MCSymbol *GVSym = GetARMGVSymbol(GV, TF); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc) .addExpr(GVSymExpr) // Add predicate operands. @@ -1332,17 +1347,17 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned TF = MI->getOperand(1).getTargetFlags(); const GlobalValue *GV = MI->getOperand(1).getGlobal(); MCSymbol *GVSym = GetARMGVSymbol(GV, TF); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext); MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(), getFunctionNumber(), MI->getOperand(2).getImm(), OutContext); - const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); + const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext); unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4; const MCExpr *PCRelExpr = - ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr, - MCBinaryExpr::CreateAdd(LabelSymExpr, - MCConstantExpr::Create(PCAdj, OutContext), + ARMMCExpr::createLower16(MCBinaryExpr::createSub(GVSymExpr, + MCBinaryExpr::createAdd(LabelSymExpr, + MCConstantExpr::create(PCAdj, OutContext), OutContext), OutContext), OutContext); TmpInst.addOperand(MCOperand::createExpr(PCRelExpr)); @@ -1365,17 +1380,17 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned TF = MI->getOperand(2).getTargetFlags(); const GlobalValue *GV = MI->getOperand(2).getGlobal(); MCSymbol *GVSym = GetARMGVSymbol(GV, TF); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext); MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(), getFunctionNumber(), MI->getOperand(3).getImm(), OutContext); - const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); + const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext); unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4; const MCExpr *PCRelExpr = - ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr, - MCBinaryExpr::CreateAdd(LabelSymExpr, - MCConstantExpr::Create(PCAdj, OutContext), + ARMMCExpr::createUpper16(MCBinaryExpr::createSub(GVSymExpr, + MCBinaryExpr::createAdd(LabelSymExpr, + MCConstantExpr::create(PCAdj, OutContext), OutContext), OutContext), OutContext); TmpInst.addOperand(MCOperand::createExpr(PCRelExpr)); // Add predicate operands. @@ -1501,6 +1516,16 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitGlobalConstant(MCPE.Val.ConstVal); return; } + case ARM::JUMPTABLE_ADDRS: + EmitJumpTableAddrs(MI); + return; + case ARM::JUMPTABLE_INSTS: + EmitJumpTableInsts(MI); + return; + case ARM::JUMPTABLE_TBB: + case ARM::JUMPTABLE_TBH: + EmitJumpTableTBInst(MI, MI->getOpcode() == ARM::JUMPTABLE_TBB ? 1 : 2); + return; case ARM::t2BR_JT: { // Lower and emit the instruction itself, then the jump table following it. EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr) @@ -1509,37 +1534,19 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Add predicate operands. .addImm(ARMCC::AL) .addReg(0)); - - // Output the data for the jump table itself - EmitJump2Table(MI); - return; - } - case ARM::t2TBB_JT: { - // Lower and emit the instruction itself, then the jump table following it. - EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBB) - .addReg(ARM::PC) - .addReg(MI->getOperand(0).getReg()) - // Add predicate operands. - .addImm(ARMCC::AL) - .addReg(0)); - - // Output the data for the jump table itself - EmitJump2Table(MI); - // Make sure the next instruction is 2-byte aligned. - EmitAlignment(1); return; } + case ARM::t2TBB_JT: case ARM::t2TBH_JT: { - // Lower and emit the instruction itself, then the jump table following it. - EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBH) - .addReg(ARM::PC) - .addReg(MI->getOperand(0).getReg()) - // Add predicate operands. - .addImm(ARMCC::AL) - .addReg(0)); - - // Output the data for the jump table itself - EmitJump2Table(MI); + unsigned Opc = MI->getOpcode() == ARM::t2TBB_JT ? ARM::t2TBB : ARM::t2TBH; + // Lower and emit the PC label, then the instruction itself. + OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm())); + EmitToStreamer(*OutStreamer, MCInstBuilder(Opc) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); return; } case ARM::tBR_JTr: @@ -1559,13 +1566,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (Opc == ARM::MOVr) TmpInst.addOperand(MCOperand::createReg(0)); EmitToStreamer(*OutStreamer, TmpInst); - - // Make sure the Thumb jump table is 4-byte aligned. - if (Opc == ARM::tMOVr) - EmitAlignment(2); - - // Output the data for the jump table itself - EmitJumpTable(MI); return; } case ARM::BR_JTm: { @@ -1589,9 +1589,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::createImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::createReg(0)); EmitToStreamer(*OutStreamer, TmpInst); - - // Output the data for the jump table itself - EmitJumpTable(MI); return; } case ARM::BR_JTadd: { @@ -1606,9 +1603,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(0) // Add 's' bit operand (always reg0 for this) .addReg(0)); - - // Output the data for the jump table itself - EmitJumpTable(MI); return; } case ARM::SPACE: @@ -1695,7 +1689,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext); + const MCExpr *SymbolExpr = MCSymbolRefExpr::create(Label, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tB) .addExpr(SymbolExpr) .addImm(ARMCC::AL) diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 7bfb9447818e..a6bc3683c8b9 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -71,8 +71,9 @@ public: void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, const MCSubtargetInfo *EndInfo) const override; - void EmitJumpTable(const MachineInstr *MI); - void EmitJump2Table(const MachineInstr *MI); + void EmitJumpTableAddrs(const MachineInstr *MI); + void EmitJumpTableInsts(const MachineInstr *MI); + void EmitJumpTableTBInst(const MachineInstr *MI, unsigned OffsetWidth); void EmitInstruction(const MachineInstr *MI) override; bool runOnMachineFunction(MachineFunction &F) override; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index c5d6b258240a..9c4b4961fe8c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -627,6 +627,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::t2MOVi32imm: return 8; case ARM::CONSTPOOL_ENTRY: + case ARM::JUMPTABLE_INSTS: + case ARM::JUMPTABLE_ADDRS: + case ARM::JUMPTABLE_TBB: + case ARM::JUMPTABLE_TBH: // If this machine instr is a constant pool entry, its size is recorded as // operand #2. return MI->getOperand(2).getImm(); @@ -641,42 +645,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::t2Int_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp_nofp: return 12; - case ARM::BR_JTr: - case ARM::BR_JTm: - case ARM::BR_JTadd: - case ARM::tBR_JTr: - case ARM::t2BR_JT: - case ARM::t2TBB_JT: - case ARM::t2TBH_JT: { - // These are jumptable branches, i.e. a branch followed by an inlined - // jumptable. The size is 4 + 4 * number of entries. For TBB, each - // entry is one byte; TBH two byte each. - unsigned EntrySize = (Opc == ARM::t2TBB_JT) - ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); - unsigned NumOps = MCID.getNumOperands(); - MachineOperand JTOP = - MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1)); - unsigned JTI = JTOP.getIndex(); - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - assert(MJTI != nullptr); - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - assert(JTI < JT.size()); - // Thumb instructions are 2 byte aligned, but JT entries are 4 byte - // 4 aligned. The assembler / linker may add 2 byte padding just before - // the JT entries. The size does not include this padding; the - // constant islands pass does separate bookkeeping for it. - // FIXME: If we know the size of the function is less than (1 << 16) *2 - // bytes, we can use 16-bit entries instead. Then there won't be an - // alignment issue. - unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; - unsigned NumEntries = JT[JTI].MBBs.size(); - if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) - // Make sure the instruction that follows TBB is 2-byte aligned. - // FIXME: Constant island pass should insert an "ALIGN" instruction - // instead. - ++NumEntries; - return NumEntries * EntrySize + InstSize; - } case ARM::SPACE: return MI->getOperand(1).getImm(); } diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 6fa5ad7d0522..f4ec8c67c977 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -180,9 +180,7 @@ namespace { MachineInstr *MI; MachineInstr *CPEMI; MachineBasicBlock *HighWaterMark; - private: unsigned MaxDisp; - public: bool NegOk; bool IsSoImm; bool KnownAlignment; @@ -216,12 +214,24 @@ namespace { }; /// CPEntries - Keep track of all of the constant pool entry machine - /// instructions. For each original constpool index (i.e. those that - /// existed upon entry to this pass), it keeps a vector of entries. - /// Original elements are cloned as we go along; the clones are - /// put in the vector of the original element, but have distinct CPIs. + /// instructions. For each original constpool index (i.e. those that existed + /// upon entry to this pass), it keeps a vector of entries. Original + /// elements are cloned as we go along; the clones are put in the vector of + /// the original element, but have distinct CPIs. + /// + /// The first half of CPEntries contains generic constants, the second half + /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up + /// which vector it will be in here. std::vector<std::vector<CPEntry> > CPEntries; + /// Maps a JT index to the offset in CPEntries containing copies of that + /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity. + DenseMap<int, int> JumpTableEntryIndices; + + /// Maps a JT index to the LEA that actually uses the index to calculate its + /// base address. + DenseMap<int, int> JumpTableUserIndices; + /// ImmBranch - One per immediate branch, keeping the machine instruction /// pointer, conditional or unconditional, the max displacement, /// and (if isCond is true) the corresponding unconditional branch @@ -269,7 +279,8 @@ namespace { } private: - void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs); + void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs); + void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs); bool BBHasFallthrough(MachineBasicBlock *MBB); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); unsigned getCPELogAlign(const MachineInstr *CPEMI); @@ -279,6 +290,7 @@ namespace { void updateForInsertedWaterBlock(MachineBasicBlock *NewBB); void adjustBBOffsetsAfter(MachineBasicBlock *BB); bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI); + unsigned getCombinedIndex(const MachineInstr *CPEMI); int findInRangeCPEntry(CPUser& U, unsigned UserOffset); bool findAvailableWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter); @@ -301,8 +313,9 @@ namespace { bool optimizeThumb2Instructions(); bool optimizeThumb2Branches(); bool reorderThumb2JumpTables(); - unsigned removeDeadDefinitions(MachineInstr *MI, unsigned BaseReg, - unsigned IdxReg); + bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI, + unsigned &DeadSize, bool &CanDeleteLEA, + bool &BaseRegKill); bool optimizeThumb2JumpTables(); MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB); @@ -413,7 +426,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // we put them all at the end of the function. std::vector<MachineInstr*> CPEMIs; if (!MCP->isEmpty()) - doInitialPlacement(CPEMIs); + doInitialConstPlacement(CPEMIs); + + if (MF->getJumpTableInfo()) + doInitialJumpTablePlacement(CPEMIs); /// The next UID to take is the first unused one. AFI->initPICLabelUId(CPEMIs.size()); @@ -478,7 +494,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) { const CPEntry & CPE = CPEntries[i][j]; - AFI->recordCPEClone(i, CPE.CPI); + if (CPE.CPEMI && CPE.CPEMI->getOperand(1).isCPI()) + AFI->recordCPEClone(i, CPE.CPI); } } @@ -488,6 +505,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { WaterList.clear(); CPUsers.clear(); CPEntries.clear(); + JumpTableEntryIndices.clear(); + JumpTableUserIndices.clear(); ImmBranches.clear(); PushPopMIs.clear(); T2JumpTables.clear(); @@ -495,10 +514,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { return MadeChange; } -/// doInitialPlacement - Perform the initial placement of the constant pool -/// entries. To start with, we put them all at the end of the function. +/// \brief Perform the initial placement of the regular constant pool entries. +/// To start with, we put them all at the end of the function. void -ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { +ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) { // Create the basic block to hold the CPE's. MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); MF->push_back(BB); @@ -556,6 +575,66 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { DEBUG(BB->dump()); } +/// \brief Do initial placement of the jump tables. Because Thumb2's TBB and TBH +/// instructions can be made more efficient if the jump table immediately +/// follows the instruction, it's best to place them immediately next to their +/// jumps to begin with. In almost all cases they'll never be moved from that +/// position. +void ARMConstantIslands::doInitialJumpTablePlacement( + std::vector<MachineInstr *> &CPEMIs) { + unsigned i = CPEntries.size(); + auto MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + + MachineBasicBlock *LastCorrectlyNumberedBB = nullptr; + for (MachineBasicBlock &MBB : *MF) { + auto MI = MBB.getLastNonDebugInstr(); + + unsigned JTOpcode; + switch (MI->getOpcode()) { + default: + continue; + case ARM::BR_JTadd: + case ARM::BR_JTr: + case ARM::tBR_JTr: + case ARM::BR_JTm: + JTOpcode = ARM::JUMPTABLE_ADDRS; + break; + case ARM::t2BR_JT: + JTOpcode = ARM::JUMPTABLE_INSTS; + break; + case ARM::t2TBB_JT: + JTOpcode = ARM::JUMPTABLE_TBB; + break; + case ARM::t2TBH_JT: + JTOpcode = ARM::JUMPTABLE_TBH; + break; + } + + unsigned NumOps = MI->getDesc().getNumOperands(); + MachineOperand JTOp = + MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1)); + unsigned JTI = JTOp.getIndex(); + unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t); + MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock(); + MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB); + MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(), + DebugLoc(), TII->get(JTOpcode)) + .addImm(i++) + .addJumpTableIndex(JTI) + .addImm(Size); + CPEMIs.push_back(CPEMI); + CPEntries.emplace_back(1, CPEntry(CPEMI, JTI)); + JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1)); + if (!LastCorrectlyNumberedBB) + LastCorrectlyNumberedBB = &MBB; + } + + // If we did anything then we need to renumber the subsequent blocks. + if (LastCorrectlyNumberedBB) + MF->RenumberBlocks(LastCorrectlyNumberedBB); +} + /// BBHasFallthrough - Return true if the specified basic block can fallthrough /// into the block immediately after it. bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) { @@ -595,9 +674,21 @@ ARMConstantIslands::CPEntry /// getCPELogAlign - Returns the required alignment of the constant pool entry /// represented by CPEMI. Alignment is measured in log2(bytes) units. unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { - assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY); + switch (CPEMI->getOpcode()) { + case ARM::CONSTPOOL_ENTRY: + break; + case ARM::JUMPTABLE_TBB: + return 0; + case ARM::JUMPTABLE_TBH: + case ARM::JUMPTABLE_INSTS: + return 1; + case ARM::JUMPTABLE_ADDRS: + return 2; + default: + llvm_unreachable("unknown constpool entry kind"); + } - unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned CPI = getCombinedIndex(CPEMI); assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); unsigned Align = MCP->getConstants()[CPI].getAlignment(); assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); @@ -706,12 +797,14 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET) PushPopMIs.push_back(I); - if (Opc == ARM::CONSTPOOL_ENTRY) + if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS || + Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB || + Opc == ARM::JUMPTABLE_TBH) continue; // Scan the instructions for constant pool operands. for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) - if (I->getOperand(op).isCPI()) { + if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) { // We found one. The addressing mode tells us the max displacement // from the PC that this instruction permits. @@ -727,6 +820,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { // Taking the address of a CP entry. case ARM::LEApcrel: + case ARM::LEApcrelJT: // This takes a SoImm, which is 8 bit immediate rotated. We'll // pretend the maximum offset is 255 * 4. Since each instruction // 4 byte wide, this is always correct. We'll check for other @@ -737,10 +831,12 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { IsSoImm = true; break; case ARM::t2LEApcrel: + case ARM::t2LEApcrelJT: Bits = 12; NegOk = true; break; case ARM::tLEApcrel: + case ARM::tLEApcrelJT: Bits = 8; Scale = 4; break; @@ -768,6 +864,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { // Remember that this is a user of a CP entry. unsigned CPI = I->getOperand(op).getIndex(); + if (I->getOperand(op).isJTI()) { + JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size())); + CPI = JumpTableEntryIndices[CPI]; + } + MachineInstr *CPEMI = CPEMIs[CPI]; unsigned MaxOffs = ((1 << Bits)-1) * Scale; CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm)); @@ -1101,6 +1202,13 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI, return false; } +unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) { + if (CPEMI->getOperand(1).isCPI()) + return CPEMI->getOperand(1).getIndex(); + + return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()]; +} + /// LookForCPEntryInRange - see if the currently referenced CPE is in range; /// if not, see if an in-range clone of the CPE is in range, and if so, /// change the data structures so the user references the clone. Returns: @@ -1120,7 +1228,7 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) } // No. Look for previously created clones of the CPE that are in range. - unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned CPI = getCombinedIndex(CPEMI); std::vector<CPEntry> &CPEs = CPEntries[CPI]; for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { // We already tried this one @@ -1365,7 +1473,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; - unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned CPI = getCombinedIndex(CPEMI); unsigned Size = CPEMI->getOperand(2).getImm(); // Compute this only once, it's expensive. unsigned UserOffset = getUserOffset(U); @@ -1429,17 +1537,17 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // Update internal data structures to account for the newly inserted MBB. updateForInsertedWaterBlock(NewIsland); - // Decrement the old entry, and remove it if refcount becomes 0. - decrementCPEReferenceCount(CPI, CPEMI); - // Now that we have an island to add the CPE to, clone the original CPE and // add it to the island. U.HighWaterMark = NewIsland; - U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) - .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); + U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc()) + .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size); CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); ++NumCPEs; + // Decrement the old entry, and remove it if refcount becomes 0. + decrementCPEReferenceCount(CPI, CPEMI); + // Mark the basic block as aligned as required by the const-pool entry. NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); @@ -1844,77 +1952,120 @@ bool ARMConstantIslands::optimizeThumb2Branches() { return MadeChange; } -/// If we've formed a TBB or TBH instruction, the base register is now -/// redundant. In most cases, the instructions defining it will now be dead and -/// can be tidied up. This function removes them if so, and returns the number -/// of bytes saved. -unsigned ARMConstantIslands::removeDeadDefinitions(MachineInstr *MI, - unsigned BaseReg, - unsigned IdxReg) { - unsigned BytesRemoved = 0; - MachineBasicBlock *MBB = MI->getParent(); +static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg, + unsigned BaseReg) { + if (I.getOpcode() != ARM::t2ADDrs) + return false; - // Scan backwards to find the instruction that defines the base - // register. Due to post-RA scheduling, we can't count on it - // immediately preceding the branch instruction. - MachineBasicBlock::iterator PrevI = MI; - MachineBasicBlock::iterator B = MBB->begin(); - while (PrevI != B && !PrevI->definesRegister(BaseReg)) - --PrevI; - - // If for some reason we didn't find it, we can't do anything, so - // just skip this one. - if (!PrevI->definesRegister(BaseReg) || PrevI->hasUnmodeledSideEffects() || - PrevI->mayStore()) - return BytesRemoved; - - MachineInstr *AddrMI = PrevI; - unsigned NewBaseReg = BytesRemoved; - - // Examine the instruction that calculates the jumptable entry address. Make - // sure it only defines the base register and kills any uses other than the - // index register. We also need precisely one use to trace backwards to - // (hopefully) the LEA. - for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) { - const MachineOperand &MO = AddrMI->getOperand(k); - if (!MO.isReg() || !MO.getReg()) - continue; - if (MO.isDef() && MO.getReg() != BaseReg) - return BytesRemoved; + if (I.getOperand(0).getReg() != EntryReg) + return false; - if (MO.isUse() && MO.getReg() != IdxReg) { - if (!MO.isKill() || (NewBaseReg != 0 && NewBaseReg != MO.getReg())) - return BytesRemoved; - NewBaseReg = MO.getReg(); + if (I.getOperand(1).getReg() != BaseReg) + return false; + + // FIXME: what about CC and IdxReg? + return true; +} + +/// \brief While trying to form a TBB/TBH instruction, we may (if the table +/// doesn't immediately follow the BR_JT) need access to the start of the +/// jump-table. We know one instruction that produces such a register; this +/// function works out whether that definition can be preserved to the BR_JT, +/// possibly by removing an intervening addition (which is usually needed to +/// calculate the actual entry to jump to). +bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI, + MachineInstr *LEAMI, + unsigned &DeadSize, + bool &CanDeleteLEA, + bool &BaseRegKill) { + if (JumpMI->getParent() != LEAMI->getParent()) + return false; + + // Now we hope that we have at least these instructions in the basic block: + // BaseReg = t2LEA ... + // [...] + // EntryReg = t2ADDrs BaseReg, ... + // [...] + // t2BR_JT EntryReg + // + // We have to be very conservative about what we recognise here though. The + // main perturbing factors to watch out for are: + // + Spills at any point in the chain: not direct problems but we would + // expect a blocking Def of the spilled register so in practice what we + // can do is limited. + // + EntryReg == BaseReg: this is the one situation we should allow a Def + // of BaseReg, but only if the t2ADDrs can be removed. + // + Some instruction other than t2ADDrs computing the entry. Not seen in + // the wild, but we should be careful. + unsigned EntryReg = JumpMI->getOperand(0).getReg(); + unsigned BaseReg = LEAMI->getOperand(0).getReg(); + + CanDeleteLEA = true; + BaseRegKill = false; + MachineInstr *RemovableAdd = nullptr; + MachineBasicBlock::iterator I(LEAMI); + for (++I; &*I != JumpMI; ++I) { + if (isSimpleIndexCalc(*I, EntryReg, BaseReg)) { + RemovableAdd = &*I; + break; + } + + for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) { + const MachineOperand &MO = I->getOperand(K); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && MO.getReg() == BaseReg) + return false; + if (MO.isUse() && MO.getReg() == BaseReg) { + BaseRegKill = BaseRegKill || MO.isKill(); + CanDeleteLEA = false; + } + } + } + + if (!RemovableAdd) + return true; + + // Check the add really is removable, and that nothing else in the block + // clobbers BaseReg. + for (++I; &*I != JumpMI; ++I) { + for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) { + const MachineOperand &MO = I->getOperand(K); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && MO.getReg() == BaseReg) + return false; + if (MO.isUse() && MO.getReg() == EntryReg) + RemovableAdd = nullptr; } } - // Want to continue searching for AddrMI, but there are 2 problems: AddrMI is - // going away soon, and even decrementing once may be invalid. - if (PrevI != B) - PrevI = std::prev(PrevI); - - DEBUG(dbgs() << "remove addr: " << *AddrMI); - BytesRemoved += TII->GetInstSizeInBytes(AddrMI); - AddrMI->eraseFromParent(); - - // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction - // that gave us the initial base register definition. - for (; PrevI != B && !PrevI->definesRegister(NewBaseReg); --PrevI) - ; - - // The instruction should be a tLEApcrel or t2LEApcrelJT; we want - // to delete it as well. - MachineInstr *LeaMI = PrevI; - if ((LeaMI->getOpcode() != ARM::tLEApcrelJT && - LeaMI->getOpcode() != ARM::t2LEApcrelJT) || - LeaMI->getOperand(0).getReg() != NewBaseReg) - return BytesRemoved; - - DEBUG(dbgs() << "remove lea: " << *LeaMI); - BytesRemoved += TII->GetInstSizeInBytes(LeaMI); - LeaMI->eraseFromParent(); - return BytesRemoved; + if (RemovableAdd) { + RemovableAdd->eraseFromParent(); + DeadSize += 4; + } else if (BaseReg == EntryReg) { + // The add wasn't removable, but clobbered the base for the TBB. So we can't + // preserve it. + return false; + } + + // We reached the end of the block without seeing another definition of + // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be + // used in the TBB/TBH if necessary. + return true; +} + +/// \brief Returns whether CPEMI is the first instruction in the block +/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so, +/// we can switch the first register to PC and usually remove the address +/// calculation that preceeded it. +static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) { + MachineFunction::iterator MBB = JTMI->getParent(); + MachineFunction *MF = MBB->getParent(); + ++MBB; + + return MBB != MF->end() && MBB->begin() != MBB->end() && + &*MBB->begin() == CPEMI; } /// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller @@ -1955,37 +2106,79 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { break; } - if (ByteOk || HalfWordOk) { - MachineBasicBlock *MBB = MI->getParent(); - unsigned BaseReg = MI->getOperand(0).getReg(); - bool BaseRegKill = MI->getOperand(0).isKill(); - if (!BaseRegKill) - continue; - unsigned IdxReg = MI->getOperand(1).getReg(); - bool IdxRegKill = MI->getOperand(1).isKill(); + if (!ByteOk && !HalfWordOk) + continue; - DEBUG(dbgs() << "Shrink JT: " << *MI); - unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; - MachineBasicBlock::iterator MI_JT = MI; - MachineInstr *NewJTMI = - BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc)) - .addReg(IdxReg, getKillRegState(IdxRegKill)) - .addJumpTableIndex(JTI, JTOP.getTargetFlags()); - DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); - // FIXME: Insert an "ALIGN" instruction to ensure the next instruction - // is 2-byte aligned. For now, asm printer will fix it up. - unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); - unsigned OrigSize = TII->GetInstSizeInBytes(MI); - unsigned DeadSize = removeDeadDefinitions(MI, BaseReg, IdxReg); - MI->eraseFromParent(); + MachineBasicBlock *MBB = MI->getParent(); + if (!MI->getOperand(0).isKill()) // FIXME: needed now? + continue; + unsigned IdxReg = MI->getOperand(1).getReg(); + bool IdxRegKill = MI->getOperand(1).isKill(); - int delta = OrigSize - NewSize + DeadSize; - BBInfo[MBB->getNumber()].Size -= delta; - adjustBBOffsetsAfter(MBB); + CPUser &User = CPUsers[JumpTableUserIndices[JTI]]; + unsigned DeadSize = 0; + bool CanDeleteLEA = false; + bool BaseRegKill = false; + bool PreservedBaseReg = + preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill); - ++NumTBs; - MadeChange = true; + if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg) + continue; + + DEBUG(dbgs() << "Shrink JT: " << *MI); + MachineInstr *CPEMI = User.CPEMI; + unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; + MachineBasicBlock::iterator MI_JT = MI; + MachineInstr *NewJTMI = + BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc)) + .addReg(User.MI->getOperand(0).getReg(), + getKillRegState(BaseRegKill)) + .addReg(IdxReg, getKillRegState(IdxRegKill)) + .addJumpTableIndex(JTI, JTOP.getTargetFlags()) + .addImm(CPEMI->getOperand(0).getImm()); + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); + + unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH; + CPEMI->setDesc(TII->get(JTOpc)); + + if (jumpTableFollowsTB(MI, User.CPEMI)) { + NewJTMI->getOperand(0).setReg(ARM::PC); + NewJTMI->getOperand(0).setIsKill(false); + + if (CanDeleteLEA) { + User.MI->eraseFromParent(); + DeadSize += 4; + + // The LEA was eliminated, the TBB instruction becomes the only new user + // of the jump table. + User.MI = NewJTMI; + User.MaxDisp = 4; + User.NegOk = false; + User.IsSoImm = false; + User.KnownAlignment = false; + } else { + // The LEA couldn't be eliminated, so we must add another CPUser to + // record the TBB or TBH use. + int CPEntryIdx = JumpTableEntryIndices[JTI]; + auto &CPEs = CPEntries[CPEntryIdx]; + auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) { + return E.CPEMI == User.CPEMI; + }); + ++Entry->RefCount; + CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false)); + } } + + unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); + unsigned OrigSize = TII->GetInstSizeInBytes(MI); + MI->eraseFromParent(); + + int Delta = OrigSize - NewSize + DeadSize; + BBInfo[MBB->getNumber()].Size -= Delta; + adjustBBOffsetsAfter(MBB); + + ++NumTBs; + MadeChange = true; } return MadeChange; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 4405625e47cd..50afb192b331 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "ARMBaseInstrInfo.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -251,6 +252,9 @@ private: // Select special operations if node forms integer ABS pattern SDNode *SelectABSOp(SDNode *N); + SDNode *SelectReadRegister(SDNode *N); + SDNode *SelectWriteRegister(SDNode *N); + SDNode *SelectInlineAsm(SDNode *N); SDNode *SelectConcatVector(SDNode *N); @@ -2457,6 +2461,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::WRITE_REGISTER: { + SDNode *ResNode = SelectWriteRegister(N); + if (ResNode) + return ResNode; + break; + } + case ISD::READ_REGISTER: { + SDNode *ResNode = SelectReadRegister(N); + if (ResNode) + return ResNode; + break; + } case ISD::INLINEASM: { SDNode *ResNode = SelectInlineAsm(N); if (ResNode) @@ -3336,6 +3352,418 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } +// Inspect a register string of the form +// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or +// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string +// and obtain the integer operands from them, adding these operands to the +// provided vector. +static void getIntOperandsFromRegisterString(StringRef RegString, + SelectionDAG *CurDAG, SDLoc DL, + std::vector<SDValue>& Ops) { + SmallVector<StringRef, 5> Fields; + RegString.split(Fields, ":"); + + if (Fields.size() > 1) { + bool AllIntFields = true; + + for (StringRef Field : Fields) { + // Need to trim out leading 'cp' characters and get the integer field. + unsigned IntField; + AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); + Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); + } + + assert(AllIntFields && + "Unexpected non-integer value in special register string."); + } +} + +// Maps a Banked Register string to its mask value. The mask value returned is +// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register +// mask operand, which expresses which register is to be used, e.g. r8, and in +// which mode it is to be used, e.g. usr. Returns -1 to signify that the string +// was invalid. +static inline int getBankedRegisterMask(StringRef RegString) { + return StringSwitch<int>(RegString.lower()) + .Case("r8_usr", 0x00) + .Case("r9_usr", 0x01) + .Case("r10_usr", 0x02) + .Case("r11_usr", 0x03) + .Case("r12_usr", 0x04) + .Case("sp_usr", 0x05) + .Case("lr_usr", 0x06) + .Case("r8_fiq", 0x08) + .Case("r9_fiq", 0x09) + .Case("r10_fiq", 0x0a) + .Case("r11_fiq", 0x0b) + .Case("r12_fiq", 0x0c) + .Case("sp_fiq", 0x0d) + .Case("lr_fiq", 0x0e) + .Case("lr_irq", 0x10) + .Case("sp_irq", 0x11) + .Case("lr_svc", 0x12) + .Case("sp_svc", 0x13) + .Case("lr_abt", 0x14) + .Case("sp_abt", 0x15) + .Case("lr_und", 0x16) + .Case("sp_und", 0x17) + .Case("lr_mon", 0x1c) + .Case("sp_mon", 0x1d) + .Case("elr_hyp", 0x1e) + .Case("sp_hyp", 0x1f) + .Case("spsr_fiq", 0x2e) + .Case("spsr_irq", 0x30) + .Case("spsr_svc", 0x32) + .Case("spsr_abt", 0x34) + .Case("spsr_und", 0x36) + .Case("spsr_mon", 0x3c) + .Case("spsr_hyp", 0x3e) + .Default(-1); +} + +// Maps a MClass special register string to its value for use in the +// t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand. +// Returns -1 to signify that the string was invalid. +static inline int getMClassRegisterSYSmValueMask(StringRef RegString) { + return StringSwitch<int>(RegString.lower()) + .Case("apsr", 0x0) + .Case("iapsr", 0x1) + .Case("eapsr", 0x2) + .Case("xpsr", 0x3) + .Case("ipsr", 0x5) + .Case("epsr", 0x6) + .Case("iepsr", 0x7) + .Case("msp", 0x8) + .Case("psp", 0x9) + .Case("primask", 0x10) + .Case("basepri", 0x11) + .Case("basepri_max", 0x12) + .Case("faultmask", 0x13) + .Case("control", 0x14) + .Default(-1); +} + +// The flags here are common to those allowed for apsr in the A class cores and +// those allowed for the special registers in the M class cores. Returns a +// value representing which flags were present, -1 if invalid. +static inline int getMClassFlagsMask(StringRef Flags) { + if (Flags.empty()) + return 0x3; + + return StringSwitch<int>(Flags) + .Case("g", 0x1) + .Case("nzcvq", 0x2) + .Case("nzcvqg", 0x3) + .Default(-1); +} + +static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, + const ARMSubtarget *Subtarget) { + // Ensure that the register (without flags) was a valid M Class special + // register. + int SYSmvalue = getMClassRegisterSYSmValueMask(Reg); + if (SYSmvalue == -1) + return -1; + + // basepri, basepri_max and faultmask are only valid for V7m. + if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13) + return -1; + + // If it was a read then we won't be expecting flags and so at this point + // we can return the mask. + if (IsRead) { + assert (Flags.empty() && "Unexpected flags for reading M class register."); + return SYSmvalue; + } + + // We know we are now handling a write so need to get the mask for the flags. + int Mask = getMClassFlagsMask(Flags); + + // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values + // shouldn't have flags present. + if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty())) + return -1; + + // The _g and _nzcvqg versions are only valid if the DSP extension is + // available. + if (!Subtarget->hasThumb2DSP() && (Mask & 0x2)) + return -1; + + // The register was valid so need to put the mask in the correct place + // (the flags need to be in bits 11-10) and combine with the SYSmvalue to + // construct the operand for the instruction node. + if (SYSmvalue < 0x4) + return SYSmvalue | Mask << 10; + + return SYSmvalue; +} + +static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { + // The mask operand contains the special register (R Bit) in bit 4, whether + // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and + // bits 3-0 contains the fields to be accessed in the special register, set by + // the flags provided with the register. + int Mask = 0; + if (Reg == "apsr") { + // The flags permitted for apsr are the same flags that are allowed in + // M class registers. We get the flag value and then shift the flags into + // the correct place to combine with the mask. + Mask = getMClassFlagsMask(Flags); + if (Mask == -1) + return -1; + return Mask << 2; + } + + if (Reg != "cpsr" && Reg != "spsr") { + return -1; + } + + // This is the same as if the flags were "fc" + if (Flags.empty() || Flags == "all") + return Mask | 0x9; + + // Inspect the supplied flags string and set the bits in the mask for + // the relevant and valid flags allowed for cpsr and spsr. + for (char Flag : Flags) { + int FlagVal; + switch (Flag) { + case 'c': + FlagVal = 0x1; + break; + case 'x': + FlagVal = 0x2; + break; + case 's': + FlagVal = 0x4; + break; + case 'f': + FlagVal = 0x8; + break; + default: + FlagVal = 0; + } + + // This avoids allowing strings where the same flag bit appears twice. + if (!FlagVal || (Mask & FlagVal)) + return -1; + Mask |= FlagVal; + } + + // If the register is spsr then we need to set the R bit. + if (Reg == "spsr") + Mask |= 0x10; + + return Mask; +} + +// Lower the read_register intrinsic to ARM specific DAG nodes +// using the supplied metadata string to select the instruction node to use +// and the registers/masks to construct as operands for the node. +SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){ + const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); + const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + bool IsThumb2 = Subtarget->isThumb2(); + SDLoc DL(N); + + std::vector<SDValue> Ops; + getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); + + if (!Ops.empty()) { + // If the special register string was constructed of fields (as defined + // in the ACLE) then need to lower to MRC node (32 bit) or + // MRRC node(64 bit), we can make the distinction based on the number of + // operands we have. + unsigned Opcode; + SmallVector<EVT, 3> ResTypes; + if (Ops.size() == 5){ + Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; + ResTypes.append({ MVT::i32, MVT::Other }); + } else { + assert(Ops.size() == 3 && + "Invalid number of fields in special register string."); + Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; + ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); + } + + Ops.push_back(getAL(CurDAG, DL)); + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); + Ops.push_back(N->getOperand(0)); + return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops); + } + + std::string SpecialReg = RegString->getString().lower(); + + int BankedReg = getBankedRegisterMask(SpecialReg); + if (BankedReg != -1) { + Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), + getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, + DL, MVT::i32, MVT::Other, Ops); + } + + // The VFP registers are read by creating SelectionDAG nodes with opcodes + // corresponding to the register that is being read from. So we switch on the + // string to find which opcode we need to use. + unsigned Opcode = StringSwitch<unsigned>(SpecialReg) + .Case("fpscr", ARM::VMRS) + .Case("fpexc", ARM::VMRS_FPEXC) + .Case("fpsid", ARM::VMRS_FPSID) + .Case("mvfr0", ARM::VMRS_MVFR0) + .Case("mvfr1", ARM::VMRS_MVFR1) + .Case("mvfr2", ARM::VMRS_MVFR2) + .Case("fpinst", ARM::VMRS_FPINST) + .Case("fpinst2", ARM::VMRS_FPINST2) + .Default(0); + + // If an opcode was found then we can lower the read to a VFP instruction. + if (Opcode) { + if (!Subtarget->hasVFP2()) + return nullptr; + if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8()) + return nullptr; + + Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops); + } + + // If the target is M Class then need to validate that the register string + // is an acceptable value, so check that a mask can be constructed from the + // string. + if (Subtarget->isMClass()) { + int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget); + if (SYSmValue == -1) + return nullptr; + + SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), + getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops); + } + + // Here we know the target is not M Class so we need to check if it is one + // of the remaining possible values which are apsr, cpsr or spsr. + if (SpecialReg == "apsr" || SpecialReg == "cpsr") { + Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL, + MVT::i32, MVT::Other, Ops); + } + + if (SpecialReg == "spsr") { + Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, + DL, MVT::i32, MVT::Other, Ops); + } + + return nullptr; +} + +// Lower the write_register intrinsic to ARM specific DAG nodes +// using the supplied metadata string to select the instruction node to use +// and the registers/masks to use in the nodes +SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){ + const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); + const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + bool IsThumb2 = Subtarget->isThumb2(); + SDLoc DL(N); + + std::vector<SDValue> Ops; + getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); + + if (!Ops.empty()) { + // If the special register string was constructed of fields (as defined + // in the ACLE) then need to lower to MCR node (32 bit) or + // MCRR node(64 bit), we can make the distinction based on the number of + // operands we have. + unsigned Opcode; + if (Ops.size() == 5) { + Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; + Ops.insert(Ops.begin()+2, N->getOperand(2)); + } else { + assert(Ops.size() == 3 && + "Invalid number of fields in special register string."); + Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; + SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; + Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); + } + + Ops.push_back(getAL(CurDAG, DL)); + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); + Ops.push_back(N->getOperand(0)); + + return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); + } + + std::string SpecialReg = RegString->getString().lower(); + int BankedReg = getBankedRegisterMask(SpecialReg); + if (BankedReg != -1) { + Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), + getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, + DL, MVT::Other, Ops); + } + + // The VFP registers are written to by creating SelectionDAG nodes with + // opcodes corresponding to the register that is being written. So we switch + // on the string to find which opcode we need to use. + unsigned Opcode = StringSwitch<unsigned>(SpecialReg) + .Case("fpscr", ARM::VMSR) + .Case("fpexc", ARM::VMSR_FPEXC) + .Case("fpsid", ARM::VMSR_FPSID) + .Case("fpinst", ARM::VMSR_FPINST) + .Case("fpinst2", ARM::VMSR_FPINST2) + .Default(0); + + if (Opcode) { + if (!Subtarget->hasVFP2()) + return nullptr; + Ops = { N->getOperand(2), getAL(CurDAG, DL), + CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; + return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); + } + + SmallVector<StringRef, 5> Fields; + StringRef(SpecialReg).split(Fields, "_", 1, false); + std::string Reg = Fields[0].str(); + StringRef Flags = Fields.size() == 2 ? Fields[1] : ""; + + // If the target was M Class then need to validate the special register value + // and retrieve the mask for use in the instruction node. + if (Subtarget->isMClass()) { + // basepri_max gets split so need to correct Reg and Flags. + if (SpecialReg == "basepri_max") { + Reg = SpecialReg; + Flags = ""; + } + int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget); + if (SYSmValue == -1) + return nullptr; + + SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), + N->getOperand(2), getAL(CurDAG, DL), + CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; + return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops); + } + + // We then check to see if a valid mask can be constructed for one of the + // register string values permitted for the A and R class cores. These values + // are apsr, spsr and cpsr; these are also valid on older cores. + int Mask = getARClassRegisterMask(Reg, Flags); + if (Mask != -1) { + Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), + getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, + DL, MVT::Other, Ops); + } + + return nullptr; +} + SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ std::vector<SDValue> AsmNodeOperands; unsigned Flag, Kind; @@ -3492,13 +3920,29 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ bool ARMDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { - assert(ConstraintID == InlineAsm::Constraint_m && - "unexpected asm memory constraint"); - // Require the address to be in a register. That is safe for all ARM - // variants and it is hard to do anything much smarter without knowing - // how the operand is used. - OutOps.push_back(Op); - return false; + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_i: + // FIXME: It seems strange that 'i' is needed here since it's supposed to + // be an immediate and not a memory constraint. + // Fallthrough. + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_Q: + case InlineAsm::Constraint_Um: + case InlineAsm::Constraint_Un: + case InlineAsm::Constraint_Uq: + case InlineAsm::Constraint_Us: + case InlineAsm::Constraint_Ut: + case InlineAsm::Constraint_Uv: + case InlineAsm::Constraint_Uy: + // Require the address to be in a register. That is safe for all ARM + // variants and it is hard to do anything much smarter without knowing + // how the operand is used. + OutOps.push_back(Op); + return false; + } + return true; } /// createARMISelDag - This pass converts a legalized DAG into a diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 629cc90d67de..47c8400a668f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -426,6 +426,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ConstantFP, MVT::f32, Custom); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); + setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); + setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); + if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); @@ -2378,6 +2381,24 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { return !Subtarget->isThumb1Only(); } +// Trying to write a 64 bit value so need to split into two 32 bit values first, +// and pass the lower and high parts through. +static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + SDValue WriteValue = Op->getOperand(2); + + // This function is only supposed to be called for i64 type argument. + assert(WriteValue.getValueType() == MVT::i64 + && "LowerWRITE_REGISTER called for non-i64 type argument."); + + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, + DAG.getConstant(0, DL, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, + DAG.getConstant(1, DL, MVT::i32)); + SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi }; + return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops); +} + // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise @@ -4085,7 +4106,28 @@ unsigned ARMTargetLowering::getRegisterByName(const char* RegName, .Default(0); if (Reg) return Reg; - report_fatal_error("Invalid register name global variable"); + report_fatal_error(Twine("Invalid register name \"" + + StringRef(RegName) + "\".")); +} + +// Result is 64 bit value so split into two 32 bit values and return as a +// pair of values. +static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) { + SDLoc DL(N); + + // This function is only supposed to be called for i64 type destination. + assert(N->getValueType(0) == MVT::i64 + && "ExpandREAD_REGISTER called for non-i64 type result."); + + SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL, + DAG.getVTList(MVT::i32, MVT::i32, MVT::Other), + N->getOperand(0), + N->getOperand(1)); + + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0), + Read.getValue(1))); + Results.push_back(Read.getOperand(0)); } /// ExpandBITCAST - If the target supports VFP, this function is called to @@ -6355,6 +6397,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); + case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: @@ -6439,6 +6482,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); + case ISD::READ_REGISTER: + ExpandREAD_REGISTER(N, Results, DAG); + break; case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; @@ -10222,7 +10268,8 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { EVT VT = getValueType(Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 63e87c5282d1..c0b329c5a1e5 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -286,7 +286,8 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal @@ -346,8 +347,31 @@ namespace llvm { unsigned getInlineAsmMemConstraint( const std::string &ConstraintCode) const override { - // FIXME: Map different constraints differently. - return InlineAsm::Constraint_m; + if (ConstraintCode == "Q") + return InlineAsm::Constraint_Q; + else if (ConstraintCode.size() == 2) { + if (ConstraintCode[0] == 'U') { + switch(ConstraintCode[1]) { + default: + break; + case 'm': + return InlineAsm::Constraint_Um; + case 'n': + return InlineAsm::Constraint_Un; + case 'q': + return InlineAsm::Constraint_Uq; + case 's': + return InlineAsm::Constraint_Us; + case 't': + return InlineAsm::Constraint_Ut; + case 'v': + return InlineAsm::Constraint_Uv; + case 'y': + return InlineAsm::Constraint_Uy; + } + } + } + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } const ARMSubtarget* getSubtarget() const { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 778fd17137f6..b8cac135baf6 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1826,6 +1826,32 @@ def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size), NoItinerary, []>; +/// A jumptable consisting of direct 32-bit addresses of the destination basic +/// blocks (either absolute, or relative to the start of the jump-table in PIC +/// mode). Used mostly in ARM and Thumb-1 modes. +def JUMPTABLE_ADDRS : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + +/// A jumptable consisting of 32-bit jump instructions. Used for Thumb-2 tables +/// that cannot be optimised to use TBB or TBH. +def JUMPTABLE_INSTS : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + +/// A jumptable consisting of 8-bit unsigned integers representing offsets from +/// a TBB instruction. +def JUMPTABLE_TBB : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + +/// A jumptable consisting of 16-bit unsigned integers representing offsets from +/// a TBH instruction. +def JUMPTABLE_TBH : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + + // FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE // from removing one half of the matched pairs. That breaks PEI, which assumes // these will always be in pairs, and asserts if it finds otherwise. Better way? @@ -2224,7 +2250,7 @@ let isBranch = 1, isTerminator = 1 in { [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>, Sched<[WriteBr]>; - let isNotDuplicable = 1, isIndirectBranch = 1 in { + let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : ARMPseudoInst<(outs), (ins GPR:$target, i32imm:$jt), 0, IIC_Br, @@ -5039,10 +5065,11 @@ def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; -class MovRRCopro<string opc, bit direction, list<dag> pattern = []> - : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, - GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), - NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { +class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag> + pattern = []> + : ABI<0b1100, oops, iops, NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", + pattern> { + let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -5060,9 +5087,13 @@ class MovRRCopro<string opc, bit direction, list<dag> pattern = []> } def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, c_imm:$CRm), [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, imm:$CRm)]>; -def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; +def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */, + (outs GPRnopc:$Rt, GPRnopc:$Rt2), + (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>; class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 0fecfa1319d3..40414da3ca81 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -526,6 +526,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { 0, IIC_Br, [(ARMbrjt tGPR:$target, tjumptable:$jt)]>, Sched<[WriteBrTbl]> { + let Size = 2; list<Predicate> Predicates = [IsThumb, IsThumb1Only]; } } diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 814b524b2bcb..aba8a7b10fd9 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3531,20 +3531,20 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let AsmMatchConverter = "cvtThumbBranches"; } -let isNotDuplicable = 1, isIndirectBranch = 1 in { +let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : t2PseudoInst<(outs), (ins GPR:$target, GPR:$index, i32imm:$jt), 0, IIC_Br, [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt)]>, Sched<[WriteBr]>; -// FIXME: Add a non-pc based case that can be predicated. +// FIXME: Add a case that can be predicated. def t2TBB_JT : t2PseudoInst<(outs), - (ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>, + (ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, Sched<[WriteBr]>; def t2TBH_JT : t2PseudoInst<(outs), - (ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>, + (ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, Sched<[WriteBr]>; def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br, @@ -4141,11 +4141,9 @@ class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops, let Inst{19-16} = CRn; } -class t2MovRRCopro<bits<4> Op, string opc, bit direction, +class t2MovRRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops, list<dag> pattern = []> - : T2Cop<Op, (outs), - (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { + : T2Cop<Op, oops, iops, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { let Inst{27-24} = 0b1100; let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -4210,19 +4208,25 @@ def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), /* from ARM core register to coprocessor */ -def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, +def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs), + (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, + c_imm:$CRm), [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, imm:$CRm)]>; -def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, - [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, - GPR:$Rt2, imm:$CRm)]> { +def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs), + (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, + c_imm:$CRm), + [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, + GPR:$Rt2, imm:$CRm)]> { let Predicates = [IsThumb2, PreV8]; } /* from coprocessor to ARM core register */ -def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>; +def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1, (outs GPR:$Rt, GPR:$Rt2), + (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm)>; -def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1> { +def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1, (outs GPR:$Rt, GPR:$Rt2), + (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm)> { let Predicates = [IsThumb2, PreV8]; } diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 5b62a21706ce..46ff326ba630 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains a pass that performs load / store related peephole -// optimizations. This pass should be run after register allocation. +/// \file This file contains a pass that performs load / store related peephole +/// optimizations. This pass should be run after register allocation. // //===----------------------------------------------------------------------===// @@ -58,10 +58,9 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); -/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine -/// load / store instructions to form ldm / stm instructions. - namespace { + /// Post- register allocation pass the combine load / store instructions to + /// form ldm / stm instructions. struct ARMLoadStoreOpt : public MachineFunctionPass { static char ID; ARMLoadStoreOpt() : MachineFunctionPass(ID) {} @@ -271,10 +270,7 @@ static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { } } -namespace llvm { - namespace ARM_AM { - -AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { +static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unhandled opcode!"); case ARM::LDMIA_RET: @@ -328,9 +324,6 @@ AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { } } - } // end namespace ARM_AM -} // end namespace llvm - static bool isT1i32Load(unsigned Opc) { return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; } @@ -469,9 +462,9 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, } } -/// MergeOps - Create and insert a LDM or STM with Base as base register and -/// registers in Regs as the register operands that would be loaded / stored. -/// It returns true if the transformation is done. +/// Create and insert a LDM or STM with Base as base register and registers in +/// Regs as the register operands that would be loaded / stored. It returns +/// true if the transformation is done. bool ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -665,7 +658,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return true; } -/// \brief Find all instructions using a given imp-def within a range. +/// Find all instructions using a given imp-def within a range. /// /// We are trying to combine a range of instructions, one of which (located at /// position RangeBegin) implicitly defines a register. The final LDM/STM will @@ -721,8 +714,7 @@ void ARMLoadStoreOpt::findUsesOfImpDef( } } -// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on -// success. +/// Call MergeOps and update MemOps and merges accordingly on success. void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, MemOpQueue &memOps, unsigned memOpsBegin, unsigned memOpsEnd, @@ -762,10 +754,10 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, Regs.push_back(std::make_pair(Reg, isKill)); // Collect any implicit defs of super-registers. They must be preserved. - for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { - if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) + for (const MachineOperand &MO : memOps[i].MBBI->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead()) continue; - unsigned DefReg = MO->getReg(); + unsigned DefReg = MO.getReg(); if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) ImpDefs.push_back(DefReg); @@ -823,8 +815,8 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, } } -/// MergeLDR_STR - Merge a number of load / store instructions into one or more -/// load / store multiple instructions. +/// Merge a number of load / store instructions into one or more load / store +/// multiple instructions. void ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, unsigned Opcode, unsigned Size, @@ -1083,8 +1075,8 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } } -/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base -/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: +/// Fold proceeding/trailing inc/dec of base register into the +/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: /// /// stmia rn, <ra, rb, rc> /// rn := rn + 4 * 3; @@ -1118,7 +1110,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, return false; bool DoMerge = false; - ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); + ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); // Try merging with the previous instruction. MachineBasicBlock::iterator BeginMBBI = MBB.begin(); @@ -1231,8 +1223,8 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, } } -/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base -/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: +/// Fold proceeding/trailing inc/dec of base register into the +/// LDR/STR/FLD{D|S}/FST{D|S} op when possible: bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const TargetInstrInfo *TII, @@ -1373,8 +1365,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return true; } -/// isMemoryOp - Returns true if instruction is a memory operation that this -/// pass is capable of operating on. +/// Returns true if instruction is a memory operation that this pass is capable +/// of operating on. static bool isMemoryOp(const MachineInstr *MI) { // When no memory operands are present, conservatively assume unaligned, // volatile, unfoldable. @@ -1428,8 +1420,8 @@ static bool isMemoryOp(const MachineInstr *MI) { return false; } -/// AdvanceRS - Advance register scavenger to just before the earliest memory -/// op that is being merged. +/// Advance register scavenger to just before the earliest memory op that is +/// being merged. void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { MachineBasicBlock::iterator Loc = MemOps[0].MBBI; unsigned Position = MemOps[0].Position; @@ -1472,8 +1464,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr *MI = &*MBBI; unsigned Opcode = MI->getOpcode(); - if (Opcode == ARM::LDRD || Opcode == ARM::STRD || - Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { + if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { const MachineOperand &BaseOp = MI->getOperand(2); unsigned BaseReg = BaseOp.getReg(); unsigned EvenReg = MI->getOperand(0).getReg(); @@ -1588,8 +1579,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, return false; } -/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR -/// ops of the same base and incrementing offset into LDM / STM ops. +/// An optimization pass to turn multiple LDR / STR ops of the same base and +/// incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { unsigned NumMerges = 0; unsigned NumMemOps = 0; @@ -1770,9 +1761,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { return NumMerges > 0; } -/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops -/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it -/// directly restore the value of LR into pc. +/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") +/// into the preceding stack restore so it directly restore the value of LR +/// into pc. /// ldmfd sp!, {..., lr} /// bx lr /// or @@ -1834,12 +1825,9 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return Modified; } - -/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move -/// load / stores from consecutive locations close to make it more -/// likely they will be combined later. - namespace { + /// Pre- register allocation pass that move load / stores from consecutive + /// locations close to make it more likely they will be combined later. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} @@ -1936,7 +1924,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, } -/// Copy Op0 and Op1 operands into a new array assigned to MI. +/// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, MachineInstr *Op1) { assert(MI->memoperands_empty() && "expected a new machineinstr"); @@ -1954,10 +1942,11 @@ static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, - DebugLoc &dl, - unsigned &NewOpc, unsigned &EvenReg, - unsigned &OddReg, unsigned &BaseReg, - int &Offset, unsigned &PredReg, + DebugLoc &dl, unsigned &NewOpc, + unsigned &FirstReg, + unsigned &SecondReg, + unsigned &BaseReg, int &Offset, + unsigned &PredReg, ARMCC::CondCodes &Pred, bool &isT2) { // Make sure we're allowed to generate LDRD/STRD. @@ -2016,9 +2005,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; Offset = ARM_AM::getAM3Opc(AddSub, OffImm); } - EvenReg = Op0->getOperand(0).getReg(); - OddReg = Op1->getOperand(0).getReg(); - if (EvenReg == OddReg) + FirstReg = Op0->getOperand(0).getReg(); + SecondReg = Op1->getOperand(0).getReg(); + if (FirstReg == SecondReg) return false; BaseReg = Op0->getOperand(1).getReg(); Pred = getInstrPredicate(Op0, PredReg); @@ -2114,7 +2103,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, // to try to allocate a pair of registers that can form register pairs. MachineInstr *Op0 = Ops.back(); MachineInstr *Op1 = Ops[Ops.size()-2]; - unsigned EvenReg = 0, OddReg = 0; + unsigned FirstReg = 0, SecondReg = 0; unsigned BaseReg = 0, PredReg = 0; ARMCC::CondCodes Pred = ARMCC::AL; bool isT2 = false; @@ -2122,21 +2111,21 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, int Offset = 0; DebugLoc dl; if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, - EvenReg, OddReg, BaseReg, + FirstReg, SecondReg, BaseReg, Offset, PredReg, Pred, isT2)) { Ops.pop_back(); Ops.pop_back(); const MCInstrDesc &MCID = TII->get(NewOpc); const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); - MRI->constrainRegClass(EvenReg, TRC); - MRI->constrainRegClass(OddReg, TRC); + MRI->constrainRegClass(FirstReg, TRC); + MRI->constrainRegClass(SecondReg, TRC); // Form the pair instruction. if (isLd) { MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) - .addReg(EvenReg, RegState::Define) - .addReg(OddReg, RegState::Define) + .addReg(FirstReg, RegState::Define) + .addReg(SecondReg, RegState::Define) .addReg(BaseReg); // FIXME: We're converting from LDRi12 to an insn that still // uses addrmode2, so we need an explicit offset reg. It should @@ -2149,8 +2138,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, ++NumLDRDFormed; } else { MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) - .addReg(EvenReg) - .addReg(OddReg) + .addReg(FirstReg) + .addReg(SecondReg) .addReg(BaseReg); // FIXME: We're converting from LDRi12 to an insn that still // uses addrmode2, so we need an explicit offset reg. It should @@ -2165,9 +2154,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, MBB->erase(Op0); MBB->erase(Op1); - // Add register allocation hints to form register pairs. - MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); - MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); + if (!isT2) { + // Add register allocation hints to form register pairs. + MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); + MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); + } } else { for (unsigned i = 0; i != NumMove; ++i) { MachineInstr *Op = Ops.back(); @@ -2292,8 +2283,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { } -/// createARMLoadStoreOptimizationPass - returns an instance of the load / store -/// optimization pass. +/// Returns an instance of the load / store optimization pass. FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { if (PreAlloc) return new ARMPreAllocLoadStoreOpt(); diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index e370b962ba7f..a2aca2d1a69e 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -30,35 +30,35 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, unsigned Option = MO.getTargetFlags() & ARMII::MO_OPTION_MASK; switch (Option) { default: { - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, + Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); switch (Option) { default: llvm_unreachable("Unknown target flag on symbol operand"); case ARMII::MO_NO_FLAG: break; case ARMII::MO_LO16: - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, + Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); - Expr = ARMMCExpr::CreateLower16(Expr, OutContext); + Expr = ARMMCExpr::createLower16(Expr, OutContext); break; case ARMII::MO_HI16: - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, + Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); - Expr = ARMMCExpr::CreateUpper16(Expr, OutContext); + Expr = ARMMCExpr::createUpper16(Expr, OutContext); break; } break; } case ARMII::MO_PLT: - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_PLT, + Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_PLT, OutContext); break; } if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), + Expr = MCBinaryExpr::createAdd(Expr, + MCConstantExpr::create(MO.getOffset(), OutContext), OutContext); return MCOperand::createExpr(Expr); @@ -80,7 +80,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), OutContext)); break; case MachineOperand::MO_GlobalAddress: { diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index e794fb71af63..0aceaed87510 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -304,10 +304,6 @@ public: return getTM<ARMBaseTargetMachine>(); } - const ARMSubtarget &getARMSubtarget() const { - return *getARMTargetMachine().getSubtargetImpl(); - } - void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; @@ -330,24 +326,28 @@ void ARMPassConfig::addIRPasses() { // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. - const ARMSubtarget *Subtarget = &getARMSubtarget(); - if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) - if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass()); + if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) + addPass(createCFGSimplificationPass(-1, [this](const Function &F) { + const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F); + return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); + })); TargetPassConfig::addIRPasses(); } bool ARMPassConfig::addPreISel() { - if ((TM->getOptLevel() == CodeGenOpt::Aggressive && + if ((TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge == cl::BOU_UNSET) || - EnableGlobalMerge == cl::BOU_TRUE) + EnableGlobalMerge == cl::BOU_TRUE) { // FIXME: This is using the thumb1 only constant value for // maximal global offset for merging globals. We may want // to look into using the old value for non-thumb1 code of // 4095 based on the TargetMachine, but this starts to become // tricky when doing code gen per function. - addPass(createGlobalMergePass(TM, 127)); + bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) && + (EnableGlobalMerge == cl::BOU_UNSET); + addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize)); + } return false; } @@ -387,10 +387,13 @@ void ARMPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOpt::None) { // in v8, IfConversion depends on Thumb instruction widths - if (getARMSubtarget().restrictIT()) - addPass(createThumb2SizeReductionPass()); - if (!getARMSubtarget().isThumb1Only()) - addPass(&IfConverterID); + addPass(createThumb2SizeReductionPass([this](const Function &F) { + return this->TM->getSubtarget<ARMSubtarget>(F).restrictIT(); + })); + + addPass(createIfConverter([this](const Function &F) { + return !this->TM->getSubtarget<ARMSubtarget>(F).isThumb1Only(); + })); } addPass(createThumb2ITBlockPass()); } @@ -399,8 +402,9 @@ void ARMPassConfig::addPreEmitPass() { addPass(createThumb2SizeReductionPass()); // Constant island pass work on unbundled instructions. - if (getARMSubtarget().isThumb2()) - addPass(&UnpackMachineBundlesID); + addPass(createUnpackMachineBundles([this](const Function &F) { + return this->TM->getSubtarget<ARMSubtarget>(F).isThumb2(); + })); // Don't optimize barriers at -O0. if (getOptLevel() != CodeGenOpt::None) diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 80f03c62bbfb..eaed5cc68750 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -50,12 +50,12 @@ const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference( assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only"); - return MCSymbolRefExpr::Create(TM.getSymbol(GV, Mang), + return MCSymbolRefExpr::create(TM.getSymbol(GV, Mang), MCSymbolRefExpr::VK_ARM_TARGET2, getContext()); } const MCExpr *ARMElfTargetObjectFile:: getDebugThreadLocalSymbol(const MCSymbol *Sym) const { - return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO, + return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO, getContext()); } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 30c7d62e84b8..8bcbb1159f81 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1051,7 +1051,7 @@ public: if (!CE) return false; int64_t Value = CE->getValue(); return (ARM_AM::getSOImmVal(Value) != -1 || - ARM_AM::getSOImmVal(-Value) != -1);; + ARM_AM::getSOImmVal(-Value) != -1); } bool isT2SOImm() const { if (!isImm()) return false; @@ -4252,7 +4252,7 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) { Error(S, "'be' or 'le' operand expected"); return MatchOperand_ParseFail; } - Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val, + Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::create(Val, getContext()), S, Tok.getEndLoc())); return MatchOperand_Success; @@ -4656,7 +4656,7 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) { Val = INT32_MIN; Operands.push_back( - ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), S, E)); + ARMOperand::CreateImm(MCConstantExpr::create(Val, getContext()), S, E)); return MatchOperand_Success; } @@ -4886,7 +4886,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { // If the constant was #-0, represent it as INT32_MIN. int32_t Val = CE->getValue(); if (isNegative && Val == 0) - CE = MCConstantExpr::Create(INT32_MIN, getContext()); + CE = MCConstantExpr::create(INT32_MIN, getContext()); // Now we should have the closing ']' if (Parser.getTok().isNot(AsmToken::RBrac)) @@ -5073,7 +5073,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) { IntVal ^= (uint64_t)isNegative << 31; Parser.Lex(); // Eat the token. Operands.push_back(ARMOperand::CreateImm( - MCConstantExpr::Create(IntVal, getContext()), + MCConstantExpr::create(IntVal, getContext()), S, Parser.getTok().getLoc())); return MatchOperand_Success; } @@ -5090,7 +5090,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) { Val = APFloat(RealVal).bitcastToAPInt().getZExtValue(); Operands.push_back(ARMOperand::CreateImm( - MCConstantExpr::Create(Val, getContext()), S, + MCConstantExpr::create(Val, getContext()), S, Parser.getTok().getLoc())); return MatchOperand_Success; } @@ -5179,7 +5179,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { if (CE) { int32_t Val = CE->getValue(); if (isNegative && Val == 0) - ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); + ImmVal = MCConstantExpr::create(INT32_MIN, getContext()); } E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); @@ -5209,7 +5209,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { if (getParser().parseExpression(SubExprVal)) return true; - const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal, + const MCExpr *ExprVal = ARMMCExpr::create(RefKind, SubExprVal, getContext()); E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E)); @@ -5765,7 +5765,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // Add the processor imod operand, if necessary. if (ProcessorIMod) { Operands.push_back(ARMOperand::CreateImm( - MCConstantExpr::Create(ProcessorIMod, getContext()), + MCConstantExpr::create(ProcessorIMod, getContext()), NameLoc, NameLoc)); } else if (Mnemonic == "cps" && isMClass()) { return Error(NameLoc, "instruction 'cps' requires effect for M-class"); @@ -6752,13 +6752,13 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, MCSymbol *Dot = getContext().createTempSymbol(); Out.EmitLabel(Dot); const MCExpr *OpExpr = Inst.getOperand(2).getExpr(); - const MCExpr *InstPC = MCSymbolRefExpr::Create(Dot, + const MCExpr *InstPC = MCSymbolRefExpr::create(Dot, MCSymbolRefExpr::VK_None, getContext()); - const MCExpr *Const8 = MCConstantExpr::Create(8, getContext()); - const MCExpr *ReadPC = MCBinaryExpr::CreateAdd(InstPC, Const8, + const MCExpr *Const8 = MCConstantExpr::create(8, getContext()); + const MCExpr *ReadPC = MCBinaryExpr::createAdd(InstPC, Const8, getContext()); - const MCExpr *FixupAddr = MCBinaryExpr::CreateAdd(ReadPC, OpExpr, + const MCExpr *FixupAddr = MCBinaryExpr::createAdd(ReadPC, OpExpr, getContext()); TmpInst.addOperand(MCOperand::createExpr(FixupAddr)); } @@ -9168,74 +9168,19 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { StringRef CPU = getParser().parseStringToEndOfStatement().trim(); getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU); + // FIXME: This is using table-gen data, but should be moved to + // ARMTargetParser once that is table-gen'd. if (!STI.isCPUStringValid(CPU)) { Error(L, "Unknown CPU name"); return false; } - // FIXME: This switches the CPU features globally, therefore it might - // happen that code you would not expect to assemble will. For details - // see: http://llvm.org/bugs/show_bug.cgi?id=20757 STI.InitMCProcessorInfo(CPU, ""); STI.InitCPUSchedModel(CPU); setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); return false; } - -// FIXME: This is duplicated in getARMFPUFeatures() in -// tools/clang/lib/Driver/Tools.cpp -static const struct { - const unsigned ID; - const FeatureBitset Enabled; - const FeatureBitset Disabled; -} FPUs[] = { - {/* ID */ ARM::FK_VFP, - /* Enabled */ {ARM::FeatureVFP2}, - /* Disabled */ {ARM::FeatureNEON}}, - {/* ID */ ARM::FK_VFPV2, - /* Enabled */ {ARM::FeatureVFP2}, - /* Disabled */ {ARM::FeatureNEON}}, - {/* ID */ ARM::FK_VFPV3, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3}, - /* Disabled */ {ARM::FeatureNEON, ARM::FeatureD16}}, - {/* ID */ ARM::FK_VFPV3_D16, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureD16}, - /* Disabled */ {ARM::FeatureNEON}}, - {/* ID */ ARM::FK_VFPV4, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4}, - /* Disabled */ {ARM::FeatureNEON, ARM::FeatureD16}}, - {/* ID */ ARM::FK_VFPV4_D16, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4, - ARM::FeatureD16}, - /* Disabled */ {ARM::FeatureNEON}}, - {/* ID */ ARM::FK_FPV5_D16, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4, - ARM::FeatureFPARMv8, ARM::FeatureD16}, - /* Disabled */ {ARM::FeatureNEON, ARM::FeatureCrypto}}, - {/* ID */ ARM::FK_FP_ARMV8, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4, - ARM::FeatureFPARMv8}, - /* Disabled */ {ARM::FeatureNEON, ARM::FeatureCrypto, ARM::FeatureD16}}, - {/* ID */ ARM::FK_NEON, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON}, - /* Disabled */ {ARM::FeatureD16}}, - {/* ID */ ARM::FK_NEON_VFPV4, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4, - ARM::FeatureNEON}, - /* Disabled */ {ARM::FeatureD16}}, - {/* ID */ ARM::FK_NEON_FP_ARMV8, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4, - ARM::FeatureFPARMv8, ARM::FeatureNEON}, - /* Disabled */ {ARM::FeatureCrypto, ARM::FeatureD16}}, - {/* ID */ ARM::FK_CRYPTO_NEON_FP_ARMV8, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4, - ARM::FeatureFPARMv8, ARM::FeatureNEON, - ARM::FeatureCrypto}, - /* Disabled */ {ARM::FeatureD16}}, - {ARM::FK_SOFTVFP, {}, {}}, -}; - /// parseDirectiveFPU /// ::= .fpu str bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { @@ -9243,23 +9188,15 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { StringRef FPU = getParser().parseStringToEndOfStatement().trim(); unsigned ID = ARMTargetParser::parseFPU(FPU); - - if (ID == ARM::FK_INVALID) { + std::vector<const char *> Features; + if (!ARMTargetParser::getFPUFeatures(ID, Features)) { Error(FPUNameLoc, "Unknown FPU name"); return false; } - for (const auto &Entry : FPUs) { - if (Entry.ID != ID) - continue; - - // Need to toggle features that should be on but are off and that - // should off but are on. - FeatureBitset Toggle = (Entry.Enabled & ~STI.getFeatureBits()) | - (Entry.Disabled & STI.getFeatureBits()); - setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle))); - break; - } + for (auto Feature : Features) + STI.ApplyFeatureFlag(Feature); + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); getTargetStreamer().emitFPU(ID); return false; @@ -9804,7 +9741,7 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { } const MCSymbolRefExpr *SRE = - MCSymbolRefExpr::Create(Parser.getTok().getIdentifier(), + MCSymbolRefExpr::create(Parser.getTok().getIdentifier(), MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext()); Lex(); @@ -9982,33 +9919,32 @@ extern "C" void LLVMInitializeARMAsmParser() { #define GET_MATCHER_IMPLEMENTATION #include "ARMGenAsmMatcher.inc" +// FIXME: This structure should be moved inside ARMTargetParser +// when we start to table-generate them, and we can use the ARM +// flags below, that were generated by table-gen. static const struct { - const char *Name; + const ARM::ArchExtKind Kind; const unsigned ArchCheck; const FeatureBitset Features; } Extensions[] = { - { "crc", Feature_HasV8, {ARM::FeatureCRC} }, - { "crypto", Feature_HasV8, + { ARM::AEK_CRC, Feature_HasV8, {ARM::FeatureCRC} }, + { ARM::AEK_CRYPTO, Feature_HasV8, {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} }, - { "fp", Feature_HasV8, {ARM::FeatureFPARMv8} }, - { "idiv", Feature_HasV7 | Feature_IsNotMClass, + { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} }, + { ARM::AEK_HWDIV, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} }, - // FIXME: iWMMXT not supported - { "iwmmxt", Feature_None, {} }, - // FIXME: iWMMXT2 not supported - { "iwmmxt2", Feature_None, {} }, - // FIXME: Maverick not supported - { "maverick", Feature_None, {} }, - { "mp", Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} }, - // FIXME: ARMv6-m OS Extensions feature not checked - { "os", Feature_None, {} }, + { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} }, + { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} }, // FIXME: Also available in ARMv6-K - { "sec", Feature_HasV7, {ARM::FeatureTrustZone} }, - { "simd", Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} }, + { ARM::AEK_SEC, Feature_HasV7, {ARM::FeatureTrustZone} }, // FIXME: Only available in A-class, isel not predicated - { "virt", Feature_HasV7, {ARM::FeatureVirtualization} }, - // FIXME: xscale not supported - { "xscale", Feature_None, {} }, + { ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} }, + // FIXME: Unsupported extensions. + { ARM::AEK_OS, Feature_None, {} }, + { ARM::AEK_IWMMXT, Feature_None, {} }, + { ARM::AEK_IWMMXT2, Feature_None, {} }, + { ARM::AEK_MAVERICK, Feature_None, {} }, + { ARM::AEK_XSCALE, Feature_None, {} }, }; /// parseDirectiveArchExtension @@ -10031,9 +9967,12 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { EnableFeature = false; Name = Name.substr(2); } + unsigned FeatureKind = ARMTargetParser::parseArchExt(Name); + if (FeatureKind == ARM::AEK_INVALID) + Error(ExtLoc, "unknown architectural extension: " + Name); for (const auto &Extension : Extensions) { - if (Extension.Name != Name) + if (Extension.Kind != FeatureKind) continue; if (Extension.Features.none()) @@ -10080,7 +10019,7 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, if (Op.isImm()) { const MCExpr *SOExpr = Op.getImm(); int64_t Value; - if (!SOExpr->EvaluateAsAbsolute(Value)) + if (!SOExpr->evaluateAsAbsolute(Value)) return Match_Success; assert((Value >= INT32_MIN && Value <= UINT32_MAX) && "expression value must be representable in 32 bits"); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 2d36c3020016..0bff52141da5 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -329,7 +329,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCExpr *Expr = Op.getExpr(); switch (Expr->getKind()) { case MCExpr::Binary: - O << '#' << *Expr; + O << '#'; + Expr->print(O, &MAI); break; case MCExpr::Constant: { // If a symbolic branch target was added as a constant expression then @@ -337,8 +338,9 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, // address. const MCConstantExpr *Constant = cast<MCConstantExpr>(Expr); int64_t TargetAddress; - if (!Constant->EvaluateAsAbsolute(TargetAddress)) { - O << '#' << *Expr; + if (!Constant->evaluateAsAbsolute(TargetAddress)) { + O << '#'; + Expr->print(O, &MAI); } else { O << "0x"; O.write_hex(static_cast<uint32_t>(TargetAddress)); @@ -348,7 +350,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, default: // FIXME: Should we always treat this as if it is a constant literal and // prefix it with '#'? - O << *Expr; + Expr->print(O, &MAI); break; } } @@ -359,7 +361,7 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); if (MO1.isExpr()) { - O << *MO1.getExpr(); + MO1.getExpr()->print(O, &MAI); return; } @@ -1055,7 +1057,7 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, const MCOperand &MO = MI->getOperand(OpNum); if (MO.isExpr()) { - O << *MO.getExpr(); + MO.getExpr()->print(O, &MAI); return; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index f0eed9b811d4..b03cada9a641 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -622,8 +622,6 @@ namespace ARM_AM { return Value; } - AMSubMode getLoadStoreMultipleSubMode(int Opcode); - //===--------------------------------------------------------------------===// // Floating-point Immediates // diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 6c1f7891f58a..be23e9070103 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -260,9 +260,9 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding; uint64_t NumNops = Count / 2; for (uint64_t i = 0; i != NumNops; ++i) - OW->Write16(nopEncoding); + OW->write16(nopEncoding); if (Count & 1) - OW->Write8(0); + OW->write8(0); return true; } // ARM mode @@ -270,21 +270,21 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding; uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) - OW->Write32(nopEncoding); + OW->write32(nopEncoding); // FIXME: should this function return false when unable to write exactly // 'Count' bytes with NOP encodings? switch (Count % 4) { default: break; // No leftover bytes to write case 1: - OW->Write8(0); + OW->write8(0); break; case 2: - OW->Write16(0); + OW->write16(0); break; case 3: - OW->Write16(0); - OW->Write8(0xa0); + OW->write16(0); + OW->write8(0xa0); break; } @@ -601,8 +601,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // the offset when the destination has the same MCFragment. if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { const MCSymbol &Sym = A->getSymbol(); - const MCSymbolData &SymData = Asm.getSymbolData(Sym); - IsResolved = (SymData.getFragment() == DF); + IsResolved = (Sym.getFragment() == DF); } // We must always generate a relocation for BL/BLX instructions if we have // a symbol to reference, as the linker relies on knowing the destination diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index f4fedeef650b..804d3534096a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -37,7 +37,7 @@ namespace { unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; - bool needsRelocateWithSymbol(const MCSymbolData &SD, + bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override; }; } @@ -49,7 +49,7 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) ARMELFObjectWriter::~ARMELFObjectWriter() {} -bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, +bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const { // FIXME: This is extremely conservative. This really needs to use a // whitelist with a clear explanation for why each realocation needs to diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 0eb5a8136e88..6e3af739eca2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -22,9 +22,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFStreamer.h" -#include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" @@ -34,7 +32,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" @@ -216,7 +214,13 @@ ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { } void ARMTargetAsmStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) { - OS << "\t.thumb_set\t" << *Symbol << ", " << *Value << '\n'; + const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo(); + + OS << "\t.thumb_set\t"; + Symbol->print(OS, MAI); + OS << ", "; + Value->print(OS, MAI); + OS << '\n'; } void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) { @@ -562,17 +566,16 @@ private: MCSymbol *Start = getContext().createTempSymbol(); EmitLabel(Start); - MCSymbol *Symbol = - getContext().getOrCreateSymbol(Name + "." + - Twine(MappingSymbolCounter++)); + auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol( + Name + "." + Twine(MappingSymbolCounter++))); - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - MCELF::SetType(SD, ELF::STT_NOTYPE); - MCELF::SetBinding(SD, ELF::STB_LOCAL); - SD.setExternal(false); + getAssembler().registerSymbol(*Symbol); + Symbol->setType(ELF::STT_NOTYPE); + Symbol->setBinding(ELF::STB_LOCAL); + Symbol->setExternal(false); AssignSection(Symbol, getCurrentSection().first); - const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); + const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext()); Symbol->setVariableValue(Value); } @@ -688,16 +691,16 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { using namespace ARMBuildAttrs; setAttributeItem(CPU_name, - ARMTargetParser::getArchDefaultCPUName(Arch), + ARMTargetParser::getCPUAttr(Arch), false); if (EmittedArch == ARM::AK_INVALID) setAttributeItem(CPU_arch, - ARMTargetParser::getArchDefaultCPUArch(Arch), + ARMTargetParser::getArchAttr(Arch), false); else setAttributeItem(CPU_arch, - ARMTargetParser::getArchDefaultCPUArch(EmittedArch), + ARMTargetParser::getArchAttr(EmittedArch), false); switch (Arch) { @@ -813,6 +816,9 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { /* OverwriteExisting= */ false); break; + // ABI_HardFP_use is handled in ARMAsmPrinter, so _SP_D16 is treated the same + // as _D16 here. + case ARM::FK_FPV4_SP_D16: case ARM::FK_VFPV4_D16: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4B, @@ -827,6 +833,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { // FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so // uses the FP_ARMV8_D16 build attribute. + case ARM::FK_FPV5_SP_D16: case ARM::FK_FPV5_D16: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPARMv8B, @@ -861,6 +868,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { break; case ARM::FK_SOFTVFP: + case ARM::FK_NONE: break; default: @@ -972,9 +980,9 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) { if (!Streamer.IsThumb) return; - const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol); - unsigned Type = MCELF::GetType(SD); - if (Type == ELF_STT_Func || Type == ELF_STT_GnuIFunc) + Streamer.getAssembler().registerSymbol(*Symbol); + unsigned Type = cast<MCSymbolELF>(Symbol)->getType(); + if (Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC) Streamer.EmitThumbFunc(Symbol); } @@ -1024,7 +1032,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, } // Get .ARM.extab or .ARM.exidx section - const MCSymbol *Group = FnSection.getGroup(); + const MCSymbolELF *Group = FnSection.getGroup(); if (Group) Flags |= ELF::SHF_GROUP; MCSectionELF *EHSection = @@ -1095,7 +1103,7 @@ void ARMELFStreamer::emitFnEnd() { EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex)); const MCSymbolRefExpr *FnStartRef = - MCSymbolRefExpr::Create(FnStart, + MCSymbolRefExpr::create(FnStart, MCSymbolRefExpr::VK_ARM_PREL31, getContext()); @@ -1106,7 +1114,7 @@ void ARMELFStreamer::emitFnEnd() { } else if (ExTab) { // Emit a reference to the unwind opcodes in the ".ARM.extab" section. const MCSymbolRefExpr *ExTabEntryRef = - MCSymbolRefExpr::Create(ExTab, + MCSymbolRefExpr::create(ExTab, MCSymbolRefExpr::VK_ARM_PREL31, getContext()); EmitValue(ExTabEntryRef, 4); @@ -1138,7 +1146,7 @@ void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; } void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) { const MCSymbol *PersonalitySym = getContext().getOrCreateSymbol(Name); - const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::create( PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext()); visitUsedExpr(*PersonalityRef); @@ -1186,7 +1194,7 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) { // Emit personality if (Personality) { const MCSymbolRefExpr *PersonalityRef = - MCSymbolRefExpr::Create(Personality, + MCSymbolRefExpr::create(Personality, MCSymbolRefExpr::VK_ARM_PREL31, getContext()); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index caa873622ae9..1ac08159bd3d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -19,8 +19,7 @@ using namespace llvm; void ARMMCAsmInfoDarwin::anchor() { } -ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) { - Triple TheTriple(TT); +ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) { if ((TheTriple.getArch() == Triple::armeb) || (TheTriple.getArch() == Triple::thumbeb)) IsLittleEndian = false; @@ -41,8 +40,7 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) { void ARMELFMCAsmInfo::anchor() { } -ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) { - Triple TheTriple(TT); +ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) { if ((TheTriple.getArch() == Triple::armeb) || (TheTriple.getArch() == Triple::thumbeb)) IsLittleEndian = false; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index 6cb471537f6e..99a5fff5ec27 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -19,18 +19,19 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { + class Triple; class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { virtual void anchor(); public: - explicit ARMMCAsmInfoDarwin(StringRef TT); + explicit ARMMCAsmInfoDarwin(const Triple &TheTriple); }; class ARMELFMCAsmInfo : public MCAsmInfoELF { void anchor() override; public: - explicit ARMELFMCAsmInfo(StringRef TT); + explicit ARMELFMCAsmInfo(const Triple &TT); void setUseIntegratedAssembler(bool Value) override; }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 5b90de327418..2063ca6bdf3b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -16,12 +16,12 @@ using namespace llvm; #define DEBUG_TYPE "armmcexpr" const ARMMCExpr* -ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr, +ARMMCExpr::create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx) { return new (Ctx) ARMMCExpr(Kind, Expr); } -void ARMMCExpr::PrintImpl(raw_ostream &OS) const { +void ARMMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { switch (Kind) { default: llvm_unreachable("Invalid kind!"); case VK_ARM_HI16: OS << ":upper16:"; break; @@ -31,7 +31,7 @@ void ARMMCExpr::PrintImpl(raw_ostream &OS) const { const MCExpr *Expr = getSubExpr(); if (Expr->getKind() != MCExpr::SymbolRef) OS << '('; - Expr->print(OS); + Expr->print(OS, MAI); if (Expr->getKind() != MCExpr::SymbolRef) OS << ')'; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index a52abe7760d1..9146d4def75a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -33,15 +33,15 @@ public: /// @name Construction /// @{ - static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr, + static const ARMMCExpr *create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx); - static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_ARM_HI16, Expr, Ctx); + static const ARMMCExpr *createUpper16(const MCExpr *Expr, MCContext &Ctx) { + return create(VK_ARM_HI16, Expr, Ctx); } - static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_ARM_LO16, Expr, Ctx); + static const ARMMCExpr *createLower16(const MCExpr *Expr, MCContext &Ctx) { + return create(VK_ARM_LO16, Expr, Ctx); } /// @} @@ -56,15 +56,15 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override { return false; } void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *FindAssociatedSection() const override { - return getSubExpr()->FindAssociatedSection(); + MCSection *findAssociatedSection() const override { + return getSubExpr()->findAssociatedSection(); } // There are no TLS ARMMCExprs at the moment. diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 30deba9a08c6..92c4d6a824ea 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -277,18 +277,17 @@ static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { return X; } -static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { - Triple TheTriple(TT); - +static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TheTriple) { MCAsmInfo *MAI; if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO()) - MAI = new ARMMCAsmInfoDarwin(TT); + MAI = new ARMMCAsmInfoDarwin(TheTriple); else if (TheTriple.isWindowsItaniumEnvironment()) MAI = new ARMCOFFMCAsmInfoGNU(); else if (TheTriple.isWindowsMSVCEnvironment()) MAI = new ARMCOFFMCAsmInfoMicrosoft(); else - MAI = new ARMELFMCAsmInfo(TT); + MAI = new ARMELFMCAsmInfo(TheTriple); unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true); MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0)); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index d4b00e6e4fb5..4468132588cf 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -26,9 +26,9 @@ public: unsigned VariantKind) override { switch(VariantKind) { case LLVMDisassembler_VariantKind_ARM_HI16: - return ARMMCExpr::CreateUpper16(SubExpr, Ctx); + return ARMMCExpr::createUpper16(SubExpr, Ctx); case LLVMDisassembler_VariantKind_ARM_LO16: - return ARMMCExpr::CreateLower16(SubExpr, Ctx); + return ARMMCExpr::createLower16(SubExpr, Ctx); default: return MCRelocationInfo::createExprForCAPIVariantKind(SubExpr, VariantKind); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 9755330bf8c3..95d7ea7c04a3 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -17,7 +17,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCValue.h" @@ -49,12 +48,10 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter { const MCSymbol &S, uint64_t FixedValue); public: - ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, - uint32_t CPUSubtype) - : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, - /*UseAggressiveSymbolFolding=*/true) {} + ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {} - void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; @@ -152,23 +149,21 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - const MCSymbolData *A_SD = &Asm.getSymbolData(*A); - if (!A_SD->getFragment()) + if (!A->getFragment()) Asm.getContext().reportFatalError(Fixup.getLoc(), "symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Writer->getSymbolAddress(*A, Layout); uint32_t Value2 = 0; - uint64_t SecAddr = - Writer->getSectionAddress(A_SD->getFragment()->getParent()); + uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent()); FixedValue += SecAddr; if (const MCSymbolRefExpr *B = Target.getSymB()) { - const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbol *SB = &B->getSymbol(); - if (!B_SD->getFragment()) + if (!SB->getFragment()) Asm.getContext().reportFatalError(Fixup.getLoc(), "symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); @@ -176,7 +171,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // Select the appropriate difference relocation type. Type = MachO::ARM_RELOC_HALF_SECTDIFF; Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); - FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } // Relocations are written out in reverse order, so the PAIR comes first. @@ -255,24 +250,22 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - const MCSymbolData *A_SD = &Asm.getSymbolData(*A); - if (!A_SD->getFragment()) + if (!A->getFragment()) Asm.getContext().reportFatalError(Fixup.getLoc(), "symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Writer->getSymbolAddress(*A, Layout); - uint64_t SecAddr = - Writer->getSectionAddress(A_SD->getFragment()->getParent()); + uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent()); FixedValue += SecAddr; uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols"); - const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbol *SB = &B->getSymbol(); - if (!B_SD->getFragment()) + if (!SB->getFragment()) Asm.getContext().reportFatalError(Fixup.getLoc(), "symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); @@ -280,7 +273,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, // Select the appropriate difference relocation type. Type = MachO::ARM_RELOC_SECTDIFF; Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); - FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } // Relocations are written out in reverse order, so the PAIR comes first. @@ -344,7 +337,7 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, return false; } -void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, +void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -405,7 +398,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // Resolve constant variables. if (A->isVariable()) { int64_t Res; - if (A->getVariableValue()->EvaluateAsAbsolute( + if (A->getVariableValue()->evaluateAsAbsolute( Res, Layout, Writer->getSectionAddressMap())) { FixedValue = Res; return; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index b62ae2e3429e..68736bc1decd 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -94,12 +94,12 @@ static void TrackDefUses(MachineInstr *MI, /// conservatively remove more kill flags than are necessary, but removing them /// is safer than incorrect kill flags remaining on instructions. static void ClearKillFlags(MachineInstr *MI, SmallSet<unsigned, 4> &Uses) { - for (MIOperands MO(MI); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->isDef() || !MO->isKill()) + for (MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.isDef() || !MO.isKill()) continue; - if (!Uses.count(MO->getReg())) + if (!Uses.count(MO.getReg())) continue; - MO->setIsKill(false); + MO.setIsKill(false); } } diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 0ab1ff906c9a..d9ab824995c1 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -133,7 +133,7 @@ namespace { class Thumb2SizeReduce : public MachineFunctionPass { public: static char ID; - Thumb2SizeReduce(); + Thumb2SizeReduce(std::function<bool(const Function &)> Ftor); const Thumb2InstrInfo *TII; const ARMSubtarget *STI; @@ -198,11 +198,14 @@ namespace { }; SmallVector<MBBInfo, 8> BlockInfo; + + std::function<bool(const Function &)> PredicateFtor; }; char Thumb2SizeReduce::ID = 0; } -Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { +Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor) + : MachineFunctionPass(ID), PredicateFtor(Ftor) { OptimizeSize = MinimizeSize = false; for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { unsigned FromOpc = ReduceTable[i].WideOpc; @@ -1000,6 +1003,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { + if (PredicateFtor && !PredicateFtor(*MF.getFunction())) + return false; + STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); if (STI->isThumb1Only() || STI->prefers32BitThumb()) return false; @@ -1025,6 +1031,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size /// reduction pass. -FunctionPass *llvm::createThumb2SizeReductionPass() { - return new Thumb2SizeReduce(); +FunctionPass *llvm::createThumb2SizeReductionPass( + std::function<bool(const Function &)> Ftor) { + return new Thumb2SizeReduce(Ftor); } diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp index 32375968eac1..10ec6587550b 100644 --- a/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/lib/Target/BPF/BPFAsmPrinter.cpp @@ -83,5 +83,7 @@ void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Force static initialization. extern "C" void LLVMInitializeBPFAsmPrinter() { - RegisterAsmPrinter<BPFAsmPrinter> X(TheBPFTarget); + RegisterAsmPrinter<BPFAsmPrinter> X(TheBPFleTarget); + RegisterAsmPrinter<BPFAsmPrinter> Y(TheBPFbeTarget); + RegisterAsmPrinter<BPFAsmPrinter> Z(TheBPFTarget); } diff --git a/lib/Target/BPF/BPFMCInstLower.cpp b/lib/Target/BPF/BPFMCInstLower.cpp index d608afb348cb..00bd8d9c090c 100644 --- a/lib/Target/BPF/BPFMCInstLower.cpp +++ b/lib/Target/BPF/BPFMCInstLower.cpp @@ -33,7 +33,7 @@ BPFMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { MCOperand BPFMCInstLower::LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); if (!MO.isJTI() && MO.getOffset()) llvm_unreachable("unknown symbol op"); @@ -63,7 +63,7 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; case MachineOperand::MO_MachineBasicBlock: MCOp = MCOperand::createExpr( - MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx)); + MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_RegisterMask: continue; diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp index 9487427fef5e..3329d5f87409 100644 --- a/lib/Target/BPF/BPFTargetMachine.cpp +++ b/lib/Target/BPF/BPFTargetMachine.cpp @@ -23,19 +23,24 @@ using namespace llvm; extern "C" void LLVMInitializeBPFTarget() { // Register the target. - RegisterTargetMachine<BPFTargetMachine> X(TheBPFTarget); + RegisterTargetMachine<BPFTargetMachine> X(TheBPFleTarget); + RegisterTargetMachine<BPFTargetMachine> Y(TheBPFbeTarget); + RegisterTargetMachine<BPFTargetMachine> Z(TheBPFTarget); +} + +// DataLayout: little or big endian +static std::string computeDataLayout(StringRef TT) { + if (Triple(TT).getArch() == Triple::bpfeb) + return "E-m:e-p:64:64-i64:64-n32:64-S128"; + else + return "e-m:e-p:64:64-i64:64-n32:64-S128"; } -// DataLayout --> Little-endian, 64-bit pointer/ABI/alignment -// The stack is always 8 byte aligned -// On function prologue, the stack is created by decrementing -// its pointer. Once decremented, all references are done with positive -// offset from the stack/frame pointer. BPFTargetMachine::BPFTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, "e-m:e-p:64:64-i64:64-n32:64-S128", TT, CPU, FS, + : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM, OL), TLOF(make_unique<TargetLoweringObjectFileELF>()), Subtarget(TT, CPU, FS, *this) { diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 48f34e484590..7b1d9259caf9 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -25,7 +25,10 @@ using namespace llvm; namespace { class BPFAsmBackend : public MCAsmBackend { public: - BPFAsmBackend() : MCAsmBackend() {} + bool IsLittleEndian; + + BPFAsmBackend(bool IsLittleEndian) + : MCAsmBackend(), IsLittleEndian(IsLittleEndian) {} ~BPFAsmBackend() override {} void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, @@ -54,7 +57,7 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return false; for (uint64_t i = 0; i < Count; i += 8) - OW->Write64(0x15000000); + OW->write64(0x15000000); return true; } @@ -69,17 +72,28 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } assert(Fixup.getKind() == FK_PCRel_2); Value = (uint16_t)((Value - 8) / 8); - Data[Fixup.getOffset() + 2] = Value & 0xFF; - Data[Fixup.getOffset() + 3] = Value >> 8; + if (IsLittleEndian) { + Data[Fixup.getOffset() + 2] = Value & 0xFF; + Data[Fixup.getOffset() + 3] = Value >> 8; + } else { + Data[Fixup.getOffset() + 2] = Value >> 8; + Data[Fixup.getOffset() + 3] = Value & 0xFF; + } } MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { - return createBPFELFObjectWriter(OS, 0); + return createBPFELFObjectWriter(OS, 0, IsLittleEndian); } } MCAsmBackend *llvm::createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU) { - return new BPFAsmBackend(); + return new BPFAsmBackend(/*IsLittleEndian=*/true); +} + +MCAsmBackend *llvm::createBPFbeAsmBackend(const Target &T, + const MCRegisterInfo &MRI, StringRef TT, + StringRef CPU) { + return new BPFAsmBackend(/*IsLittleEndian=*/false); } diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index a5562c1a933e..05ba6183e322 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -47,7 +47,8 @@ unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target, } } -MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { +MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, bool IsLittleEndian) { MCELFObjectTargetWriter *MOTW = new BPFELFObjectWriter(OSABI); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); + return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h index ab61ae7ae662..d63bbf49294e 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h @@ -16,13 +16,18 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/ADT/Triple.h" namespace llvm { class Target; +class Triple; class BPFMCAsmInfo : public MCAsmInfo { public: - explicit BPFMCAsmInfo(StringRef TT) { + explicit BPFMCAsmInfo(const Triple &TT) { + if (TT.getArch() == Triple::bpfeb) + IsLittleEndian = false; + PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp index ba8a874e4966..dc4ede30f191 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -30,9 +30,11 @@ class BPFMCCodeEmitter : public MCCodeEmitter { BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete; void operator=(const BPFMCCodeEmitter &) = delete; const MCRegisterInfo &MRI; + bool IsLittleEndian; public: - BPFMCCodeEmitter(const MCRegisterInfo &mri) : MRI(mri) {} + BPFMCCodeEmitter(const MCRegisterInfo &mri, bool IsLittleEndian) + : MRI(mri), IsLittleEndian(IsLittleEndian) {} ~BPFMCCodeEmitter() {} @@ -61,7 +63,13 @@ public: MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx) { - return new BPFMCCodeEmitter(MRI); + return new BPFMCCodeEmitter(MRI, true); +} + +MCCodeEmitter *llvm::createBPFbeMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new BPFMCCodeEmitter(MRI, false); } unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI, @@ -91,59 +99,53 @@ unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI, return 0; } -// Emit one byte through output stream -void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) { - OS << (char)C; - ++CurByte; -} - -// Emit a series of bytes (little endian) -void EmitLEConstant(uint64_t Val, unsigned Size, unsigned &CurByte, - raw_ostream &OS) { - assert(Size <= 8 && "size too big in emit constant"); - - for (unsigned i = 0; i != Size; ++i) { - EmitByte(Val & 255, CurByte, OS); - Val >>= 8; - } -} - -// Emit a series of bytes (big endian) -void EmitBEConstant(uint64_t Val, unsigned Size, unsigned &CurByte, - raw_ostream &OS) { - assert(Size <= 8 && "size too big in emit constant"); - - for (int i = (Size - 1) * 8; i >= 0; i -= 8) - EmitByte((Val >> i) & 255, CurByte, OS); +static uint8_t SwapBits(uint8_t Val) +{ + return (Val & 0x0F) << 4 | (Val & 0xF0) >> 4; } void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { unsigned Opcode = MI.getOpcode(); - // Keep track of the current byte being emitted - unsigned CurByte = 0; + support::endian::Writer<support::little> LE(OS); + support::endian::Writer<support::big> BE(OS); if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) { uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); - EmitByte(Value >> 56, CurByte, OS); - EmitByte(((Value >> 48) & 0xff), CurByte, OS); - EmitLEConstant(0, 2, CurByte, OS); - EmitLEConstant(Value & 0xffffFFFF, 4, CurByte, OS); + LE.write<uint8_t>(Value >> 56); + if (IsLittleEndian) + LE.write<uint8_t>((Value >> 48) & 0xff); + else + LE.write<uint8_t>(SwapBits((Value >> 48) & 0xff)); + LE.write<uint16_t>(0); + if (IsLittleEndian) + LE.write<uint32_t>(Value & 0xffffFFFF); + else + BE.write<uint32_t>(Value & 0xffffFFFF); const MCOperand &MO = MI.getOperand(1); uint64_t Imm = MO.isImm() ? MO.getImm() : 0; - EmitByte(0, CurByte, OS); - EmitByte(0, CurByte, OS); - EmitLEConstant(0, 2, CurByte, OS); - EmitLEConstant(Imm >> 32, 4, CurByte, OS); + LE.write<uint8_t>(0); + LE.write<uint8_t>(0); + LE.write<uint16_t>(0); + if (IsLittleEndian) + LE.write<uint32_t>(Imm >> 32); + else + BE.write<uint32_t>(Imm >> 32); } else { // Get instruction encoding and emit it uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); - EmitByte(Value >> 56, CurByte, OS); - EmitByte((Value >> 48) & 0xff, CurByte, OS); - EmitLEConstant((Value >> 32) & 0xffff, 2, CurByte, OS); - EmitLEConstant(Value & 0xffffFFFF, 4, CurByte, OS); + LE.write<uint8_t>(Value >> 56); + if (IsLittleEndian) { + LE.write<uint8_t>((Value >> 48) & 0xff); + LE.write<uint16_t>((Value >> 32) & 0xffff); + LE.write<uint32_t>(Value & 0xffffFFFF); + } else { + LE.write<uint8_t>(SwapBits((Value >> 48) & 0xff)); + BE.write<uint16_t>((Value >> 32) & 0xffff); + BE.write<uint32_t>(Value & 0xffffFFFF); + } } } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index c4cf4b824508..7cedba90a746 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -79,32 +79,43 @@ static MCInstPrinter *createBPFMCInstPrinter(const Triple &T, } extern "C" void LLVMInitializeBPFTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfo<BPFMCAsmInfo> X(TheBPFTarget); + for (Target *T : {&TheBPFleTarget, &TheBPFbeTarget, &TheBPFTarget}) { + // Register the MC asm info. + RegisterMCAsmInfo<BPFMCAsmInfo> X(*T); - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheBPFTarget, createBPFMCCodeGenInfo); + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createBPFMCCodeGenInfo); - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheBPFTarget, createBPFMCInstrInfo); + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createBPFMCInstrInfo); - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheBPFTarget, createBPFMCRegisterInfo); + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createBPFMCRegisterInfo); - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheBPFTarget, - createBPFMCSubtargetInfo); + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, + createBPFMCSubtargetInfo); - // Register the MC code emitter - TargetRegistry::RegisterMCCodeEmitter(TheBPFTarget, - llvm::createBPFMCCodeEmitter); + // Register the object streamer + TargetRegistry::RegisterELFStreamer(*T, createBPFMCStreamer); - // Register the ASM Backend - TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFAsmBackend); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createBPFMCInstPrinter); + } - // Register the object streamer - TargetRegistry::RegisterELFStreamer(TheBPFTarget, createBPFMCStreamer); + // Register the MC code emitter + TargetRegistry::RegisterMCCodeEmitter(TheBPFleTarget, createBPFMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheBPFbeTarget, createBPFbeMCCodeEmitter); - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheBPFTarget, createBPFMCInstPrinter); + // Register the ASM Backend + TargetRegistry::RegisterMCAsmBackend(TheBPFleTarget, createBPFAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheBPFbeTarget, createBPFbeAsmBackend); + + if (sys::IsLittleEndianHost) { + TargetRegistry::RegisterMCCodeEmitter(TheBPFTarget, createBPFMCCodeEmitter); + TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFAsmBackend); + } else { + TargetRegistry::RegisterMCCodeEmitter(TheBPFTarget, createBPFbeMCCodeEmitter); + TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFbeAsmBackend); + } } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index ce08b7cf76e6..a9ba7d990e17 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -30,16 +30,24 @@ class StringRef; class raw_ostream; class raw_pwrite_stream; +extern Target TheBPFleTarget; +extern Target TheBPFbeTarget; extern Target TheBPFTarget; MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); +MCCodeEmitter *createBPFbeMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx); MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); +MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); -MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); +MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, bool IsLittleEndian); } // Defines symbolic names for BPF registers. This defines a mapping from diff --git a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp index 87716e6775cf..a16dbae867b2 100644 --- a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp +++ b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp @@ -11,8 +11,18 @@ #include "llvm/Support/TargetRegistry.h" using namespace llvm; -Target llvm::TheBPFTarget; +namespace llvm { +Target TheBPFleTarget; +Target TheBPFbeTarget; +Target TheBPFTarget; +} extern "C" void LLVMInitializeBPFTargetInfo() { - RegisterTarget<Triple::bpf, /*HasJIT=*/true> X(TheBPFTarget, "bpf", "BPF"); + TargetRegistry::RegisterTarget(TheBPFTarget, "bpf", + "BPF (host endian)", + [](Triple::ArchType) { return false; }, true); + RegisterTarget<Triple::bpfel, /*HasJIT=*/true> X( + TheBPFleTarget, "bpfel", "BPF (little endian)"); + RegisterTarget<Triple::bpfeb, /*HasJIT=*/true> Y( + TheBPFbeTarget, "bpfeb", "BPF (big endian)"); } diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 1805437b12f7..e6d0199952f4 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMTarget TargetLoweringObjectFile.cpp TargetMachine.cpp TargetMachineC.cpp + TargetRecip.cpp TargetSubtargetInfo.cpp ADDITIONAL_HEADER_DIRS diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index f1a7127e8fd9..b8377986ecc0 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -645,8 +645,7 @@ void CppWriter::printType(Type* Ty) { if (DefinedTypes.find(Ty) == DefinedTypes.end()) { std::string elemName(getCppName(ET)); Out << "ArrayType* " << typeName << " = ArrayType::get(" - << elemName - << ", " << utostr(AT->getNumElements()) << ");"; + << elemName << ", " << AT->getNumElements() << ");"; nl(Out); } break; @@ -658,8 +657,7 @@ void CppWriter::printType(Type* Ty) { if (DefinedTypes.find(Ty) == DefinedTypes.end()) { std::string elemName(getCppName(ET)); Out << "PointerType* " << typeName << " = PointerType::get(" - << elemName - << ", " << utostr(PT->getAddressSpace()) << ");"; + << elemName << ", " << PT->getAddressSpace() << ");"; nl(Out); } break; @@ -671,8 +669,7 @@ void CppWriter::printType(Type* Ty) { if (DefinedTypes.find(Ty) == DefinedTypes.end()) { std::string elemName(getCppName(ET)); Out << "VectorType* " << typeName << " = VectorType::get(" - << elemName - << ", " << utostr(PT->getNumElements()) << ");"; + << elemName << ", " << PT->getNumElements() << ");"; nl(Out); } break; @@ -1029,7 +1026,7 @@ void CppWriter::printVariableHead(const GlobalVariable *GV) { } if (GV->getAlignment()) { printCppName(GV); - Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");"; + Out << "->setAlignment(" << GV->getAlignment() << ");"; nl(Out); } if (GV->getVisibility() != GlobalValue::DefaultVisibility) { diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index a60d1e471944..14f9d777580c 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -7,9 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" + #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCExpr.h" @@ -27,6 +29,7 @@ #include <vector> using namespace llvm; +using namespace Hexagon; #define DEBUG_TYPE "hexagon-disassembler" @@ -37,9 +40,14 @@ namespace { /// \brief Hexagon disassembler for all Hexagon platforms. class HexagonDisassembler : public MCDisassembler { public: + std::unique_ptr<MCInst *> CurrentBundle; HexagonDisassembler(MCSubtargetInfo const &STI, MCContext &Ctx) - : MCDisassembler(STI, Ctx) {} + : MCDisassembler(STI, Ctx), CurrentBundle(new MCInst *) {} + DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, raw_ostream &CStream, + bool &Complete) const; DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &VStream, @@ -48,37 +56,43 @@ public: } static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, void const *Decoder); + uint64_t Address, + void const *Decoder); + +static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op, + raw_ostream &os); +static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst); static const uint16_t IntRegDecoderTable[] = { - Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, - Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, - Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, - Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, - Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24, - Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29, - Hexagon::R30, Hexagon::R31 }; + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, + Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, + Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, + Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24, + Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29, + Hexagon::R30, Hexagon::R31}; -static const uint16_t PredRegDecoderTable[] = { Hexagon::P0, Hexagon::P1, -Hexagon::P2, Hexagon::P3 }; +static const uint16_t PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1, + Hexagon::P2, Hexagon::P3}; static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, - const uint16_t Table[], size_t Size) { + const uint16_t Table[], size_t Size) { if (RegNo < Size) { Inst.addOperand(MCOperand::createReg(Table[RegNo])); return MCDisassembler::Success; - } - else + } else return MCDisassembler::Fail; } static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - void const *Decoder) { + uint64_t /*Address*/, + void const *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -88,13 +102,13 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, const void *Decoder) { + uint64_t /*Address*/, + const void *Decoder) { static const uint16_t CtrlRegDecoderTable[] = { - Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1, - Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6, Hexagon::C7, - Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP, - Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPCH - }; + Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1, + Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6, Hexagon::C7, + Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP, + Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPCH}; if (RegNo >= sizeof(CtrlRegDecoderTable) / sizeof(CtrlRegDecoderTable[0])) return MCDisassembler::Fail; @@ -108,17 +122,15 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, void const *Decoder) { + uint64_t /*Address*/, + void const *Decoder) { static const uint16_t CtrlReg64DecoderTable[] = { - Hexagon::C1_0, Hexagon::NoRegister, - Hexagon::C3_2, Hexagon::NoRegister, - Hexagon::NoRegister, Hexagon::NoRegister, - Hexagon::C7_6, Hexagon::NoRegister, - Hexagon::C9_8, Hexagon::NoRegister, - Hexagon::C11_10, Hexagon::NoRegister, - Hexagon::CS, Hexagon::NoRegister, - Hexagon::UPC, Hexagon::NoRegister - }; + Hexagon::C1_0, Hexagon::NoRegister, Hexagon::C3_2, + Hexagon::NoRegister, Hexagon::NoRegister, Hexagon::NoRegister, + Hexagon::C7_6, Hexagon::NoRegister, Hexagon::C9_8, + Hexagon::NoRegister, Hexagon::C11_10, Hexagon::NoRegister, + Hexagon::CS, Hexagon::NoRegister, Hexagon::UPC, + Hexagon::NoRegister}; if (RegNo >= sizeof(CtrlReg64DecoderTable) / sizeof(CtrlReg64DecoderTable[0])) return MCDisassembler::Fail; @@ -132,7 +144,8 @@ static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, const void *Decoder) { + uint64_t /*Address*/, + const void *Decoder) { unsigned Register = 0; switch (RegNo) { case 0: @@ -149,22 +162,21 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, const void *Decoder) { + uint64_t /*Address*/, + const void *Decoder) { static const uint16_t DoubleRegDecoderTable[] = { - Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, - Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, - Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11, - Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15 - }; + Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, + Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, + Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11, + Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15}; - return (DecodeRegisterClass(Inst, RegNo >> 1, - DoubleRegDecoderTable, - sizeof (DoubleRegDecoderTable))); + return (DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable, + sizeof(DoubleRegDecoderTable))); } static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - void const *Decoder) { + uint64_t /*Address*/, + void const *Decoder) { if (RegNo > 3) return MCDisassembler::Fail; @@ -191,17 +203,687 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, uint64_t Address, raw_ostream &os, raw_ostream &cs) const { - Size = 4; - if (Bytes.size() < 4) - return MCDisassembler::Fail; + DecodeStatus Result = DecodeStatus::Success; + bool Complete = false; + Size = 0; + + *CurrentBundle = &MI; + MI.setOpcode(Hexagon::BUNDLE); + MI.addOperand(MCOperand::createImm(0)); + while (Result == Success && Complete == false) { + if (Bytes.size() < HEXAGON_INSTR_SIZE) + return MCDisassembler::Fail; + MCInst *Inst = new (getContext()) MCInst; + Result = getSingleInstruction(*Inst, MI, Bytes, Address, os, cs, Complete); + MI.addOperand(MCOperand::createInst(Inst)); + Size += HEXAGON_INSTR_SIZE; + Bytes = Bytes.slice(HEXAGON_INSTR_SIZE); + } + return Result; +} + +DecodeStatus HexagonDisassembler::getSingleInstruction( + MCInst &MI, MCInst &MCB, ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &os, raw_ostream &cs, bool &Complete) const { + assert(Bytes.size() >= HEXAGON_INSTR_SIZE); - uint32_t insn = + uint32_t Instruction = llvm::support::endian::read<uint32_t, llvm::support::little, llvm::support::unaligned>(Bytes.data()); - // Remove parse bits. - insn &= ~static_cast<uint32_t>(HexagonII::InstParseBits::INST_PARSE_MASK); - DecodeStatus Result = decodeInstruction(DecoderTable32, MI, insn, Address, this, STI); - HexagonMCInstrInfo::AppendImplicitOperands(MI); + auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB); + if ((Instruction & HexagonII::INST_PARSE_MASK) == + HexagonII::INST_PARSE_LOOP_END) { + if (BundleSize == 0) + HexagonMCInstrInfo::setInnerLoop(MCB); + else if (BundleSize == 1) + HexagonMCInstrInfo::setOuterLoop(MCB); + else + return DecodeStatus::Fail; + } + + DecodeStatus Result = DecodeStatus::Success; + if ((Instruction & HexagonII::INST_PARSE_MASK) == + HexagonII::INST_PARSE_DUPLEX) { + // Determine the instruction class of each instruction in the duplex. + unsigned duplexIClass, IClassLow, IClassHigh; + + duplexIClass = ((Instruction >> 28) & 0xe) | ((Instruction >> 13) & 0x1); + switch (duplexIClass) { + default: + return MCDisassembler::Fail; + case 0: + IClassLow = HexagonII::HSIG_L1; + IClassHigh = HexagonII::HSIG_L1; + break; + case 1: + IClassLow = HexagonII::HSIG_L2; + IClassHigh = HexagonII::HSIG_L1; + break; + case 2: + IClassLow = HexagonII::HSIG_L2; + IClassHigh = HexagonII::HSIG_L2; + break; + case 3: + IClassLow = HexagonII::HSIG_A; + IClassHigh = HexagonII::HSIG_A; + break; + case 4: + IClassLow = HexagonII::HSIG_L1; + IClassHigh = HexagonII::HSIG_A; + break; + case 5: + IClassLow = HexagonII::HSIG_L2; + IClassHigh = HexagonII::HSIG_A; + break; + case 6: + IClassLow = HexagonII::HSIG_S1; + IClassHigh = HexagonII::HSIG_A; + break; + case 7: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_A; + break; + case 8: + IClassLow = HexagonII::HSIG_S1; + IClassHigh = HexagonII::HSIG_L1; + break; + case 9: + IClassLow = HexagonII::HSIG_S1; + IClassHigh = HexagonII::HSIG_L2; + break; + case 10: + IClassLow = HexagonII::HSIG_S1; + IClassHigh = HexagonII::HSIG_S1; + break; + case 11: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_S1; + break; + case 12: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_L1; + break; + case 13: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_L2; + break; + case 14: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_S2; + break; + } + + // Set the MCInst to be a duplex instruction. Which one doesn't matter. + MI.setOpcode(Hexagon::DuplexIClass0); + + // Decode each instruction in the duplex. + // Create an MCInst for each instruction. + unsigned instLow = Instruction & 0x1fff; + unsigned instHigh = (Instruction >> 16) & 0x1fff; + unsigned opLow; + if (GetSubinstOpcode(IClassLow, instLow, opLow, os) != + MCDisassembler::Success) + return MCDisassembler::Fail; + unsigned opHigh; + if (GetSubinstOpcode(IClassHigh, instHigh, opHigh, os) != + MCDisassembler::Success) + return MCDisassembler::Fail; + MCInst *MILow = new (getContext()) MCInst; + MILow->setOpcode(opLow); + MCInst *MIHigh = new (getContext()) MCInst; + MIHigh->setOpcode(opHigh); + AddSubinstOperands(MILow, opLow, instLow); + AddSubinstOperands(MIHigh, opHigh, instHigh); + // see ConvertToSubInst() in + // lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp + + // Add the duplex instruction MCInsts as operands to the passed in MCInst. + MCOperand OPLow = MCOperand::createInst(MILow); + MCOperand OPHigh = MCOperand::createInst(MIHigh); + MI.addOperand(OPLow); + MI.addOperand(OPHigh); + Complete = true; + } else { + if ((Instruction & HexagonII::INST_PARSE_MASK) == + HexagonII::INST_PARSE_PACKET_END) + Complete = true; + // Calling the auto-generated decoder function. + Result = + decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI); + } + return Result; } + +// These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td +enum subInstBinaryValues { + V4_SA1_addi_BITS = 0x0000, + V4_SA1_addi_MASK = 0x1800, + V4_SA1_addrx_BITS = 0x1800, + V4_SA1_addrx_MASK = 0x1f00, + V4_SA1_addsp_BITS = 0x0c00, + V4_SA1_addsp_MASK = 0x1c00, + V4_SA1_and1_BITS = 0x1200, + V4_SA1_and1_MASK = 0x1f00, + V4_SA1_clrf_BITS = 0x1a70, + V4_SA1_clrf_MASK = 0x1e70, + V4_SA1_clrfnew_BITS = 0x1a50, + V4_SA1_clrfnew_MASK = 0x1e70, + V4_SA1_clrt_BITS = 0x1a60, + V4_SA1_clrt_MASK = 0x1e70, + V4_SA1_clrtnew_BITS = 0x1a40, + V4_SA1_clrtnew_MASK = 0x1e70, + V4_SA1_cmpeqi_BITS = 0x1900, + V4_SA1_cmpeqi_MASK = 0x1f00, + V4_SA1_combine0i_BITS = 0x1c00, + V4_SA1_combine0i_MASK = 0x1d18, + V4_SA1_combine1i_BITS = 0x1c08, + V4_SA1_combine1i_MASK = 0x1d18, + V4_SA1_combine2i_BITS = 0x1c10, + V4_SA1_combine2i_MASK = 0x1d18, + V4_SA1_combine3i_BITS = 0x1c18, + V4_SA1_combine3i_MASK = 0x1d18, + V4_SA1_combinerz_BITS = 0x1d08, + V4_SA1_combinerz_MASK = 0x1d08, + V4_SA1_combinezr_BITS = 0x1d00, + V4_SA1_combinezr_MASK = 0x1d08, + V4_SA1_dec_BITS = 0x1300, + V4_SA1_dec_MASK = 0x1f00, + V4_SA1_inc_BITS = 0x1100, + V4_SA1_inc_MASK = 0x1f00, + V4_SA1_seti_BITS = 0x0800, + V4_SA1_seti_MASK = 0x1c00, + V4_SA1_setin1_BITS = 0x1a00, + V4_SA1_setin1_MASK = 0x1e40, + V4_SA1_sxtb_BITS = 0x1500, + V4_SA1_sxtb_MASK = 0x1f00, + V4_SA1_sxth_BITS = 0x1400, + V4_SA1_sxth_MASK = 0x1f00, + V4_SA1_tfr_BITS = 0x1000, + V4_SA1_tfr_MASK = 0x1f00, + V4_SA1_zxtb_BITS = 0x1700, + V4_SA1_zxtb_MASK = 0x1f00, + V4_SA1_zxth_BITS = 0x1600, + V4_SA1_zxth_MASK = 0x1f00, + V4_SL1_loadri_io_BITS = 0x0000, + V4_SL1_loadri_io_MASK = 0x1000, + V4_SL1_loadrub_io_BITS = 0x1000, + V4_SL1_loadrub_io_MASK = 0x1000, + V4_SL2_deallocframe_BITS = 0x1f00, + V4_SL2_deallocframe_MASK = 0x1fc0, + V4_SL2_jumpr31_BITS = 0x1fc0, + V4_SL2_jumpr31_MASK = 0x1fc4, + V4_SL2_jumpr31_f_BITS = 0x1fc5, + V4_SL2_jumpr31_f_MASK = 0x1fc7, + V4_SL2_jumpr31_fnew_BITS = 0x1fc7, + V4_SL2_jumpr31_fnew_MASK = 0x1fc7, + V4_SL2_jumpr31_t_BITS = 0x1fc4, + V4_SL2_jumpr31_t_MASK = 0x1fc7, + V4_SL2_jumpr31_tnew_BITS = 0x1fc6, + V4_SL2_jumpr31_tnew_MASK = 0x1fc7, + V4_SL2_loadrb_io_BITS = 0x1000, + V4_SL2_loadrb_io_MASK = 0x1800, + V4_SL2_loadrd_sp_BITS = 0x1e00, + V4_SL2_loadrd_sp_MASK = 0x1f00, + V4_SL2_loadrh_io_BITS = 0x0000, + V4_SL2_loadrh_io_MASK = 0x1800, + V4_SL2_loadri_sp_BITS = 0x1c00, + V4_SL2_loadri_sp_MASK = 0x1e00, + V4_SL2_loadruh_io_BITS = 0x0800, + V4_SL2_loadruh_io_MASK = 0x1800, + V4_SL2_return_BITS = 0x1f40, + V4_SL2_return_MASK = 0x1fc4, + V4_SL2_return_f_BITS = 0x1f45, + V4_SL2_return_f_MASK = 0x1fc7, + V4_SL2_return_fnew_BITS = 0x1f47, + V4_SL2_return_fnew_MASK = 0x1fc7, + V4_SL2_return_t_BITS = 0x1f44, + V4_SL2_return_t_MASK = 0x1fc7, + V4_SL2_return_tnew_BITS = 0x1f46, + V4_SL2_return_tnew_MASK = 0x1fc7, + V4_SS1_storeb_io_BITS = 0x1000, + V4_SS1_storeb_io_MASK = 0x1000, + V4_SS1_storew_io_BITS = 0x0000, + V4_SS1_storew_io_MASK = 0x1000, + V4_SS2_allocframe_BITS = 0x1c00, + V4_SS2_allocframe_MASK = 0x1e00, + V4_SS2_storebi0_BITS = 0x1200, + V4_SS2_storebi0_MASK = 0x1f00, + V4_SS2_storebi1_BITS = 0x1300, + V4_SS2_storebi1_MASK = 0x1f00, + V4_SS2_stored_sp_BITS = 0x0a00, + V4_SS2_stored_sp_MASK = 0x1e00, + V4_SS2_storeh_io_BITS = 0x0000, + V4_SS2_storeh_io_MASK = 0x1800, + V4_SS2_storew_sp_BITS = 0x0800, + V4_SS2_storew_sp_MASK = 0x1e00, + V4_SS2_storewi0_BITS = 0x1000, + V4_SS2_storewi0_MASK = 0x1f00, + V4_SS2_storewi1_BITS = 0x1100, + V4_SS2_storewi1_MASK = 0x1f00 +}; + +static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op, + raw_ostream &os) { + switch (IClass) { + case HexagonII::HSIG_L1: + if ((inst & V4_SL1_loadri_io_MASK) == V4_SL1_loadri_io_BITS) + op = Hexagon::V4_SL1_loadri_io; + else if ((inst & V4_SL1_loadrub_io_MASK) == V4_SL1_loadrub_io_BITS) + op = Hexagon::V4_SL1_loadrub_io; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + case HexagonII::HSIG_L2: + if ((inst & V4_SL2_deallocframe_MASK) == V4_SL2_deallocframe_BITS) + op = Hexagon::V4_SL2_deallocframe; + else if ((inst & V4_SL2_jumpr31_MASK) == V4_SL2_jumpr31_BITS) + op = Hexagon::V4_SL2_jumpr31; + else if ((inst & V4_SL2_jumpr31_f_MASK) == V4_SL2_jumpr31_f_BITS) + op = Hexagon::V4_SL2_jumpr31_f; + else if ((inst & V4_SL2_jumpr31_fnew_MASK) == V4_SL2_jumpr31_fnew_BITS) + op = Hexagon::V4_SL2_jumpr31_fnew; + else if ((inst & V4_SL2_jumpr31_t_MASK) == V4_SL2_jumpr31_t_BITS) + op = Hexagon::V4_SL2_jumpr31_t; + else if ((inst & V4_SL2_jumpr31_tnew_MASK) == V4_SL2_jumpr31_tnew_BITS) + op = Hexagon::V4_SL2_jumpr31_tnew; + else if ((inst & V4_SL2_loadrb_io_MASK) == V4_SL2_loadrb_io_BITS) + op = Hexagon::V4_SL2_loadrb_io; + else if ((inst & V4_SL2_loadrd_sp_MASK) == V4_SL2_loadrd_sp_BITS) + op = Hexagon::V4_SL2_loadrd_sp; + else if ((inst & V4_SL2_loadrh_io_MASK) == V4_SL2_loadrh_io_BITS) + op = Hexagon::V4_SL2_loadrh_io; + else if ((inst & V4_SL2_loadri_sp_MASK) == V4_SL2_loadri_sp_BITS) + op = Hexagon::V4_SL2_loadri_sp; + else if ((inst & V4_SL2_loadruh_io_MASK) == V4_SL2_loadruh_io_BITS) + op = Hexagon::V4_SL2_loadruh_io; + else if ((inst & V4_SL2_return_MASK) == V4_SL2_return_BITS) + op = Hexagon::V4_SL2_return; + else if ((inst & V4_SL2_return_f_MASK) == V4_SL2_return_f_BITS) + op = Hexagon::V4_SL2_return_f; + else if ((inst & V4_SL2_return_fnew_MASK) == V4_SL2_return_fnew_BITS) + op = Hexagon::V4_SL2_return_fnew; + else if ((inst & V4_SL2_return_t_MASK) == V4_SL2_return_t_BITS) + op = Hexagon::V4_SL2_return_t; + else if ((inst & V4_SL2_return_tnew_MASK) == V4_SL2_return_tnew_BITS) + op = Hexagon::V4_SL2_return_tnew; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + case HexagonII::HSIG_A: + if ((inst & V4_SA1_addi_MASK) == V4_SA1_addi_BITS) + op = Hexagon::V4_SA1_addi; + else if ((inst & V4_SA1_addrx_MASK) == V4_SA1_addrx_BITS) + op = Hexagon::V4_SA1_addrx; + else if ((inst & V4_SA1_addsp_MASK) == V4_SA1_addsp_BITS) + op = Hexagon::V4_SA1_addsp; + else if ((inst & V4_SA1_and1_MASK) == V4_SA1_and1_BITS) + op = Hexagon::V4_SA1_and1; + else if ((inst & V4_SA1_clrf_MASK) == V4_SA1_clrf_BITS) + op = Hexagon::V4_SA1_clrf; + else if ((inst & V4_SA1_clrfnew_MASK) == V4_SA1_clrfnew_BITS) + op = Hexagon::V4_SA1_clrfnew; + else if ((inst & V4_SA1_clrt_MASK) == V4_SA1_clrt_BITS) + op = Hexagon::V4_SA1_clrt; + else if ((inst & V4_SA1_clrtnew_MASK) == V4_SA1_clrtnew_BITS) + op = Hexagon::V4_SA1_clrtnew; + else if ((inst & V4_SA1_cmpeqi_MASK) == V4_SA1_cmpeqi_BITS) + op = Hexagon::V4_SA1_cmpeqi; + else if ((inst & V4_SA1_combine0i_MASK) == V4_SA1_combine0i_BITS) + op = Hexagon::V4_SA1_combine0i; + else if ((inst & V4_SA1_combine1i_MASK) == V4_SA1_combine1i_BITS) + op = Hexagon::V4_SA1_combine1i; + else if ((inst & V4_SA1_combine2i_MASK) == V4_SA1_combine2i_BITS) + op = Hexagon::V4_SA1_combine2i; + else if ((inst & V4_SA1_combine3i_MASK) == V4_SA1_combine3i_BITS) + op = Hexagon::V4_SA1_combine3i; + else if ((inst & V4_SA1_combinerz_MASK) == V4_SA1_combinerz_BITS) + op = Hexagon::V4_SA1_combinerz; + else if ((inst & V4_SA1_combinezr_MASK) == V4_SA1_combinezr_BITS) + op = Hexagon::V4_SA1_combinezr; + else if ((inst & V4_SA1_dec_MASK) == V4_SA1_dec_BITS) + op = Hexagon::V4_SA1_dec; + else if ((inst & V4_SA1_inc_MASK) == V4_SA1_inc_BITS) + op = Hexagon::V4_SA1_inc; + else if ((inst & V4_SA1_seti_MASK) == V4_SA1_seti_BITS) + op = Hexagon::V4_SA1_seti; + else if ((inst & V4_SA1_setin1_MASK) == V4_SA1_setin1_BITS) + op = Hexagon::V4_SA1_setin1; + else if ((inst & V4_SA1_sxtb_MASK) == V4_SA1_sxtb_BITS) + op = Hexagon::V4_SA1_sxtb; + else if ((inst & V4_SA1_sxth_MASK) == V4_SA1_sxth_BITS) + op = Hexagon::V4_SA1_sxth; + else if ((inst & V4_SA1_tfr_MASK) == V4_SA1_tfr_BITS) + op = Hexagon::V4_SA1_tfr; + else if ((inst & V4_SA1_zxtb_MASK) == V4_SA1_zxtb_BITS) + op = Hexagon::V4_SA1_zxtb; + else if ((inst & V4_SA1_zxth_MASK) == V4_SA1_zxth_BITS) + op = Hexagon::V4_SA1_zxth; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + case HexagonII::HSIG_S1: + if ((inst & V4_SS1_storeb_io_MASK) == V4_SS1_storeb_io_BITS) + op = Hexagon::V4_SS1_storeb_io; + else if ((inst & V4_SS1_storew_io_MASK) == V4_SS1_storew_io_BITS) + op = Hexagon::V4_SS1_storew_io; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + case HexagonII::HSIG_S2: + if ((inst & V4_SS2_allocframe_MASK) == V4_SS2_allocframe_BITS) + op = Hexagon::V4_SS2_allocframe; + else if ((inst & V4_SS2_storebi0_MASK) == V4_SS2_storebi0_BITS) + op = Hexagon::V4_SS2_storebi0; + else if ((inst & V4_SS2_storebi1_MASK) == V4_SS2_storebi1_BITS) + op = Hexagon::V4_SS2_storebi1; + else if ((inst & V4_SS2_stored_sp_MASK) == V4_SS2_stored_sp_BITS) + op = Hexagon::V4_SS2_stored_sp; + else if ((inst & V4_SS2_storeh_io_MASK) == V4_SS2_storeh_io_BITS) + op = Hexagon::V4_SS2_storeh_io; + else if ((inst & V4_SS2_storew_sp_MASK) == V4_SS2_storew_sp_BITS) + op = Hexagon::V4_SS2_storew_sp; + else if ((inst & V4_SS2_storewi0_MASK) == V4_SS2_storewi0_BITS) + op = Hexagon::V4_SS2_storewi0; + else if ((inst & V4_SS2_storewi1_MASK) == V4_SS2_storewi1_BITS) + op = Hexagon::V4_SS2_storewi1; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + default: + os << "<unknown>"; + return MCDisassembler::Fail; + } + return MCDisassembler::Success; +} + +static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) { + if (encoded_reg < 8) + return Hexagon::R0 + encoded_reg; + else if (encoded_reg < 16) + return Hexagon::R0 + encoded_reg + 8; + return Hexagon::NoRegister; +} + +static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) { + if (encoded_dreg < 4) + return Hexagon::D0 + encoded_dreg; + else if (encoded_dreg < 8) + return Hexagon::D0 + encoded_dreg + 4; + return Hexagon::NoRegister; +} + +static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { + int64_t operand; + MCOperand Op; + switch (opcode) { + case Hexagon::V4_SL2_deallocframe: + case Hexagon::V4_SL2_jumpr31: + case Hexagon::V4_SL2_jumpr31_f: + case Hexagon::V4_SL2_jumpr31_fnew: + case Hexagon::V4_SL2_jumpr31_t: + case Hexagon::V4_SL2_jumpr31_tnew: + case Hexagon::V4_SL2_return: + case Hexagon::V4_SL2_return_f: + case Hexagon::V4_SL2_return_fnew: + case Hexagon::V4_SL2_return_t: + case Hexagon::V4_SL2_return_tnew: + // no operands for these instructions + break; + case Hexagon::V4_SS2_allocframe: + // u 8-4{5_3} + operand = ((inst & 0x1f0) >> 4) << 3; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SL1_loadri_io: + // Rd 3-0, Rs 7-4, u 11-8{4_2} + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0xf00) >> 6; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SL1_loadrub_io: + // Rd 3-0, Rs 7-4, u 11-8 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0xf00) >> 8; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SL2_loadrb_io: + // Rd 3-0, Rs 7-4, u 10-8 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0x700) >> 8; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SL2_loadrh_io: + case Hexagon::V4_SL2_loadruh_io: + // Rd 3-0, Rs 7-4, u 10-8{3_1} + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x700) >> 8) << 1; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SL2_loadrd_sp: + // Rdd 2-0, u 7-3{5_3} + operand = getDRegFromSubinstEncoding(inst & 0x7); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x0f8) >> 3) << 3; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SL2_loadri_sp: + // Rd 3-0, u 8-4{5_2} + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x1f0) >> 4) << 2; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_addi: + // Rx 3-0 (x2), s7 10-4 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + MI->addOperand(Op); + operand = SignExtend64<7>((inst & 0x7f0) >> 4); + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_addrx: + // Rx 3-0 (x2), Rs 7-4 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + case Hexagon::V4_SA1_and1: + case Hexagon::V4_SA1_dec: + case Hexagon::V4_SA1_inc: + case Hexagon::V4_SA1_sxtb: + case Hexagon::V4_SA1_sxth: + case Hexagon::V4_SA1_tfr: + case Hexagon::V4_SA1_zxtb: + case Hexagon::V4_SA1_zxth: + // Rd 3-0, Rs 7-4 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_addsp: + // Rd 3-0, u 9-4{6_2} + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x3f0) >> 4) << 2; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_seti: + // Rd 3-0, u 9-4 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0x3f0) >> 4; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_clrf: + case Hexagon::V4_SA1_clrfnew: + case Hexagon::V4_SA1_clrt: + case Hexagon::V4_SA1_clrtnew: + case Hexagon::V4_SA1_setin1: + // Rd 3-0 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_cmpeqi: + // Rs 7-4, u 1-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = inst & 0x3; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_combine0i: + case Hexagon::V4_SA1_combine1i: + case Hexagon::V4_SA1_combine2i: + case Hexagon::V4_SA1_combine3i: + // Rdd 2-0, u 6-5 + operand = getDRegFromSubinstEncoding(inst & 0x7); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0x060) >> 5; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_combinerz: + case Hexagon::V4_SA1_combinezr: + // Rdd 2-0, Rs 7-4 + operand = getDRegFromSubinstEncoding(inst & 0x7); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS1_storeb_io: + // Rs 7-4, u 11-8, Rt 3-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0xf00) >> 8; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS1_storew_io: + // Rs 7-4, u 11-8{4_2}, Rt 3-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0xf00) >> 8) << 2; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS2_storebi0: + case Hexagon::V4_SS2_storebi1: + // Rs 7-4, u 3-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = inst & 0xf; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS2_storewi0: + case Hexagon::V4_SS2_storewi1: + // Rs 7-4, u 3-0{4_2} + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0xf) << 2; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS2_stored_sp: + // s 8-3{6_3}, Rtt 2-0 + operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3); + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + operand = getDRegFromSubinstEncoding(inst & 0x7); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + case Hexagon::V4_SS2_storeh_io: + // Rs 7-4, u 10-8{3_1}, Rt 3-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x700) >> 8) << 1; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS2_storew_sp: + // u 8-4{5_2}, Rd 3-0 + operand = ((inst & 0x1f0) >> 4) << 2; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + default: + // don't crash with an invalid subinstruction + // llvm_unreachable("Invalid subinstruction in duplex instruction"); + break; + } +} diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index dfe79f9ff7b0..6e2ecaf57e49 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -76,4 +76,11 @@ namespace llvm { // Maximum number of words and instructions in a packet. #define HEXAGON_PACKET_SIZE 4 +// Minimum number of instructions in an end-loop packet. +#define HEXAGON_PACKET_INNER_SIZE 2 +#define HEXAGON_PACKET_OUTER_SIZE 3 +// Maximum number of instructions in a packet before shuffling, +// including a compound one or a duplex or an extender. +#define HEXAGON_PRESHUFFLE_PACKET_SIZE (HEXAGON_PACKET_SIZE + 3) + #endif diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index e9491baf29ef..05728d2b627e 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -20,6 +20,7 @@ #include "HexagonTargetMachine.h" #include "MCTargetDesc/HexagonInstPrinter.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCShuffler.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -78,14 +79,14 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, O << MO.getImm(); return; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_ConstantPoolIndex: - O << *GetCPISymbol(MO.getIndex()); + GetCPISymbol(MO.getIndex())->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: // Computing the address of a global symbol, not calling it. - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); printOffset(MO.getOffset(), O); return; } @@ -177,49 +178,40 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, /// the current output stream. /// void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { - if (MI->isBundle()) { - std::vector<MachineInstr const *> BundleMIs; + MCInst MCB; + MCB.setOpcode(Hexagon::BUNDLE); + MCB.addOperand(MCOperand::createImm(0)); - const MachineBasicBlock *MBB = MI->getParent(); + if (MI->isBundle()) { + const MachineBasicBlock* MBB = MI->getParent(); MachineBasicBlock::const_instr_iterator MII = MI; - ++MII; - unsigned int IgnoreCount = 0; - while (MII != MBB->end() && MII->isInsideBundle()) { - const MachineInstr *MInst = MII; - if (MInst->getOpcode() == TargetOpcode::DBG_VALUE || - MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) { - IgnoreCount++; - ++MII; - continue; + unsigned IgnoreCount = 0; + + for (++MII; MII != MBB->end() && MII->isInsideBundle(); ++MII) { + if (MII->getOpcode() == TargetOpcode::DBG_VALUE || + MII->getOpcode() == TargetOpcode::IMPLICIT_DEF) + ++IgnoreCount; + else { + HexagonLowerToMC(MII, MCB, *this); } - // BundleMIs.push_back(&*MII); - BundleMIs.push_back(MInst); - ++MII; - } - unsigned Size = BundleMIs.size(); - assert((Size + IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!"); - for (unsigned Index = 0; Index < Size; Index++) { - MCInst MCI; - - HexagonLowerToMC(BundleMIs[Index], MCI, *this); - HexagonMCInstrInfo::AppendImplicitOperands(MCI); - HexagonMCInstrInfo::setPacketBegin(MCI, Index == 0); - HexagonMCInstrInfo::setPacketEnd(MCI, Index == (Size - 1)); - EmitToStreamer(*OutStreamer, MCI); } } else { - MCInst MCI; - HexagonLowerToMC(MI, MCI, *this); - HexagonMCInstrInfo::AppendImplicitOperands(MCI); - if (MI->getOpcode() == Hexagon::ENDLOOP0) { - HexagonMCInstrInfo::setPacketBegin(MCI, true); - HexagonMCInstrInfo::setPacketEnd(MCI, true); - } - EmitToStreamer(*OutStreamer, MCI); + HexagonLowerToMC(MI, MCB, *this); + HexagonMCInstrInfo::padEndloop(MCB); } - - return; + // Examine the packet and try to find instructions that can be converted + // to compounds. + HexagonMCInstrInfo::tryCompound(*Subtarget->getInstrInfo(), + OutStreamer->getContext(), MCB); + // Examine the packet and convert pairs of instructions to duplex + // instructions when possible. + SmallVector<DuplexCandidate, 8> possibleDuplexes; + possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties( + *Subtarget->getInstrInfo(), MCB); + HexagonMCShuffle(*Subtarget->getInstrInfo(), *Subtarget, + OutStreamer->getContext(), MCB, possibleDuplexes); + EmitToStreamer(*OutStreamer, MCB); } extern "C" void LLVMInitializeHexagonAsmPrinter() { diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 0885a794a7b4..868f87e18413 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -201,17 +201,17 @@ namespace { break; } // Check individual operands. - for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) { + for (const MachineOperand &MO : MI->operands()) { // While the presence of a frame index does not prove that a stack // frame will be required, all frame indexes should be within alloc- // frame/deallocframe. Otherwise, the code that translates a frame // index into an offset would have to be aware of the placement of // the frame creation/destruction instructions. - if (Mo->isFI()) + if (MO.isFI()) return true; - if (!Mo->isReg()) + if (!MO.isReg()) continue; - unsigned R = Mo->getReg(); + unsigned R = MO.getReg(); // Virtual registers will need scavenging, which then may require // a stack slot. if (TargetRegisterInfo::isVirtualRegister(R)) diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index ed5676c1fbb6..74d92aef25ac 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2370,7 +2370,8 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { /// isLegalAddressingMode - Return true if the addressing mode represented by /// AM is legal for this target, for a load/store of the specified type. bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // Allows a signed-extended 11-bit immediate field. if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) return false; diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 584c2c57c7ca..b80e8477eb7b 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -198,7 +198,8 @@ bool isPositiveHalfWord(SDNode *N); /// The type may be VoidTy, in which case only return true if the addressing /// mode is legal for a load/store of any legal type. /// TODO: Handle pre/postinc as well. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 36a7e9f642c6..44bab292f32c 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -66,10 +66,8 @@ def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd) class OpcodeHexagon { field bits<32> Inst = ?; // Default to an invalid insn. bits<4> IClass = 0; // ICLASS - bits<2> IParse = 0; // Parse bits. let Inst{31-28} = IClass; - let Inst{15-14} = IParse; bits<1> zero = 0; } diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index 7f7b2c96dba7..db83ef6bc474 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -146,6 +146,11 @@ class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []> : InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123, TypePREFIX>, OpcodeHexagon; +class SUBInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypeDUPLEX>, + OpcodeHexagon; + class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = ""> : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>, diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 49b4517698d5..e566a97789a9 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -779,10 +779,9 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return false; } -MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - int FI) const { +MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FI) const { // Hexagon_TODO: Implement. return nullptr; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 0239cabe9e52..a7ae65e4eb9c 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -114,10 +114,12 @@ public: MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const override { return nullptr; } diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 8b667c645156..65b0f4974367 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -4263,3 +4263,7 @@ def J4_jumpsetr: CJInst < let Inst{19-16} = Rs; let Inst{7-1} = r9_2{8-2}; } + +// Duplex instructions +//===----------------------------------------------------------------------===// +include "HexagonIsetDx.td" diff --git a/lib/Target/Hexagon/HexagonIsetDx.td b/lib/Target/Hexagon/HexagonIsetDx.td new file mode 100644 index 000000000000..0ca95e999859 --- /dev/null +++ b/lib/Target/Hexagon/HexagonIsetDx.td @@ -0,0 +1,728 @@ +//=- HexagonIsetDx.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon duplex instructions. +// +//===----------------------------------------------------------------------===// + +// SA1_combine1i: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combine1i: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u2Imm:$u2), + "$Rdd = combine(#1, #$u2)"> { + bits<3> Rdd; + bits<2> u2; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b0; + let Inst{4-3} = 0b01; + let Inst{2-0} = Rdd; + let Inst{6-5} = u2; + } + +// SL2_jumpr31_f: Indirect conditional jump if false. +// SL2_jumpr31_f -> SL2_jumpr31_fnew +let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31_f: SUBInst < + (outs ), + (ins ), + "if (!p0) jumpr r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2-0} = 0b101; + } + +// SL2_deallocframe: Deallocate stack frame. +let Defs = [R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in +def V4_SL2_deallocframe: SUBInst < + (outs ), + (ins ), + "deallocframe"> { + let Inst{12-6} = 0b1111100; + let Inst{2} = 0b0; + } + +// SL2_return_f: Deallocate stack frame and return. +// SL2_return_f -> SL2_return_fnew +let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return_f: SUBInst < + (outs ), + (ins ), + "if (!p0) dealloc_return"> { + let Inst{12-6} = 0b1111101; + let Inst{2-0} = 0b101; + } + +// SA1_combine3i: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combine3i: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u2Imm:$u2), + "$Rdd = combine(#3, #$u2)"> { + bits<3> Rdd; + bits<2> u2; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b0; + let Inst{4-3} = 0b11; + let Inst{2-0} = Rdd; + let Inst{6-5} = u2; + } + +// SS2_storebi0: Store byte. +let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in +def V4_SS2_storebi0: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_0Imm:$u4_0), + "memb($Rs + #$u4_0)=#0"> { + bits<4> Rs; + bits<4> u4_0; + + let Inst{12-8} = 0b10010; + let Inst{7-4} = Rs; + let Inst{3-0} = u4_0; + } + +// SA1_clrtnew: Clear if true. +let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_clrtnew: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "if (p0.new) $Rd = #0"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6-4} = 0b100; + let Inst{3-0} = Rd; + } + +// SL2_loadruh_io: Load half. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL2_loadruh_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u3_1Imm:$u3_1), + "$Rd = memuh($Rs + #$u3_1)"> { + bits<4> Rd; + bits<4> Rs; + bits<4> u3_1; + + let Inst{12-11} = 0b01; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{10-8} = u3_1{3-1}; + } + +// SL2_jumpr31_tnew: Indirect conditional jump if true. +let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31_tnew: SUBInst < + (outs ), + (ins ), + "if (p0.new) jumpr:nt r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2-0} = 0b110; + } + +// SA1_addi: Add. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 1, opExtentBits = 7, opExtendable = 2 in +def V4_SA1_addi: SUBInst < + (outs IntRegs:$Rx), + (ins IntRegs:$_src_, s7Ext:$s7), + "$Rx = add($_src_, #$s7)" , + [] , + "$_src_ = $Rx"> { + bits<4> Rx; + bits<7> s7; + + let Inst{12-11} = 0b00; + let Inst{3-0} = Rx; + let Inst{10-4} = s7; + } + +// SL1_loadrub_io: Load byte. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL1_loadrub_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u4_0Imm:$u4_0), + "$Rd = memub($Rs + #$u4_0)"> { + bits<4> Rd; + bits<4> Rs; + bits<4> u4_0; + + let Inst{12} = 0b1; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{11-8} = u4_0; + } + +// SL1_loadri_io: Load word. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL1_loadri_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u4_2Imm:$u4_2), + "$Rd = memw($Rs + #$u4_2)"> { + bits<4> Rd; + bits<4> Rs; + bits<6> u4_2; + + let Inst{12} = 0b0; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{11-8} = u4_2{5-2}; + } + +// SA1_cmpeqi: Compareimmed. +let Defs = [P0], isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_cmpeqi: SUBInst < + (outs ), + (ins IntRegs:$Rs, u2Imm:$u2), + "p0 = cmp.eq($Rs, #$u2)"> { + bits<4> Rs; + bits<2> u2; + + let Inst{12-8} = 0b11001; + let Inst{7-4} = Rs; + let Inst{1-0} = u2; + } + +// SA1_combinerz: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combinerz: SUBInst < + (outs DoubleRegs:$Rdd), + (ins IntRegs:$Rs), + "$Rdd = combine($Rs, #0)"> { + bits<3> Rdd; + bits<4> Rs; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b1; + let Inst{3} = 0b1; + let Inst{2-0} = Rdd; + let Inst{7-4} = Rs; + } + +// SL2_return_t: Deallocate stack frame and return. +// SL2_return_t -> SL2_return_tnew +let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return_t: SUBInst < + (outs ), + (ins ), + "if (p0) dealloc_return"> { + let Inst{12-6} = 0b1111101; + let Inst{2-0} = 0b100; + } + +// SS2_allocframe: Allocate stack frame. +let Defs = [R29, R30], Uses = [R30, R31, R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in +def V4_SS2_allocframe: SUBInst < + (outs ), + (ins u5_3Imm:$u5_3), + "allocframe(#$u5_3)"> { + bits<8> u5_3; + + let Inst{12-9} = 0b1110; + let Inst{8-4} = u5_3{7-3}; + } + +// SS2_storeh_io: Store half. +let isCodeGenOnly = 1, mayStore = 1, accessSize = HalfWordAccess in +def V4_SS2_storeh_io: SUBInst < + (outs ), + (ins IntRegs:$Rs, u3_1Imm:$u3_1, IntRegs:$Rt), + "memh($Rs + #$u3_1) = $Rt"> { + bits<4> Rs; + bits<4> u3_1; + bits<4> Rt; + + let Inst{12-11} = 0b00; + let Inst{7-4} = Rs; + let Inst{10-8} = u3_1{3-1}; + let Inst{3-0} = Rt; + } + +// SS2_storewi0: Store word. +let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in +def V4_SS2_storewi0: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_2Imm:$u4_2), + "memw($Rs + #$u4_2)=#0"> { + bits<4> Rs; + bits<6> u4_2; + + let Inst{12-8} = 0b10000; + let Inst{7-4} = Rs; + let Inst{3-0} = u4_2{5-2}; + } + +// SS2_storewi1: Store word. +let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in +def V4_SS2_storewi1: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_2Imm:$u4_2), + "memw($Rs + #$u4_2)=#1"> { + bits<4> Rs; + bits<6> u4_2; + + let Inst{12-8} = 0b10001; + let Inst{7-4} = Rs; + let Inst{3-0} = u4_2{5-2}; + } + +// SL2_jumpr31: Indirect conditional jump if true. +let Defs = [PC], Uses = [R31], isCodeGenOnly = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31: SUBInst < + (outs ), + (ins ), + "jumpr r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2} = 0b0; + } + +// SA1_combinezr: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combinezr: SUBInst < + (outs DoubleRegs:$Rdd), + (ins IntRegs:$Rs), + "$Rdd = combine(#0, $Rs)"> { + bits<3> Rdd; + bits<4> Rs; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b1; + let Inst{3} = 0b0; + let Inst{2-0} = Rdd; + let Inst{7-4} = Rs; + } + +// SL2_loadrh_io: Load half. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL2_loadrh_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u3_1Imm:$u3_1), + "$Rd = memh($Rs + #$u3_1)"> { + bits<4> Rd; + bits<4> Rs; + bits<4> u3_1; + + let Inst{12-11} = 0b00; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{10-8} = u3_1{3-1}; + } + +// SA1_addrx: Add. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_addrx: SUBInst < + (outs IntRegs:$Rx), + (ins IntRegs:$_src_, IntRegs:$Rs), + "$Rx = add($_src_, $Rs)" , + [] , + "$_src_ = $Rx"> { + bits<4> Rx; + bits<4> Rs; + + let Inst{12-8} = 0b11000; + let Inst{3-0} = Rx; + let Inst{7-4} = Rs; + } + +// SA1_setin1: Set to -1. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_setin1: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "$Rd = #-1"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6} = 0b0; + let Inst{3-0} = Rd; + } + +// SA1_sxth: Sxth. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_sxth: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = sxth($Rs)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10100; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SA1_combine0i: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combine0i: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u2Imm:$u2), + "$Rdd = combine(#0, #$u2)"> { + bits<3> Rdd; + bits<2> u2; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b0; + let Inst{4-3} = 0b00; + let Inst{2-0} = Rdd; + let Inst{6-5} = u2; + } + +// SA1_combine2i: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combine2i: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u2Imm:$u2), + "$Rdd = combine(#2, #$u2)"> { + bits<3> Rdd; + bits<2> u2; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b0; + let Inst{4-3} = 0b10; + let Inst{2-0} = Rdd; + let Inst{6-5} = u2; + } + +// SA1_sxtb: Sxtb. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_sxtb: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = sxtb($Rs)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10101; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SA1_clrf: Clear if false. +// SA1_clrf -> SA1_clrfnew +let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_clrf: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "if (!p0) $Rd = #0"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6-4} = 0b111; + let Inst{3-0} = Rd; + } + +// SL2_loadrb_io: Load byte. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL2_loadrb_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u3_0Imm:$u3_0), + "$Rd = memb($Rs + #$u3_0)"> { + bits<4> Rd; + bits<4> Rs; + bits<3> u3_0; + + let Inst{12-11} = 0b10; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{10-8} = u3_0; + } + +// SA1_tfr: Tfr. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_tfr: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = $Rs"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10000; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SL2_loadrd_sp: Load dword. +let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in +def V4_SL2_loadrd_sp: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u5_3Imm:$u5_3), + "$Rdd = memd(r29 + #$u5_3)"> { + bits<3> Rdd; + bits<8> u5_3; + + let Inst{12-8} = 0b11110; + let Inst{2-0} = Rdd; + let Inst{7-3} = u5_3{7-3}; + } + +// SA1_and1: And #1. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_and1: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = and($Rs, #1)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10010; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SS2_storebi1: Store byte. +let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in +def V4_SS2_storebi1: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_0Imm:$u4_0), + "memb($Rs + #$u4_0)=#1"> { + bits<4> Rs; + bits<4> u4_0; + + let Inst{12-8} = 0b10011; + let Inst{7-4} = Rs; + let Inst{3-0} = u4_0; + } + +// SA1_inc: Inc. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_inc: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = add($Rs, #1)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10001; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SS2_stored_sp: Store dword. +let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in +def V4_SS2_stored_sp: SUBInst < + (outs ), + (ins s6_3Imm:$s6_3, DoubleRegs:$Rtt), + "memd(r29 + #$s6_3) = $Rtt"> { + bits<9> s6_3; + bits<3> Rtt; + + let Inst{12-9} = 0b0101; + let Inst{8-3} = s6_3{8-3}; + let Inst{2-0} = Rtt; + } + +// SS2_storew_sp: Store word. +let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in +def V4_SS2_storew_sp: SUBInst < + (outs ), + (ins u5_2Imm:$u5_2, IntRegs:$Rt), + "memw(r29 + #$u5_2) = $Rt"> { + bits<7> u5_2; + bits<4> Rt; + + let Inst{12-9} = 0b0100; + let Inst{8-4} = u5_2{6-2}; + let Inst{3-0} = Rt; + } + +// SL2_jumpr31_fnew: Indirect conditional jump if false. +let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31_fnew: SUBInst < + (outs ), + (ins ), + "if (!p0.new) jumpr:nt r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2-0} = 0b111; + } + +// SA1_clrt: Clear if true. +// SA1_clrt -> SA1_clrtnew +let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_clrt: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "if (p0) $Rd = #0"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6-4} = 0b110; + let Inst{3-0} = Rd; + } + +// SL2_return: Deallocate stack frame and return. +let Defs = [PC, R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return: SUBInst < + (outs ), + (ins ), + "dealloc_return"> { + let Inst{12-6} = 0b1111101; + let Inst{2} = 0b0; + } + +// SA1_dec: Dec. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_dec: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = add($Rs,#-1)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10011; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SA1_seti: Set immed. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 0, opExtentBits = 6, opExtendable = 1 in +def V4_SA1_seti: SUBInst < + (outs IntRegs:$Rd), + (ins u6Ext:$u6), + "$Rd = #$u6"> { + bits<4> Rd; + bits<6> u6; + + let Inst{12-10} = 0b010; + let Inst{3-0} = Rd; + let Inst{9-4} = u6; + } + +// SL2_jumpr31_t: Indirect conditional jump if true. +// SL2_jumpr31_t -> SL2_jumpr31_tnew +let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31_t: SUBInst < + (outs ), + (ins ), + "if (p0) jumpr r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2-0} = 0b100; + } + +// SA1_clrfnew: Clear if false. +let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_clrfnew: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "if (!p0.new) $Rd = #0"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6-4} = 0b101; + let Inst{3-0} = Rd; + } + +// SS1_storew_io: Store word. +let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in +def V4_SS1_storew_io: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_2Imm:$u4_2, IntRegs:$Rt), + "memw($Rs + #$u4_2) = $Rt"> { + bits<4> Rs; + bits<6> u4_2; + bits<4> Rt; + + let Inst{12} = 0b0; + let Inst{7-4} = Rs; + let Inst{11-8} = u4_2{5-2}; + let Inst{3-0} = Rt; + } + +// SA1_zxtb: Zxtb. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_zxtb: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = and($Rs, #255)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10111; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SA1_addsp: Add. +let Uses = [R29], isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_addsp: SUBInst < + (outs IntRegs:$Rd), + (ins u6_2Imm:$u6_2), + "$Rd = add(r29, #$u6_2)"> { + bits<4> Rd; + bits<8> u6_2; + + let Inst{12-10} = 0b011; + let Inst{3-0} = Rd; + let Inst{9-4} = u6_2{7-2}; + } + +// SL2_loadri_sp: Load word. +let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL2_loadri_sp: SUBInst < + (outs IntRegs:$Rd), + (ins u5_2Imm:$u5_2), + "$Rd = memw(r29 + #$u5_2)"> { + bits<4> Rd; + bits<7> u5_2; + + let Inst{12-9} = 0b1110; + let Inst{3-0} = Rd; + let Inst{8-4} = u5_2{6-2}; + } + +// SS1_storeb_io: Store byte. +let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in +def V4_SS1_storeb_io: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_0Imm:$u4_0, IntRegs:$Rt), + "memb($Rs + #$u4_0) = $Rt"> { + bits<4> Rs; + bits<4> u4_0; + bits<4> Rt; + + let Inst{12} = 0b1; + let Inst{7-4} = Rs; + let Inst{11-8} = u4_0; + let Inst{3-0} = Rt; + } + +// SL2_return_tnew: Deallocate stack frame and return. +let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return_tnew: SUBInst < + (outs ), + (ins ), + "if (p0.new) dealloc_return:nt"> { + let Inst{12-6} = 0b1111101; + let Inst{2-0} = 0b110; + } + +// SL2_return_fnew: Deallocate stack frame and return. +let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return_fnew: SUBInst < + (outs ), + (ins ), + "if (!p0.new) dealloc_return:nt"> { + let Inst{12-6} = 0b1111101; + let Inst{2-0} = 0b111; + } + +// SA1_zxth: Zxth. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_zxth: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = zxth($Rs)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10110; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index 535d1f91b493..75189b696ea2 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -15,9 +15,12 @@ #include "Hexagon.h" #include "HexagonAsmPrinter.h" #include "HexagonMachineFunctionInfo.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" + #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Mangler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -28,19 +31,30 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol, MCContext &MC = Printer.OutContext; const MCExpr *ME; - ME = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, MC); + ME = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, MC); if (!MO.isJTI() && MO.getOffset()) - ME = MCBinaryExpr::CreateAdd(ME, MCConstantExpr::Create(MO.getOffset(), MC), + ME = MCBinaryExpr::createAdd(ME, MCConstantExpr::create(MO.getOffset(), MC), MC); return (MCOperand::createExpr(ME)); } // Create an MCInst from a MachineInstr -void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI, +void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB, HexagonAsmPrinter& AP) { - MCI.setOpcode(MI->getOpcode()); + if(MI->getOpcode() == Hexagon::ENDLOOP0){ + HexagonMCInstrInfo::setInnerLoop(MCB); + return; + } + if(MI->getOpcode() == Hexagon::ENDLOOP1){ + HexagonMCInstrInfo::setOuterLoop(MCB); + return; + } + MCInst* MCI = new (AP.OutContext) MCInst; + MCI->setOpcode(MI->getOpcode()); + assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) && + "MCI opcode should have been set on construction"); for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { const MachineOperand &MO = MI->getOperand(i); @@ -67,7 +81,7 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI, break; case MachineOperand::MO_MachineBasicBlock: MCO = MCOperand::createExpr - (MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), + (MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), AP.OutContext)); break; case MachineOperand::MO_GlobalAddress: @@ -88,6 +102,7 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI, break; } - MCI.addOperand(MCO); + MCI->addOperand(MCO); } + MCB.addOperand(MCOperand::createInst(MCI)); } diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td index b7f364ef0751..be8204b7de53 100644 --- a/lib/Target/Hexagon/HexagonOperands.td +++ b/lib/Target/Hexagon/HexagonOperands.td @@ -27,6 +27,7 @@ let PrintMethod = "printImmOperand" in { def s8Imm : Operand<i32>; def s8Imm64 : Operand<i64>; def s6Imm : Operand<i32>; + def s6_3Imm : Operand<i32>; def s4Imm : Operand<i32>; def s4_0Imm : Operand<i32>; def s4_1Imm : Operand<i32>; @@ -51,8 +52,14 @@ let PrintMethod = "printImmOperand" in { def u6_2Imm : Operand<i32>; def u6_3Imm : Operand<i32>; def u5Imm : Operand<i32>; + def u5_2Imm : Operand<i32>; + def u5_3Imm : Operand<i32>; def u4Imm : Operand<i32>; + def u4_0Imm : Operand<i32>; + def u4_2Imm : Operand<i32>; def u3Imm : Operand<i32>; + def u3_0Imm : Operand<i32>; + def u3_1Imm : Operand<i32>; def u2Imm : Operand<i32>; def u1Imm : Operand<i32>; def n8Imm : Operand<i32>; @@ -444,6 +451,7 @@ let PrintMethod = "printExtOperand" in { def s10Ext : Operand<i32>; def s9Ext : Operand<i32>; def s8Ext : Operand<i32>; + def s7Ext : Operand<i32>; def s6Ext : Operand<i32>; def s11_0Ext : Operand<i32>; def s11_1Ext : Operand<i32>; diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt index 4c987ed32a64..6253686b4993 100644 --- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt @@ -4,8 +4,12 @@ add_llvm_library(LLVMHexagonDesc HexagonInstPrinter.cpp HexagonMCAsmInfo.cpp HexagonMCCodeEmitter.cpp + HexagonMCCompound.cpp + HexagonMCDuplexInfo.cpp HexagonMCInstrInfo.cpp + HexagonMCShuffler.cpp HexagonMCTargetDesc.cpp + HexagonShuffler.cpp ) add_dependencies(LLVMHexagonDesc HexagonCommonTableGen) diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 155aa9ef9557..76894840153d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -7,19 +7,150 @@ // //===----------------------------------------------------------------------===// +#include "Hexagon.h" +#include "HexagonFixupKinds.h" #include "HexagonMCTargetDesc.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; +using namespace Hexagon; namespace { class HexagonAsmBackend : public MCAsmBackend { + uint8_t OSABI; + StringRef CPU; + mutable uint64_t relaxedCnt; + std::unique_ptr <MCInstrInfo> MCII; + std::unique_ptr <MCInst *> RelaxTarget; public: - HexagonAsmBackend(Target const & /*T*/) {} + HexagonAsmBackend(Target const &T, uint8_t OSABI, StringRef CPU) : + OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *){} - unsigned getNumFixupKinds() const override { return 0; } + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + return createHexagonELFObjectWriter(OS, OSABI, CPU); + } + + unsigned getNumFixupKinds() const override { + return Hexagon::NumTargetFixupKinds; + } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { + const static MCFixupKindInfo Infos[Hexagon::NumTargetFixupKinds] = { + // This table *must* be in same the order of fixup_* kinds in + // HexagonFixupKinds.h. + // + // namei offset bits flags + {"fixup_Hexagon_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B15_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B7_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_LO16", 0, 32, 0}, + {"fixup_Hexagon_HI16", 0, 32, 0}, + {"fixup_Hexagon_32", 0, 32, 0}, + {"fixup_Hexagon_16", 0, 32, 0}, + {"fixup_Hexagon_8", 0, 32, 0}, + {"fixup_Hexagon_GPREL16_0", 0, 32, 0}, + {"fixup_Hexagon_GPREL16_1", 0, 32, 0}, + {"fixup_Hexagon_GPREL16_2", 0, 32, 0}, + {"fixup_Hexagon_GPREL16_3", 0, 32, 0}, + {"fixup_Hexagon_HL16", 0, 32, 0}, + {"fixup_Hexagon_B13_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B9_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B32_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_B22_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B15_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B13_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B9_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B7_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_16_X", 0, 32, 0}, + {"fixup_Hexagon_12_X", 0, 32, 0}, + {"fixup_Hexagon_11_X", 0, 32, 0}, + {"fixup_Hexagon_10_X", 0, 32, 0}, + {"fixup_Hexagon_9_X", 0, 32, 0}, + {"fixup_Hexagon_8_X", 0, 32, 0}, + {"fixup_Hexagon_7_X", 0, 32, 0}, + {"fixup_Hexagon_6_X", 0, 32, 0}, + {"fixup_Hexagon_32_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_COPY", 0, 32, 0}, + {"fixup_Hexagon_GLOB_DAT", 0, 32, 0}, + {"fixup_Hexagon_JMP_SLOT", 0, 32, 0}, + {"fixup_Hexagon_RELATIVE", 0, 32, 0}, + {"fixup_Hexagon_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_GOTREL_LO16", 0, 32, 0}, + {"fixup_Hexagon_GOTREL_HI16", 0, 32, 0}, + {"fixup_Hexagon_GOTREL_32", 0, 32, 0}, + {"fixup_Hexagon_GOT_LO16", 0, 32, 0}, + {"fixup_Hexagon_GOT_HI16", 0, 32, 0}, + {"fixup_Hexagon_GOT_32", 0, 32, 0}, + {"fixup_Hexagon_GOT_16", 0, 32, 0}, + {"fixup_Hexagon_DTPMOD_32", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_LO16", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_HI16", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_32", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_16", 0, 32, 0}, + {"fixup_Hexagon_GD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_LD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_GD_GOT_LO16", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_HI16", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_32", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_16", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_LO16", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_HI16", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_32", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_16", 0, 32, 0}, + {"fixup_Hexagon_IE_LO16", 0, 32, 0}, + {"fixup_Hexagon_IE_HI16", 0, 32, 0}, + {"fixup_Hexagon_IE_32", 0, 32, 0}, + {"fixup_Hexagon_IE_16", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_LO16", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_HI16", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_32", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_16", 0, 32, 0}, + {"fixup_Hexagon_TPREL_LO16", 0, 32, 0}, + {"fixup_Hexagon_TPREL_HI16", 0, 32, 0}, + {"fixup_Hexagon_TPREL_32", 0, 32, 0}, + {"fixup_Hexagon_TPREL_16", 0, 32, 0}, + {"fixup_Hexagon_6_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_GOTREL_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_GOTREL_16_X", 0, 32, 0}, + {"fixup_Hexagon_GOTREL_11_X", 0, 32, 0}, + {"fixup_Hexagon_GOT_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_GOT_16_X", 0, 32, 0}, + {"fixup_Hexagon_GOT_11_X", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_16_X", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_11_X", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_16_X", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_11_X", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_16_X", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_11_X", 0, 32, 0}, + {"fixup_Hexagon_IE_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_IE_16_X", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_16_X", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_11_X", 0, 32, 0}, + {"fixup_Hexagon_TPREL_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_TPREL_16_X", 0, 32, 0}, + {"fixup_Hexagon_TPREL_11_X", 0, 32, 0}}; + + if (Kind < FirstTargetFixupKind) { + return MCAsmBackend::getFixupKindInfo(Kind); + } + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } void applyFixup(MCFixup const & /*Fixup*/, char * /*Data*/, unsigned /*DataSize*/, uint64_t /*Value*/, @@ -27,14 +158,119 @@ public: return; } - bool mayNeedRelaxation(MCInst const & /*Inst*/) const override { + bool isInstRelaxable(MCInst const &HMI) const { + const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(*MCII, HMI); + bool Relaxable = false; + // Branches and loop-setup insns are handled as necessary by relaxation. + if (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeJ || + (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeNV && + MCID.isBranch()) || + (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeCR && + HMI.getOpcode() != Hexagon::C4_addipc)) + if (HexagonMCInstrInfo::isExtendable(*MCII, HMI)) + Relaxable = true; + + return Relaxable; + } + + /// MayNeedRelaxation - Check whether the given instruction may need + /// relaxation. + /// + /// \param Inst - The instruction to test. + bool mayNeedRelaxation(MCInst const &Inst) const override { + assert(HexagonMCInstrInfo::isBundle(Inst)); + bool PreviousIsExtender = false; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(Inst)) { + auto const &Inst = *I.getInst(); + if (!PreviousIsExtender) { + if (isInstRelaxable(Inst)) + return true; + } + PreviousIsExtender = HexagonMCInstrInfo::isImmext(Inst); + } return false; } - bool fixupNeedsRelaxation(MCFixup const & /*Fixup*/, uint64_t /*Value*/, - MCRelaxableFragment const * /*DF*/, - MCAsmLayout const & /*Layout*/) const override { - llvm_unreachable("fixupNeedsRelaxation() unimplemented"); + /// fixupNeedsRelaxation - Target specific predicate for whether a given + /// fixup requires the associated instruction to be relaxed. + bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + MCInst const &MCB = DF->getInst(); + assert(HexagonMCInstrInfo::isBundle(MCB)); + + *RelaxTarget = nullptr; + MCInst &MCI = const_cast<MCInst &>(HexagonMCInstrInfo::instruction( + MCB, Fixup.getOffset() / HEXAGON_INSTR_SIZE)); + // If we cannot resolve the fixup value, it requires relaxation. + if (!Resolved) { + switch ((unsigned)Fixup.getKind()) { + case fixup_Hexagon_B22_PCREL: + // GetFixupCount assumes B22 won't relax + // Fallthrough + default: + return false; + break; + case fixup_Hexagon_B13_PCREL: + case fixup_Hexagon_B15_PCREL: + case fixup_Hexagon_B9_PCREL: + case fixup_Hexagon_B7_PCREL: { + if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) { + ++relaxedCnt; + *RelaxTarget = &MCI; + return true; + } else { + return false; + } + break; + } + } + } + bool Relaxable = isInstRelaxable(MCI); + if (Relaxable == false) + return false; + + MCFixupKind Kind = Fixup.getKind(); + int64_t sValue = Value; + int64_t maxValue; + + switch ((unsigned)Kind) { + case fixup_Hexagon_B7_PCREL: + maxValue = 1 << 8; + break; + case fixup_Hexagon_B9_PCREL: + maxValue = 1 << 10; + break; + case fixup_Hexagon_B15_PCREL: + maxValue = 1 << 16; + break; + case fixup_Hexagon_B22_PCREL: + maxValue = 1 << 23; + break; + default: + maxValue = INT64_MAX; + break; + } + + bool isFarAway = -maxValue > sValue || sValue > maxValue - 1; + + if (isFarAway) { + if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) { + ++relaxedCnt; + *RelaxTarget = &MCI; + return true; + } + } + + return false; + } + + /// Simple predicate for targets where !Resolved implies requiring relaxation + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced"); } void relaxInstruction(MCInst const & /*Inst*/, @@ -49,26 +285,11 @@ public: }; } // end anonymous namespace -namespace { -class ELFHexagonAsmBackend : public HexagonAsmBackend { - uint8_t OSABI; - -public: - ELFHexagonAsmBackend(Target const &T, uint8_t OSABI) - : HexagonAsmBackend(T), OSABI(OSABI) {} - - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { - StringRef CPU("HexagonV4"); - return createHexagonELFObjectWriter(OS, OSABI, CPU); - } -}; -} // end anonymous namespace - namespace llvm { MCAsmBackend *createHexagonAsmBackend(Target const &T, MCRegisterInfo const & /*MRI*/, - StringRef TT, StringRef /*CPU*/) { + StringRef TT, StringRef CPU) { uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); - return new ELFHexagonAsmBackend(T, OSABI); + return new HexagonAsmBackend(T, OSABI, CPU); } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 6a72f205e9d3..f4d162ccf6a8 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -43,6 +43,7 @@ namespace HexagonII { TypeXTYPE = 8, TypeMEMOP = 9, TypeNV = 10, + TypeDUPLEX = 11, TypePREFIX = 30, // Such as extenders. TypeENDLOOP = 31 // Such as end of a HW loop. }; @@ -190,7 +191,26 @@ namespace HexagonII { MO_GPREL }; - enum class InstParseBits : uint32_t { + // Hexagon Sub-instruction classes. + enum SubInstructionGroup { + HSIG_None = 0, + HSIG_L1, + HSIG_L2, + HSIG_S1, + HSIG_S2, + HSIG_A, + HSIG_Compound + }; + + // Hexagon Compound classes. + enum CompoundGroup { + HCG_None = 0, + HCG_A, + HCG_B, + HCG_C + }; + + enum InstParseBits { INST_PARSE_MASK = 0x0000c000, INST_PARSE_PACKET_END = 0x0000c000, INST_PARSE_LOOP_END = 0x00008000, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index fde935b2758b..843072302b21 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "Hexagon.h" +#include "MCTargetDesc/HexagonFixupKinds.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/Support/Debug.h" @@ -40,17 +41,306 @@ HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C) unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/, MCFixup const &Fixup, bool IsPCRel) const { + // determine the type of the relocation unsigned Type = (unsigned)ELF::R_HEX_NONE; - llvm::MCFixupKind Kind = Fixup.getKind(); + unsigned Kind = (unsigned)Fixup.getKind(); switch (Kind) { - default: - DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n"); - llvm_unreachable("Unimplemented Fixup kind!"); - break; - case FK_Data_4: - Type = (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32; - break; + default: + DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n"); + llvm_unreachable("Unimplemented Fixup kind!"); + break; + case FK_Data_4: + Type = (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32; + break; + case FK_PCRel_4: + Type = ELF::R_HEX_32_PCREL; + break; + case FK_Data_2: + Type = ELF::R_HEX_16; + break; + case FK_Data_1: + Type = ELF::R_HEX_8; + break; + case fixup_Hexagon_B22_PCREL: + Type = ELF::R_HEX_B22_PCREL; + break; + case fixup_Hexagon_B15_PCREL: + Type = ELF::R_HEX_B15_PCREL; + break; + case fixup_Hexagon_B7_PCREL: + Type = ELF::R_HEX_B7_PCREL; + break; + case fixup_Hexagon_LO16: + Type = ELF::R_HEX_LO16; + break; + case fixup_Hexagon_HI16: + Type = ELF::R_HEX_HI16; + break; + case fixup_Hexagon_32: + Type = ELF::R_HEX_32; + break; + case fixup_Hexagon_16: + Type = ELF::R_HEX_16; + break; + case fixup_Hexagon_8: + Type = ELF::R_HEX_8; + break; + case fixup_Hexagon_GPREL16_0: + Type = ELF::R_HEX_GPREL16_0; + break; + case fixup_Hexagon_GPREL16_1: + Type = ELF::R_HEX_GPREL16_1; + break; + case fixup_Hexagon_GPREL16_2: + Type = ELF::R_HEX_GPREL16_2; + break; + case fixup_Hexagon_GPREL16_3: + Type = ELF::R_HEX_GPREL16_3; + break; + case fixup_Hexagon_HL16: + Type = ELF::R_HEX_HL16; + break; + case fixup_Hexagon_B13_PCREL: + Type = ELF::R_HEX_B13_PCREL; + break; + case fixup_Hexagon_B9_PCREL: + Type = ELF::R_HEX_B9_PCREL; + break; + case fixup_Hexagon_B32_PCREL_X: + Type = ELF::R_HEX_B32_PCREL_X; + break; + case fixup_Hexagon_32_6_X: + Type = ELF::R_HEX_32_6_X; + break; + case fixup_Hexagon_B22_PCREL_X: + Type = ELF::R_HEX_B22_PCREL_X; + break; + case fixup_Hexagon_B15_PCREL_X: + Type = ELF::R_HEX_B15_PCREL_X; + break; + case fixup_Hexagon_B13_PCREL_X: + Type = ELF::R_HEX_B13_PCREL_X; + break; + case fixup_Hexagon_B9_PCREL_X: + Type = ELF::R_HEX_B9_PCREL_X; + break; + case fixup_Hexagon_B7_PCREL_X: + Type = ELF::R_HEX_B7_PCREL_X; + break; + case fixup_Hexagon_16_X: + Type = ELF::R_HEX_16_X; + break; + case fixup_Hexagon_12_X: + Type = ELF::R_HEX_12_X; + break; + case fixup_Hexagon_11_X: + Type = ELF::R_HEX_11_X; + break; + case fixup_Hexagon_10_X: + Type = ELF::R_HEX_10_X; + break; + case fixup_Hexagon_9_X: + Type = ELF::R_HEX_9_X; + break; + case fixup_Hexagon_8_X: + Type = ELF::R_HEX_8_X; + break; + case fixup_Hexagon_7_X: + Type = ELF::R_HEX_7_X; + break; + case fixup_Hexagon_6_X: + Type = ELF::R_HEX_6_X; + break; + case fixup_Hexagon_32_PCREL: + Type = ELF::R_HEX_32_PCREL; + break; + case fixup_Hexagon_COPY: + Type = ELF::R_HEX_COPY; + break; + case fixup_Hexagon_GLOB_DAT: + Type = ELF::R_HEX_GLOB_DAT; + break; + case fixup_Hexagon_JMP_SLOT: + Type = ELF::R_HEX_JMP_SLOT; + break; + case fixup_Hexagon_RELATIVE: + Type = ELF::R_HEX_RELATIVE; + break; + case fixup_Hexagon_PLT_B22_PCREL: + Type = ELF::R_HEX_PLT_B22_PCREL; + break; + case fixup_Hexagon_GOTREL_LO16: + Type = ELF::R_HEX_GOTREL_LO16; + break; + case fixup_Hexagon_GOTREL_HI16: + Type = ELF::R_HEX_GOTREL_HI16; + break; + case fixup_Hexagon_GOTREL_32: + Type = ELF::R_HEX_GOTREL_32; + break; + case fixup_Hexagon_GOT_LO16: + Type = ELF::R_HEX_GOT_LO16; + break; + case fixup_Hexagon_GOT_HI16: + Type = ELF::R_HEX_GOT_HI16; + break; + case fixup_Hexagon_GOT_32: + Type = ELF::R_HEX_GOT_32; + break; + case fixup_Hexagon_GOT_16: + Type = ELF::R_HEX_GOT_16; + break; + case fixup_Hexagon_DTPMOD_32: + Type = ELF::R_HEX_DTPMOD_32; + break; + case fixup_Hexagon_DTPREL_LO16: + Type = ELF::R_HEX_DTPREL_LO16; + break; + case fixup_Hexagon_DTPREL_HI16: + Type = ELF::R_HEX_DTPREL_HI16; + break; + case fixup_Hexagon_DTPREL_32: + Type = ELF::R_HEX_DTPREL_32; + break; + case fixup_Hexagon_DTPREL_16: + Type = ELF::R_HEX_DTPREL_16; + break; + case fixup_Hexagon_GD_PLT_B22_PCREL: + Type = ELF::R_HEX_GD_PLT_B22_PCREL; + break; + case fixup_Hexagon_LD_PLT_B22_PCREL: + Type = ELF::R_HEX_LD_PLT_B22_PCREL; + break; + case fixup_Hexagon_GD_GOT_LO16: + Type = ELF::R_HEX_GD_GOT_LO16; + break; + case fixup_Hexagon_GD_GOT_HI16: + Type = ELF::R_HEX_GD_GOT_HI16; + break; + case fixup_Hexagon_GD_GOT_32: + Type = ELF::R_HEX_GD_GOT_32; + break; + case fixup_Hexagon_GD_GOT_16: + Type = ELF::R_HEX_GD_GOT_16; + break; + case fixup_Hexagon_LD_GOT_LO16: + Type = ELF::R_HEX_LD_GOT_LO16; + break; + case fixup_Hexagon_LD_GOT_HI16: + Type = ELF::R_HEX_LD_GOT_HI16; + break; + case fixup_Hexagon_LD_GOT_32: + Type = ELF::R_HEX_LD_GOT_32; + break; + case fixup_Hexagon_LD_GOT_16: + Type = ELF::R_HEX_LD_GOT_16; + break; + case fixup_Hexagon_IE_LO16: + Type = ELF::R_HEX_IE_LO16; + break; + case fixup_Hexagon_IE_HI16: + Type = ELF::R_HEX_IE_HI16; + break; + case fixup_Hexagon_IE_32: + Type = ELF::R_HEX_IE_32; + break; + case fixup_Hexagon_IE_GOT_LO16: + Type = ELF::R_HEX_IE_GOT_LO16; + break; + case fixup_Hexagon_IE_GOT_HI16: + Type = ELF::R_HEX_IE_GOT_HI16; + break; + case fixup_Hexagon_IE_GOT_32: + Type = ELF::R_HEX_IE_GOT_32; + break; + case fixup_Hexagon_IE_GOT_16: + Type = ELF::R_HEX_IE_GOT_16; + break; + case fixup_Hexagon_TPREL_LO16: + Type = ELF::R_HEX_TPREL_LO16; + break; + case fixup_Hexagon_TPREL_HI16: + Type = ELF::R_HEX_TPREL_HI16; + break; + case fixup_Hexagon_TPREL_32: + Type = ELF::R_HEX_TPREL_32; + break; + case fixup_Hexagon_TPREL_16: + Type = ELF::R_HEX_TPREL_16; + break; + case fixup_Hexagon_6_PCREL_X: + Type = ELF::R_HEX_6_PCREL_X; + break; + case fixup_Hexagon_GOTREL_32_6_X: + Type = ELF::R_HEX_GOTREL_32_6_X; + break; + case fixup_Hexagon_GOTREL_16_X: + Type = ELF::R_HEX_GOTREL_16_X; + break; + case fixup_Hexagon_GOTREL_11_X: + Type = ELF::R_HEX_GOTREL_11_X; + break; + case fixup_Hexagon_GOT_32_6_X: + Type = ELF::R_HEX_GOT_32_6_X; + break; + case fixup_Hexagon_GOT_16_X: + Type = ELF::R_HEX_GOT_16_X; + break; + case fixup_Hexagon_GOT_11_X: + Type = ELF::R_HEX_GOT_11_X; + break; + case fixup_Hexagon_DTPREL_32_6_X: + Type = ELF::R_HEX_DTPREL_32_6_X; + break; + case fixup_Hexagon_DTPREL_16_X: + Type = ELF::R_HEX_DTPREL_16_X; + break; + case fixup_Hexagon_DTPREL_11_X: + Type = ELF::R_HEX_DTPREL_11_X; + break; + case fixup_Hexagon_GD_GOT_32_6_X: + Type = ELF::R_HEX_GD_GOT_32_6_X; + break; + case fixup_Hexagon_GD_GOT_16_X: + Type = ELF::R_HEX_GD_GOT_16_X; + break; + case fixup_Hexagon_GD_GOT_11_X: + Type = ELF::R_HEX_GD_GOT_11_X; + break; + case fixup_Hexagon_LD_GOT_32_6_X: + Type = ELF::R_HEX_LD_GOT_32_6_X; + break; + case fixup_Hexagon_LD_GOT_16_X: + Type = ELF::R_HEX_LD_GOT_16_X; + break; + case fixup_Hexagon_LD_GOT_11_X: + Type = ELF::R_HEX_LD_GOT_11_X; + break; + case fixup_Hexagon_IE_32_6_X: + Type = ELF::R_HEX_IE_32_6_X; + break; + case fixup_Hexagon_IE_16_X: + Type = ELF::R_HEX_IE_16_X; + break; + case fixup_Hexagon_IE_GOT_32_6_X: + Type = ELF::R_HEX_IE_GOT_32_6_X; + break; + case fixup_Hexagon_IE_GOT_16_X: + Type = ELF::R_HEX_IE_GOT_16_X; + break; + case fixup_Hexagon_IE_GOT_11_X: + Type = ELF::R_HEX_IE_GOT_11_X; + break; + case fixup_Hexagon_TPREL_32_6_X: + Type = ELF::R_HEX_TPREL_32_6_X; + break; + case fixup_Hexagon_TPREL_16_X: + Type = ELF::R_HEX_TPREL_16_X; + break; + case fixup_Hexagon_TPREL_11_X: + Type = ELF::R_HEX_TPREL_11_X; + break; } return Type; } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index 15cda717cf1c..36f81465eef6 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -28,7 +28,47 @@ using namespace llvm; #define GET_INSTRUCTION_NAME #include "HexagonGenAsmWriter.inc" -const char HexagonInstPrinter::PacketPadding = '\t'; +HexagonAsmInstPrinter::HexagonAsmInstPrinter(MCInstPrinter *RawPrinter) + : MCInstPrinter(*RawPrinter), RawPrinter(RawPrinter) {} + +void HexagonAsmInstPrinter::printInst(MCInst const *MI, raw_ostream &O, + StringRef Annot, + MCSubtargetInfo const &STI) { + assert(HexagonMCInstrInfo::isBundle(*MI)); + assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE); + std::string Buffer; + { + raw_string_ostream TempStream(Buffer); + RawPrinter->printInst(MI, TempStream, "", STI); + } + StringRef Contents(Buffer); + auto PacketBundle = Contents.rsplit('\n'); + auto HeadTail = PacketBundle.first.split('\n'); + auto Preamble = "\t{\n\t\t"; + auto Separator = ""; + while(!HeadTail.first.empty()) { + O << Separator; + StringRef Inst; + auto Duplex = HeadTail.first.split('\v'); + if(!Duplex.second.empty()){ + O << Duplex.first << "\n"; + Inst = Duplex.second; + } + else + Inst = Duplex.first; + O << Preamble; + O << Inst; + HeadTail = HeadTail.second.split('\n'); + Preamble = ""; + Separator = "\n\t\t"; + } + O << "\n\t}" << PacketBundle.second; +} + +void HexagonAsmInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { + RawPrinter->printRegName(O, RegNo); +} + // Return the minimum value that a constant extendable operand can have // without being extended. static int getMinValue(uint64_t TSFlags) { @@ -77,48 +117,44 @@ void HexagonInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } -void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O, - StringRef Annot, - const MCSubtargetInfo &STI) { - const char startPacket = '{', - endPacket = '}'; - // TODO: add outer HW loop when it's supported too. - if (MI->getOpcode() == Hexagon::ENDLOOP0) { - // Ending a harware loop is different from ending an regular packet. - assert(HexagonMCInstrInfo::isPacketEnd(*MI) && "Loop-end must also end the packet"); - - if (HexagonMCInstrInfo::isPacketBegin(*MI)) { - // There must be a packet to end a loop. - // FIXME: when shuffling is always run, this shouldn't be needed. - MCInst Nop; - StringRef NoAnnot; - - Nop.setOpcode (Hexagon::A2_nop); - HexagonMCInstrInfo::setPacketBegin (Nop, HexagonMCInstrInfo::isPacketBegin(*MI)); - printInst (&Nop, O, NoAnnot, STI); - } +void HexagonInstPrinter::setExtender(MCInst const &MCI) { + HasExtender = HexagonMCInstrInfo::isImmext(MCI); +} - // Close the packet. - if (HexagonMCInstrInfo::isPacketEnd(*MI)) - O << PacketPadding << endPacket; +void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS, + StringRef Annot, + MCSubtargetInfo const &STI) { + assert(HexagonMCInstrInfo::isBundle(*MI)); + assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE); + HasExtender = false; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) { + MCInst const &MCI = *I.getInst(); + if (HexagonMCInstrInfo::isDuplex(MII, MCI)) { + printInstruction(MCI.getOperand(1).getInst(), OS); + OS << '\v'; + HasExtender = false; + printInstruction(MCI.getOperand(0).getInst(), OS); + } else + printInstruction(&MCI, OS); + setExtender(MCI); + OS << "\n"; + } - printInstruction(MI, O); + auto Separator = ""; + if (HexagonMCInstrInfo::isInnerLoop(*MI)) { + OS << Separator; + Separator = " "; + MCInst ME; + ME.setOpcode(Hexagon::ENDLOOP0); + printInstruction(&ME, OS); } - else { - // Prefix the insn opening the packet. - if (HexagonMCInstrInfo::isPacketBegin(*MI)) - O << PacketPadding << startPacket << '\n'; - - printInstruction(MI, O); - - // Suffix the insn closing the packet. - if (HexagonMCInstrInfo::isPacketEnd(*MI)) - // Suffix the packet in a new line always, since the GNU assembler has - // issues with a closing brace on the same line as CONST{32,64}. - O << '\n' << PacketPadding << endPacket; + if (HexagonMCInstrInfo::isOuterLoop(*MI)) { + OS << Separator; + Separator = " "; + MCInst ME; + ME.setOpcode(Hexagon::ENDLOOP1); + printInstruction(&ME, OS); } - - printAnnotation(O, Annot); } void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, @@ -128,7 +164,7 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (MO.isReg()) { printRegName(O, MO.getReg()); } else if(MO.isExpr()) { - O << *MO.getExpr(); + MO.getExpr()->print(O, &MAI); } else if(MO.isImm()) { printImmOperand(MI, OpNo, O); } else { @@ -141,7 +177,7 @@ void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo, const MCOperand& MO = MI->getOperand(OpNo); if(MO.isExpr()) { - O << *MO.getExpr(); + MO.getExpr()->print(O, &MAI); } else if(MO.isImm()) { O << MI->getOperand(OpNo).getImm(); } else { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h index 3fedaed8fbf9..534ac237d635 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h @@ -18,6 +18,21 @@ #include "llvm/MC/MCInstrInfo.h" namespace llvm { +class HexagonAsmInstPrinter : public MCInstPrinter { +public: + HexagonAsmInstPrinter(MCInstPrinter *RawPrinter); + void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot, + MCSubtargetInfo const &STI) override; + void printRegName(raw_ostream &O, unsigned RegNo) const override; + std::unique_ptr<MCInstPrinter> RawPrinter; +}; +/// Prints bundles as a newline separated list of individual instructions +/// Duplexes are separated by a vertical tab \v character +/// A trailing line includes bundle properties such as endloop0/1 +/// +/// r0 = add(r1, r2) +/// r0 = #0 \v jump 0x0 +/// :endloop0 :endloop1 class HexagonInstPrinter : public MCInstPrinter { public: explicit HexagonInstPrinter(MCAsmInfo const &MAI, @@ -74,11 +89,11 @@ namespace llvm { void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi) const; - static const char PacketPadding; - private: const MCInstrInfo &MII; + bool HasExtender; + void setExtender(MCInst const &MCI); }; } // end namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp index ad5e0fb15e7f..51d2f1c878dc 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -18,7 +18,7 @@ using namespace llvm; // Pin the vtable to this file. void HexagonMCAsmInfo::anchor() {} -HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) { +HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) { Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; Data64bitsDirective = nullptr; // .xword is only supported by V9. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h index ab18f0b37ba6..dc0706994786 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h @@ -18,10 +18,12 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { + class Triple; + class HexagonMCAsmInfo : public MCAsmInfoELF { void anchor() override; public: - explicit HexagonMCAsmInfo(StringRef TT); + explicit HexagonMCAsmInfo(const Triple &TT); }; } // namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index ae3953abba10..1eee852996fd 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "mccodeemitter" @@ -31,38 +32,206 @@ using namespace Hexagon; STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); -namespace { -/// \brief 10.6 Instruction Packets -/// Possible values for instruction packet parse field. -enum class ParseField { duplex = 0x0, last0 = 0x1, last1 = 0x2, end = 0x3 }; -/// \brief Returns the packet bits based on instruction position. -uint32_t getPacketBits(MCInst const &HMI) { - unsigned const ParseFieldOffset = 14; - ParseField Field = HexagonMCInstrInfo::isPacketEnd(HMI) ? ParseField::end - : ParseField::last0; - return static_cast<uint32_t>(Field) << ParseFieldOffset; -} -void emitLittleEndian(uint64_t Binary, raw_ostream &OS) { - OS << static_cast<uint8_t>((Binary >> 0x00) & 0xff); - OS << static_cast<uint8_t>((Binary >> 0x08) & 0xff); - OS << static_cast<uint8_t>((Binary >> 0x10) & 0xff); - OS << static_cast<uint8_t>((Binary >> 0x18) & 0xff); -} -} - HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT) : MCT(aMCT), MCII(aMII), Addend(new unsigned(0)), - Extended(new bool(false)) {} + Extended(new bool(false)), CurrentBundle(new MCInst const *) {} + +uint32_t HexagonMCCodeEmitter::parseBits(size_t Instruction, size_t Last, + MCInst const &MCB, + MCInst const &MCI) const { + bool Duplex = HexagonMCInstrInfo::isDuplex(MCII, MCI); + if (Instruction == 0) { + if (HexagonMCInstrInfo::isInnerLoop(MCB)) { + assert(!Duplex); + assert(Instruction != Last); + return HexagonII::INST_PARSE_LOOP_END; + } + } + if (Instruction == 1) { + if (HexagonMCInstrInfo::isOuterLoop(MCB)) { + assert(!Duplex); + assert(Instruction != Last); + return HexagonII::INST_PARSE_LOOP_END; + } + } + if (Duplex) { + assert(Instruction == Last); + return HexagonII::INST_PARSE_DUPLEX; + } + if(Instruction == Last) + return HexagonII::INST_PARSE_PACKET_END; + return HexagonII::INST_PARSE_NOT_END; +} void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, MCSubtargetInfo const &STI) const { - uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI) | getPacketBits(MI); - assert(HexagonMCInstrInfo::getDesc(MCII, MI).getSize() == 4 && - "All instructions should be 32bit"); - (void)&MCII; - emitLittleEndian(Binary, OS); + MCInst &HMB = const_cast<MCInst &>(MI); + + assert(HexagonMCInstrInfo::isBundle(HMB)); + DEBUG(dbgs() << "Encoding bundle\n";); + *Addend = 0; + *Extended = false; + *CurrentBundle = &MI; + size_t Instruction = 0; + size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1; + for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) { + MCInst &HMI = const_cast<MCInst &>(*I.getInst()); + EncodeSingleInstruction(HMI, OS, Fixups, STI, + parseBits(Instruction, Last, HMB, HMI), + Instruction); + *Extended = HexagonMCInstrInfo::isImmext(HMI); + *Addend += HEXAGON_INSTR_SIZE; + ++Instruction; + } + return; +} + +/// EncodeSingleInstruction - Emit a single +void HexagonMCCodeEmitter::EncodeSingleInstruction( + const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI, uint32_t Parse, size_t Index) const { + MCInst HMB = MI; + assert(!HexagonMCInstrInfo::isBundle(HMB)); + uint64_t Binary; + + // Pseudo instructions don't get encoded and shouldn't be here + // in the first place! + assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() && + "pseudo-instruction found"); + DEBUG(dbgs() << "Encoding insn" + " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'" + "\n"); + + if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) { + // Calculate the new value distance to the associated producer + MCOperand &MCO = + HMB.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, HMB)); + unsigned SOffset = 0; + unsigned Register = MCO.getReg(); + unsigned Register1; + auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); + auto i = Instructions.begin() + Index - 1; + for (;; --i) { + assert(i != Instructions.begin() - 1 && "Couldn't find producer"); + MCInst const &Inst = *i->getInst(); + if (HexagonMCInstrInfo::isImmext(Inst)) + continue; + ++SOffset; + Register1 = + HexagonMCInstrInfo::hasNewValue(MCII, Inst) + ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg() + : static_cast<unsigned>(Hexagon::NoRegister); + if (Register != Register1) + // This isn't the register we're looking for + continue; + if (!HexagonMCInstrInfo::isPredicated(MCII, Inst)) + // Producer is unpredicated + break; + assert(HexagonMCInstrInfo::isPredicated(MCII, HMB) && + "Unpredicated consumer depending on predicated producer"); + if (HexagonMCInstrInfo::isPredicatedTrue(MCII, Inst) == + HexagonMCInstrInfo::isPredicatedTrue(MCII, HMB)) + // Producer predicate sense matched ours + break; + } + // Hexagon PRM 10.11 Construct Nt from distance + unsigned Offset = SOffset; + Offset <<= 1; + MCO.setReg(Offset + Hexagon::R0); + } + + Binary = getBinaryCodeForInstr(HMB, Fixups, STI); + // Check for unimplemented instructions. Immediate extenders + // are encoded as zero, so they need to be accounted for. + if ((!Binary) && + ((HMB.getOpcode() != DuplexIClass0) && (HMB.getOpcode() != A4_ext) && + (HMB.getOpcode() != A4_ext_b) && (HMB.getOpcode() != A4_ext_c) && + (HMB.getOpcode() != A4_ext_g))) { + // Use a A2_nop for unimplemented instructions. + DEBUG(dbgs() << "Unimplemented inst: " + " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'" + "\n"); + llvm_unreachable("Unimplemented Instruction"); + } + Binary |= Parse; + + // if we need to emit a duplexed instruction + if (HMB.getOpcode() >= Hexagon::DuplexIClass0 && + HMB.getOpcode() <= Hexagon::DuplexIClassF) { + assert(Parse == HexagonII::INST_PARSE_DUPLEX && + "Emitting duplex without duplex parse bits"); + unsigned dupIClass; + switch (HMB.getOpcode()) { + case Hexagon::DuplexIClass0: + dupIClass = 0; + break; + case Hexagon::DuplexIClass1: + dupIClass = 1; + break; + case Hexagon::DuplexIClass2: + dupIClass = 2; + break; + case Hexagon::DuplexIClass3: + dupIClass = 3; + break; + case Hexagon::DuplexIClass4: + dupIClass = 4; + break; + case Hexagon::DuplexIClass5: + dupIClass = 5; + break; + case Hexagon::DuplexIClass6: + dupIClass = 6; + break; + case Hexagon::DuplexIClass7: + dupIClass = 7; + break; + case Hexagon::DuplexIClass8: + dupIClass = 8; + break; + case Hexagon::DuplexIClass9: + dupIClass = 9; + break; + case Hexagon::DuplexIClassA: + dupIClass = 10; + break; + case Hexagon::DuplexIClassB: + dupIClass = 11; + break; + case Hexagon::DuplexIClassC: + dupIClass = 12; + break; + case Hexagon::DuplexIClassD: + dupIClass = 13; + break; + case Hexagon::DuplexIClassE: + dupIClass = 14; + break; + case Hexagon::DuplexIClassF: + dupIClass = 15; + break; + default: + llvm_unreachable("Unimplemented DuplexIClass"); + break; + } + // 29 is the bit position. + // 0b1110 =0xE bits are masked off and down shifted by 1 bit. + // Last bit is moved to bit position 13 + Binary = ((dupIClass & 0xE) << (29 - 1)) | ((dupIClass & 0x1) << 13); + + const MCInst *subInst0 = HMB.getOperand(0).getInst(); + const MCInst *subInst1 = HMB.getOperand(1).getInst(); + + // get subinstruction slot 0 + unsigned subInstSlot0Bits = getBinaryCodeForInstr(*subInst0, Fixups, STI); + // get subinstruction slot 1 + unsigned subInstSlot1Bits = getBinaryCodeForInstr(*subInst1, Fixups, STI); + + Binary |= subInstSlot0Bits | (subInstSlot1Bits << 16); + } + support::endian::Writer<support::little>(OS).write<uint32_t>(Binary); ++MCNumEmitted; } @@ -182,7 +351,7 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, { int64_t Res; - if (ME->EvaluateAsAbsolute(Res)) + if (ME->evaluateAsAbsolute(Res)) return Res; MCExpr::ExprKind MK = ME->getKind(); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h index 939380af1013..9aa258cee4c6 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h @@ -30,6 +30,7 @@ class HexagonMCCodeEmitter : public MCCodeEmitter { MCInstrInfo const &MCII; std::unique_ptr<unsigned> Addend; std::unique_ptr<bool> Extended; + std::unique_ptr<MCInst const *> CurrentBundle; // helper routine for getMachineOpValue() unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO, @@ -39,12 +40,21 @@ class HexagonMCCodeEmitter : public MCCodeEmitter { public: HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT); + // Return parse bits for instruction `MCI' inside bundle `MCB' + uint32_t parseBits(size_t Instruction, size_t Last, MCInst const &MCB, + MCInst const &MCI) const; + MCSubtargetInfo const &getSubtargetInfo() const; void encodeInstruction(MCInst const &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, MCSubtargetInfo const &STI) const override; + void EncodeSingleInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI, + uint32_t Parse, size_t Index) const; + // \brief TableGen'erated function for getting the // binary encoding for an instruction. uint64_t getBinaryCodeForInstr(MCInst const &MI, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp new file mode 100644 index 000000000000..108093547f82 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -0,0 +1,420 @@ + +//=== HexagonMCCompound.cpp - Hexagon Compound checker -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is looks at a packet and tries to form compound insns +// +//===----------------------------------------------------------------------===// +#include "Hexagon.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCShuffler.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace Hexagon; + +#define DEBUG_TYPE "hexagon-mccompound" + +enum OpcodeIndex { + fp0_jump_nt = 0, + fp0_jump_t, + fp1_jump_nt, + fp1_jump_t, + tp0_jump_nt, + tp0_jump_t, + tp1_jump_nt, + tp1_jump_t +}; + +unsigned tstBitOpcode[8] = {J4_tstbit0_fp0_jump_nt, J4_tstbit0_fp0_jump_t, + J4_tstbit0_fp1_jump_nt, J4_tstbit0_fp1_jump_t, + J4_tstbit0_tp0_jump_nt, J4_tstbit0_tp0_jump_t, + J4_tstbit0_tp1_jump_nt, J4_tstbit0_tp1_jump_t}; +unsigned cmpeqBitOpcode[8] = {J4_cmpeq_fp0_jump_nt, J4_cmpeq_fp0_jump_t, + J4_cmpeq_fp1_jump_nt, J4_cmpeq_fp1_jump_t, + J4_cmpeq_tp0_jump_nt, J4_cmpeq_tp0_jump_t, + J4_cmpeq_tp1_jump_nt, J4_cmpeq_tp1_jump_t}; +unsigned cmpgtBitOpcode[8] = {J4_cmpgt_fp0_jump_nt, J4_cmpgt_fp0_jump_t, + J4_cmpgt_fp1_jump_nt, J4_cmpgt_fp1_jump_t, + J4_cmpgt_tp0_jump_nt, J4_cmpgt_tp0_jump_t, + J4_cmpgt_tp1_jump_nt, J4_cmpgt_tp1_jump_t}; +unsigned cmpgtuBitOpcode[8] = {J4_cmpgtu_fp0_jump_nt, J4_cmpgtu_fp0_jump_t, + J4_cmpgtu_fp1_jump_nt, J4_cmpgtu_fp1_jump_t, + J4_cmpgtu_tp0_jump_nt, J4_cmpgtu_tp0_jump_t, + J4_cmpgtu_tp1_jump_nt, J4_cmpgtu_tp1_jump_t}; +unsigned cmpeqiBitOpcode[8] = {J4_cmpeqi_fp0_jump_nt, J4_cmpeqi_fp0_jump_t, + J4_cmpeqi_fp1_jump_nt, J4_cmpeqi_fp1_jump_t, + J4_cmpeqi_tp0_jump_nt, J4_cmpeqi_tp0_jump_t, + J4_cmpeqi_tp1_jump_nt, J4_cmpeqi_tp1_jump_t}; +unsigned cmpgtiBitOpcode[8] = {J4_cmpgti_fp0_jump_nt, J4_cmpgti_fp0_jump_t, + J4_cmpgti_fp1_jump_nt, J4_cmpgti_fp1_jump_t, + J4_cmpgti_tp0_jump_nt, J4_cmpgti_tp0_jump_t, + J4_cmpgti_tp1_jump_nt, J4_cmpgti_tp1_jump_t}; +unsigned cmpgtuiBitOpcode[8] = {J4_cmpgtui_fp0_jump_nt, J4_cmpgtui_fp0_jump_t, + J4_cmpgtui_fp1_jump_nt, J4_cmpgtui_fp1_jump_t, + J4_cmpgtui_tp0_jump_nt, J4_cmpgtui_tp0_jump_t, + J4_cmpgtui_tp1_jump_nt, J4_cmpgtui_tp1_jump_t}; +unsigned cmpeqn1BitOpcode[8] = {J4_cmpeqn1_fp0_jump_nt, J4_cmpeqn1_fp0_jump_t, + J4_cmpeqn1_fp1_jump_nt, J4_cmpeqn1_fp1_jump_t, + J4_cmpeqn1_tp0_jump_nt, J4_cmpeqn1_tp0_jump_t, + J4_cmpeqn1_tp1_jump_nt, J4_cmpeqn1_tp1_jump_t}; +unsigned cmpgtn1BitOpcode[8] = { + J4_cmpgtn1_fp0_jump_nt, J4_cmpgtn1_fp0_jump_t, J4_cmpgtn1_fp1_jump_nt, + J4_cmpgtn1_fp1_jump_t, J4_cmpgtn1_tp0_jump_nt, J4_cmpgtn1_tp0_jump_t, + J4_cmpgtn1_tp1_jump_nt, J4_cmpgtn1_tp1_jump_t, +}; + +// enum HexagonII::CompoundGroup +namespace { +unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { + unsigned DstReg, SrcReg, Src1Reg, Src2Reg; + + switch (MI.getOpcode()) { + default: + return HexagonII::HCG_None; + // + // Compound pairs. + // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2" + // "Rd16=#U6 ; jump #r9:2" + // "Rd16=Rs16 ; jump #r9:2" + // + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtu: + if (IsExtended) + return false; + DstReg = MI.getOperand(0).getReg(); + Src1Reg = MI.getOperand(1).getReg(); + Src2Reg = MI.getOperand(2).getReg(); + if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg)) + return HexagonII::HCG_A; + break; + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtui: + if (IsExtended) + return false; + // P0 = cmp.eq(Rs,#u2) + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MI.getOperand(2).isImm() && ((isUInt<5>(MI.getOperand(2).getImm())) || + (MI.getOperand(2).getImm() == -1))) + return HexagonII::HCG_A; + break; + case Hexagon::A2_tfr: + if (IsExtended) + return false; + // Rd = Rs + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) + return HexagonII::HCG_A; + break; + case Hexagon::A2_tfrsi: + if (IsExtended) + return false; + // Rd = #u6 + DstReg = MI.getOperand(0).getReg(); + if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() <= 63 && + MI.getOperand(1).getImm() >= 0 && + HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) + return HexagonII::HCG_A; + break; + case Hexagon::S2_tstbit_i: + if (IsExtended) + return false; + DstReg = MI.getOperand(0).getReg(); + Src1Reg = MI.getOperand(1).getReg(); + if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + MI.getOperand(2).isImm() && + HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + (MI.getOperand(2).getImm() == 0)) + return HexagonII::HCG_A; + break; + // The fact that .new form is used pretty much guarantees + // that predicate register will match. Nevertheless, + // there could be some false positives without additional + // checking. + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumptnewpt: + case Hexagon::J2_jumpfnewpt: + Src1Reg = MI.getOperand(0).getReg(); + if (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg) + return HexagonII::HCG_B; + break; + // Transfer and jump: + // Rd=#U6 ; jump #r9:2 + // Rd=Rs ; jump #r9:2 + // Do not test for jump range here. + case Hexagon::J2_jump: + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: + return HexagonII::HCG_C; + break; + } + + return HexagonII::HCG_None; +} +} + +/// getCompoundOp - Return the index from 0-7 into the above opcode lists. +namespace { +unsigned getCompoundOp(MCInst const &HMCI) { + const MCOperand &Predicate = HMCI.getOperand(0); + unsigned PredReg = Predicate.getReg(); + + assert((PredReg == Hexagon::P0) || (PredReg == Hexagon::P1) || + (PredReg == Hexagon::P2) || (PredReg == Hexagon::P3)); + + switch (HMCI.getOpcode()) { + default: + llvm_unreachable("Expected match not found.\n"); + break; + case Hexagon::J2_jumpfnew: + return (PredReg == Hexagon::P0) ? fp0_jump_nt : fp1_jump_nt; + case Hexagon::J2_jumpfnewpt: + return (PredReg == Hexagon::P0) ? fp0_jump_t : fp1_jump_t; + case Hexagon::J2_jumptnew: + return (PredReg == Hexagon::P0) ? tp0_jump_nt : tp1_jump_nt; + case Hexagon::J2_jumptnewpt: + return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t; + } +} +} + +namespace { +MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { + MCInst *CompoundInsn = 0; + unsigned compoundOpcode; + MCOperand Rs, Rt; + + switch (L.getOpcode()) { + default: + DEBUG(dbgs() << "Possible compound ignored\n"); + return CompoundInsn; + + case Hexagon::A2_tfrsi: + Rt = L.getOperand(0); + compoundOpcode = J4_jumpseti; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(L.getOperand(1)); // Immediate + CompoundInsn->addOperand(R.getOperand(0)); // Jump target + break; + + case Hexagon::A2_tfr: + Rt = L.getOperand(0); + Rs = L.getOperand(1); + + compoundOpcode = J4_jumpsetr; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(R.getOperand(0)); // Jump target. + + break; + + case Hexagon::C2_cmpeq: + DEBUG(dbgs() << "CX: C2_cmpeq\n"); + Rs = L.getOperand(1); + Rt = L.getOperand(2); + + compoundOpcode = cmpeqBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpgt: + DEBUG(dbgs() << "CX: C2_cmpgt\n"); + Rs = L.getOperand(1); + Rt = L.getOperand(2); + + compoundOpcode = cmpgtBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpgtu: + DEBUG(dbgs() << "CX: C2_cmpgtu\n"); + Rs = L.getOperand(1); + Rt = L.getOperand(2); + + compoundOpcode = cmpgtuBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpeqi: + DEBUG(dbgs() << "CX: C2_cmpeqi\n"); + if (L.getOperand(2).getImm() == -1) + compoundOpcode = cmpeqn1BitOpcode[getCompoundOp(R)]; + else + compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)]; + + Rs = L.getOperand(1); + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + if (L.getOperand(2).getImm() != -1) + CompoundInsn->addOperand(L.getOperand(2)); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpgti: + DEBUG(dbgs() << "CX: C2_cmpgti\n"); + if (L.getOperand(2).getImm() == -1) + compoundOpcode = cmpgtn1BitOpcode[getCompoundOp(R)]; + else + compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)]; + + Rs = L.getOperand(1); + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + if (L.getOperand(2).getImm() != -1) + CompoundInsn->addOperand(L.getOperand(2)); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpgtui: + DEBUG(dbgs() << "CX: C2_cmpgtui\n"); + Rs = L.getOperand(1); + compoundOpcode = cmpgtuiBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(L.getOperand(2)); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::S2_tstbit_i: + DEBUG(dbgs() << "CX: S2_tstbit_i\n"); + Rs = L.getOperand(1); + compoundOpcode = tstBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(R.getOperand(1)); + break; + } + + return CompoundInsn; +} +} + +/// Non-Symmetrical. See if these two instructions are fit for compound pair. +namespace { +bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, + MCInst const &MIb, bool IsExtendedB) { + unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA); + unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB); + // We have two candidates - check that this is the same register + // we are talking about. + unsigned Opca = MIa.getOpcode(); + if (MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_C && + (Opca == Hexagon::A2_tfr || Opca == Hexagon::A2_tfrsi)) + return true; + return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) && + (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg())); +} +} + +namespace { +bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { + assert(HexagonMCInstrInfo::isBundle(MCI)); + bool JExtended = false; + for (MCInst::iterator J = + MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset; + J != MCI.end(); ++J) { + MCInst const *JumpInst = J->getInst(); + if (HexagonMCInstrInfo::isImmext(*JumpInst)) { + JExtended = true; + continue; + } + if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) == + HexagonII::TypeJ) { + // Try to pair with another insn (B)undled with jump. + bool BExtended = false; + for (MCInst::iterator B = + MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset; + B != MCI.end(); ++B) { + MCInst const *Inst = B->getInst(); + if (JumpInst == Inst) + continue; + if (HexagonMCInstrInfo::isImmext(*Inst)) { + BExtended = true; + continue; + } + DEBUG(dbgs() << "J,B: " << JumpInst->getOpcode() << "," + << Inst->getOpcode() << "\n"); + if (isOrderedCompoundPair(*Inst, BExtended, *JumpInst, JExtended)) { + MCInst *CompoundInsn = getCompoundInsn(Context, *Inst, *JumpInst); + if (CompoundInsn) { + DEBUG(dbgs() << "B: " << Inst->getOpcode() << "," + << JumpInst->getOpcode() << " Compounds to " + << CompoundInsn->getOpcode() << "\n"); + J->setInst(CompoundInsn); + MCI.erase(B); + return true; + } + } + BExtended = false; + } + } + JExtended = false; + } + return false; +} +} + +/// tryCompound - Given a bundle check for compound insns when one +/// is found update the contents fo the bundle with the compound insn. +/// If a compound instruction is found then the bundle will have one +/// additional slot. +void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, + MCContext &Context, MCInst &MCI) { + assert(MCI.getOpcode() == Hexagon::BUNDLE && + "Non-Bundle where Bundle expected"); + + // By definition a compound must have 2 insn. + if (MCI.size() < 2) + return; + + // Look for compounds until none are found, only update the bundle when + // a compound is found. + while (lookForCompound(MCII, Context, MCI)) + ; + + return; +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp new file mode 100644 index 000000000000..eb629774a2cd --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -0,0 +1,1100 @@ +//===----- HexagonMCDuplexInfo.cpp - Instruction bundle checking ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements duplexing of instructions to reduce code size +// +//===----------------------------------------------------------------------===// + +#include "HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <map> + +using namespace llvm; +using namespace Hexagon; + +#define DEBUG_TYPE "hexagon-mcduplex-info" + +// pair table of subInstructions with opcodes +static std::pair<unsigned, unsigned> opcodeData[] = { + std::make_pair((unsigned)V4_SA1_addi, 0), + std::make_pair((unsigned)V4_SA1_addrx, 6144), + std::make_pair((unsigned)V4_SA1_addsp, 3072), + std::make_pair((unsigned)V4_SA1_and1, 4608), + std::make_pair((unsigned)V4_SA1_clrf, 6768), + std::make_pair((unsigned)V4_SA1_clrfnew, 6736), + std::make_pair((unsigned)V4_SA1_clrt, 6752), + std::make_pair((unsigned)V4_SA1_clrtnew, 6720), + std::make_pair((unsigned)V4_SA1_cmpeqi, 6400), + std::make_pair((unsigned)V4_SA1_combine0i, 7168), + std::make_pair((unsigned)V4_SA1_combine1i, 7176), + std::make_pair((unsigned)V4_SA1_combine2i, 7184), + std::make_pair((unsigned)V4_SA1_combine3i, 7192), + std::make_pair((unsigned)V4_SA1_combinerz, 7432), + std::make_pair((unsigned)V4_SA1_combinezr, 7424), + std::make_pair((unsigned)V4_SA1_dec, 4864), + std::make_pair((unsigned)V4_SA1_inc, 4352), + std::make_pair((unsigned)V4_SA1_seti, 2048), + std::make_pair((unsigned)V4_SA1_setin1, 6656), + std::make_pair((unsigned)V4_SA1_sxtb, 5376), + std::make_pair((unsigned)V4_SA1_sxth, 5120), + std::make_pair((unsigned)V4_SA1_tfr, 4096), + std::make_pair((unsigned)V4_SA1_zxtb, 5888), + std::make_pair((unsigned)V4_SA1_zxth, 5632), + std::make_pair((unsigned)V4_SL1_loadri_io, 0), + std::make_pair((unsigned)V4_SL1_loadrub_io, 4096), + std::make_pair((unsigned)V4_SL2_deallocframe, 7936), + std::make_pair((unsigned)V4_SL2_jumpr31, 8128), + std::make_pair((unsigned)V4_SL2_jumpr31_f, 8133), + std::make_pair((unsigned)V4_SL2_jumpr31_fnew, 8135), + std::make_pair((unsigned)V4_SL2_jumpr31_t, 8132), + std::make_pair((unsigned)V4_SL2_jumpr31_tnew, 8134), + std::make_pair((unsigned)V4_SL2_loadrb_io, 4096), + std::make_pair((unsigned)V4_SL2_loadrd_sp, 7680), + std::make_pair((unsigned)V4_SL2_loadrh_io, 0), + std::make_pair((unsigned)V4_SL2_loadri_sp, 7168), + std::make_pair((unsigned)V4_SL2_loadruh_io, 2048), + std::make_pair((unsigned)V4_SL2_return, 8000), + std::make_pair((unsigned)V4_SL2_return_f, 8005), + std::make_pair((unsigned)V4_SL2_return_fnew, 8007), + std::make_pair((unsigned)V4_SL2_return_t, 8004), + std::make_pair((unsigned)V4_SL2_return_tnew, 8006), + std::make_pair((unsigned)V4_SS1_storeb_io, 4096), + std::make_pair((unsigned)V4_SS1_storew_io, 0), + std::make_pair((unsigned)V4_SS2_allocframe, 7168), + std::make_pair((unsigned)V4_SS2_storebi0, 4608), + std::make_pair((unsigned)V4_SS2_storebi1, 4864), + std::make_pair((unsigned)V4_SS2_stored_sp, 2560), + std::make_pair((unsigned)V4_SS2_storeh_io, 0), + std::make_pair((unsigned)V4_SS2_storew_sp, 2048), + std::make_pair((unsigned)V4_SS2_storewi0, 4096), + std::make_pair((unsigned)V4_SS2_storewi1, 4352)}; + +static std::map<unsigned, unsigned> + subinstOpcodeMap(opcodeData, + opcodeData + sizeof(opcodeData) / sizeof(opcodeData[0])); + +bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) { + switch (Ga) { + case HexagonII::HSIG_None: + default: + return false; + case HexagonII::HSIG_L1: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_L2: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_S1: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_S2: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 || + Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_A: + return (Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_Compound: + return (Gb == HexagonII::HSIG_Compound); + } + return false; +} + +unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) { + switch (Ga) { + case HexagonII::HSIG_None: + default: + break; + case HexagonII::HSIG_L1: + switch (Gb) { + default: + break; + case HexagonII::HSIG_L1: + return 0; + case HexagonII::HSIG_A: + return 0x4; + } + case HexagonII::HSIG_L2: + switch (Gb) { + default: + break; + case HexagonII::HSIG_L1: + return 0x1; + case HexagonII::HSIG_L2: + return 0x2; + case HexagonII::HSIG_A: + return 0x5; + } + case HexagonII::HSIG_S1: + switch (Gb) { + default: + break; + case HexagonII::HSIG_L1: + return 0x8; + case HexagonII::HSIG_L2: + return 0x9; + case HexagonII::HSIG_S1: + return 0xA; + case HexagonII::HSIG_A: + return 0x6; + } + case HexagonII::HSIG_S2: + switch (Gb) { + default: + break; + case HexagonII::HSIG_L1: + return 0xC; + case HexagonII::HSIG_L2: + return 0xD; + case HexagonII::HSIG_S1: + return 0xB; + case HexagonII::HSIG_S2: + return 0xE; + case HexagonII::HSIG_A: + return 0x7; + } + case HexagonII::HSIG_A: + switch (Gb) { + default: + break; + case HexagonII::HSIG_A: + return 0x3; + } + case HexagonII::HSIG_Compound: + switch (Gb) { + case HexagonII::HSIG_Compound: + return 0xFFFFFFFF; + } + } + return 0xFFFFFFFF; +} + +unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { + unsigned DstReg, PredReg, SrcReg, Src1Reg, Src2Reg; + + switch (MCI.getOpcode()) { + default: + return HexagonII::HSIG_None; + // + // Group L1: + // + // Rd = memw(Rs+#u4:2) + // Rd = memub(Rs+#u4:0) + case Hexagon::L2_loadri_io: + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + // Special case this one from Group L2. + // Rd = memw(r29+#u5:2) + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && + MCI.getOperand(2).isImm() && + isShiftedUInt<5, 2>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_L2; + } + // Rd = memw(Rs+#u4:2) + if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + (MCI.getOperand(2).isImm() && + isShiftedUInt<4, 2>(MCI.getOperand(2).getImm()))) { + return HexagonII::HSIG_L1; + } + } + break; + case Hexagon::L2_loadrub_io: + // Rd = memub(Rs+#u4:0) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MCI.getOperand(2).isImm() && isUInt<4>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_L1; + } + break; + // + // Group L2: + // + // Rd = memh/memuh(Rs+#u3:1) + // Rd = memb(Rs+#u3:0) + // Rd = memw(r29+#u5:2) - Handled above. + // Rdd = memd(r29+#u5:3) + // deallocframe + // [if ([!]p0[.new])] dealloc_return + // [if ([!]p0[.new])] jumpr r31 + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: + // Rd = memh/memuh(Rs+#u3:1) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MCI.getOperand(2).isImm() && + isShiftedUInt<3, 1>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_L2; + } + break; + case Hexagon::L2_loadrb_io: + // Rd = memb(Rs+#u3:0) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MCI.getOperand(2).isImm() && isUInt<3>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_L2; + } + break; + case Hexagon::L2_loadrd_io: + // Rdd = memd(r29+#u5:3) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && + MCI.getOperand(2).isImm() && + isShiftedUInt<5, 3>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_L2; + } + break; + + case Hexagon::L4_return: + + case Hexagon::L2_deallocframe: + + return HexagonII::HSIG_L2; + case Hexagon::EH_RETURN_JMPR: + + case Hexagon::J2_jumpr: + case Hexagon::JMPret: + // jumpr r31 + // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>. + DstReg = MCI.getOperand(0).getReg(); + if (Hexagon::R31 == DstReg) { + return HexagonII::HSIG_L2; + } + break; + + case Hexagon::J2_jumprt: + case Hexagon::J2_jumprf: + case Hexagon::J2_jumprtnew: + case Hexagon::J2_jumprfnew: + case Hexagon::JMPrett: + case Hexagon::JMPretf: + case Hexagon::JMPrettnew: + case Hexagon::JMPretfnew: + case Hexagon::JMPrettnewpt: + case Hexagon::JMPretfnewpt: + DstReg = MCI.getOperand(1).getReg(); + SrcReg = MCI.getOperand(0).getReg(); + // [if ([!]p0[.new])] jumpr r31 + if ((HexagonMCInstrInfo::isPredReg(SrcReg) && (Hexagon::P0 == SrcReg)) && + (Hexagon::R31 == DstReg)) { + return HexagonII::HSIG_L2; + } + break; + case Hexagon::L4_return_t: + + case Hexagon::L4_return_f: + + case Hexagon::L4_return_tnew_pnt: + + case Hexagon::L4_return_fnew_pnt: + + case Hexagon::L4_return_tnew_pt: + + case Hexagon::L4_return_fnew_pt: + // [if ([!]p0[.new])] dealloc_return + SrcReg = MCI.getOperand(0).getReg(); + if (Hexagon::P0 == SrcReg) { + return HexagonII::HSIG_L2; + } + break; + // + // Group S1: + // + // memw(Rs+#u4:2) = Rt + // memb(Rs+#u4:0) = Rt + case Hexagon::S2_storeri_io: + // Special case this one from Group S2. + // memw(r29+#u5:2) = Rt + Src1Reg = MCI.getOperand(0).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isIntReg(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && + Hexagon::R29 == Src1Reg && MCI.getOperand(1).isImm() && + isShiftedUInt<5, 2>(MCI.getOperand(1).getImm())) { + return HexagonII::HSIG_S2; + } + // memw(Rs+#u4:2) = Rt + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && + MCI.getOperand(1).isImm() && + isShiftedUInt<4, 2>(MCI.getOperand(1).getImm())) { + return HexagonII::HSIG_S1; + } + break; + case Hexagon::S2_storerb_io: + // memb(Rs+#u4:0) = Rt + Src1Reg = MCI.getOperand(0).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && + MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm())) { + return HexagonII::HSIG_S1; + } + break; + // + // Group S2: + // + // memh(Rs+#u3:1) = Rt + // memw(r29+#u5:2) = Rt + // memd(r29+#s6:3) = Rtt + // memw(Rs+#u4:2) = #U1 + // memb(Rs+#u4) = #U1 + // allocframe(#u5:3) + case Hexagon::S2_storerh_io: + // memh(Rs+#u3:1) = Rt + Src1Reg = MCI.getOperand(0).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && + MCI.getOperand(1).isImm() && + isShiftedUInt<3, 1>(MCI.getOperand(1).getImm())) { + return HexagonII::HSIG_S2; + } + break; + case Hexagon::S2_storerd_io: + // memd(r29+#s6:3) = Rtt + Src1Reg = MCI.getOperand(0).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(Src2Reg) && + HexagonMCInstrInfo::isIntReg(Src1Reg) && Hexagon::R29 == Src1Reg && + MCI.getOperand(1).isImm() && + isShiftedInt<6, 3>(MCI.getOperand(1).getImm())) { + return HexagonII::HSIG_S2; + } + break; + case Hexagon::S4_storeiri_io: + // memw(Rs+#u4:2) = #U1 + Src1Reg = MCI.getOperand(0).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + MCI.getOperand(1).isImm() && + isShiftedUInt<4, 2>(MCI.getOperand(1).getImm()) && + MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_S2; + } + break; + case Hexagon::S4_storeirb_io: + // memb(Rs+#u4) = #U1 + Src1Reg = MCI.getOperand(0).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm()) && + MCI.getOperand(2).isImm() && MCI.getOperand(2).isImm() && + isUInt<1>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_S2; + } + break; + case Hexagon::S2_allocframe: + if (MCI.getOperand(0).isImm() && + isShiftedUInt<5, 3>(MCI.getOperand(0).getImm())) { + return HexagonII::HSIG_S2; + } + break; + // + // Group A: + // + // Rx = add(Rx,#s7) + // Rd = Rs + // Rd = #u6 + // Rd = #-1 + // if ([!]P0[.new]) Rd = #0 + // Rd = add(r29,#u6:2) + // Rx = add(Rx,Rs) + // P0 = cmp.eq(Rs,#u2) + // Rdd = combine(#0,Rs) + // Rdd = combine(Rs,#0) + // Rdd = combine(#u2,#U2) + // Rd = add(Rs,#1) + // Rd = add(Rs,#-1) + // Rd = sxth/sxtb/zxtb/zxth(Rs) + // Rd = and(Rs,#1) + case Hexagon::A2_addi: + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + // Rd = add(r29,#u6:2) + if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && + MCI.getOperand(2).isImm() && + isShiftedUInt<6, 2>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_A; + } + // Rx = add(Rx,#s7) + if (DstReg == SrcReg) { + return HexagonII::HSIG_A; + } + // Rd = add(Rs,#1) + // Rd = add(Rs,#-1) + if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) || + (MCI.getOperand(2).getImm() == -1))) { + return HexagonII::HSIG_A; + } + } + break; + case Hexagon::A2_add: + // Rx = add(Rx,Rs) + DstReg = MCI.getOperand(0).getReg(); + Src1Reg = MCI.getOperand(1).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_andir: + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) || + (MCI.getOperand(2).getImm() == 255))) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_tfr: + // Rd = Rs + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_tfrsi: + DstReg = MCI.getOperand(0).getReg(); + + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmovenewit: + case Hexagon::C2_cmoveif: + case Hexagon::C2_cmovenewif: + // if ([!]P0[.new]) Rd = #0 + // Actual form: + // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>; + DstReg = MCI.getOperand(0).getReg(); // Rd + PredReg = MCI.getOperand(1).getReg(); // P0 + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + Hexagon::P0 == PredReg && MCI.getOperand(2).isImm() && + MCI.getOperand(2).getImm() == 0) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::C2_cmpeqi: + // P0 = cmp.eq(Rs,#u2) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (Hexagon::P0 == DstReg && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MCI.getOperand(2).isImm() && isUInt<2>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + // Rdd = combine(#u2,#U2) + DstReg = MCI.getOperand(0).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && + // TODO: Handle Globals/Symbols + (MCI.getOperand(1).isImm() && isUInt<2>(MCI.getOperand(1).getImm())) && + ((MCI.getOperand(2).isImm() && + isUInt<2>(MCI.getOperand(2).getImm())))) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A4_combineri: + // Rdd = combine(Rs,#0) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + (MCI.getOperand(2).isImm() && MCI.getOperand(2).getImm() == 0)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A4_combineir: + // Rdd = combine(#0,Rs) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + (MCI.getOperand(1).isImm() && MCI.getOperand(1).getImm() == 0)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_sxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + // Rd = sxth/sxtb/zxtb/zxth(Rs) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) { + return HexagonII::HSIG_A; + } + break; + } + + return HexagonII::HSIG_None; +} + +bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) { + + unsigned DstReg, SrcReg; + + switch (potentialDuplex.getOpcode()) { + case Hexagon::A2_addi: + // testing for case of: Rx = add(Rx,#s7) + DstReg = potentialDuplex.getOperand(0).getReg(); + SrcReg = potentialDuplex.getOperand(1).getReg(); + if (DstReg == SrcReg && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + if (potentialDuplex.getOperand(2).isExpr()) + return true; + if (potentialDuplex.getOperand(2).isImm() && + !(isShiftedInt<7, 0>(potentialDuplex.getOperand(2).getImm()))) + return true; + } + break; + case Hexagon::A2_tfrsi: + DstReg = potentialDuplex.getOperand(0).getReg(); + + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + if (potentialDuplex.getOperand(1).isExpr()) + return true; + // Check for case of Rd = #-1. + if (potentialDuplex.getOperand(1).isImm() && + (potentialDuplex.getOperand(1).getImm() == -1)) + return false; + // Check for case of Rd = #u6. + if (potentialDuplex.getOperand(1).isImm() && + !isShiftedUInt<6, 0>(potentialDuplex.getOperand(1).getImm())) + return true; + } + break; + default: + break; + } + return false; +} + +/// non-Symmetrical. See if these two instructions are fit for duplex pair. +bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII, + MCInst const &MIa, bool ExtendedA, + MCInst const &MIb, bool ExtendedB, + bool bisReversable) { + // Slot 1 cannot be extended in duplexes PRM 10.5 + if (ExtendedA) + return false; + // Only A2_addi and A2_tfrsi can be extended in duplex form PRM 10.5 + if (ExtendedB) { + unsigned Opcode = MIb.getOpcode(); + if ((Opcode != Hexagon::A2_addi) && (Opcode != Hexagon::A2_tfrsi)) + return false; + } + unsigned MIaG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIa), + MIbG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIb); + + // If a duplex contains 2 insns in the same group, the insns must be + // ordered such that the numerically smaller opcode is in slot 1. + if ((MIaG != HexagonII::HSIG_None) && (MIaG == MIbG) && bisReversable) { + MCInst SubInst0 = HexagonMCInstrInfo::deriveSubInst(MIa); + MCInst SubInst1 = HexagonMCInstrInfo::deriveSubInst(MIb); + + unsigned zeroedSubInstS0 = + subinstOpcodeMap.find(SubInst0.getOpcode())->second; + unsigned zeroedSubInstS1 = + subinstOpcodeMap.find(SubInst1.getOpcode())->second; + + if (zeroedSubInstS0 < zeroedSubInstS1) + // subinstS0 (maps to slot 0) must be greater than + // subinstS1 (maps to slot 1) + return false; + } + + // allocframe must always be in slot 0 + if (MIb.getOpcode() == Hexagon::S2_allocframe) + return false; + + if ((MIaG != HexagonII::HSIG_None) && (MIbG != HexagonII::HSIG_None)) { + // Prevent 2 instructions with extenders from duplexing + // Note that MIb (slot1) can be extended and MIa (slot0) + // can never be extended + if (subInstWouldBeExtended(MIa)) + return false; + + // If duplexing produces an extender, but the original did not + // have an extender, do not duplex. + if (subInstWouldBeExtended(MIb) && !ExtendedB) + return false; + } + + // If jumpr r31 appears, it must be in slot 0, and never slot 1 (MIb). + if (MIbG == HexagonII::HSIG_L2) { + if ((MIb.getNumOperands() > 1) && MIb.getOperand(1).isReg() && + (MIb.getOperand(1).getReg() == Hexagon::R31)) + return false; + if ((MIb.getNumOperands() > 0) && MIb.getOperand(0).isReg() && + (MIb.getOperand(0).getReg() == Hexagon::R31)) + return false; + } + + // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb); + // therefore, not duplexable if slot 1 is a store, and slot 0 is not. + if ((MIbG == HexagonII::HSIG_S1) || (MIbG == HexagonII::HSIG_S2)) { + if ((MIaG != HexagonII::HSIG_S1) && (MIaG != HexagonII::HSIG_S2)) + return false; + } + + return (isDuplexPairMatch(MIaG, MIbG)); +} + +/// Symmetrical. See if these two instructions are fit for duplex pair. +bool HexagonMCInstrInfo::isDuplexPair(MCInst const &MIa, MCInst const &MIb) { + unsigned MIaG = getDuplexCandidateGroup(MIa), + MIbG = getDuplexCandidateGroup(MIb); + return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG)); +} + +inline static void addOps(MCInst &subInstPtr, MCInst const &Inst, + unsigned opNum) { + if (Inst.getOperand(opNum).isReg()) { + switch (Inst.getOperand(opNum).getReg()) { + default: + llvm_unreachable("Not Duplexable Register"); + break; + case Hexagon::R0: + case Hexagon::R1: + case Hexagon::R2: + case Hexagon::R3: + case Hexagon::R4: + case Hexagon::R5: + case Hexagon::R6: + case Hexagon::R7: + case Hexagon::D0: + case Hexagon::D1: + case Hexagon::D2: + case Hexagon::D3: + case Hexagon::R16: + case Hexagon::R17: + case Hexagon::R18: + case Hexagon::R19: + case Hexagon::R20: + case Hexagon::R21: + case Hexagon::R22: + case Hexagon::R23: + case Hexagon::D8: + case Hexagon::D9: + case Hexagon::D10: + case Hexagon::D11: + subInstPtr.addOperand(Inst.getOperand(opNum)); + break; + } + } else + subInstPtr.addOperand(Inst.getOperand(opNum)); +} + +MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { + MCInst Result; + switch (Inst.getOpcode()) { + default: + // dbgs() << "opcode: "<< Inst->getOpcode() << "\n"; + llvm_unreachable("Unimplemented subinstruction \n"); + break; + case Hexagon::A2_addi: + if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == 1) { + Result.setOpcode(Hexagon::V4_SA1_inc); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; + } // 1,2 SUBInst $Rd = add($Rs, #1) + else if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == -1) { + Result.setOpcode(Hexagon::V4_SA1_dec); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; + } // 1,2 SUBInst $Rd = add($Rs,#-1) + else if (Inst.getOperand(1).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::V4_SA1_addsp); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; + } // 1,3 SUBInst $Rd = add(r29, #$u6_2) + else { + Result.setOpcode(Hexagon::V4_SA1_addi); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; + } // 1,2,3 SUBInst $Rx = add($Rx, #$s7) + case Hexagon::A2_add: + Result.setOpcode(Hexagon::V4_SA1_addrx); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rx = add($_src_, $Rs) + case Hexagon::S2_allocframe: + Result.setOpcode(Hexagon::V4_SS2_allocframe); + addOps(Result, Inst, 0); + break; // 1 SUBInst allocframe(#$u5_3) + case Hexagon::A2_andir: + if (Inst.getOperand(2).getImm() == 255) { + Result.setOpcode(Hexagon::V4_SA1_zxtb); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 $Rd = and($Rs, #255) + } else { + Result.setOpcode(Hexagon::V4_SA1_and1); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = and($Rs, #1) + } + case Hexagon::C2_cmpeqi: + Result.setOpcode(Hexagon::V4_SA1_cmpeqi); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 2,3 SUBInst p0 = cmp.eq($Rs, #$u2) + case Hexagon::A4_combineii: + case Hexagon::A2_combineii: + if (Inst.getOperand(1).getImm() == 1) { + Result.setOpcode(Hexagon::V4_SA1_combine1i); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#1, #$u2) + } + + if (Inst.getOperand(1).getImm() == 3) { + Result.setOpcode(Hexagon::V4_SA1_combine3i); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#3, #$u2) + } + if (Inst.getOperand(1).getImm() == 0) { + Result.setOpcode(Hexagon::V4_SA1_combine0i); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#0, #$u2) + } + if (Inst.getOperand(1).getImm() == 2) { + Result.setOpcode(Hexagon::V4_SA1_combine2i); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#2, #$u2) + } + case Hexagon::A4_combineir: + Result.setOpcode(Hexagon::V4_SA1_combinezr); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#0, $Rs) + + case Hexagon::A4_combineri: + Result.setOpcode(Hexagon::V4_SA1_combinerz); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rdd = combine($Rs, #0) + case Hexagon::L4_return_tnew_pnt: + case Hexagon::L4_return_tnew_pt: + Result.setOpcode(Hexagon::V4_SL2_return_tnew); + break; // none SUBInst if (p0.new) dealloc_return:nt + case Hexagon::L4_return_fnew_pnt: + case Hexagon::L4_return_fnew_pt: + Result.setOpcode(Hexagon::V4_SL2_return_fnew); + break; // none SUBInst if (!p0.new) dealloc_return:nt + case Hexagon::L4_return_f: + Result.setOpcode(Hexagon::V4_SL2_return_f); + break; // none SUBInst if (!p0) dealloc_return + case Hexagon::L4_return_t: + Result.setOpcode(Hexagon::V4_SL2_return_t); + break; // none SUBInst if (p0) dealloc_return + case Hexagon::L4_return: + Result.setOpcode(Hexagon::V4_SL2_return); + break; // none SUBInst dealloc_return + case Hexagon::L2_deallocframe: + Result.setOpcode(Hexagon::V4_SL2_deallocframe); + break; // none SUBInst deallocframe + case Hexagon::EH_RETURN_JMPR: + case Hexagon::J2_jumpr: + case Hexagon::JMPret: + Result.setOpcode(Hexagon::V4_SL2_jumpr31); + break; // none SUBInst jumpr r31 + case Hexagon::J2_jumprf: + case Hexagon::JMPretf: + Result.setOpcode(Hexagon::V4_SL2_jumpr31_f); + break; // none SUBInst if (!p0) jumpr r31 + case Hexagon::J2_jumprfnew: + case Hexagon::JMPretfnewpt: + case Hexagon::JMPretfnew: + Result.setOpcode(Hexagon::V4_SL2_jumpr31_fnew); + break; // none SUBInst if (!p0.new) jumpr:nt r31 + case Hexagon::J2_jumprt: + case Hexagon::JMPrett: + Result.setOpcode(Hexagon::V4_SL2_jumpr31_t); + break; // none SUBInst if (p0) jumpr r31 + case Hexagon::J2_jumprtnew: + case Hexagon::JMPrettnewpt: + case Hexagon::JMPrettnew: + Result.setOpcode(Hexagon::V4_SL2_jumpr31_tnew); + break; // none SUBInst if (p0.new) jumpr:nt r31 + case Hexagon::L2_loadrb_io: + Result.setOpcode(Hexagon::V4_SL2_loadrb_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memb($Rs + #$u3_0) + case Hexagon::L2_loadrd_io: + Result.setOpcode(Hexagon::V4_SL2_loadrd_sp); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = memd(r29 + #$u5_3) + case Hexagon::L2_loadrh_io: + Result.setOpcode(Hexagon::V4_SL2_loadrh_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memh($Rs + #$u3_1) + case Hexagon::L2_loadrub_io: + Result.setOpcode(Hexagon::V4_SL1_loadrub_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memub($Rs + #$u4_0) + case Hexagon::L2_loadruh_io: + Result.setOpcode(Hexagon::V4_SL2_loadruh_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memuh($Rs + #$u3_1) + case Hexagon::L2_loadri_io: + if (Inst.getOperand(1).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::V4_SL2_loadri_sp); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 2 1,3 SUBInst $Rd = memw(r29 + #$u5_2) + } else { + Result.setOpcode(Hexagon::V4_SL1_loadri_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memw($Rs + #$u4_2) + } + case Hexagon::S4_storeirb_io: + if (Inst.getOperand(2).getImm() == 0) { + Result.setOpcode(Hexagon::V4_SS2_storebi0); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst memb($Rs + #$u4_0)=#0 + } else if (Inst.getOperand(2).getImm() == 1) { + Result.setOpcode(Hexagon::V4_SS2_storebi1); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 2 1,2 SUBInst memb($Rs + #$u4_0)=#1 + } + case Hexagon::S2_storerb_io: + Result.setOpcode(Hexagon::V4_SS1_storeb_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt + case Hexagon::S2_storerd_io: + Result.setOpcode(Hexagon::V4_SS2_stored_sp); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 2,3 SUBInst memd(r29 + #$s6_3) = $Rtt + case Hexagon::S2_storerh_io: + Result.setOpcode(Hexagon::V4_SS2_storeh_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt + case Hexagon::S4_storeiri_io: + if (Inst.getOperand(2).getImm() == 0) { + Result.setOpcode(Hexagon::V4_SS2_storewi0); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#0 + } else if (Inst.getOperand(2).getImm() == 1) { + Result.setOpcode(Hexagon::V4_SS2_storewi1); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#1 + } else if (Inst.getOperand(0).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::V4_SS2_storew_sp); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1 2,3 SUBInst memw(r29 + #$u5_2) = $Rt + } + case Hexagon::S2_storeri_io: + if (Inst.getOperand(0).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::V4_SS2_storew_sp); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); // 1,2,3 SUBInst memw(sp + #$u5_2) = $Rt + } else { + Result.setOpcode(Hexagon::V4_SS1_storew_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); // 1,2,3 SUBInst memw($Rs + #$u4_2) = $Rt + } + break; + case Hexagon::A2_sxtb: + Result.setOpcode(Hexagon::V4_SA1_sxtb); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = sxtb($Rs) + case Hexagon::A2_sxth: + Result.setOpcode(Hexagon::V4_SA1_sxth); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = sxth($Rs) + case Hexagon::A2_tfr: + Result.setOpcode(Hexagon::V4_SA1_tfr); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = $Rs + case Hexagon::C2_cmovenewif: + Result.setOpcode(Hexagon::V4_SA1_clrfnew); + addOps(Result, Inst, 0); + break; // 2 SUBInst if (!p0.new) $Rd = #0 + case Hexagon::C2_cmovenewit: + Result.setOpcode(Hexagon::V4_SA1_clrtnew); + addOps(Result, Inst, 0); + break; // 2 SUBInst if (p0.new) $Rd = #0 + case Hexagon::C2_cmoveif: + Result.setOpcode(Hexagon::V4_SA1_clrf); + addOps(Result, Inst, 0); + break; // 2 SUBInst if (!p0) $Rd = #0 + case Hexagon::C2_cmoveit: + Result.setOpcode(Hexagon::V4_SA1_clrt); + addOps(Result, Inst, 0); + break; // 2 SUBInst if (p0) $Rd = #0 + case Hexagon::A2_tfrsi: + if (Inst.getOperand(1).isImm() && Inst.getOperand(1).getImm() == -1) { + Result.setOpcode(Hexagon::V4_SA1_setin1); + addOps(Result, Inst, 0); + break; // 2 1 SUBInst $Rd = #-1 + } else { + Result.setOpcode(Hexagon::V4_SA1_seti); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = #$u6 + } + case Hexagon::A2_zxtb: + Result.setOpcode(Hexagon::V4_SA1_zxtb); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 $Rd = and($Rs, #255) + + case Hexagon::A2_zxth: + Result.setOpcode(Hexagon::V4_SA1_zxth); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = zxth($Rs) + } + return Result; +} + +static bool isStoreInst(unsigned opCode) { + switch (opCode) { + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerb_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerd_io: + case Hexagon::S4_storeiri_io: + case Hexagon::S4_storeirb_io: + case Hexagon::S2_allocframe: + return true; + default: + return false; + } +} + +SmallVector<DuplexCandidate, 8> +HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII, + MCInst const &MCB) { + assert(isBundle(MCB)); + SmallVector<DuplexCandidate, 8> duplexToTry; + // Use an "order matters" version of isDuplexPair. + unsigned numInstrInPacket = MCB.getNumOperands(); + + for (unsigned distance = 1; distance < numInstrInPacket; ++distance) { + for (unsigned j = HexagonMCInstrInfo::bundleInstructionsOffset, + k = j + distance; + (j < numInstrInPacket) && (k < numInstrInPacket); ++j, ++k) { + + // Check if reversable. + bool bisReversable = true; + if (isStoreInst(MCB.getOperand(j).getInst()->getOpcode()) && + isStoreInst(MCB.getOperand(k).getInst()->getOpcode())) { + DEBUG(dbgs() << "skip out of order write pair: " << k << "," << j + << "\n"); + bisReversable = false; + } + + // Try in order. + if (isOrderedDuplexPair( + MCII, *MCB.getOperand(k).getInst(), + HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1), + *MCB.getOperand(j).getInst(), + HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1), + bisReversable)) { + // Get iClass. + unsigned iClass = iClassOfDuplexPair( + getDuplexCandidateGroup(*MCB.getOperand(k).getInst()), + getDuplexCandidateGroup(*MCB.getOperand(j).getInst())); + + // Save off pairs for duplex checking. + duplexToTry.push_back(DuplexCandidate(j, k, iClass)); + DEBUG(dbgs() << "adding pair: " << j << "," << k << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + continue; + } else { + DEBUG(dbgs() << "skipping pair: " << j << "," << k << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + } + + // Try reverse. + if (bisReversable) { + if (isOrderedDuplexPair( + MCII, *MCB.getOperand(j).getInst(), + HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1), + *MCB.getOperand(k).getInst(), + HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1), + bisReversable)) { + // Get iClass. + unsigned iClass = iClassOfDuplexPair( + getDuplexCandidateGroup(*MCB.getOperand(j).getInst()), + getDuplexCandidateGroup(*MCB.getOperand(k).getInst())); + + // Save off pairs for duplex checking. + duplexToTry.push_back(DuplexCandidate(k, j, iClass)); + DEBUG(dbgs() << "adding pair:" << k << "," << j << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + } else { + DEBUG(dbgs() << "skipping pair: " << k << "," << j << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + } + } + } + } + return duplexToTry; +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 93c7a0d98bf2..2731278f0e41 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -12,12 +12,53 @@ //===----------------------------------------------------------------------===// #include "HexagonMCInstrInfo.h" + +#include "Hexagon.h" #include "HexagonBaseInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" + namespace llvm { -void HexagonMCInstrInfo::AppendImplicitOperands(MCInst &MCI) { - MCI.addOperand(MCOperand::createImm(0)); - MCI.addOperand(MCOperand::createInst(nullptr)); +iterator_range<MCInst::const_iterator> +HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) { + assert(isBundle(MCI)); + return iterator_range<MCInst::const_iterator>( + MCI.begin() + bundleInstructionsOffset, MCI.end()); +} + +size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) { + if (HexagonMCInstrInfo::isBundle(MCI)) + return (MCI.size() - bundleInstructionsOffset); + else + return (1); +} + +MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass, + MCInst const &inst0, + MCInst const &inst1) { + assert((iClass <= 0xf) && "iClass must have range of 0 to 0xf"); + MCInst *duplexInst = new (Context) MCInst; + duplexInst->setOpcode(Hexagon::DuplexIClass0 + iClass); + + MCInst *SubInst0 = new (Context) MCInst(deriveSubInst(inst0)); + MCInst *SubInst1 = new (Context) MCInst(deriveSubInst(inst1)); + duplexInst->addOperand(MCOperand::createInst(SubInst0)); + duplexInst->addOperand(MCOperand::createInst(SubInst1)); + return duplexInst; +} + +MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB, + size_t Index) { + assert(Index <= bundleSize(MCB)); + if (Index == 0) + return nullptr; + MCInst const *Inst = + MCB.getOperand(Index + bundleInstructionsOffset - 1).getInst(); + if (isImmext(*Inst)) + return Inst; + return nullptr; } HexagonII::MemAccessSize @@ -46,6 +87,24 @@ MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII, return (MCII.get(MCI.getOpcode())); } +unsigned short HexagonMCInstrInfo::getExtendableOp(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); +} + +MCOperand const & +HexagonMCInstrInfo::getExtendableOperand(MCInstrInfo const &MCII, + MCInst const &MCI) { + unsigned O = HexagonMCInstrInfo::getExtendableOp(MCII, MCI); + MCOperand const &MO = MCI.getOperand(O); + + assert((HexagonMCInstrInfo::isExtendable(MCII, MCI) || + HexagonMCInstrInfo::isExtended(MCII, MCI)) && + (MO.isImm() || MO.isExpr())); + return (MO); +} + unsigned HexagonMCInstrInfo::getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; @@ -58,12 +117,6 @@ unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII, return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); } -std::bitset<16> HexagonMCInstrInfo::GetImplicitBits(MCInst const &MCI) { - SanityCheckImplicitOperands(MCI); - std::bitset<16> Bits(MCI.getOperand(MCI.getNumOperands() - 2).getImm()); - return Bits; -} - // Return the max value that a constant extendable operand can have // without being extended. int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII, @@ -99,9 +152,14 @@ char const *HexagonMCInstrInfo::getName(MCInstrInfo const &MCII, return MCII.getName(MCI.getOpcode()); } -// Return the operand that consumes or produces a new value. -MCOperand const &HexagonMCInstrInfo::getNewValue(MCInstrInfo const &MCII, +unsigned short HexagonMCInstrInfo::getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask); +} + +MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII, + MCInst const &MCI) { uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; unsigned const O = (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask; @@ -113,6 +171,21 @@ MCOperand const &HexagonMCInstrInfo::getNewValue(MCInstrInfo const &MCII, return (MCO); } +int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + + HexagonII::SubTarget Target = static_cast<HexagonII::SubTarget>( + (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask); + + switch (Target) { + default: + return Hexagon::ArchV4; + case HexagonII::HasV5SubT: + return Hexagon::ArchV5; + } +} + // Return the Hexagon ISA class for the insn. unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII, MCInst const &MCI) { @@ -121,6 +194,32 @@ unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII, return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); } +unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, + MCInst const &MCI) { + + const InstrItinerary *II = STI.getSchedModel().InstrItineraries; + int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass(); + return ((II[SchedClass].FirstStage + HexagonStages)->getUnits()); +} + +bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) { + if (!HexagonMCInstrInfo::isBundle(MCI)) + return false; + + for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) { + auto MI = I.getInst(); + if (isImmext(*MI)) + return true; + } + + return false; +} + +bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) { + return extenderForIndex(MCB, Index) != nullptr; +} + // Return whether the instruction is a legal new-value producer. bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI) { @@ -128,6 +227,18 @@ bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII, return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask); } +MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) { + assert(isBundle(MCB)); + assert(Index < HEXAGON_PACKET_SIZE); + return *MCB.getOperand(bundleInstructionsOffset + Index).getInst(); +} + +bool HexagonMCInstrInfo::isBundle(MCInst const &MCI) { + auto Result = Hexagon::BUNDLE == MCI.getOpcode(); + assert(!Result || (MCI.size() > 0 && MCI.getOperand(0).isImm())); + return Result; +} + // Return whether the insn is an actual insn. bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) { return (!HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() && @@ -135,6 +246,15 @@ bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) { HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP); } +bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) { + return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) || + (Reg >= Hexagon::D8 && Reg <= Hexagon::D11)); +} + +bool HexagonMCInstrInfo::isDuplex(MCInstrInfo const &MCII, MCInst const &MCI) { + return HexagonII::TypeDUPLEX == HexagonMCInstrInfo::getType(MCII, MCI); +} + // Return whether the instruction needs to be constant extended. // 1) Always return true if the instruction has 'isExtended' flag set. // @@ -173,20 +293,44 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII, return (ImmValue < MinValue || ImmValue > MaxValue); } -// Return true if the instruction may be extended based on the operand value. bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII, MCInst const &MCI) { uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; } -// Return whether the instruction must be always extended. bool HexagonMCInstrInfo::isExtended(MCInstrInfo const &MCII, MCInst const &MCI) { uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; } +bool HexagonMCInstrInfo::isFloat(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::FPPos) & HexagonII::FPMask); +} + +bool HexagonMCInstrInfo::isImmext(MCInst const &MCI) { + auto Op = MCI.getOpcode(); + return (Op == Hexagon::A4_ext_b || Op == Hexagon::A4_ext_c || + Op == Hexagon::A4_ext_g || Op == Hexagon::A4_ext); +} + +bool HexagonMCInstrInfo::isInnerLoop(MCInst const &MCI) { + assert(isBundle(MCI)); + int64_t Flags = MCI.getOperand(0).getImm(); + return (Flags & innerLoopMask) != 0; +} + +bool HexagonMCInstrInfo::isIntReg(unsigned Reg) { + return (Reg >= Hexagon::R0 && Reg <= Hexagon::R31); +} + +bool HexagonMCInstrInfo::isIntRegForSubInst(unsigned Reg) { + return ((Reg >= Hexagon::R0 && Reg <= Hexagon::R7) || + (Reg >= Hexagon::R16 && Reg <= Hexagon::R23)); +} + // Return whether the insn is a new-value consumer. bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII, MCInst const &MCI) { @@ -203,46 +347,103 @@ bool HexagonMCInstrInfo::isOperandExtended(MCInstrInfo const &MCII, OperandNum; } -bool HexagonMCInstrInfo::isPacketBegin(MCInst const &MCI) { - std::bitset<16> Bits(GetImplicitBits(MCI)); - return Bits.test(packetBeginIndex); +bool HexagonMCInstrInfo::isOuterLoop(MCInst const &MCI) { + assert(isBundle(MCI)); + int64_t Flags = MCI.getOperand(0).getImm(); + return (Flags & outerLoopMask) != 0; } -bool HexagonMCInstrInfo::isPacketEnd(MCInst const &MCI) { - std::bitset<16> Bits(GetImplicitBits(MCI)); - return Bits.test(packetEndIndex); +bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); +} + +bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ( + !((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask)); +} + +bool HexagonMCInstrInfo::isPredReg(unsigned Reg) { + return (Reg >= Hexagon::P0 && Reg <= Hexagon::P3_0); } -// Return whether the insn is a prefix. bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) { return (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypePREFIX); } -// Return whether the insn is solo, i.e., cannot be in a packet. bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask); } -void HexagonMCInstrInfo::resetPacket(MCInst &MCI) { - setPacketBegin(MCI, false); - setPacketEnd(MCI, false); +bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask); +} + +bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask); +} + +void HexagonMCInstrInfo::padEndloop(MCInst &MCB) { + MCInst Nop; + Nop.setOpcode(Hexagon::A2_nop); + assert(isBundle(MCB)); + while ((HexagonMCInstrInfo::isInnerLoop(MCB) && + (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) || + ((HexagonMCInstrInfo::isOuterLoop(MCB) && + (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE)))) + MCB.addOperand(MCOperand::createInst(new MCInst(Nop))); +} + +bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII, + MCInst const &MCI) { + if (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR) + return false; + + unsigned SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass(); + switch (SchedClass) { + case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123: + case Hexagon::Sched::ALU64_tc_2_SLOT23: + case Hexagon::Sched::ALU64_tc_3x_SLOT23: + case Hexagon::Sched::M_tc_2_SLOT23: + case Hexagon::Sched::M_tc_3x_SLOT23: + case Hexagon::Sched::S_2op_tc_2_SLOT23: + case Hexagon::Sched::S_3op_tc_2_SLOT23: + case Hexagon::Sched::S_3op_tc_3x_SLOT23: + return true; + } + return false; } -void HexagonMCInstrInfo::SetImplicitBits(MCInst &MCI, std::bitset<16> Bits) { - SanityCheckImplicitOperands(MCI); - MCI.getOperand(MCI.getNumOperands() - 2).setImm(Bits.to_ulong()); +void HexagonMCInstrInfo::replaceDuplex(MCContext &Context, MCInst &MCB, + DuplexCandidate Candidate) { + assert(Candidate.packetIndexI < MCB.size()); + assert(Candidate.packetIndexJ < MCB.size()); + assert(isBundle(MCB)); + MCInst *Duplex = + deriveDuplex(Context, Candidate.iClass, + *MCB.getOperand(Candidate.packetIndexJ).getInst(), + *MCB.getOperand(Candidate.packetIndexI).getInst()); + assert(Duplex != nullptr); + MCB.getOperand(Candidate.packetIndexI).setInst(Duplex); + MCB.erase(MCB.begin() + Candidate.packetIndexJ); } -void HexagonMCInstrInfo::setPacketBegin(MCInst &MCI, bool f) { - std::bitset<16> Bits(GetImplicitBits(MCI)); - Bits.set(packetBeginIndex, f); - SetImplicitBits(MCI, Bits); +void HexagonMCInstrInfo::setInnerLoop(MCInst &MCI) { + assert(isBundle(MCI)); + MCOperand &Operand = MCI.getOperand(0); + Operand.setImm(Operand.getImm() | innerLoopMask); } -void HexagonMCInstrInfo::setPacketEnd(MCInst &MCI, bool f) { - std::bitset<16> Bits(GetImplicitBits(MCI)); - Bits.set(packetEndIndex, f); - SetImplicitBits(MCI, Bits); +void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) { + assert(isBundle(MCI)); + MCOperand &Operand = MCI.getOperand(0); + Operand.setImm(Operand.getImm() | outerLoopMask); } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 082c80d5ac05..09f305f638e2 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -1,4 +1,4 @@ -//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===// +//===- HexagonMCInstrInfo.cpp - Utility functions on Hexagon MCInsts ------===// // // The LLVM Compiler Infrastructure // @@ -15,20 +15,47 @@ #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrInfo.h" - -#include <bitset> namespace llvm { +class MCContext; class MCInstrDesc; class MCInstrInfo; class MCInst; class MCOperand; +class MCSubtargetInfo; namespace HexagonII { enum class MemAccessSize; } +class DuplexCandidate { +public: + unsigned packetIndexI, packetIndexJ, iClass; + DuplexCandidate(unsigned i, unsigned j, unsigned iClass) + : packetIndexI(i), packetIndexJ(j), iClass(iClass) {} +}; namespace HexagonMCInstrInfo { -void AppendImplicitOperands(MCInst &MCI); +size_t const innerLoopOffset = 0; +int64_t const innerLoopMask = 1 << innerLoopOffset; + +size_t const outerLoopOffset = 1; +int64_t const outerLoopMask = 1 << outerLoopOffset; + +size_t const bundleInstructionsOffset = 1; + +// Returns the number of instructions in the bundle +size_t bundleSize(MCInst const &MCI); + +// Returns a iterator range of instructions in this bundle +iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI); + +// Return the extender for instruction at Index or nullptr if none +MCInst const *extenderForIndex(MCInst const &MCB, size_t Index); + +// Create a duplex instruction given the two subinsts +MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0, + MCInst const &inst1); + +// Convert this instruction in to a duplex subinst +MCInst deriveSubInst(MCInst const &Inst); // Return memory access size HexagonII::MemAccessSize getAccessSize(MCInstrInfo const &MCII, @@ -42,14 +69,26 @@ unsigned short getCExtOpNum(MCInstrInfo const &MCII, MCInst const &MCI); MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI); +// Return which duplex group this instruction belongs to +unsigned getDuplexCandidateGroup(MCInst const &MI); + +// Return a list of all possible instruction duplex combinations +SmallVector<DuplexCandidate, 8> getDuplexPossibilties(MCInstrInfo const &MCII, + MCInst const &MCB); + +// Return the index of the extendable operand +unsigned short getExtendableOp(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return a reference to the extendable operand +MCOperand const &getExtendableOperand(MCInstrInfo const &MCII, + MCInst const &MCI); + // Return the implicit alignment of the extendable operand unsigned getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI); // Return the number of logical bits of the extendable operand unsigned getExtentBits(MCInstrInfo const &MCII, MCInst const &MCI); -std::bitset<16> GetImplicitBits(MCInst const &MCI); - // Return the max value that a constant extendable operand can have // without being extended. int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI); @@ -61,27 +100,77 @@ int getMinValue(MCInstrInfo const &MCII, MCInst const &MCI); // Return instruction name char const *getName(MCInstrInfo const &MCII, MCInst const &MCI); +// Return the operand index for the new value. +unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI); + // Return the operand that consumes or produces a new value. -MCOperand const &getNewValue(MCInstrInfo const &MCII, MCInst const &MCI); +MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI); + +int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI); // Return the Hexagon ISA class for the insn. unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI); +/// Return the slots used by the insn. +unsigned getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst const &MCI); + +// Does the packet have an extender for the instruction at Index +bool hasExtenderForIndex(MCInst const &MCB, size_t Index); + +bool hasImmExt(MCInst const &MCI); + // Return whether the instruction is a legal new-value producer. bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI); +// Return the instruction at Index +MCInst const &instruction(MCInst const &MCB, size_t Index); + +// Returns whether this MCInst is a wellformed bundle +bool isBundle(MCInst const &MCI); + // Return whether the insn is an actual insn. bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI); +// Return the duplex iclass given the two duplex classes +unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb); + // Return whether the instruction needs to be constant extended. bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI); +// Is this double register suitable for use in a duplex subinst +bool isDblRegForSubInst(unsigned Reg); + +// Is this a duplex instruction +bool isDuplex(MCInstrInfo const &MCII, MCInst const &MCI); + +// Can these instructions be duplexed +bool isDuplexPair(MCInst const &MIa, MCInst const &MIb); + +// Can these duplex classes be combine in to a duplex instruction +bool isDuplexPairMatch(unsigned Ga, unsigned Gb); + // Return true if the insn may be extended based on the operand value. bool isExtendable(MCInstrInfo const &MCII, MCInst const &MCI); // Return whether the instruction must be always extended. bool isExtended(MCInstrInfo const &MCII, MCInst const &MCI); +/// Return whether it is a floating-point insn. +bool isFloat(MCInstrInfo const &MCII, MCInst const &MCI); + +// Returns whether this instruction is an immediate extender +bool isImmext(MCInst const &MCI); + +// Returns whether this bundle is an endloop0 +bool isInnerLoop(MCInst const &MCI); + +// Is this an integer register +bool isIntReg(unsigned Reg); + +// Is this register suitable for use in a duplex subinst +bool isIntRegForSubInst(unsigned Reg); + // Return whether the insn is a new-value consumer. bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI); @@ -89,9 +178,22 @@ bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI); bool isOperandExtended(MCInstrInfo const &MCII, MCInst const &MCI, unsigned short OperandNum); -bool isPacketBegin(MCInst const &MCI); +// Can these two instructions be duplexed +bool isOrderedDuplexPair(MCInstrInfo const &MCII, MCInst const &MIa, + bool ExtendedA, MCInst const &MIb, bool ExtendedB, + bool bisReversable); + +// Returns whether this bundle is an endloop1 +bool isOuterLoop(MCInst const &MCI); + +// Return whether this instruction is predicated +bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the predicate sense is true +bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI); -bool isPacketEnd(MCInst const &MCI); +// Is this a predicate register +bool isPredReg(unsigned Reg); // Return whether the insn is a prefix. bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI); @@ -99,23 +201,31 @@ bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI); // Return whether the insn is solo, i.e., cannot be in a packet. bool isSolo(MCInstrInfo const &MCII, MCInst const &MCI); -static const size_t packetBeginIndex = 0; -static const size_t packetEndIndex = 1; +/// Return whether the insn can be packaged only with A and X-type insns. +bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI); -void resetPacket(MCInst &MCI); +/// Return whether the insn can be packaged only with an A-type insn in slot #1. +bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI); -inline void SanityCheckImplicitOperands(MCInst const &MCI) { - assert(MCI.getNumOperands() >= 2 && "At least the two implicit operands"); - assert(MCI.getOperand(MCI.getNumOperands() - 1).isInst() && - "Implicit bits and flags"); - assert(MCI.getOperand(MCI.getNumOperands() - 2).isImm() && "Parent pointer"); -} +// Pad the bundle with nops to satisfy endloop requirements +void padEndloop(MCInst &MCI); + +bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI); + +// Replace the instructions inside MCB, represented by Candidate +void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate); + +// Marks a bundle as endloop0 +void setInnerLoop(MCInst &MCI); -void SetImplicitBits(MCInst &MCI, std::bitset<16> Bits); +// Marks a bundle as endloop1 +void setOuterLoop(MCInst &MCI); -void setPacketBegin(MCInst &MCI, bool Y); +// Would duplexing this instruction create a requirement to extend +bool subInstWouldBeExtended(MCInst const &potentialDuplex); -void setPacketEnd(MCInst &MCI, bool Y); +// Attempt to find and replace compound pairs +void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI); } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp new file mode 100644 index 000000000000..8e70280c1a0d --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp @@ -0,0 +1,237 @@ +//===----- HexagonMCShuffler.cpp - MC bundle shuffling --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the shuffling of insns inside a bundle according to the +// packet formation rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-shuffle" + +#include "Hexagon.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCShuffler.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> + DisableShuffle("disable-hexagon-shuffle", cl::Hidden, cl::init(false), + cl::desc("Disable Hexagon instruction shuffling")); + +void HexagonMCShuffler::init(MCInst &MCB) { + if (HexagonMCInstrInfo::isBundle(MCB)) { + MCInst const *Extender = nullptr; + // Copy the bundle for the shuffling. + for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo()); + MCInst *MI = const_cast<MCInst *>(I.getInst()); + + if (!HexagonMCInstrInfo::isImmext(*MI)) { + append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI), + false); + Extender = nullptr; + } else + Extender = MI; + } + } + + BundleFlags = MCB.getOperand(0).getImm(); +} + +void HexagonMCShuffler::init(MCInst &MCB, MCInst const *AddMI, + bool bInsertAtFront) { + if (HexagonMCInstrInfo::isBundle(MCB)) { + if (bInsertAtFront && AddMI) + append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI), + false); + MCInst const *Extender = nullptr; + // Copy the bundle for the shuffling. + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo()); + MCInst *MI = const_cast<MCInst *>(I.getInst()); + if (!HexagonMCInstrInfo::isImmext(*MI)) { + append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI), + false); + Extender = nullptr; + } else + Extender = MI; + } + if (!bInsertAtFront && AddMI) + append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI), + false); + } + + BundleFlags = MCB.getOperand(0).getImm(); +} + +void HexagonMCShuffler::copyTo(MCInst &MCB) { + MCB.clear(); + MCB.addOperand(MCOperand::createImm(BundleFlags)); + // Copy the results into the bundle. + for (HexagonShuffler::iterator I = begin(); I != end(); ++I) { + + MCInst const *MI = I->getDesc(); + MCInst const *Extender = I->getExtender(); + if (Extender) + MCB.addOperand(MCOperand::createInst(Extender)); + MCB.addOperand(MCOperand::createInst(MI)); + } +} + +bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) { + if (shuffle()) { + // Copy the results into the bundle. + copyTo(MCB); + } else + DEBUG(MCB.dump()); + + return (!getError()); +} + +bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB) { + HexagonMCShuffler MCS(MCII, STI, MCB); + + if (DisableShuffle) + // Ignore if user chose so. + return false; + + if (!HexagonMCInstrInfo::bundleSize(MCB)) { + // There once was a bundle: + // BUNDLE %D2<imp-def>, %R4<imp-def>, %R5<imp-def>, %D7<imp-def>, ... + // * %D2<def> = IMPLICIT_DEF; flags: + // * %D7<def> = IMPLICIT_DEF; flags: + // After the IMPLICIT_DEFs were removed by the asm printer, the bundle + // became empty. + DEBUG(dbgs() << "Skipping empty bundle"); + return false; + } else if (!HexagonMCInstrInfo::isBundle(MCB)) { + DEBUG(dbgs() << "Skipping stand-alone insn"); + return false; + } + + // Reorder the bundle and copy the result. + if (!MCS.reshuffleTo(MCB)) { + // Unless there is any error, which should not happen at this point. + unsigned shuffleError = MCS.getError(); + switch (shuffleError) { + default: + llvm_unreachable("unknown error"); + case HexagonShuffler::SHUFFLE_ERROR_INVALID: + llvm_unreachable("invalid packet"); + case HexagonShuffler::SHUFFLE_ERROR_STORES: + llvm_unreachable("too many stores"); + case HexagonShuffler::SHUFFLE_ERROR_LOADS: + llvm_unreachable("too many loads"); + case HexagonShuffler::SHUFFLE_ERROR_BRANCHES: + llvm_unreachable("too many branches"); + case HexagonShuffler::SHUFFLE_ERROR_NOSLOTS: + llvm_unreachable("no suitable slot"); + case HexagonShuffler::SHUFFLE_ERROR_SLOTS: + llvm_unreachable("over-subscribed slots"); + case HexagonShuffler::SHUFFLE_SUCCESS: // Single instruction case. + return true; + } + } + + return true; +} + +unsigned +llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCContext &Context, MCInst &MCB, + SmallVector<DuplexCandidate, 8> possibleDuplexes) { + + if (DisableShuffle) + return HexagonShuffler::SHUFFLE_SUCCESS; + + if (!HexagonMCInstrInfo::bundleSize(MCB)) { + // There once was a bundle: + // BUNDLE %D2<imp-def>, %R4<imp-def>, %R5<imp-def>, %D7<imp-def>, ... + // * %D2<def> = IMPLICIT_DEF; flags: + // * %D7<def> = IMPLICIT_DEF; flags: + // After the IMPLICIT_DEFs were removed by the asm printer, the bundle + // became empty. + DEBUG(dbgs() << "Skipping empty bundle"); + return HexagonShuffler::SHUFFLE_SUCCESS; + } else if (!HexagonMCInstrInfo::isBundle(MCB)) { + DEBUG(dbgs() << "Skipping stand-alone insn"); + return HexagonShuffler::SHUFFLE_SUCCESS; + } + + bool doneShuffling = false; + unsigned shuffleError; + while (possibleDuplexes.size() > 0 && (!doneShuffling)) { + // case of Duplex Found + DuplexCandidate duplexToTry = possibleDuplexes.pop_back_val(); + MCInst Attempt(MCB); + HexagonMCInstrInfo::replaceDuplex(Context, Attempt, duplexToTry); + HexagonMCShuffler MCS(MCII, STI, Attempt); // copy packet to the shuffler + if (MCS.size() == 1) { // case of one duplex + // copy the created duplex in the shuffler to the bundle + MCS.copyTo(MCB); + doneShuffling = true; + return HexagonShuffler::SHUFFLE_SUCCESS; + } + // try shuffle with this duplex + doneShuffling = MCS.reshuffleTo(MCB); + shuffleError = MCS.getError(); + + if (doneShuffling) + break; + } + + if (doneShuffling == false) { + HexagonMCShuffler MCS(MCII, STI, MCB); + doneShuffling = MCS.reshuffleTo(MCB); // shuffle + shuffleError = MCS.getError(); + } + if (!doneShuffling) + return shuffleError; + + return HexagonShuffler::SHUFFLE_SUCCESS; +} + +bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB, MCInst const *AddMI, int fixupCount) { + if (!HexagonMCInstrInfo::isBundle(MCB) || !AddMI) + return false; + + // if fixups present, make sure we don't insert too many nops that would + // later prevent an extender from being inserted. + unsigned int bundleSize = HexagonMCInstrInfo::bundleSize(MCB); + if (bundleSize >= HEXAGON_PACKET_SIZE) + return false; + if (fixupCount >= 2) { + return false; + } else { + if (bundleSize == HEXAGON_PACKET_SIZE - 1 && fixupCount) + return false; + } + + if (DisableShuffle) + return false; + + HexagonMCShuffler MCS(MCII, STI, MCB, AddMI); + if (!MCS.reshuffleTo(MCB)) { + unsigned shuffleError = MCS.getError(); + switch (shuffleError) { + default: + return false; + case HexagonShuffler::SHUFFLE_SUCCESS: // single instruction case + return true; + } + } + + return true; +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h new file mode 100644 index 000000000000..a21cce1fc240 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h @@ -0,0 +1,65 @@ +//=-- HexagonMCShuffler.h ---------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This declares the shuffling of insns inside a bundle according to the +// packet formation rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCSHUFFLER_H +#define HEXAGONMCSHUFFLER_H + +#include "MCTargetDesc/HexagonShuffler.h" + +namespace llvm { + +class MCInst; + +// Insn bundle shuffler. +class HexagonMCShuffler : public HexagonShuffler { + bool immext_present; + bool duplex_present; + +public: + HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB) + : HexagonShuffler(MCII, STI) { + init(MCB); + }; + HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB, const MCInst *AddMI, + bool bInsertAtFront = false) + : HexagonShuffler(MCII, STI) { + init(MCB, AddMI, bInsertAtFront); + }; + + // Copy reordered bundle to another. + void copyTo(MCInst &MCB); + // Reorder and copy result to another. + bool reshuffleTo(MCInst &MCB); + + bool immextPresent() const { return immext_present; }; + bool duplexPresent() const { return duplex_present; }; + +private: + void init(MCInst &MCB); + void init(MCInst &MCB, const MCInst *AddMI, bool bInsertAtFront = false); +}; + +// Invocation of the shuffler. +bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &); +bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &, const MCInst *, int); +unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCContext &Context, MCInst &, + SmallVector<DuplexCandidate, 8>); +} + +#endif // HEXAGONMCSHUFFLER_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 59395e230fa9..43734ed6ca3f 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -55,7 +55,7 @@ createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { } static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { + const Triple &TT) { MCAsmInfo *MAI = new HexagonMCAsmInfo(TT); // VirtualFP = (R30 + #0). @@ -112,11 +112,11 @@ extern "C" void LLVMInitializeHexagonTargetMC() { TargetRegistry::RegisterMCCodeEmitter(TheHexagonTarget, createHexagonMCCodeEmitter); - // Register the MC Inst Printer - TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget, - createHexagonMCInstPrinter); - // Register the asm backend TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget, createHexagonAsmBackend); + + // Register the MC Inst Printer + TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget, + createHexagonMCInstPrinter); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index de63fd271aea..81211cc026db 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -17,6 +17,8 @@ #include <cstdint> namespace llvm { +struct InstrItinerary; +struct InstrStage; class MCAsmBackend; class MCCodeEmitter; class MCContext; @@ -31,6 +33,8 @@ class raw_pwrite_stream; extern Target TheHexagonTarget; +extern const InstrStage HexagonStages[]; + MCInstrInfo *createHexagonMCInstrInfo(); MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp new file mode 100644 index 000000000000..feaaa4f780d5 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -0,0 +1,385 @@ +//===----- HexagonShuffler.cpp - Instruction bundle shuffling -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the shuffling of insns inside a bundle according to the +// packet formation rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-shuffle" + +#include <algorithm> +#include <utility> +#include "Hexagon.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "HexagonShuffler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// Insn shuffling priority. +class HexagonBid { + // The priority is directly proportional to how restricted the insn is based + // on its flexibility to run on the available slots. So, the fewer slots it + // may run on, the higher its priority. + enum { MAX = 360360 }; // LCD of 1/2, 1/3, 1/4,... 1/15. + unsigned Bid; + +public: + HexagonBid() : Bid(0){}; + HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; }; + + // Check if the insn priority is overflowed. + bool isSold() const { return (Bid >= MAX); }; + + HexagonBid &operator+=(const HexagonBid &B) { + Bid += B.Bid; + return *this; + }; +}; + +// Slot shuffling allocation. +class HexagonUnitAuction { + HexagonBid Scores[HEXAGON_PACKET_SIZE]; + // Mask indicating which slot is unavailable. + unsigned isSold : HEXAGON_PACKET_SIZE; + +public: + HexagonUnitAuction() : isSold(0){}; + + // Allocate slots. + bool bid(unsigned B) { + // Exclude already auctioned slots from the bid. + unsigned b = B & ~isSold; + if (b) { + for (unsigned i = 0; i < HEXAGON_PACKET_SIZE; ++i) + if (b & (1 << i)) { + // Request candidate slots. + Scores[i] += HexagonBid(b); + isSold |= Scores[i].isSold() << i; + } + return true; + ; + } else + // Error if the desired slots are already full. + return false; + }; +}; + +unsigned HexagonResource::setWeight(unsigned s) { + const unsigned SlotWeight = 8; + const unsigned MaskWeight = SlotWeight - 1; + bool Key = (1 << s) & getUnits(); + + // Calculate relative weight of the insn for the given slot, weighing it the + // heavier the more restrictive the insn is and the lowest the slots that the + // insn may be executed in. + Weight = + (Key << (SlotWeight * s)) * ((MaskWeight - countPopulation(getUnits())) + << countTrailingZeros(getUnits())); + return (Weight); +} + +HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, + MCSubtargetInfo const &STI) + : MCII(MCII), STI(STI) { + reset(); +} + +void HexagonShuffler::reset() { + Packet.clear(); + BundleFlags = 0; + Error = SHUFFLE_SUCCESS; +} + +void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender, + unsigned S, bool X) { + HexagonInstr PI(ID, Extender, S, X); + + Packet.push_back(PI); +} + +/// Check that the packet is legal and enforce relative insn order. +bool HexagonShuffler::check() { + // Descriptive slot masks. + const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2, + slotThree = 0x8, slotFirstJump = 0x8, slotLastJump = 0x4, + slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1; + // Highest slots for branches and stores used to keep their original order. + unsigned slotJump = slotFirstJump; + unsigned slotLoadStore = slotFirstLoadStore; + // Number of branches, solo branches, indirect branches. + unsigned jumps = 0, jump1 = 0, jumpr = 0; + // Number of memory operations, loads, solo loads, stores, solo stores, single + // stores. + unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; + // Number of duplex insns, solo insns. + unsigned duplex = 0, solo = 0; + // Number of insns restricting other insns in the packet to A and X types, + // which is neither A or X types. + unsigned onlyAX = 0, neitherAnorX = 0; + // Number of insns restricting other insns in slot #1 to A type. + unsigned onlyAin1 = 0; + // Number of insns restricting any insn in slot #1, except A2_nop. + unsigned onlyNo1 = 0; + unsigned xtypeFloat = 0; + unsigned pSlot3Cnt = 0; + iterator slot3ISJ = end(); + + // Collect information from the insns in the packet. + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const *ID = ISJ->getDesc(); + + if (HexagonMCInstrInfo::isSolo(MCII, *ID)) + solo += !ISJ->isSoloException(); + else if (HexagonMCInstrInfo::isSoloAX(MCII, *ID)) + onlyAX += !ISJ->isSoloException(); + else if (HexagonMCInstrInfo::isSoloAin1(MCII, *ID)) + onlyAin1 += !ISJ->isSoloException(); + if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32 && + HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeXTYPE) + ++neitherAnorX; + if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) { + ++pSlot3Cnt; + slot3ISJ = ISJ; + } + + switch (HexagonMCInstrInfo::getType(MCII, *ID)) { + case HexagonII::TypeXTYPE: + if (HexagonMCInstrInfo::isFloat(MCII, *ID)) + ++xtypeFloat; + break; + case HexagonII::TypeJR: + ++jumpr; + // Fall-through. + case HexagonII::TypeJ: + ++jumps; + break; + case HexagonII::TypeLD: + ++loads; + ++memory; + if (ISJ->Core.getUnits() == slotSingleLoad) + ++load0; + if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn()) + ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. + break; + case HexagonII::TypeST: + ++stores; + ++memory; + if (ISJ->Core.getUnits() == slotSingleStore) + ++store0; + break; + case HexagonII::TypeMEMOP: + ++loads; + ++stores; + ++store1; + ++memory; + break; + case HexagonII::TypeNV: + ++memory; // NV insns are memory-like. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch()) + ++jumps, ++jump1; + break; + case HexagonII::TypeCR: + // Legacy conditional branch predicated on a register. + case HexagonII::TypeSYSTEM: + if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) + ++loads; + break; + } + } + + // Check if the packet is legal. + if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory)) || + (solo && size() > 1) || (onlyAX && neitherAnorX > 1) || + (onlyAX && xtypeFloat)) { + Error = SHUFFLE_ERROR_INVALID; + return false; + } + + if (jump1 && jumps > 1) { + // Error if single branch with another branch. + Error = SHUFFLE_ERROR_BRANCHES; + return false; + } + + // Modify packet accordingly. + // TODO: need to reserve slots #0 and #1 for duplex insns. + bool bOnlySlot3 = false; + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const *ID = ISJ->getDesc(); + + if (!ISJ->Core.getUnits()) { + // Error if insn may not be executed in any slot. + Error = SHUFFLE_ERROR_UNKNOWN; + return false; + } + + // Exclude from slot #1 any insn but A2_nop. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).getOpcode() != Hexagon::A2_nop) + if (onlyNo1) + ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne); + + // Exclude from slot #1 any insn but A-type. + if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32) + if (onlyAin1) + ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne); + + // Branches must keep the original order. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch() || + HexagonMCInstrInfo::getDesc(MCII, *ID).isCall()) + if (jumps > 1) { + if (jumpr || slotJump < slotLastJump) { + // Error if indirect branch with another branch or + // no more slots available for branches. + Error = SHUFFLE_ERROR_BRANCHES; + return false; + } + // Pin the branch to the highest slot available to it. + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotJump); + // Update next highest slot available to branches. + slotJump >>= 1; + } + + // A single load must use slot #0. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) { + if (loads == 1 && loads == memory) + // Pin the load to slot #0. + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad); + } + + // A single store must use slot #0. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayStore()) { + if (!store0) { + if (stores == 1) + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore); + else if (stores > 1) { + if (slotLoadStore < slotLastLoadStore) { + // Error if no more slots available for stores. + Error = SHUFFLE_ERROR_STORES; + return false; + } + // Pin the store to the highest slot available to it. + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore); + // Update the next highest slot available to stores. + slotLoadStore >>= 1; + } + } + if (store1 && stores > 1) { + // Error if a single store with another store. + Error = SHUFFLE_ERROR_STORES; + return false; + } + } + + // flag if an instruction can only be executed in slot 3 + if (ISJ->Core.getUnits() == slotThree) + bOnlySlot3 = true; + + if (!ISJ->Core.getUnits()) { + // Error if insn may not be executed in any slot. + Error = SHUFFLE_ERROR_NOSLOTS; + return false; + } + } + + bool validateSlots = true; + if (bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) { + // save off slot mask of instruction marked with A_PREFER_SLOT3 + // and then pin it to slot #3 + unsigned saveUnits = slot3ISJ->Core.getUnits(); + slot3ISJ->Core.setUnits(saveUnits & slotThree); + + HexagonUnitAuction AuctionCore; + std::sort(begin(), end(), HexagonInstr::lessCore); + + // see if things ok with that instruction being pinned to slot #3 + bool bFail = false; + for (iterator I = begin(); I != end() && bFail != true; ++I) + if (!AuctionCore.bid(I->Core.getUnits())) + bFail = true; + + // if yes, great, if not then restore original slot mask + if (!bFail) + validateSlots = false; // all good, no need to re-do auction + else + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const *ID = ISJ->getDesc(); + if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) + ISJ->Core.setUnits(saveUnits); + } + } + + // Check if any slot, core, is over-subscribed. + // Verify the core slot subscriptions. + if (validateSlots) { + HexagonUnitAuction AuctionCore; + + std::sort(begin(), end(), HexagonInstr::lessCore); + + for (iterator I = begin(); I != end(); ++I) + if (!AuctionCore.bid(I->Core.getUnits())) { + Error = SHUFFLE_ERROR_SLOTS; + return false; + } + } + + Error = SHUFFLE_SUCCESS; + return true; +} + +bool HexagonShuffler::shuffle() { + if (size() > HEXAGON_PACKET_SIZE) { + // Ignore a packet with with more than what a packet can hold + // or with compound or duplex insns for now. + Error = SHUFFLE_ERROR_INVALID; + return false; + } + + // Check and prepare packet. + if (size() > 1 && check()) + // Reorder the handles for each slot. + for (unsigned nSlot = 0, emptySlots = 0; nSlot < HEXAGON_PACKET_SIZE; + ++nSlot) { + iterator ISJ, ISK; + unsigned slotSkip, slotWeight; + + // Prioritize the handles considering their restrictions. + for (ISJ = ISK = Packet.begin(), slotSkip = slotWeight = 0; + ISK != Packet.end(); ++ISK, ++slotSkip) + if (slotSkip < nSlot - emptySlots) + // Note which handle to begin at. + ++ISJ; + else + // Calculate the weight of the slot. + slotWeight += ISK->Core.setWeight(HEXAGON_PACKET_SIZE - nSlot - 1); + + if (slotWeight) + // Sort the packet, favoring source order, + // beginning after the previous slot. + std::sort(ISJ, Packet.end()); + else + // Skip unused slot. + ++emptySlots; + } + + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) + DEBUG(dbgs().write_hex(ISJ->Core.getUnits()); + dbgs() << ':' + << HexagonMCInstrInfo::getDesc(MCII, *ISJ->getDesc()) + .getOpcode(); + dbgs() << '\n'); + DEBUG(dbgs() << '\n'); + + return (!getError()); +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h new file mode 100644 index 000000000000..9218fd3eb070 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -0,0 +1,139 @@ +//===----- HexagonShuffler.h - Instruction bundle shuffling ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the shuffling of insns inside a bundle according to the +// packet formation rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONSHUFFLER_H +#define HEXAGONSHUFFLER_H + +#include "Hexagon.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInstrInfo.h" + +using namespace llvm; + +namespace llvm { +// Insn resources. +class HexagonResource { + // Mask of the slots or units that may execute the insn and + // the weight or priority that the insn requires to be assigned a slot. + unsigned Slots, Weight; + +public: + HexagonResource(unsigned s) { setUnits(s); }; + + void setUnits(unsigned s) { + Slots = s & ~(-1 << HEXAGON_PACKET_SIZE); + setWeight(s); + }; + unsigned setWeight(unsigned s); + + unsigned getUnits() const { return (Slots); }; + unsigned getWeight() const { return (Weight); }; + + // Check if the resources are in ascending slot order. + static bool lessUnits(const HexagonResource &A, const HexagonResource &B) { + return (countPopulation(A.getUnits()) < countPopulation(B.getUnits())); + }; + // Check if the resources are in ascending weight order. + static bool lessWeight(const HexagonResource &A, const HexagonResource &B) { + return (A.getWeight() < B.getWeight()); + }; +}; + +// Handle to an insn used by the shuffling algorithm. +class HexagonInstr { + friend class HexagonShuffler; + + MCInst const *ID; + MCInst const *Extender; + HexagonResource Core; + bool SoloException; + +public: + HexagonInstr(MCInst const *id, MCInst const *Extender, unsigned s, + bool x = false) + : ID(id), Extender(Extender), Core(s), SoloException(x){}; + + MCInst const *getDesc() const { return (ID); }; + + MCInst const *getExtender() const { return Extender; } + + unsigned isSoloException() const { return (SoloException); }; + + // Check if the handles are in ascending order for shuffling purposes. + bool operator<(const HexagonInstr &B) const { + return (HexagonResource::lessWeight(B.Core, Core)); + }; + // Check if the handles are in ascending order by core slots. + static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) { + return (HexagonResource::lessUnits(A.Core, B.Core)); + }; +}; + +// Bundle shuffler. +class HexagonShuffler { + typedef SmallVector<HexagonInstr, HEXAGON_PRESHUFFLE_PACKET_SIZE> + HexagonPacket; + + // Insn handles in a bundle. + HexagonPacket Packet; + + // Shuffling error code. + unsigned Error; + +protected: + int64_t BundleFlags; + MCInstrInfo const &MCII; + MCSubtargetInfo const &STI; + +public: + typedef HexagonPacket::iterator iterator; + + enum { + SHUFFLE_SUCCESS = 0, ///< Successful operation. + SHUFFLE_ERROR_INVALID, ///< Invalid bundle. + SHUFFLE_ERROR_STORES, ///< No free slots for store insns. + SHUFFLE_ERROR_LOADS, ///< No free slots for load insns. + SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns. + SHUFFLE_ERROR_NOSLOTS, ///< No free slots for other insns. + SHUFFLE_ERROR_SLOTS, ///< Over-subscribed slots. + SHUFFLE_ERROR_UNKNOWN ///< Unknown error. + }; + + explicit HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI); + + // Reset to initial state. + void reset(); + // Check if the bundle may be validly shuffled. + bool check(); + // Reorder the insn handles in the bundle. + bool shuffle(); + + unsigned size() const { return (Packet.size()); }; + + iterator begin() { return (Packet.begin()); }; + iterator end() { return (Packet.end()); }; + + // Add insn handle to the bundle . + void append(MCInst const *ID, MCInst const *Extender, unsigned S, + bool X = false); + + // Return the error code for the last check or shuffling of the bundle. + void setError(unsigned Err) { Error = Err; }; + unsigned getError() const { return (Error); }; +}; +} + +#endif // HEXAGONSHUFFLER_H diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp index 6c43d97837ca..be6d1a84a377 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp @@ -39,7 +39,7 @@ void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo, O << Op.getImm(); else { assert(Op.isExpr() && "unknown pcrel immediate operand"); - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } @@ -53,7 +53,8 @@ void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << '#' << Op.getImm(); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << '#' << *Op.getExpr(); + O << '#'; + Op.getExpr()->print(O, &MAI); } } @@ -75,7 +76,7 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo, O << '&'; if (Disp.isExpr()) - O << *Disp.getExpr(); + Disp.getExpr()->print(O, &MAI); else { assert(Disp.isImm() && "Expected immediate in displacement field"); O << Disp.getImm(); diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt index b8f3d02ab4d8..a305b2db8683 100644 --- a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = MSP430Desc parent = MSP430 -required_libraries = MC MSP430AsmPrinter MSP430Info +required_libraries = MC MSP430AsmPrinter MSP430Info Support add_to_library_groups = MSP430 diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp index df1aa1a41f19..c26b3081dbc3 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp @@ -12,12 +12,11 @@ //===----------------------------------------------------------------------===// #include "MSP430MCAsmInfo.h" -#include "llvm/ADT/StringRef.h" using namespace llvm; void MSP430MCAsmInfo::anchor() { } -MSP430MCAsmInfo::MSP430MCAsmInfo(StringRef TT) { +MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) { PointerSize = CalleeSaveStackSlotSize = 2; CommentString = ";"; diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h index 2c9532d321e4..ff5b0b6d858c 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h @@ -17,12 +17,12 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - class StringRef; + class Triple; class MSP430MCAsmInfo : public MCAsmInfoELF { void anchor() override; public: - explicit MSP430MCAsmInfo(StringRef TT); + explicit MSP430MCAsmInfo(const Triple &TT); }; } // namespace llvm diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index a99c9a3e2374..4342c10a1bf2 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -75,7 +75,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, O << MO.getImm(); return; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: { bool isMemOp = Modifier && !strcmp(Modifier, "mem"); @@ -92,7 +92,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, if (Offset) O << '(' << Offset << '+'; - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); if (Offset) O << ')'; diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp index b039778d96c4..54154a8afac1 100644 --- a/lib/Target/MSP430/MSP430MCInstLower.cpp +++ b/lib/Target/MSP430/MSP430MCInstLower.cpp @@ -96,7 +96,7 @@ MCOperand MSP430MCInstLower:: LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); switch (MO.getTargetFlags()) { default: llvm_unreachable("Unknown target flag on GV operand"); @@ -104,8 +104,8 @@ LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { } if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), Ctx), + Expr = MCBinaryExpr::createAdd(Expr, + MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); return MCOperand::createExpr(Expr); } @@ -130,7 +130,7 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_GlobalAddress: diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index aade12b3046a..9c054e5ac231 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -43,7 +43,7 @@ class MCInstrInfo; namespace { class MipsAssemblerOptions { public: - MipsAssemblerOptions(uint64_t Features_) : + MipsAssemblerOptions(const FeatureBitset &Features_) : ATReg(1), Reorder(true), Macro(true), Features(Features_) {} MipsAssemblerOptions(const MipsAssemblerOptions *Opts) { @@ -70,8 +70,8 @@ public: void setMacro() { Macro = true; } void setNoMacro() { Macro = false; } - uint64_t getFeatures() const { return Features; } - void setFeatures(uint64_t Features_) { Features = Features_; } + const FeatureBitset &getFeatures() const { return Features; } + void setFeatures(const FeatureBitset &Features_) { Features = Features_; } // Set of features that are either architecture features or referenced // by them (e.g.: FeatureNaN2008 implied by FeatureMips32r6). @@ -84,7 +84,7 @@ private: unsigned ATReg; bool Reorder; bool Macro; - uint64_t Features; + FeatureBitset Features; }; } @@ -247,6 +247,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetFpDirective(); bool parseSetPopDirective(); bool parseSetPushDirective(); + bool parseSetSoftFloatDirective(); + bool parseSetHardFloatDirective(); bool parseSetAssignment(); @@ -325,23 +327,23 @@ class MipsAsmParser : public MCTargetAsmParser { STI.setFeatureBits(FeatureBits); setAvailableFeatures( ComputeAvailableFeatures(STI.ToggleFeature(ArchFeature))); - AssemblerOptions.back()->setFeatures(getAvailableFeatures()); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); } void setFeatureBits(uint64_t Feature, StringRef FeatureString) { if (!(STI.getFeatureBits()[Feature])) { setAvailableFeatures( ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); } - AssemblerOptions.back()->setFeatures(getAvailableFeatures()); } void clearFeatureBits(uint64_t Feature, StringRef FeatureString) { if (STI.getFeatureBits()[Feature]) { setAvailableFeatures( ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); } - AssemblerOptions.back()->setFeatures(getAvailableFeatures()); } public: @@ -367,11 +369,11 @@ public: // Remember the initial assembler options. The user can not modify these. AssemblerOptions.push_back( - make_unique<MipsAssemblerOptions>(getAvailableFeatures())); + llvm::make_unique<MipsAssemblerOptions>(STI.getFeatureBits())); // Create an assembler options environment for the user to modify. AssemblerOptions.push_back( - make_unique<MipsAssemblerOptions>(getAvailableFeatures())); + llvm::make_unique<MipsAssemblerOptions>(STI.getFeatureBits())); getTargetStreamer().updateABIInfo(*this); @@ -1946,10 +1948,10 @@ void MipsAsmParser::expandLoadAddressSym( unsigned RegNo = DstRegOp.getReg(); const MCSymbolRefExpr *Symbol = cast<MCSymbolRefExpr>(SymOp.getExpr()); const MCSymbolRefExpr *HiExpr = - MCSymbolRefExpr::Create(Symbol->getSymbol().getName(), + MCSymbolRefExpr::create(Symbol->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI, getContext()); const MCSymbolRefExpr *LoExpr = - MCSymbolRefExpr::Create(Symbol->getSymbol().getName(), + MCSymbolRefExpr::create(Symbol->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_LO, getContext()); if (!Is32BitSym) { // If it's a 64-bit architecture, expand to: @@ -1960,10 +1962,10 @@ void MipsAsmParser::expandLoadAddressSym( // dsll d,d,16 // ori d,d,lo16(sym) const MCSymbolRefExpr *HighestExpr = - MCSymbolRefExpr::Create(Symbol->getSymbol().getName(), + MCSymbolRefExpr::create(Symbol->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_HIGHEST, getContext()); const MCSymbolRefExpr *HigherExpr = - MCSymbolRefExpr::Create(Symbol->getSymbol().getName(), + MCSymbolRefExpr::create(Symbol->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_HIGHER, getContext()); tmpInst.setOpcode(Mips::LUi); @@ -2102,7 +2104,7 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, else { if (ExprOffset->getKind() == MCExpr::SymbolRef) { SR = static_cast<const MCSymbolRefExpr *>(ExprOffset); - const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::create( SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI, getContext()); TempInst.addOperand(MCOperand::createExpr(HiExpr)); @@ -2133,7 +2135,7 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, TempInst.addOperand(MCOperand::createImm(LoOffset)); else { if (ExprOffset->getKind() == MCExpr::SymbolRef) { - const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create( SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_LO, getContext()); TempInst.addOperand(MCOperand::createExpr(LoExpr)); @@ -2505,7 +2507,7 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier); // Otherwise create a symbol reference. const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext()); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); Operands.push_back(MipsOperand::CreateImm(Res, S, E, *this)); return false; @@ -2565,14 +2567,14 @@ const MCExpr *MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr, default: report_fatal_error("unsupported reloc value"); } - return MCConstantExpr::Create(Val, getContext()); + return MCConstantExpr::create(Val, getContext()); } if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(Expr)) { // It's a symbol, create a symbolic expression from the symbol. StringRef Symbol = MSRE->getSymbol().getName(); MCSymbolRefExpr::VariantKind VK = getVariantKind(RelocStr); - Res = MCSymbolRefExpr::Create(Symbol, VK, getContext()); + Res = MCSymbolRefExpr::create(Symbol, VK, getContext()); return Res; } @@ -2581,17 +2583,17 @@ const MCExpr *MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr, // Try to create target expression. if (MipsMCExpr::isSupportedBinaryExpr(VK, BE)) - return MipsMCExpr::Create(VK, Expr, getContext()); + return MipsMCExpr::create(VK, Expr, getContext()); const MCExpr *LExp = evaluateRelocExpr(BE->getLHS(), RelocStr); const MCExpr *RExp = evaluateRelocExpr(BE->getRHS(), RelocStr); - Res = MCBinaryExpr::Create(BE->getOpcode(), LExp, RExp, getContext()); + Res = MCBinaryExpr::create(BE->getOpcode(), LExp, RExp, getContext()); return Res; } if (const MCUnaryExpr *UN = dyn_cast<MCUnaryExpr>(Expr)) { const MCExpr *UnExp = evaluateRelocExpr(UN->getSubExpr(), RelocStr); - Res = MCUnaryExpr::Create(UN->getOpcode(), UnExp, getContext()); + Res = MCUnaryExpr::create(UN->getOpcode(), UnExp, getContext()); return Res; } // Just return the original expression. @@ -2779,7 +2781,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) { Parser.Lex(); // Eat the ')' token. if (!IdVal) - IdVal = MCConstantExpr::Create(0, getContext()); + IdVal = MCConstantExpr::create(0, getContext()); // Replace the register operand with the memory operand. std::unique_ptr<MipsOperand> op( @@ -2790,10 +2792,10 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) { // Add the memory operand. if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(IdVal)) { int64_t Imm; - if (IdVal->EvaluateAsAbsolute(Imm)) - IdVal = MCConstantExpr::Create(Imm, getContext()); + if (IdVal->evaluateAsAbsolute(Imm)) + IdVal = MCConstantExpr::create(Imm, getContext()); else if (BE->getLHS()->getKind() != MCExpr::SymbolRef) - IdVal = MCBinaryExpr::Create(BE->getOpcode(), BE->getRHS(), BE->getLHS(), + IdVal = MCBinaryExpr::create(BE->getOpcode(), BE->getRHS(), BE->getLHS(), getContext()); } @@ -3010,7 +3012,7 @@ MipsAsmParser::parseInvNum(OperandVector &Operands) { int64_t Val = MCE->getValue(); SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(MipsOperand::CreateImm( - MCConstantExpr::Create(0 - Val, getContext()), S, E, *this)); + MCConstantExpr::create(0 - Val, getContext()), S, E, *this)); return MatchOperand_Success; } @@ -3034,7 +3036,7 @@ MipsAsmParser::parseLSAImm(OperandVector &Operands) { return MatchOperand_ParseFail; int64_t Val; - if (!Expr->EvaluateAsAbsolute(Val)) { + if (!Expr->evaluateAsAbsolute(Val)) { Error(S, "expected immediate value"); return MatchOperand_ParseFail; } @@ -3601,7 +3603,9 @@ bool MipsAsmParser::parseSetPopDirective() { return reportParseError(Loc, ".set pop with no .set push"); AssemblerOptions.pop_back(); - setAvailableFeatures(AssemblerOptions.back()->getFeatures()); + setAvailableFeatures( + ComputeAvailableFeatures(AssemblerOptions.back()->getFeatures())); + STI.setFeatureBits(AssemblerOptions.back()->getFeatures()); getTargetStreamer().emitDirectiveSetPop(); return false; @@ -3621,6 +3625,28 @@ bool MipsAsmParser::parseSetPushDirective() { return false; } +bool MipsAsmParser::parseSetSoftFloatDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return reportParseError("unexpected token, expected end of statement"); + + setFeatureBits(Mips::FeatureSoftFloat, "soft-float"); + getTargetStreamer().emitDirectiveSetSoftFloat(); + return false; +} + +bool MipsAsmParser::parseSetHardFloatDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return reportParseError("unexpected token, expected end of statement"); + + clearFeatureBits(Mips::FeatureSoftFloat, "soft-float"); + getTargetStreamer().emitDirectiveSetHardFloat(); + return false; +} + bool MipsAsmParser::parseSetAssignment() { StringRef Name; const MCExpr *Value; @@ -3649,7 +3675,9 @@ bool MipsAsmParser::parseSetMips0Directive() { return reportParseError("unexpected token, expected end of statement"); // Reset assembler options to their initial values. - setAvailableFeatures(AssemblerOptions.front()->getFeatures()); + setAvailableFeatures( + ComputeAvailableFeatures(AssemblerOptions.front()->getFeatures())); + STI.setFeatureBits(AssemblerOptions.front()->getFeatures()); AssemblerOptions.back()->setFeatures(AssemblerOptions.front()->getFeatures()); getTargetStreamer().emitDirectiveSetMips0(); @@ -3985,6 +4013,10 @@ bool MipsAsmParser::parseDirectiveSet() { return parseSetMsaDirective(); } else if (Tok.getString() == "nomsa") { return parseSetNoMsaDirective(); + } else if (Tok.getString() == "softfloat") { + return parseSetSoftFloatDirective(); + } else if (Tok.getString() == "hardfloat") { + return parseSetHardFloatDirective(); } else { // It is just an identifier, look for an assignment. parseSetAssignment(); @@ -4286,7 +4318,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { reportParseError("expected number after comma"); return false; } - if (!DummyNumber->EvaluateAsAbsolute(DummyNumberVal)) { + if (!DummyNumber->evaluateAsAbsolute(DummyNumberVal)) { reportParseError("expected an absolute expression after comma"); return false; } @@ -4366,7 +4398,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { return false; } - if (!FrameSize->EvaluateAsAbsolute(FrameSizeVal)) { + if (!FrameSize->evaluateAsAbsolute(FrameSizeVal)) { reportParseError("frame size not an absolute expression"); return false; } @@ -4427,7 +4459,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { return false; } - if (!BitMask->EvaluateAsAbsolute(BitMaskVal)) { + if (!BitMask->evaluateAsAbsolute(BitMaskVal)) { reportParseError("bitmask not an absolute expression"); return false; } @@ -4448,7 +4480,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { return false; } - if (!FrameOffset->EvaluateAsAbsolute(FrameOffsetVal)) { + if (!FrameOffset->evaluateAsAbsolute(FrameOffsetVal)) { reportParseError("frame offset not an absolute expression"); return false; } diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index eb97c93ac196..c8629b5d7bd2 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -47,6 +47,8 @@ public: bool isGP64() const { return STI.getFeatureBits()[Mips::FeatureGP64Bit]; } + bool hasCnMips() const { return STI.getFeatureBits()[Mips::FeatureCnMips]; } + bool hasCOP3() const { // Only present in MIPS-I and MIPS-II return !hasMips32() && !hasMips3(); @@ -889,6 +891,16 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, } } + if (hasCnMips()) { + DEBUG(dbgs() << "Trying CnMips table (32-bit opcodes):\n"); + Result = decodeInstruction(DecoderTableCnMips32, Instr, Insn, + Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + } + if (isGP64()) { DEBUG(dbgs() << "Trying Mips64 (GPR64) table (32-bit opcodes):\n"); Result = decodeInstruction(DecoderTableMips6432, Instr, Insn, diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index e80a47b90142..a5637b16b636 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -122,7 +122,8 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O, } } -static void printExpr(const MCExpr *Expr, raw_ostream &OS) { +static void printExpr(const MCExpr *Expr, const MCAsmInfo *MAI, + raw_ostream &OS) { int Offset = 0; const MCSymbolRefExpr *SRE; @@ -132,7 +133,7 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { assert(SRE && CE && "Binary expression must be sym+const."); Offset = CE->getValue(); } else if (const MipsMCExpr *ME = dyn_cast<MipsMCExpr>(Expr)) { - ME->print(OS); + ME->print(OS, MAI); return; } else SRE = cast<MCSymbolRefExpr>(Expr); @@ -170,7 +171,7 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { case MCSymbolRefExpr::VK_Mips_PCREL_LO16: OS << "%pcrel_lo("; break; } - OS << SRE->getSymbol(); + SRE->getSymbol().print(OS, MAI); if (Offset) { if (Offset > 0) @@ -199,7 +200,7 @@ void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } assert(Op.isExpr() && "unknown operand kind in printOperand"); - printExpr(Op.getExpr(), O); + printExpr(Op.getExpr(), &MAI, O); } void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum, diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp index b1f7c2f22594..bf8f7d12880d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp @@ -99,6 +99,10 @@ unsigned MipsABIInfo::GetFramePtr() const { return ArePtrs64bit() ? Mips::FP_64 : Mips::FP; } +unsigned MipsABIInfo::GetBasePtr() const { + return ArePtrs64bit() ? Mips::S7_64 : Mips::S7; +} + unsigned MipsABIInfo::GetNullPtr() const { return ArePtrs64bit() ? Mips::ZERO_64 : Mips::ZERO; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h index 9a6ba9467659..d20dc9037951 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h @@ -65,6 +65,7 @@ public: unsigned GetStackPtr() const; unsigned GetFramePtr() const; + unsigned GetBasePtr() const; unsigned GetNullPtr() const; unsigned GetPtrAdduOp() const; unsigned GetPtrAddiuOp() const; diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 8d9e3e31105e..982a7f54e825 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -12,10 +12,10 @@ #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include <list> @@ -46,7 +46,7 @@ struct MipsRelocationEntry { unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; - bool needsRelocateWithSymbol(const MCSymbolData &SD, + bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override; virtual void sortRelocs(const MCAssembler &Asm, std::vector<ELFRelocationEntry> &Relocs) override; @@ -65,181 +65,134 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { // determine the type of the relocation - unsigned Type = (unsigned)ELF::R_MIPS_NONE; unsigned Kind = (unsigned)Fixup.getKind(); switch (Kind) { - default: - llvm_unreachable("invalid fixup kind!"); case Mips::fixup_Mips_32: case FK_Data_4: - Type = ELF::R_MIPS_32; - break; + return IsPCRel ? ELF::R_MIPS_PC32 : ELF::R_MIPS_32; case Mips::fixup_Mips_64: case FK_Data_8: - Type = ELF::R_MIPS_64; - break; + return ELF::R_MIPS_64; case FK_GPRel_4: if (isN64()) { + unsigned Type = (unsigned)ELF::R_MIPS_NONE; Type = setRType((unsigned)ELF::R_MIPS_GPREL32, Type); Type = setRType2((unsigned)ELF::R_MIPS_64, Type); Type = setRType3((unsigned)ELF::R_MIPS_NONE, Type); + return Type; } - else - Type = ELF::R_MIPS_GPREL32; - break; + return ELF::R_MIPS_GPREL32; case Mips::fixup_Mips_GPREL16: - Type = ELF::R_MIPS_GPREL16; - break; + return ELF::R_MIPS_GPREL16; case Mips::fixup_Mips_26: - Type = ELF::R_MIPS_26; - break; + return ELF::R_MIPS_26; case Mips::fixup_Mips_CALL16: - Type = ELF::R_MIPS_CALL16; - break; + return ELF::R_MIPS_CALL16; case Mips::fixup_Mips_GOT_Global: case Mips::fixup_Mips_GOT_Local: - Type = ELF::R_MIPS_GOT16; - break; + return ELF::R_MIPS_GOT16; case Mips::fixup_Mips_HI16: - Type = ELF::R_MIPS_HI16; - break; + return ELF::R_MIPS_HI16; case Mips::fixup_Mips_LO16: - Type = ELF::R_MIPS_LO16; - break; + return ELF::R_MIPS_LO16; case Mips::fixup_Mips_TLSGD: - Type = ELF::R_MIPS_TLS_GD; - break; + return ELF::R_MIPS_TLS_GD; case Mips::fixup_Mips_GOTTPREL: - Type = ELF::R_MIPS_TLS_GOTTPREL; - break; + return ELF::R_MIPS_TLS_GOTTPREL; case Mips::fixup_Mips_TPREL_HI: - Type = ELF::R_MIPS_TLS_TPREL_HI16; - break; + return ELF::R_MIPS_TLS_TPREL_HI16; case Mips::fixup_Mips_TPREL_LO: - Type = ELF::R_MIPS_TLS_TPREL_LO16; - break; + return ELF::R_MIPS_TLS_TPREL_LO16; case Mips::fixup_Mips_TLSLDM: - Type = ELF::R_MIPS_TLS_LDM; - break; + return ELF::R_MIPS_TLS_LDM; case Mips::fixup_Mips_DTPREL_HI: - Type = ELF::R_MIPS_TLS_DTPREL_HI16; - break; + return ELF::R_MIPS_TLS_DTPREL_HI16; case Mips::fixup_Mips_DTPREL_LO: - Type = ELF::R_MIPS_TLS_DTPREL_LO16; - break; + return ELF::R_MIPS_TLS_DTPREL_LO16; case Mips::fixup_Mips_Branch_PCRel: case Mips::fixup_Mips_PC16: - Type = ELF::R_MIPS_PC16; - break; + return ELF::R_MIPS_PC16; case Mips::fixup_Mips_GOT_PAGE: - Type = ELF::R_MIPS_GOT_PAGE; - break; + return ELF::R_MIPS_GOT_PAGE; case Mips::fixup_Mips_GOT_OFST: - Type = ELF::R_MIPS_GOT_OFST; - break; + return ELF::R_MIPS_GOT_OFST; case Mips::fixup_Mips_GOT_DISP: - Type = ELF::R_MIPS_GOT_DISP; - break; - case Mips::fixup_Mips_GPOFF_HI: + return ELF::R_MIPS_GOT_DISP; + case Mips::fixup_Mips_GPOFF_HI: { + unsigned Type = (unsigned)ELF::R_MIPS_NONE; Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type); Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type); Type = setRType3((unsigned)ELF::R_MIPS_HI16, Type); - break; - case Mips::fixup_Mips_GPOFF_LO: + return Type; + } + case Mips::fixup_Mips_GPOFF_LO: { + unsigned Type = (unsigned)ELF::R_MIPS_NONE; Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type); Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type); Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type); - break; + return Type; + } case Mips::fixup_Mips_HIGHER: - Type = ELF::R_MIPS_HIGHER; - break; + return ELF::R_MIPS_HIGHER; case Mips::fixup_Mips_HIGHEST: - Type = ELF::R_MIPS_HIGHEST; - break; + return ELF::R_MIPS_HIGHEST; case Mips::fixup_Mips_GOT_HI16: - Type = ELF::R_MIPS_GOT_HI16; - break; + return ELF::R_MIPS_GOT_HI16; case Mips::fixup_Mips_GOT_LO16: - Type = ELF::R_MIPS_GOT_LO16; - break; + return ELF::R_MIPS_GOT_LO16; case Mips::fixup_Mips_CALL_HI16: - Type = ELF::R_MIPS_CALL_HI16; - break; + return ELF::R_MIPS_CALL_HI16; case Mips::fixup_Mips_CALL_LO16: - Type = ELF::R_MIPS_CALL_LO16; - break; + return ELF::R_MIPS_CALL_LO16; case Mips::fixup_MICROMIPS_26_S1: - Type = ELF::R_MICROMIPS_26_S1; - break; + return ELF::R_MICROMIPS_26_S1; case Mips::fixup_MICROMIPS_HI16: - Type = ELF::R_MICROMIPS_HI16; - break; + return ELF::R_MICROMIPS_HI16; case Mips::fixup_MICROMIPS_LO16: - Type = ELF::R_MICROMIPS_LO16; - break; + return ELF::R_MICROMIPS_LO16; case Mips::fixup_MICROMIPS_GOT16: - Type = ELF::R_MICROMIPS_GOT16; - break; + return ELF::R_MICROMIPS_GOT16; case Mips::fixup_MICROMIPS_PC7_S1: - Type = ELF::R_MICROMIPS_PC7_S1; - break; + return ELF::R_MICROMIPS_PC7_S1; case Mips::fixup_MICROMIPS_PC10_S1: - Type = ELF::R_MICROMIPS_PC10_S1; - break; + return ELF::R_MICROMIPS_PC10_S1; case Mips::fixup_MICROMIPS_PC16_S1: - Type = ELF::R_MICROMIPS_PC16_S1; - break; + return ELF::R_MICROMIPS_PC16_S1; case Mips::fixup_MICROMIPS_CALL16: - Type = ELF::R_MICROMIPS_CALL16; - break; + return ELF::R_MICROMIPS_CALL16; case Mips::fixup_MICROMIPS_GOT_DISP: - Type = ELF::R_MICROMIPS_GOT_DISP; - break; + return ELF::R_MICROMIPS_GOT_DISP; case Mips::fixup_MICROMIPS_GOT_PAGE: - Type = ELF::R_MICROMIPS_GOT_PAGE; - break; + return ELF::R_MICROMIPS_GOT_PAGE; case Mips::fixup_MICROMIPS_GOT_OFST: - Type = ELF::R_MICROMIPS_GOT_OFST; - break; + return ELF::R_MICROMIPS_GOT_OFST; case Mips::fixup_MICROMIPS_TLS_GD: - Type = ELF::R_MICROMIPS_TLS_GD; - break; + return ELF::R_MICROMIPS_TLS_GD; case Mips::fixup_MICROMIPS_TLS_LDM: - Type = ELF::R_MICROMIPS_TLS_LDM; - break; + return ELF::R_MICROMIPS_TLS_LDM; case Mips::fixup_MICROMIPS_TLS_DTPREL_HI16: - Type = ELF::R_MICROMIPS_TLS_DTPREL_HI16; - break; + return ELF::R_MICROMIPS_TLS_DTPREL_HI16; case Mips::fixup_MICROMIPS_TLS_DTPREL_LO16: - Type = ELF::R_MICROMIPS_TLS_DTPREL_LO16; - break; + return ELF::R_MICROMIPS_TLS_DTPREL_LO16; case Mips::fixup_MICROMIPS_TLS_TPREL_HI16: - Type = ELF::R_MICROMIPS_TLS_TPREL_HI16; - break; + return ELF::R_MICROMIPS_TLS_TPREL_HI16; case Mips::fixup_MICROMIPS_TLS_TPREL_LO16: - Type = ELF::R_MICROMIPS_TLS_TPREL_LO16; - break; + return ELF::R_MICROMIPS_TLS_TPREL_LO16; case Mips::fixup_MIPS_PC19_S2: - Type = ELF::R_MIPS_PC19_S2; - break; + return ELF::R_MIPS_PC19_S2; case Mips::fixup_MIPS_PC18_S3: - Type = ELF::R_MIPS_PC18_S3; - break; + return ELF::R_MIPS_PC18_S3; case Mips::fixup_MIPS_PC21_S2: - Type = ELF::R_MIPS_PC21_S2; - break; + return ELF::R_MIPS_PC21_S2; case Mips::fixup_MIPS_PC26_S2: - Type = ELF::R_MIPS_PC26_S2; - break; + return ELF::R_MIPS_PC26_S2; case Mips::fixup_MIPS_PCHI16: - Type = ELF::R_MIPS_PCHI16; - break; + return ELF::R_MIPS_PCHI16; case Mips::fixup_MIPS_PCLO16: - Type = ELF::R_MIPS_PCLO16; - break; + return ELF::R_MIPS_PCLO16; } - return Type; + llvm_unreachable("invalid fixup kind!"); } // Sort entries by SortOffset in descending order. @@ -271,9 +224,7 @@ static unsigned getMatchingLoType(const MCAssembler &Asm, if (Type == ELF::R_MIPS16_HI16) return ELF::R_MIPS16_LO16; - const MCSymbolData &SD = Asm.getSymbolData(*Reloc.Symbol); - - if (MCELF::GetBinding(SD) != ELF::STB_LOCAL) + if (Reloc.Symbol->getBinding() != ELF::STB_LOCAL) return ELF::R_MIPS_NONE; if (Type == ELF::R_MIPS_GOT16) @@ -405,9 +356,8 @@ void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm, Relocs[I] = MipsRelocs[I].R; } -bool -MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, - unsigned Type) const { +bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, + unsigned Type) const { // FIXME: This is extremely conservative. This really needs to use a // whitelist with a clear explanation for why each realocation needs to // point to the symbol, not to the section. @@ -434,7 +384,7 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, return true; case ELF::R_MIPS_32: - if (MCELF::getOther(SD) & (ELF::STO_MIPS_MICROMIPS >> 2)) + if (cast<MCSymbolELF>(Sym).getOther() & ELF::STO_MIPS_MICROMIPS) return true; // falltrough case ELF::R_MIPS_26: diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index d2b51831245a..b45d9cf621d7 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -9,8 +9,8 @@ #include "MipsELFStreamer.h" #include "MipsTargetStreamer.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/ELF.h" using namespace llvm; @@ -41,12 +41,10 @@ void MipsELFStreamer::createPendingLabelRelocs() { // FIXME: Also mark labels when in MIPS16 mode. if (ELFTargetStreamer->isMicroMipsEnabled()) { - for (auto Label : Labels) { - MCSymbolData &Data = getOrCreateSymbolData(Label); - // The "other" values are stored in the last 6 bits of the second byte. - // The traditional defines for STO values assume the full byte and thus - // the shift to pack it. - MCELF::setOther(Data, ELF::STO_MIPS_MICROMIPS >> 2); + for (auto *L : Labels) { + auto *Label = cast<MCSymbolELF>(L); + getAssembler().registerSymbol(*Label); + Label->setOther(ELF::STO_MIPS_MICROMIPS); } } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index e2bd5a815ab1..4d554583dc78 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -18,8 +18,7 @@ using namespace llvm; void MipsMCAsmInfo::anchor() { } -MipsMCAsmInfo::MipsMCAsmInfo(StringRef TT) { - Triple TheTriple(TT); +MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) { if ((TheTriple.getArch() == Triple::mips) || (TheTriple.getArch() == Triple::mips64)) IsLittleEndian = false; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h index 59ff1c41ed6e..5d23fcbd7a44 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h @@ -17,12 +17,12 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - class StringRef; + class Triple; class MipsMCAsmInfo : public MCAsmInfoELF { void anchor() override; public: - explicit MipsMCAsmInfo(StringRef TT); + explicit MipsMCAsmInfo(const Triple &TheTriple); }; } // namespace llvm diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index a0d9e1540515..93925bf8ca03 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -467,7 +467,7 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { int64_t Res; - if (Expr->EvaluateAsAbsolute(Res)) + if (Expr->evaluateAsAbsolute(Res)) return Res; MCExpr::ExprKind Kind = Expr->getKind(); diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp index 74490f334b37..c85fc4816b08 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp @@ -41,7 +41,7 @@ bool MipsMCExpr::isSupportedBinaryExpr(MCSymbolRefExpr::VariantKind VK, } const MipsMCExpr* -MipsMCExpr::Create(MCSymbolRefExpr::VariantKind VK, const MCExpr *Expr, +MipsMCExpr::create(MCSymbolRefExpr::VariantKind VK, const MCExpr *Expr, MCContext &Ctx) { VariantKind Kind; switch (VK) { @@ -64,7 +64,7 @@ MipsMCExpr::Create(MCSymbolRefExpr::VariantKind VK, const MCExpr *Expr, return new (Ctx) MipsMCExpr(Kind, Expr); } -void MipsMCExpr::PrintImpl(raw_ostream &OS) const { +void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { switch (Kind) { default: llvm_unreachable("Invalid kind!"); case VK_Mips_LO: OS << "%lo"; break; @@ -74,15 +74,15 @@ void MipsMCExpr::PrintImpl(raw_ostream &OS) const { } OS << '('; - Expr->print(OS); + Expr->print(OS, MAI); OS << ')'; } bool -MipsMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, +MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const { - return getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup); + return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup); } void MipsMCExpr::visitUsedExpr(MCStreamer &Streamer) const { diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h index ee11461ef174..fd2ed17ee785 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h @@ -37,7 +37,7 @@ public: static bool isSupportedBinaryExpr(MCSymbolRefExpr::VariantKind VK, const MCBinaryExpr *BE); - static const MipsMCExpr *Create(MCSymbolRefExpr::VariantKind VK, + static const MipsMCExpr *create(MCSymbolRefExpr::VariantKind VK, const MCExpr *Expr, MCContext &Ctx); /// getOpcode - Get the kind of this expression. @@ -46,13 +46,13 @@ public: /// getSubExpr - Get the child of this expression. const MCExpr *getSubExpr() const { return Expr; } - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override; void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *FindAssociatedSection() const override { - return getSubExpr()->FindAssociatedSection(); + MCSection *findAssociatedSection() const override { + return getSubExpr()->findAssociatedSection(); } // There are no TLS MipsMCExprs at the moment. diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 2e3179ac28d9..54d88632abdb 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -75,7 +75,8 @@ static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { +static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TT) { MCAsmInfo *MAI = new MipsMCAsmInfo(TT); unsigned SP = MRI.getDwarfRegNum(Mips::SP, true); diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 8e6f047450e3..a051f4c123fc 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -17,10 +17,9 @@ #include "MipsTargetObjectFile.h" #include "MipsTargetStreamer.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -81,6 +80,12 @@ void MipsTargetStreamer::emitDirectiveSetMips64R5() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips64R6() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetPop() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetPush() { forbidModuleDirective(); } +void MipsTargetStreamer::emitDirectiveSetSoftFloat() { + forbidModuleDirective(); +} +void MipsTargetStreamer::emitDirectiveSetHardFloat() { + forbidModuleDirective(); +} void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {} @@ -308,6 +313,16 @@ void MipsTargetAsmStreamer::emitDirectiveSetPush() { MipsTargetStreamer::emitDirectiveSetPush(); } +void MipsTargetAsmStreamer::emitDirectiveSetSoftFloat() { + OS << "\t.set\tsoftfloat\n"; + MipsTargetStreamer::emitDirectiveSetSoftFloat(); +} + +void MipsTargetAsmStreamer::emitDirectiveSetHardFloat() { + OS << "\t.set\thardfloat\n"; + MipsTargetStreamer::emitDirectiveSetHardFloat(); +} + // Print a 32 bit hex number with all numbers. static void printHex32(unsigned Value, raw_ostream &OS) { OS << "0x"; @@ -358,7 +373,6 @@ void MipsTargetAsmStreamer::emitDirectiveModuleFP( MipsABIFlagsSection::FpABIKind Value, bool Is32BitABI) { MipsTargetStreamer::emitDirectiveModuleFP(Value, Is32BitABI); - StringRef ModuleValue; OS << "\t.module\tfp="; OS << ABIFlagsSection.getFpABIString(Value) << "\n"; } @@ -367,7 +381,6 @@ void MipsTargetAsmStreamer::emitDirectiveSetFp( MipsABIFlagsSection::FpABIKind Value) { MipsTargetStreamer::emitDirectiveSetFp(Value); - StringRef ModuleValue; OS << "\t.set\tfp="; OS << ABIFlagsSection.getFpABIString(Value) << "\n"; } @@ -440,18 +453,16 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S, MCA.setELFHeaderEFlags(EFlags); } -void MipsTargetELFStreamer::emitLabel(MCSymbol *Symbol) { +void MipsTargetELFStreamer::emitLabel(MCSymbol *S) { + auto *Symbol = cast<MCSymbolELF>(S); if (!isMicroMipsEnabled()) return; - MCSymbolData &Data = getStreamer().getOrCreateSymbolData(Symbol); - uint8_t Type = MCELF::GetType(Data); + getStreamer().getAssembler().registerSymbol(*Symbol); + uint8_t Type = Symbol->getType(); if (Type != ELF::STT_FUNC) return; - // The "other" values are stored in the last 6 bits of the second byte - // The traditional defines for STO values assume the full byte and thus - // the shift to pack it. - MCELF::setOther(Data, ELF::STO_MIPS_MICROMIPS >> 2); + Symbol->setOther(ELF::STO_MIPS_MICROMIPS); } void MipsTargetELFStreamer::finish() { @@ -505,23 +516,18 @@ void MipsTargetELFStreamer::finish() { emitMipsAbiFlags(); } -void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol, - const MCExpr *Value) { +void MipsTargetELFStreamer::emitAssignment(MCSymbol *S, const MCExpr *Value) { + auto *Symbol = cast<MCSymbolELF>(S); // If on rhs is micromips symbol then mark Symbol as microMips. if (Value->getKind() != MCExpr::SymbolRef) return; - const MCSymbol &RhsSym = - static_cast<const MCSymbolRefExpr *>(Value)->getSymbol(); - MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym); + const auto &RhsSym = cast<MCSymbolELF>( + static_cast<const MCSymbolRefExpr *>(Value)->getSymbol()); - if (!(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2))) + if (!(RhsSym.getOther() & ELF::STO_MIPS_MICROMIPS)) return; - MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol); - // The "other" values are stored in the last 6 bits of the second byte. - // The traditional defines for STO values assume the full byte and thus - // the shift to pack it. - MCELF::setOther(SymbolData, ELF::STO_MIPS_MICROMIPS >> 2); + Symbol->setOther(ELF::STO_MIPS_MICROMIPS); } MCELFStreamer &MipsTargetELFStreamer::getStreamer() { @@ -568,7 +574,7 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) { ELF::SHF_ALLOC | ELF::SHT_REL); const MCSymbolRefExpr *ExprRef = - MCSymbolRefExpr::Create(Name, MCSymbolRefExpr::VK_None, Context); + MCSymbolRefExpr::create(Name, MCSymbolRefExpr::VK_None, Context); MCA.registerSection(*Sec); Sec->setAlignment(4); @@ -693,12 +699,12 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) { StringRef SymName("_gp_disp"); MCAssembler &MCA = getStreamer().getAssembler(); MCSymbol *GP_Disp = MCA.getContext().getOrCreateSymbol(SymName); - MCA.getOrCreateSymbolData(*GP_Disp); + MCA.registerSymbol(*GP_Disp); MCInst TmpInst; TmpInst.setOpcode(Mips::LUi); TmpInst.addOperand(MCOperand::createReg(Mips::GP)); - const MCSymbolRefExpr *HiSym = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *HiSym = MCSymbolRefExpr::create( "_gp_disp", MCSymbolRefExpr::VK_Mips_ABS_HI, MCA.getContext()); TmpInst.addOperand(MCOperand::createExpr(HiSym)); getStreamer().EmitInstruction(TmpInst, STI); @@ -708,7 +714,7 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) { TmpInst.setOpcode(Mips::ADDiu); TmpInst.addOperand(MCOperand::createReg(Mips::GP)); TmpInst.addOperand(MCOperand::createReg(Mips::GP)); - const MCSymbolRefExpr *LoSym = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *LoSym = MCSymbolRefExpr::create( "_gp_disp", MCSymbolRefExpr::VK_Mips_ABS_LO, MCA.getContext()); TmpInst.addOperand(MCOperand::createExpr(LoSym)); getStreamer().EmitInstruction(TmpInst, STI); @@ -752,9 +758,9 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, getStreamer().EmitInstruction(Inst, STI); Inst.clear(); - const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::create( &Sym, MCSymbolRefExpr::VK_Mips_GPOFF_HI, MCA.getContext()); - const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create( &Sym, MCSymbolRefExpr::VK_Mips_GPOFF_LO, MCA.getContext()); // lui $gp, %hi(%neg(%gp_rel(funcSym))) diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 272933f7285e..8a27874a37ce 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -305,8 +305,9 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in { def LONG_BRANCH_DADDiu : PseudoSE<(outs GPR64Opnd:$dst), (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>; -// Cavium Octeon cmMIPS instructions -let EncodingPredicates = []<Predicate>, // FIXME: The lack of HasStdEnc is probably a bug +// Cavium Octeon cnMIPS instructions +let DecoderNamespace = "CnMips", + EncodingPredicates = []<Predicate>, // FIXME: The lack of HasStdEnc is probably a bug AdditionalPredicates = [HasCnMips] in { class Count1s<string opstr, RegisterOperand RO>: @@ -353,6 +354,10 @@ class CBranchBitNum<string opstr, DAGOperand opnd, PatFrag cond_op, let Defs = [AT]; } +class MFC2OP<string asmstr, RegisterOperand RO> : + InstSE<(outs RO:$rt, uimm16:$imm16), (ins), + !strconcat(asmstr, "\t$rt, $imm16"), [], NoItinerary, FrmFR>; + // Unsigned Byte Add let Pattern = [(set GPR64Opnd:$rd, (and (add GPR64Opnd:$rs, GPR64Opnd:$rt), 255))] in @@ -415,6 +420,9 @@ let Defs = [MPL1, MPL2, P0, P1, P2] in def VMULU : ArithLogicR<"vmulu", GPR64Opnd, 0, II_DMUL>, ADD_FM<0x1c, 0x0f>; +// Move between CPU and coprocessor registers +def DMFC2_OCTEON : MFC2OP<"dmfc2", GPR64Opnd>, MFC2OP_FM<0x12, 1>; +def DMTC2_OCTEON : MFC2OP<"dmtc2", GPR64Opnd>, MFC2OP_FM<0x12, 5>; } } diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index a3995b8ceb99..f84666b6229e 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -41,7 +41,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -594,11 +594,11 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); break; case MachineOperand::MO_BlockAddress: { @@ -778,7 +778,7 @@ void MipsAsmPrinter::EmitJal(const MCSubtargetInfo &STI, MCSymbol *Symbol) { MCInst I; I.setOpcode(Mips::JAL); I.addOperand( - MCOperand::createExpr(MCSymbolRefExpr::Create(Symbol, OutContext))); + MCOperand::createExpr(MCSymbolRefExpr::create(Symbol, OutContext))); OutStreamer->EmitInstruction(I, STI); } @@ -983,7 +983,8 @@ void MipsAsmPrinter::EmitFPCallStub( // __call_stub_fp_xxxx: // std::string x = "__call_stub_fp_" + std::string(Symbol); - MCSymbol *Stub = OutContext.getOrCreateSymbol(StringRef(x)); + MCSymbolELF *Stub = + cast<MCSymbolELF>(OutContext.getOrCreateSymbol(StringRef(x))); TS.emitDirectiveEnt(*Stub); MCSymbol *MType = OutContext.getOrCreateSymbol("__call_stub_fp_" + Twine(Symbol)); @@ -1028,10 +1029,10 @@ void MipsAsmPrinter::EmitFPCallStub( MCSymbol *Tmp = OutContext.createTempSymbol(); OutStreamer->EmitLabel(Tmp); - const MCSymbolRefExpr *E = MCSymbolRefExpr::Create(Stub, OutContext); - const MCSymbolRefExpr *T = MCSymbolRefExpr::Create(Tmp, OutContext); - const MCExpr *T_min_E = MCBinaryExpr::CreateSub(T, E, OutContext); - OutStreamer->EmitELFSize(Stub, T_min_E); + const MCSymbolRefExpr *E = MCSymbolRefExpr::create(Stub, OutContext); + const MCSymbolRefExpr *T = MCSymbolRefExpr::create(Tmp, OutContext); + const MCExpr *T_min_E = MCBinaryExpr::createSub(T, E, OutContext); + OutStreamer->emitELFSize(Stub, T_min_E); TS.emitDirectiveEnd(x); OutStreamer->PopSection(); } diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 4faee10744b3..3d020abe2704 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -82,6 +82,7 @@ class MipsFastISel final : public FastISel { LLVMContext *Context; bool fastLowerCall(CallLoweringInfo &CLI) override; + bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; bool TargetSupported; bool UnsupportedFPMode; // To allow fast-isel to proceed and just not handle @@ -94,6 +95,7 @@ private: bool selectLoad(const Instruction *I); bool selectStore(const Instruction *I); bool selectBranch(const Instruction *I); + bool selectSelect(const Instruction *I); bool selectCmp(const Instruction *I); bool selectFPExt(const Instruction *I); bool selectFPTrunc(const Instruction *I); @@ -102,6 +104,7 @@ private: bool selectTrunc(const Instruction *I); bool selectIntExt(const Instruction *I); bool selectShift(const Instruction *I); + bool selectDivRem(const Instruction *I, unsigned ISDOpcode); // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); @@ -140,6 +143,7 @@ private: unsigned materializeGV(const GlobalValue *GV, MVT VT); unsigned materializeInt(const Constant *C, MVT VT); unsigned materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); + unsigned materializeExternalCallSym(const char *SynName); MachineInstrBuilder emitInst(unsigned Opc) { return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); @@ -156,6 +160,12 @@ private: unsigned MemReg, int64_t MemOffset) { return emitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset); } + + unsigned fastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); + // for some reason, this default is not generated by tablegen // so we explicitly generate it here. // @@ -359,6 +369,15 @@ unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) { return DestReg; } +unsigned MipsFastISel::materializeExternalCallSym(const char *SymName) { + const TargetRegisterClass *RC = &Mips::GPR32RegClass; + unsigned DestReg = createResultReg(RC); + emitInst(Mips::LW, DestReg) + .addReg(MFI->getGlobalBaseReg()) + .addExternalSymbol(SymName, MipsII::MO_GOT); + return DestReg; +} + // Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) { @@ -463,15 +482,51 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) { } bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) { - const GlobalValue *GV = dyn_cast<GlobalValue>(V); - if (GV && isa<Function>(GV) && cast<Function>(GV)->isIntrinsic()) - return false; - if (!GV) - return false; + const User *U = nullptr; + unsigned Opcode = Instruction::UserOp1; + + if (const auto *I = dyn_cast<Instruction>(V)) { + // Check if the value is defined in the same basic block. This information + // is crucial to know whether or not folding an operand is valid. + if (I->getParent() == FuncInfo.MBB->getBasicBlock()) { + Opcode = I->getOpcode(); + U = I; + } + } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { + Opcode = C->getOpcode(); + U = C; + } + + switch (Opcode) { + default: + break; + case Instruction::BitCast: + // Look past bitcasts if its operand is in the same BB. + return computeCallAddress(U->getOperand(0), Addr); + break; + case Instruction::IntToPtr: + // Look past no-op inttoptrs if its operand is in the same BB. + if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + return computeCallAddress(U->getOperand(0), Addr); + break; + case Instruction::PtrToInt: + // Look past no-op ptrtoints if its operand is in the same BB. + if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + return computeCallAddress(U->getOperand(0), Addr); + break; + } + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { Addr.setGlobalValue(GV); return true; } + + // If all else fails, try to materialize the value in a register. + if (!Addr.getGlobalValue()) { + Addr.setReg(getRegForValue(V)); + return Addr.getReg() != 0; + } + return false; } @@ -893,6 +948,50 @@ bool MipsFastISel::selectFPExt(const Instruction *I) { return true; } +bool MipsFastISel::selectSelect(const Instruction *I) { + assert(isa<SelectInst>(I) && "Expected a select instruction."); + + MVT VT; + if (!isTypeSupported(I->getType(), VT)) + return false; + + unsigned CondMovOpc; + const TargetRegisterClass *RC; + + if (VT.isInteger() && !VT.isVector() && VT.getSizeInBits() <= 32) { + CondMovOpc = Mips::MOVN_I_I; + RC = &Mips::GPR32RegClass; + } else if (VT == MVT::f32) { + CondMovOpc = Mips::MOVN_I_S; + RC = &Mips::FGR32RegClass; + } else if (VT == MVT::f64) { + CondMovOpc = Mips::MOVN_I_D32; + RC = &Mips::AFGR64RegClass; + } else + return false; + + const SelectInst *SI = cast<SelectInst>(I); + const Value *Cond = SI->getCondition(); + unsigned Src1Reg = getRegForValue(SI->getTrueValue()); + unsigned Src2Reg = getRegForValue(SI->getFalseValue()); + unsigned CondReg = getRegForValue(Cond); + + if (!Src1Reg || !Src2Reg || !CondReg) + return false; + + unsigned ResultReg = createResultReg(RC); + unsigned TempReg = createResultReg(RC); + + if (!ResultReg || !TempReg) + return false; + + emitInst(TargetOpcode::COPY, TempReg).addReg(Src2Reg); + emitInst(CondMovOpc, ResultReg) + .addReg(Src1Reg).addReg(CondReg).addReg(TempReg); + updateValueMap(I, ResultReg); + return true; +} + // Attempt to fast-select a floating-point truncate instruction. bool MipsFastISel::selectFPTrunc(const Instruction *I) { if (UnsupportedFPMode) @@ -1135,7 +1234,7 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; - // const char *SymName = CLI.SymName; + const char *SymName = CLI.SymName; // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) @@ -1182,8 +1281,15 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { if (!processCallArgs(CLI, OutVTs, NumBytes)) return false; + if (!Addr.getGlobalValue()) + return false; + // Issue the call. - unsigned DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32); + unsigned DestAddress; + if (SymName) + DestAddress = materializeExternalCallSym(SymName); + else + DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32); emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::JALR), @@ -1203,6 +1309,98 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { return finishCall(CLI, RetVT, NumBytes); } +bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: + return false; + case Intrinsic::bswap: { + Type *RetTy = II->getCalledFunction()->getReturnType(); + + MVT VT; + if (!isTypeSupported(RetTy, VT)) + return false; + + unsigned SrcReg = getRegForValue(II->getOperand(0)); + if (SrcReg == 0) + return false; + unsigned DestReg = createResultReg(&Mips::GPR32RegClass); + if (DestReg == 0) + return false; + if (VT == MVT::i16) { + if (Subtarget->hasMips32r2()) { + emitInst(Mips::WSBH, DestReg).addReg(SrcReg); + updateValueMap(II, DestReg); + return true; + } else { + unsigned TempReg[3]; + for (int i = 0; i < 3; i++) { + TempReg[i] = createResultReg(&Mips::GPR32RegClass); + if (TempReg[i] == 0) + return false; + } + emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8); + emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8); + emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]); + emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0xFFFF); + updateValueMap(II, DestReg); + return true; + } + } else if (VT == MVT::i32) { + if (Subtarget->hasMips32r2()) { + unsigned TempReg = createResultReg(&Mips::GPR32RegClass); + emitInst(Mips::WSBH, TempReg).addReg(SrcReg); + emitInst(Mips::ROTR, DestReg).addReg(TempReg).addImm(16); + updateValueMap(II, DestReg); + return true; + } else { + unsigned TempReg[8]; + for (int i = 0; i < 8; i++) { + TempReg[i] = createResultReg(&Mips::GPR32RegClass); + if (TempReg[i] == 0) + return false; + } + + emitInst(Mips::SRL, TempReg[0]).addReg(SrcReg).addImm(8); + emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(24); + emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[0]).addImm(0xFF00); + emitInst(Mips::OR, TempReg[3]).addReg(TempReg[1]).addReg(TempReg[2]); + + emitInst(Mips::ANDi, TempReg[4]).addReg(SrcReg).addImm(0xFF00); + emitInst(Mips::SLL, TempReg[5]).addReg(TempReg[4]).addImm(8); + + emitInst(Mips::SLL, TempReg[6]).addReg(SrcReg).addImm(24); + emitInst(Mips::OR, TempReg[7]).addReg(TempReg[3]).addReg(TempReg[5]); + emitInst(Mips::OR, DestReg).addReg(TempReg[6]).addReg(TempReg[7]); + updateValueMap(II, DestReg); + return true; + } + } + return false; + } + case Intrinsic::memcpy: + case Intrinsic::memmove: { + const auto *MTI = cast<MemTransferInst>(II); + // Don't handle volatile. + if (MTI->isVolatile()) + return false; + if (!MTI->getLength()->getType()->isIntegerTy(32)) + return false; + const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; + return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); + } + case Intrinsic::memset: { + const MemSetInst *MSI = cast<MemSetInst>(II); + // Don't handle volatile. + if (MSI->isVolatile()) + return false; + if (!MSI->getLength()->getType()->isIntegerTy(32)) + return false; + return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + } + } + return false; +} + bool MipsFastISel::selectRet(const Instruction *I) { const Function &F = *I->getParent()->getParent(); const ReturnInst *Ret = cast<ReturnInst>(I); @@ -1420,6 +1618,50 @@ unsigned MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, return Success ? DestReg : 0; } +bool MipsFastISel::selectDivRem(const Instruction *I, unsigned ISDOpcode) { + EVT DestEVT = TLI.getValueType(I->getType(), true); + if (!DestEVT.isSimple()) + return false; + + MVT DestVT = DestEVT.getSimpleVT(); + if (DestVT != MVT::i32) + return false; + + unsigned DivOpc; + switch (ISDOpcode) { + default: + return false; + case ISD::SDIV: + case ISD::SREM: + DivOpc = Mips::SDIV; + break; + case ISD::UDIV: + case ISD::UREM: + DivOpc = Mips::UDIV; + break; + } + + unsigned Src0Reg = getRegForValue(I->getOperand(0)); + unsigned Src1Reg = getRegForValue(I->getOperand(1)); + if (!Src0Reg || !Src1Reg) + return false; + + emitInst(DivOpc).addReg(Src0Reg).addReg(Src1Reg); + emitInst(Mips::TEQ).addReg(Src1Reg).addReg(Mips::ZERO).addImm(7); + + unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); + if (!ResultReg) + return false; + + unsigned MFOpc = (ISDOpcode == ISD::SREM || ISDOpcode == ISD::UREM) + ? Mips::MFHI + : Mips::MFLO; + emitInst(MFOpc, ResultReg); + + updateValueMap(I, ResultReg); + return true; +} + bool MipsFastISel::selectShift(const Instruction *I) { MVT RetVT; @@ -1505,6 +1747,22 @@ bool MipsFastISel::fastSelectInstruction(const Instruction *I) { return selectLoad(I); case Instruction::Store: return selectStore(I); + case Instruction::SDiv: + if (!selectBinaryOp(I, ISD::SDIV)) + return selectDivRem(I, ISD::SDIV); + return true; + case Instruction::UDiv: + if (!selectBinaryOp(I, ISD::UDIV)) + return selectDivRem(I, ISD::UDIV); + return true; + case Instruction::SRem: + if (!selectBinaryOp(I, ISD::SREM)) + return selectDivRem(I, ISD::SREM); + return true; + case Instruction::URem: + if (!selectBinaryOp(I, ISD::UREM)) + return selectDivRem(I, ISD::UREM); + return true; case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: @@ -1533,6 +1791,8 @@ bool MipsFastISel::fastSelectInstruction(const Instruction *I) { case Instruction::ICmp: case Instruction::FCmp: return selectCmp(I); + case Instruction::Select: + return selectSelect(I); } return false; } @@ -1563,6 +1823,33 @@ void MipsFastISel::simplifyAddress(Address &Addr) { } } +unsigned MipsFastISel::fastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { + // We treat the MUL instruction in a special way because it clobbers + // the HI0 & LO0 registers. The TableGen definition of this instruction can + // mark these registers only as implicitly defined. As a result, the + // register allocator runs out of registers when this instruction is + // followed by another instruction that defines the same registers too. + // We can fix this by explicitly marking those registers as dead. + if (MachineInstOpcode == Mips::MUL) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Mips::HI0, RegState::ImplicitDefine | RegState::Dead) + .addReg(Mips::LO0, RegState::ImplicitDefine | RegState::Dead); + return ResultReg; + } + + return FastISel::fastEmitInst_rr(MachineInstOpcode, RC, Op0, Op0IsKill, Op1, + Op1IsKill); +} + namespace llvm { FastISel *Mips::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 826fbaf4d00a..a74c8abd2e2d 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -90,12 +90,23 @@ const MipsFrameLowering *MipsFrameLowering::create(const MipsSubtarget &ST) { } // hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. +// pointer register. This is true if the function has variable sized allocas, +// if it needs dynamic stack realignment, if frame pointer elimination is +// disabled, or if the frame address is taken. bool MipsFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || - MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); + MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || + TRI->needsStackRealignment(MF); +} + +bool MipsFrameLowering::hasBP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + return MFI->hasVarSizedObjects() && TRI->needsStackRealignment(MF); } uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index 0b5183081e09..5eabd58e8686 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -32,6 +32,8 @@ public: bool hasFP(const MachineFunction &MF) const override; + bool hasBP(const MachineFunction &MF) const; + bool isFPCloseToIncomingSP() const override { return false; } void diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 6c7f0895b426..67ddcc4dacb9 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3547,7 +3547,8 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op, } bool MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 6ea14b53a57f..bc9a1ce64097 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -514,7 +514,8 @@ namespace llvm { return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 02ecf32d3e47..5f4fcc354616 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -226,6 +226,18 @@ class MFC3OP_FM<bits<6> op, bits<5> mfmt> let Inst{2-0} = sel; } +class MFC2OP_FM<bits<6> op, bits<5> mfmt> : StdArch { + bits<5> rt; + bits<16> imm16; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = mfmt; + let Inst{20-16} = rt; + let Inst{15-0} = imm16; +} + class ADD_FM<bits<6> op, bits<6> funct> : StdArch { bits<5> rd; bits<5> rs; diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 9e611804376b..6b2a44d7a893 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -101,7 +101,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, llvm_unreachable("<unknown operand type>"); } - const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx); + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, Kind, *Ctx); if (!Offset) return MCOperand::createExpr(MCSym); @@ -109,8 +109,8 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, // Assume offset is never negative. assert(Offset > 0); - const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx); - const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); + const MCConstantExpr *OffsetExpr = MCConstantExpr::create(Offset, *Ctx); + const MCBinaryExpr *Add = MCBinaryExpr::createAdd(MCSym, OffsetExpr, *Ctx); return MCOperand::createExpr(Add); } @@ -155,11 +155,11 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO, MCOperand MipsMCInstLower::createSub(MachineBasicBlock *BB1, MachineBasicBlock *BB2, MCSymbolRefExpr::VariantKind Kind) const { - const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::Create(BB1->getSymbol(), *Ctx); - const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::Create(BB2->getSymbol(), *Ctx); - const MCBinaryExpr *Sub = MCBinaryExpr::CreateSub(Sym1, Sym2, *Ctx); + const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::create(BB1->getSymbol(), *Ctx); + const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::create(BB2->getSymbol(), *Ctx); + const MCBinaryExpr *Sub = MCBinaryExpr::createSub(Sym1, Sym2, *Ctx); - return MCOperand::createExpr(MipsMCExpr::Create(Kind, Sub, *Ctx)); + return MCOperand::createExpr(MipsMCExpr::create(Kind, Sub, *Ctx)); } void MipsMCInstLower:: diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index f72fb4d622ec..f6647e6a8468 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/IR/Constants.h" @@ -178,6 +179,15 @@ getReservedRegs(const MachineFunction &MF) const { else { Reserved.set(Mips::FP); Reserved.set(Mips::FP_64); + + // Reserve the base register if we need to both realign the stack and + // allocate variable-sized objects at runtime. This should test the + // same conditions as MipsFrameLowering::hasBP(). + if (needsStackRealignment(MF) && + MF.getFrameInfo()->hasVarSizedObjects()) { + Reserved.set(Mips::S7); + Reserved.set(Mips::S7_64); + } } } @@ -271,6 +281,67 @@ getFrameRegister(const MachineFunction &MF) const { else return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) : (IsN64 ? Mips::SP_64 : Mips::SP); +} +bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const { + const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>(); + unsigned FP = Subtarget.isGP32bit() ? Mips::FP : Mips::FP_64; + unsigned BP = Subtarget.isGP32bit() ? Mips::S7 : Mips::S7_64; + + // Support dynamic stack realignment only for targets with standard encoding. + if (!Subtarget.hasStandardEncoding()) + return false; + + // We can't perform dynamic stack realignment if we can't reserve the + // frame pointer register. + if (!MF.getRegInfo().canReserveReg(FP)) + return false; + + // We can realign the stack if we know the maximum call frame size and we + // don't have variable sized objects. + if (Subtarget.getFrameLowering()->hasReservedCallFrame(MF)) + return true; + + // We have to reserve the base pointer register in the presence of variable + // sized objects. + return MF.getRegInfo().canReserveReg(BP); } +bool MipsRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + bool CanRealign = canRealignStack(MF); + + // Avoid realigning functions that explicitly do not want to be realigned. + // Normally, we should report an error when a function should be dynamically + // realigned but also has the attribute no-realign-stack. Unfortunately, + // with this attribute, MachineFrameInfo clamps each new object's alignment + // to that of the stack's alignment as specified by the ABI. As a result, + // the information of whether we have objects with larger alignment + // requirement than the stack's alignment is already lost at this point. + if (MF.getFunction()->hasFnAttribute("no-realign-stack")) + return false; + + const Function *F = MF.getFunction(); + if (F->hasFnAttribute(Attribute::StackAlignment)) { +#ifdef DEBUG + if (!CanRealign) + DEBUG(dbgs() << "It's not possible to realign the stack of the function: " + << F->getName() << "\n"); +#endif + return CanRealign; + } + + unsigned StackAlignment = Subtarget.getFrameLowering()->getStackAlignment(); + if (MFI->getMaxAlignment() > StackAlignment) { +#ifdef DEBUG + if (!CanRealign) + DEBUG(dbgs() << "It's not possible to realign the stack of the function: " + << F->getName() << "\n"); +#endif + return CanRealign; + } + + return false; +} diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 76e84bd142b9..ee1f6bcd7390 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -57,6 +57,14 @@ public: int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = nullptr) const; + + // Stack realignment queries. + bool canRealignStack(const MachineFunction &MF) const; + + bool needsStackRealignment(const MachineFunction &MF) const override; + /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const override; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 19efa59e1fdf..ec7bf314c641 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -382,6 +382,11 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, unsigned FP = ABI.GetFramePtr(); unsigned ZERO = ABI.GetNullPtr(); unsigned ADDu = ABI.GetPtrAdduOp(); + unsigned ADDiu = ABI.GetPtrAddiuOp(); + unsigned AND = ABI.IsN64() ? Mips::AND64 : Mips::AND; + + const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? + &Mips::GPR64RegClass : &Mips::GPR32RegClass; // First, compute final stack size. uint64_t StackSize = MFI->getStackSize(); @@ -464,15 +469,12 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, } if (MipsFI->callsEhReturn()) { - const TargetRegisterClass *PtrRC = - ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; - // Insert instructions that spill eh data registers. for (int I = 0; I < 4; ++I) { if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) MBB.addLiveIn(ABI.GetEhDataReg(I)); TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, - MipsFI->getEhDataRegFI(I), PtrRC, &RegInfo); + MipsFI->getEhDataRegFI(I), RC, &RegInfo); } // Emit .cfi_offset directives for eh data registers. @@ -497,6 +499,26 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, nullptr, MRI->getDwarfRegNum(FP, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); + + if (RegInfo.needsStackRealignment(MF)) { + // addiu $Reg, $zero, -MaxAlignment + // andi $sp, $sp, $Reg + unsigned VR = MF.getRegInfo().createVirtualRegister(RC); + assert(isInt<16>(MFI->getMaxAlignment()) && + "Function's alignment size requirement is not supported."); + int MaxAlign = - (signed) MFI->getMaxAlignment(); + + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR).addReg(ZERO) .addImm(MaxAlign); + BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); + + if (hasBP(MF)) { + // move $s7, $sp + unsigned BP = STI.isABI_N64() ? Mips::S7_64 : Mips::S7; + BuildMI(MBB, MBBI, dl, TII.get(ADDu), BP) + .addReg(SP) + .addReg(ZERO); + } + } } } @@ -606,10 +628,14 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); MipsABIInfo ABI = STI.getABI(); unsigned FP = ABI.GetFramePtr(); + unsigned BP = ABI.IsN64() ? Mips::S7_64 : Mips::S7; // Mark $fp as used if function has dedicated frame pointer. if (hasFP(MF)) MRI.setPhysRegUsed(FP); + // Mark $s7 as used if function has dedicated base pointer. + if (hasBP(MF)) + MRI.setPhysRegUsed(BP); // Create spill slots for eh data registers if function calls eh_return. if (MipsFI->callsEhReturn()) diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 8c74a98ecca6..132c3a1001ad 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -110,8 +110,11 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MachineFunction &MF = *MI.getParent()->getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + MipsABIInfo ABI = static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI(); + const MipsRegisterInfo *RegInfo = + static_cast<const MipsRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); int MinCSFI = 0; @@ -135,7 +138,14 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI) FrameReg = ABI.GetStackPtr(); - else + else if (RegInfo->needsStackRealignment(MF)) { + if (MFI->hasVarSizedObjects() && !MFI->isFixedObjectIndex(FrameIndex)) + FrameReg = ABI.GetBasePtr(); + else if (MFI->isFixedObjectIndex(FrameIndex)) + FrameReg = getFrameRegister(MF); + else + FrameReg = ABI.GetStackPtr(); + } else FrameReg = getFrameRegister(MF); // Calculate final offset. diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 22b0c6c6685d..fed06005e9c8 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -72,6 +72,8 @@ public: virtual void emitDirectiveSetNoDsp(); virtual void emitDirectiveSetPop(); virtual void emitDirectiveSetPush(); + virtual void emitDirectiveSetSoftFloat(); + virtual void emitDirectiveSetHardFloat(); // PIC support virtual void emitDirectiveCpLoad(unsigned RegNo); @@ -188,6 +190,8 @@ public: void emitDirectiveSetNoDsp() override; void emitDirectiveSetPop() override; void emitDirectiveSetPush() override; + void emitDirectiveSetSoftFloat() override; + void emitDirectiveSetHardFloat() override; // PIC support void emitDirectiveCpLoad(unsigned RegNo) override; diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp index 3615c146a527..6a65943515bb 100644 --- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp +++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp @@ -23,9 +23,9 @@ extern "C" void LLVMInitializeMipsTargetInfo() { /*HasJIT=*/true> Y(TheMipselTarget, "mipsel", "Mipsel"); RegisterTarget<Triple::mips64, - /*HasJIT=*/false> A(TheMips64Target, "mips64", "Mips64 [experimental]"); + /*HasJIT=*/true> A(TheMips64Target, "mips64", "Mips64 [experimental]"); RegisterTarget<Triple::mips64el, - /*HasJIT=*/false> B(TheMips64elTarget, + /*HasJIT=*/true> B(TheMips64elTarget, "mips64el", "Mips64el [experimental]"); } diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index cdd2f1f5944f..d48a7a9b1fcc 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -20,7 +20,7 @@ set(NVPTXCodeGen_sources NVPTXImageOptimizer.cpp NVPTXInstrInfo.cpp NVPTXLowerAggrCopies.cpp - NVPTXLowerStructArgs.cpp + NVPTXLowerKernelArgs.cpp NVPTXMCExpr.cpp NVPTXPrologEpilogPass.cpp NVPTXRegisterInfo.cpp diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp index ac92df901243..4594c22b8701 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp @@ -85,7 +85,7 @@ void NVPTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << markup("<imm:") << formatImm(Op.getImm()) << markup(">"); } else { assert(Op.isExpr() && "Unknown operand kind in printOperand"); - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index b9df3d18f941..ef36c13b49f1 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -25,8 +25,7 @@ static cl::opt<bool> CompileForDebugging("debug-compile", void NVPTXMCAsmInfo::anchor() {} -NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) { - Triple TheTriple(TT); +NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple) { if (TheTriple.getArch() == Triple::nvptx64) { PointerSize = CalleeSaveStackSlotSize = 8; } diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h index c3242866b177..b432e065c2f4 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h @@ -18,12 +18,12 @@ namespace llvm { class Target; -class StringRef; +class Triple; class NVPTXMCAsmInfo : public MCAsmInfo { virtual void anchor(); public: - explicit NVPTXMCAsmInfo(StringRef TT); + explicit NVPTXMCAsmInfo(const Triple &TheTriple); }; } // namespace llvm diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 382525d27a25..477b0bac6ca8 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -69,7 +69,7 @@ ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping); MachineFunctionPass *createNVPTXPrologEpilogPass(); MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); FunctionPass *createNVPTXImageOptimizerPass(); -FunctionPass *createNVPTXLowerStructArgsPass(); +FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM); bool isImageOrSamplerVal(const Value *, const Module *); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 3bbea400e53e..298b992b241f 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -266,7 +266,7 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), OutContext)); break; case MachineOperand::MO_ExternalSymbol: @@ -283,11 +283,11 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, default: report_fatal_error("Unsupported FP type"); break; case Type::FloatTyID: MCOp = MCOperand::createExpr( - NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext)); + NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext)); break; case Type::DoubleTyID: MCOp = MCOperand::createExpr( - NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext)); + NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext)); break; } break; @@ -334,7 +334,7 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { const MCExpr *Expr; - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, + Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); return MCOperand::createExpr(Expr); } @@ -418,9 +418,8 @@ void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( const MachineBasicBlock &MBB) const { MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); - // TODO: isLoopHeader() should take "const MachineBasicBlock *". // We insert .pragma "nounroll" only to the loop header. - if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB))) + if (!LI.isLoopHeader(&MBB)) return false; // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, @@ -468,7 +467,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { printReturnValStr(*MF, O); } - O << *CurrentFnSym; + CurrentFnSym->print(O, MAI); emitFunctionParamList(*MF, O); @@ -625,7 +624,8 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { else O << ".func "; printReturnValStr(F, O); - O << *getSymbol(F) << "\n"; + getSymbol(F)->print(O, MAI); + O << "\n"; emitFunctionParamList(F, O); O << ";\n"; } @@ -1172,7 +1172,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, else O << getPTXFundamentalTypeStr(ETy, false); O << " "; - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); // Ptx allows variable initilization only for constant and global state // spaces. @@ -1189,11 +1189,9 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, // The frontend adds zero-initializer to variables that don't have an // initial value, so skip warning for this case. if (!GVar->getInitializer()->isNullValue()) { - std::string warnMsg = - ("initial value of '" + GVar->getName() + - "' is not allowed in addrspace(" + - Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str(); - report_fatal_error(warnMsg.c_str()); + report_fatal_error("initial value of '" + GVar->getName() + + "' is not allowed in addrspace(" + + Twine(PTy->getAddressSpace()) + ")"); } } } @@ -1220,15 +1218,21 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, bufferAggregateConstant(Initializer, &aggBuffer); if (aggBuffer.numSymbols) { if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { - O << " .u64 " << *getSymbol(GVar) << "["; + O << " .u64 "; + getSymbol(GVar)->print(O, MAI); + O << "["; O << ElementSize / 8; } else { - O << " .u32 " << *getSymbol(GVar) << "["; + O << " .u32 "; + getSymbol(GVar)->print(O, MAI); + O << "["; O << ElementSize / 4; } O << "]"; } else { - O << " .b8 " << *getSymbol(GVar) << "["; + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); + O << "["; O << ElementSize; O << "]"; } @@ -1236,7 +1240,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, aggBuffer.print(); O << "}"; } else { - O << " .b8 " << *getSymbol(GVar); + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); if (ElementSize) { O << "["; O << ElementSize; @@ -1244,7 +1249,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, } } } else { - O << " .b8 " << *getSymbol(GVar); + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); if (ElementSize) { O << "["; O << ElementSize; @@ -1351,7 +1357,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, O << " ."; O << getPTXFundamentalTypeStr(ETy); O << " "; - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); return; } @@ -1366,9 +1372,11 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, case Type::ArrayTyID: case Type::VectorTyID: ElementSize = TD->getTypeStoreSize(ETy); - O << " .b8 " << *getSymbol(GVar) << "["; + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); + O << "["; if (ElementSize) { - O << itostr(ElementSize); + O << ElementSize; } O << "]"; break; @@ -1408,11 +1416,13 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O) { - O << *getSymbol(I->getParent()) << "_param_" << paramIndex; + getSymbol(I->getParent())->print(O, MAI); + O << "_param_" << paramIndex; } void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { - O << *CurrentFnSym << "_param_" << paramIndex; + CurrentFnSym->print(O, MAI); + O << "_param_" << paramIndex; } void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { @@ -1446,21 +1456,24 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { O << "\t.param .u64 .ptr .surfref "; else O << "\t.param .surfref "; - O << *CurrentFnSym << "_param_" << paramIndex; + CurrentFnSym->print(O, MAI); + O << "_param_" << paramIndex; } else { // Default image is read_only if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .texref "; else O << "\t.param .texref "; - O << *CurrentFnSym << "_param_" << paramIndex; + CurrentFnSym->print(O, MAI); + O << "_param_" << paramIndex; } } else { if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .samplerref "; else O << "\t.param .samplerref "; - O << *CurrentFnSym << "_param_" << paramIndex; + CurrentFnSym->print(O, MAI); + O << "_param_" << paramIndex; } continue; } @@ -1716,10 +1729,10 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { } if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { O << "generic("; - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); O << ")"; } else { - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); } return; } @@ -1733,20 +1746,44 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { O << "generic("; - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); O << ")"; } else { - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); } return; } else { - O << *lowerConstant(CPV); + lowerConstant(CPV)->print(O, MAI); return; } } llvm_unreachable("Not scalar type found in printScalarConstant()"); } +// These utility functions assure we get the right sequence of bytes for a given +// type even for big-endian machines +template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) { + int64_t vp = (int64_t)val; + for (unsigned i = 0; i < sizeof(T); ++i) { + p[i] = (unsigned char)vp; + vp >>= 8; + } +} +static void ConvertFloatToBytes(unsigned char *p, float val) { + int32_t *vp = (int32_t *)&val; + for (unsigned i = 0; i < sizeof(int32_t); ++i) { + p[i] = (unsigned char)*vp; + *vp >>= 8; + } +} +static void ConvertDoubleToBytes(unsigned char *p, double val) { + int64_t *vp = (int64_t *)&val; + for (unsigned i = 0; i < sizeof(int64_t); ++i) { + p[i] = (unsigned char)*vp; + *vp >>= 8; + } +} + void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer) { @@ -1760,30 +1797,30 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, return; } - unsigned char *ptr; + unsigned char ptr[8]; switch (CPV->getType()->getTypeID()) { case Type::IntegerTyID: { const Type *ETy = CPV->getType(); if (ETy == Type::getInt8Ty(CPV->getContext())) { unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); - ptr = &c; + ConvertIntToBytes<>(ptr, c); aggBuffer->addBytes(ptr, 1, Bytes); } else if (ETy == Type::getInt16Ty(CPV->getContext())) { short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue(); - ptr = (unsigned char *)&int16; + ConvertIntToBytes<>(ptr, int16); aggBuffer->addBytes(ptr, 2, Bytes); } else if (ETy == Type::getInt32Ty(CPV->getContext())) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { int int32 = (int)(constInt->getZExtValue()); - ptr = (unsigned char *)&int32; + ConvertIntToBytes<>(ptr, int32); aggBuffer->addBytes(ptr, 4, Bytes); break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( ConstantFoldConstantExpression(Cexpr, *TD))) { int int32 = (int)(constInt->getZExtValue()); - ptr = (unsigned char *)&int32; + ConvertIntToBytes<>(ptr, int32); aggBuffer->addBytes(ptr, 4, Bytes); break; } @@ -1798,14 +1835,14 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, } else if (ETy == Type::getInt64Ty(CPV->getContext())) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { long long int64 = (long long)(constInt->getZExtValue()); - ptr = (unsigned char *)&int64; + ConvertIntToBytes<>(ptr, int64); aggBuffer->addBytes(ptr, 8, Bytes); break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( ConstantFoldConstantExpression(Cexpr, *TD))) { long long int64 = (long long)(constInt->getZExtValue()); - ptr = (unsigned char *)&int64; + ConvertIntToBytes<>(ptr, int64); aggBuffer->addBytes(ptr, 8, Bytes); break; } @@ -1827,11 +1864,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, const Type *Ty = CFP->getType(); if (Ty == Type::getFloatTy(CPV->getContext())) { float float32 = (float) CFP->getValueAPF().convertToFloat(); - ptr = (unsigned char *)&float32; + ConvertFloatToBytes(ptr, float32); aggBuffer->addBytes(ptr, 4, Bytes); } else if (Ty == Type::getDoubleTy(CPV->getContext())) { double float64 = CFP->getValueAPF().convertToDouble(); - ptr = (unsigned char *)&float64; + ConvertDoubleToBytes(ptr, float64); aggBuffer->addBytes(ptr, 8, Bytes); } else { llvm_unreachable("unsupported fp const type"); @@ -1993,16 +2030,16 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) MCContext &Ctx = OutContext; if (CV->isNullValue() || isa<UndefValue>(CV)) - return MCConstantExpr::Create(0, Ctx); + return MCConstantExpr::create(0, Ctx); if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) - return MCConstantExpr::Create(CI->getZExtValue(), Ctx); + return MCConstantExpr::create(CI->getZExtValue(), Ctx); if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { const MCSymbolRefExpr *Expr = - MCSymbolRefExpr::Create(getSymbol(GV), Ctx); + MCSymbolRefExpr::create(getSymbol(GV), Ctx); if (ProcessingGeneric) { - return NVPTXGenericMCSymbolRefExpr::Create(Expr, Ctx); + return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx); } else { return Expr; } @@ -2059,7 +2096,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) return Base; int64_t Offset = OffsetAI.getSExtValue(); - return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), + return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx), Ctx); } @@ -2102,8 +2139,8 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) // the high bits so we are sure to get a proper truncation if the input is // a constant expr. unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); - const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); - return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); + const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx); + return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); } // The MC library also has a right-shift operator, but it isn't consistently @@ -2113,7 +2150,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric); switch (CE->getOpcode()) { default: llvm_unreachable("Unknown binary operator constant cast expr"); - case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); + case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); } } } @@ -2123,7 +2160,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { switch (Expr.getKind()) { case MCExpr::Target: - return cast<MCTargetExpr>(&Expr)->PrintImpl(OS); + return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI); case MCExpr::Constant: OS << cast<MCConstantExpr>(Expr).getValue(); return; @@ -2131,7 +2168,7 @@ void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { case MCExpr::SymbolRef: { const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr); const MCSymbol &Sym = SRE.getSymbol(); - OS << Sym; + Sym.print(OS, MAI); return; } @@ -2256,11 +2293,11 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_GlobalAddress: - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); break; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; default: diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 301c68609a29..f6f7685e76f9 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -165,10 +165,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { } if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { O << "generic("; - O << *Name; + Name->print(O, AP.MAI); O << ")"; } else { - O << *Name; + Name->print(O, AP.MAI); } } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) { const MCExpr *Expr = diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index ae63caec1320..cfff0019b8d9 100644 --- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -10,34 +10,54 @@ // When a load/store accesses the generic address space, checks whether the // address is casted from a non-generic address space. If so, remove this // addrspacecast because accessing non-generic address spaces is typically -// faster. Besides seeking addrspacecasts, this optimization also traces into -// the base pointer of a GEP. +// faster. Besides removing addrspacecasts directly used by loads/stores, this +// optimization also recursively traces into a GEP's pointer operand and a +// bitcast's source to find more eliminable addrspacecasts. // // For instance, the code below loads a float from an array allocated in // addrspace(3). // -// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* -// %1 = gep [10 x float]* %0, i64 0, i64 %i -// %2 = load float* %1 ; emits ld.f32 +// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* +// %1 = gep [10 x float]* %0, i64 0, i64 %i +// %2 = bitcast float* %1 to i32* +// %3 = load i32* %2 ; emits ld.u32 // -// First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast -// and the GEP to expose more optimization opportunities to function +// First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP, +// and the bitcast to expose more optimization opportunities to function // optimizeMemoryInst. The intermediate code looks like: // -// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i -// %1 = addrspacecast float addrspace(3)* %0 to float* -// %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly +// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i +// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* +// %2 = addrspacecast i32 addrspace(3)* %1 to i32* +// %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly // // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed // generic pointers, and folds the load and the addrspacecast into a load from // the original address space. The final code looks like: // -// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i -// %2 = load float addrspace(3)* %0 ; emits ld.shared.f32 +// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i +// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* +// %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32 // // This pass may remove an addrspacecast in a different BB. Therefore, we // implement it as a FunctionPass. // +// TODO: +// The current implementation doesn't handle PHINodes. Eliminating +// addrspacecasts used by PHINodes is trickier because PHINodes can introduce +// loops in data flow. For example, +// +// %generic.input = addrspacecast float addrspace(3)* %input to float* +// loop: +// %y = phi [ %generic.input, %y2 ] +// %y2 = getelementptr %y, 1 +// %v = load %y2 +// br ..., label %loop, ... +// +// Marking %y2 shared depends on marking %y shared, but %y also data-flow +// depends on %y2. We probably need an iterative fix-point algorithm on handle +// this case. +// //===----------------------------------------------------------------------===// #include "NVPTX.h" @@ -62,17 +82,31 @@ class NVPTXFavorNonGenericAddrSpaces : public FunctionPass { public: static char ID; NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} - bool runOnFunction(Function &F) override; +private: /// Optimizes load/store instructions. Idx is the index of the pointer operand /// (0 for load, and 1 for store). Returns true if it changes anything. bool optimizeMemoryInstruction(Instruction *I, unsigned Idx); + /// Recursively traces into a GEP's pointer operand or a bitcast's source to + /// find an eliminable addrspacecast, and hoists that addrspacecast to the + /// outermost level. For example, this function transforms + /// bitcast(gep(gep(addrspacecast(X)))) + /// to + /// addrspacecast(bitcast(gep(gep(X)))). + /// + /// This reordering exposes to optimizeMemoryInstruction more + /// optimization opportunities on loads and stores. + /// + /// Returns true if this function succesfully hoists an eliminable + /// addrspacecast or V is already such an addrspacecast. /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X, - /// indices)". This reordering exposes to optimizeMemoryInstruction more - /// optimization opportunities on loads and stores. Returns true if it changes - /// the program. - bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP); + /// indices)". + bool hoistAddrSpaceCastFrom(Value *V, int Depth = 0); + /// Helper function for GEPs. + bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth); + /// Helper function for bitcasts. + bool hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth); }; } @@ -85,11 +119,12 @@ INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic", "Remove unnecessary non-generic-to-generic addrspacecasts", false, false) -// Decides whether removing Cast is valid and beneficial. Cast can be an -// instruction or a constant expression. -static bool IsEliminableAddrSpaceCast(Operator *Cast) { - // Returns false if not even an addrspacecast. - if (Cast->getOpcode() != Instruction::AddrSpaceCast) +// Decides whether V is an addrspacecast and shortcutting V in load/store is +// valid and beneficial. +static bool isEliminableAddrSpaceCast(Value *V) { + // Returns false if V is not even an addrspacecast. + Operator *Cast = dyn_cast<Operator>(V); + if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast) return false; Value *Src = Cast->getOperand(0); @@ -108,67 +143,119 @@ static bool IsEliminableAddrSpaceCast(Operator *Cast) { DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); } -bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( - GEPOperator *GEP) { - Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand()); - if (!Cast) +bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(GEPOperator *GEP, + int Depth) { + if (!hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1)) return false; - if (!IsEliminableAddrSpaceCast(Cast)) - return false; + // That hoistAddrSpaceCastFrom succeeds implies GEP's pointer operand is now + // an eliminable addrspacecast. + assert(isEliminableAddrSpaceCast(GEP->getPointerOperand())); + Operator *Cast = cast<Operator>(GEP->getPointerOperand()); SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) { - // %1 = gep (addrspacecast X), indices + // GEP = gep (addrspacecast X), indices // => - // %0 = gep X, indices - // %1 = addrspacecast %0 - GetElementPtrInst *NewGEPI = GetElementPtrInst::Create( + // NewGEP = gep X, indices + // NewASC = addrspacecast NewGEP + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( GEP->getSourceElementType(), Cast->getOperand(0), Indices, - GEP->getName(), GEPI); - NewGEPI->setIsInBounds(GEP->isInBounds()); - GEP->replaceAllUsesWith( - new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); + "", GEPI); + NewGEP->setIsInBounds(GEP->isInBounds()); + Value *NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI); + NewASC->takeName(GEP); + GEP->replaceAllUsesWith(NewASC); } else { // GEP is a constant expression. - Constant *NewGEPCE = ConstantExpr::getGetElementPtr( + Constant *NewGEP = ConstantExpr::getGetElementPtr( GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)), Indices, GEP->isInBounds()); GEP->replaceAllUsesWith( - ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType())); + ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType())); } return true; } -bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, - unsigned Idx) { - // If the pointer operand is a GEP, hoist the addrspacecast if any from the - // GEP to expose more optimization opportunites. - if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) { - hoistAddrSpaceCastFromGEP(GEP); - } +bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast( + BitCastOperator *BC, int Depth) { + if (!hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1)) + return false; - // load/store (addrspacecast X) => load/store X if shortcutting the - // addrspacecast is valid and can improve performance. - // - // e.g., - // %1 = addrspacecast float addrspace(3)* %0 to float* - // %2 = load float* %1 - // -> - // %2 = load float addrspace(3)* %0 - // - // Note: the addrspacecast can also be a constant expression. - if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) { - if (IsEliminableAddrSpaceCast(Cast)) { - MI->setOperand(Idx, Cast->getOperand(0)); - return true; - } + // That hoistAddrSpaceCastFrom succeeds implies BC's source operand is now + // an eliminable addrspacecast. + assert(isEliminableAddrSpaceCast(BC->getOperand(0))); + Operator *Cast = cast<Operator>(BC->getOperand(0)); + + // Cast = addrspacecast Src + // BC = bitcast Cast + // => + // Cast' = bitcast Src + // BC' = addrspacecast Cast' + Value *Src = Cast->getOperand(0); + Type *TypeOfNewCast = + PointerType::get(BC->getType()->getPointerElementType(), + Src->getType()->getPointerAddressSpace()); + if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) { + Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI); + Value *NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI); + NewBC->takeName(BC); + BC->replaceAllUsesWith(NewBC); + } else { + // BC is a constant expression. + Constant *NewCast = + ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast); + Constant *NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType()); + BC->replaceAllUsesWith(NewBC); } + return true; +} + +bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V, + int Depth) { + // Returns true if V is already an eliminable addrspacecast. + if (isEliminableAddrSpaceCast(V)) + return true; + + // Limit the depth to prevent this recursive function from running too long. + const int MaxDepth = 20; + if (Depth >= MaxDepth) + return false; + + // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer + // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts + // that are not directly used by the load/store. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) + return hoistAddrSpaceCastFromGEP(GEP, Depth); + + if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) + return hoistAddrSpaceCastFromBitCast(BC, Depth); return false; } +bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, + unsigned Idx) { + if (hoistAddrSpaceCastFrom(MI->getOperand(Idx))) { + // load/store (addrspacecast X) => load/store X if shortcutting the + // addrspacecast is valid and can improve performance. + // + // e.g., + // %1 = addrspacecast float addrspace(3)* %0 to float* + // %2 = load float* %1 + // -> + // %2 = load float addrspace(3)* %0 + // + // Note: the addrspacecast can also be a constant expression. + assert(isEliminableAddrSpaceCast(MI->getOperand(Idx))); + Operator *ASC = dyn_cast<Operator>(MI->getOperand(Idx)); + MI->setOperand(Idx, ASC->getOperand(0)); + return true; + } + return false; +} + bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { if (DisableFavorNonGeneric) return false; diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index fa38a686fcbf..232a611d1760 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -613,6 +613,10 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { Opc = TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes; break; + case ADDRESS_SPACE_PARAM: + Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 + : NVPTX::nvvm_ptr_gen_to_param; + break; } return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); } diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 805847a581fa..b5af72ab855a 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3725,7 +3725,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( /// (LoopStrengthReduce.cpp) and memory optimization for address mode /// (CodeGenPrepare.cpp) bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // AddrMode - This represents an addressing mode of: // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 5142ae3cd88f..ed94775b3002 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -456,7 +456,8 @@ public: /// Used to guide target specific optimizations, like loop strength /// reduction (LoopStrengthReduce.cpp) and memory optimization for /// address mode (CodeGenPrepare.cpp) - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned getFunctionAlignment(const Function *F) const; @@ -497,12 +498,6 @@ public: std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { - // FIXME: Map different constraints differently. - return InlineAsm::Constraint_m; - } - const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts diff --git a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp new file mode 100644 index 000000000000..24dcb122b94e --- /dev/null +++ b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp @@ -0,0 +1,170 @@ +//===-- NVPTXLowerKernelArgs.cpp - Lower kernel arguments -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pointer arguments to kernel functions need to be lowered specially. +// +// 1. Copy byval struct args to local memory. This is a preparation for handling +// cases like +// +// kernel void foo(struct A arg, ...) +// { +// struct A *p = &arg; +// ... +// ... = p->filed1 ... (this is no generic address for .param) +// p->filed2 = ... (this is no write access to .param) +// } +// +// 2. Convert non-byval pointer arguments of CUDA kernels to pointers in the +// global address space. This allows later optimizations to emit +// ld.global.*/st.global.* for accessing these pointer arguments. For +// example, +// +// define void @foo(float* %input) { +// %v = load float, float* %input, align 4 +// ... +// } +// +// becomes +// +// define void @foo(float* %input) { +// %input2 = addrspacecast float* %input to float addrspace(1)* +// %input3 = addrspacecast float addrspace(1)* %input2 to float* +// %v = load float, float* %input3, align 4 +// ... +// } +// +// Later, NVPTXFavorNonGenericAddrSpaces will optimize it to +// +// define void @foo(float* %input) { +// %input2 = addrspacecast float* %input to float addrspace(1)* +// %v = load float, float addrspace(1)* %input2, align 4 +// ... +// } +// +// TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes +// don't cancel the addrspacecast pair this pass emits. +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXUtilities.h" +#include "NVPTXTargetMachine.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace llvm { +void initializeNVPTXLowerKernelArgsPass(PassRegistry &); +} + +namespace { +class NVPTXLowerKernelArgs : public FunctionPass { + bool runOnFunction(Function &F) override; + + // handle byval parameters + void handleByValParam(Argument *); + // handle non-byval pointer parameters + void handlePointerParam(Argument *); + +public: + static char ID; // Pass identification, replacement for typeid + NVPTXLowerKernelArgs(const NVPTXTargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) {} + const char *getPassName() const override { + return "Lower pointer arguments of CUDA kernels"; + } + +private: + const NVPTXTargetMachine *TM; +}; +} // namespace + +char NVPTXLowerKernelArgs::ID = 1; + +INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args", + "Lower kernel arguments (NVPTX)", false, false) + +// ============================================================================= +// If the function had a byval struct ptr arg, say foo(%struct.x *byval %d), +// then add the following instructions to the first basic block: +// +// %temp = alloca %struct.x, align 8 +// %tempd = addrspacecast %struct.x* %d to %struct.x addrspace(101)* +// %tv = load %struct.x addrspace(101)* %tempd +// store %struct.x %tv, %struct.x* %temp, align 8 +// +// The above code allocates some space in the stack and copies the incoming +// struct from param space to local space. +// Then replace all occurences of %d by %temp. +// ============================================================================= +void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) { + Function *Func = Arg->getParent(); + Instruction *FirstInst = &(Func->getEntryBlock().front()); + PointerType *PType = dyn_cast<PointerType>(Arg->getType()); + + assert(PType && "Expecting pointer type in handleByValParam"); + + Type *StructType = PType->getElementType(); + AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst); + // Set the alignment to alignment of the byval parameter. This is because, + // later load/stores assume that alignment, and we are going to replace + // the use of the byval parameter with this alloca instruction. + AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1)); + Arg->replaceAllUsesWith(AllocA); + + Value *ArgInParam = new AddrSpaceCastInst( + Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(), + FirstInst); + LoadInst *LI = new LoadInst(ArgInParam, Arg->getName(), FirstInst); + new StoreInst(LI, AllocA, FirstInst); +} + +void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) { + assert(!Arg->hasByValAttr() && + "byval params should be handled by handleByValParam"); + + Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin(); + Instruction *ArgInGlobal = new AddrSpaceCastInst( + Arg, PointerType::get(Arg->getType()->getPointerElementType(), + ADDRESS_SPACE_GLOBAL), + Arg->getName(), FirstInst); + Value *ArgInGeneric = new AddrSpaceCastInst(ArgInGlobal, Arg->getType(), + Arg->getName(), FirstInst); + // Replace with ArgInGeneric all uses of Args except ArgInGlobal. + Arg->replaceAllUsesWith(ArgInGeneric); + ArgInGlobal->setOperand(0, Arg); +} + + +// ============================================================================= +// Main function for this pass. +// ============================================================================= +bool NVPTXLowerKernelArgs::runOnFunction(Function &F) { + // Skip non-kernels. See the comments at the top of this file. + if (!isKernelFunction(F)) + return false; + + for (Argument &Arg : F.args()) { + if (Arg.getType()->isPointerTy()) { + if (Arg.hasByValAttr()) + handleByValParam(&Arg); + else if (TM && TM->getDrvInterface() == NVPTX::CUDA) + handlePointerParam(&Arg); + } + } + return true; +} + +FunctionPass * +llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) { + return new NVPTXLowerKernelArgs(TM); +} diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp deleted file mode 100644 index 68dfbb716139..000000000000 --- a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp +++ /dev/null @@ -1,136 +0,0 @@ -//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Copy struct args to local memory. This is needed for kernel functions only. -// This is a preparation for handling cases like -// -// kernel void foo(struct A arg, ...) -// { -// struct A *p = &arg; -// ... -// ... = p->filed1 ... (this is no generic address for .param) -// p->filed2 = ... (this is no write access to .param) -// } -// -//===----------------------------------------------------------------------===// - -#include "NVPTX.h" -#include "NVPTXUtilities.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Pass.h" - -using namespace llvm; - -namespace llvm { -void initializeNVPTXLowerStructArgsPass(PassRegistry &); -} - -namespace { -class NVPTXLowerStructArgs : public FunctionPass { - bool runOnFunction(Function &F) override; - - void handleStructPtrArgs(Function &); - void handleParam(Argument *); - -public: - static char ID; // Pass identification, replacement for typeid - NVPTXLowerStructArgs() : FunctionPass(ID) {} - const char *getPassName() const override { - return "Copy structure (byval *) arguments to stack"; - } -}; -} // namespace - -char NVPTXLowerStructArgs::ID = 1; - -INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args", - "Lower structure arguments (NVPTX)", false, false) - -void NVPTXLowerStructArgs::handleParam(Argument *Arg) { - Function *Func = Arg->getParent(); - Instruction *FirstInst = &(Func->getEntryBlock().front()); - PointerType *PType = dyn_cast<PointerType>(Arg->getType()); - - assert(PType && "Expecting pointer type in handleParam"); - - Type *StructType = PType->getElementType(); - AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst); - - /* Set the alignment to alignment of the byval parameter. This is because, - * later load/stores assume that alignment, and we are going to replace - * the use of the byval parameter with this alloca instruction. - */ - AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1)); - - Arg->replaceAllUsesWith(AllocA); - - // Get the cvt.gen.to.param intrinsic - Type *CvtTypes[] = { - Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM), - Type::getInt8PtrTy(Func->getParent()->getContext(), - ADDRESS_SPACE_GENERIC)}; - Function *CvtFunc = Intrinsic::getDeclaration( - Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param, CvtTypes); - - Value *BitcastArgs[] = { - new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(), - ADDRESS_SPACE_GENERIC), - Arg->getName(), FirstInst)}; - CallInst *CallCVT = - CallInst::Create(CvtFunc, BitcastArgs, "cvt_to_param", FirstInst); - - BitCastInst *BitCast = new BitCastInst( - CallCVT, PointerType::get(StructType, ADDRESS_SPACE_PARAM), - Arg->getName(), FirstInst); - LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst); - new StoreInst(LI, AllocA, FirstInst); -} - -// ============================================================================= -// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then -// add the following instructions to the first basic block : -// -// %temp = alloca %struct.x, align 8 -// %tt1 = bitcast %struct.x * %d to i8 * -// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2 -// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) * -// %tv = load %struct.x addrspace(101) * %tempd -// store %struct.x %tv, %struct.x * %temp, align 8 -// -// The above code allocates some space in the stack and copies the incoming -// struct from param space to local space. -// Then replace all occurences of %d by %temp. -// ============================================================================= -void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) { - for (Argument &Arg : F.args()) { - if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) { - handleParam(&Arg); - } - } -} - -// ============================================================================= -// Main function for this pass. -// ============================================================================= -bool NVPTXLowerStructArgs::runOnFunction(Function &F) { - // Skip non-kernels. See the comments at the top of this file. - if (!isKernelFunction(F)) - return false; - - handleStructPtrArgs(F); - return true; -} - -FunctionPass *llvm::createNVPTXLowerStructArgsPass() { - return new NVPTXLowerStructArgs(); -} diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp index 779b65ecc39f..3c98b9febf85 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.cpp +++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp @@ -16,11 +16,11 @@ using namespace llvm; #define DEBUG_TYPE "nvptx-mcexpr" const NVPTXFloatMCExpr* -NVPTXFloatMCExpr::Create(VariantKind Kind, APFloat Flt, MCContext &Ctx) { +NVPTXFloatMCExpr::create(VariantKind Kind, APFloat Flt, MCContext &Ctx) { return new (Ctx) NVPTXFloatMCExpr(Kind, Flt); } -void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const { +void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { bool Ignored; unsigned NumHex; APFloat APF = getAPFloat(); @@ -47,11 +47,14 @@ void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const { } const NVPTXGenericMCSymbolRefExpr* -NVPTXGenericMCSymbolRefExpr::Create(const MCSymbolRefExpr *SymExpr, +NVPTXGenericMCSymbolRefExpr::create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx) { return new (Ctx) NVPTXGenericMCSymbolRefExpr(SymExpr); } -void NVPTXGenericMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const { - OS << "generic(" << *SymExpr << ")"; +void NVPTXGenericMCSymbolRefExpr::printImpl(raw_ostream &OS, + const MCAsmInfo *MAI) const { + OS << "generic("; + SymExpr->print(OS, MAI); + OS << ")"; } diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h index 8c6b219abd13..46b4b33e7e40 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/lib/Target/NVPTX/NVPTXMCExpr.h @@ -36,17 +36,17 @@ public: /// @name Construction /// @{ - static const NVPTXFloatMCExpr *Create(VariantKind Kind, APFloat Flt, + static const NVPTXFloatMCExpr *create(VariantKind Kind, APFloat Flt, MCContext &Ctx); - static const NVPTXFloatMCExpr *CreateConstantFPSingle(APFloat Flt, + static const NVPTXFloatMCExpr *createConstantFPSingle(APFloat Flt, MCContext &Ctx) { - return Create(VK_NVPTX_SINGLE_PREC_FLOAT, Flt, Ctx); + return create(VK_NVPTX_SINGLE_PREC_FLOAT, Flt, Ctx); } - static const NVPTXFloatMCExpr *CreateConstantFPDouble(APFloat Flt, + static const NVPTXFloatMCExpr *createConstantFPDouble(APFloat Flt, MCContext &Ctx) { - return Create(VK_NVPTX_DOUBLE_PREC_FLOAT, Flt, Ctx); + return create(VK_NVPTX_DOUBLE_PREC_FLOAT, Flt, Ctx); } /// @} @@ -61,14 +61,14 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override { return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; - MCSection *FindAssociatedSection() const override { return nullptr; } + MCSection *findAssociatedSection() const override { return nullptr; } // There are no TLS NVPTXMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} @@ -92,7 +92,7 @@ public: /// @{ static const NVPTXGenericMCSymbolRefExpr - *Create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx); + *create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx); /// @} /// @name Accessors @@ -103,14 +103,14 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override { return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; - MCSection *FindAssociatedSection() const override { return nullptr; } + MCSection *findAssociatedSection() const override { return nullptr; } // There are no TLS NVPTXMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index ac27c30aabab..a6466687bc7b 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -53,7 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); -void initializeNVPTXLowerStructArgsPass(PassRegistry &); +void initializeNVPTXLowerKernelArgsPass(PassRegistry &); } extern "C" void LLVMInitializeNVPTXTarget() { @@ -69,7 +69,7 @@ extern "C" void LLVMInitializeNVPTXTarget() { initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); initializeNVPTXFavorNonGenericAddrSpacesPass( *PassRegistry::getPassRegistry()); - initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); + initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry()); } static std::string computeDataLayout(bool is64Bit) { @@ -163,7 +163,13 @@ void NVPTXPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); + addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); addPass(createNVPTXFavorNonGenericAddrSpacesPass()); + // NVPTXLowerKernelArgs emits alloca for byval parameters which can often + // be eliminated by SROA. We do not run SROA right after NVPTXLowerKernelArgs + // because we plan to merge NVPTXLowerKernelArgs and + // NVPTXFavorNonGenericAddrSpaces into one pass. + addPass(createSROAPass()); // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave // them unused. We could remove dead code in an ad-hoc manner, but that // requires manual work and might be error-prone. @@ -181,6 +187,9 @@ void NVPTXPassConfig::addIRPasses() { addPass(createEarlyCSEPass()); // Run NaryReassociate after EarlyCSE/GVN to be more effective. addPass(createNaryReassociatePass()); + // NaryReassociate on GEPs creates redundant common expressions, so run + // EarlyCSE after it. + addPass(createEarlyCSEPass()); } bool NVPTXPassConfig::addInstSelector() { diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 83de4d996993..1736d03961f7 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -24,6 +24,7 @@ #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/SourceMgr.h" @@ -772,7 +773,7 @@ public: if (const PPCMCExpr *TE = dyn_cast<PPCMCExpr>(Val)) { int64_t Res; - if (TE->EvaluateAsConstant(Res)) + if (TE->evaluateAsConstant(Res)) return CreateContextImm(Res, S, E, IsPPC64); } @@ -814,13 +815,13 @@ addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) { } } else if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Expr)) { if (BinExpr->getOpcode() == MCBinaryExpr::Sub) { - const MCExpr *NE = MCBinaryExpr::CreateSub(BinExpr->getRHS(), + const MCExpr *NE = MCBinaryExpr::createSub(BinExpr->getRHS(), BinExpr->getLHS(), Ctx); Inst.addOperand(MCOperand::createExpr(NE)); return; } } - Inst.addOperand(MCOperand::createExpr(MCUnaryExpr::CreateMinus(Expr, Ctx))); + Inst.addOperand(MCOperand::createExpr(MCUnaryExpr::createMinus(Expr, Ctx))); } void PPCAsmParser::ProcessInstruction(MCInst &Inst, @@ -1330,7 +1331,7 @@ ExtractModifierFromExpr(const MCExpr *E, return nullptr; } - return MCSymbolRefExpr::Create(&SRE->getSymbol(), Context); + return MCSymbolRefExpr::create(&SRE->getSymbol(), Context); } case MCExpr::Unary: { @@ -1338,7 +1339,7 @@ ExtractModifierFromExpr(const MCExpr *E, const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant); if (!Sub) return nullptr; - return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context); + return MCUnaryExpr::create(UE->getOpcode(), Sub, Context); } case MCExpr::Binary: { @@ -1362,7 +1363,7 @@ ExtractModifierFromExpr(const MCExpr *E, else return nullptr; - return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context); + return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context); } } @@ -1396,7 +1397,7 @@ FixupVariantKind(const MCExpr *E) { default: return E; } - return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, Context); + return MCSymbolRefExpr::create(&SRE->getSymbol(), Variant, Context); } case MCExpr::Unary: { @@ -1404,7 +1405,7 @@ FixupVariantKind(const MCExpr *E) { const MCExpr *Sub = FixupVariantKind(UE->getSubExpr()); if (Sub == UE->getSubExpr()) return E; - return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context); + return MCUnaryExpr::create(UE->getOpcode(), Sub, Context); } case MCExpr::Binary: { @@ -1413,7 +1414,7 @@ FixupVariantKind(const MCExpr *E) { const MCExpr *RHS = FixupVariantKind(BE->getRHS()); if (LHS == BE->getLHS() && RHS == BE->getRHS()) return E; - return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context); + return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context); } } @@ -1438,7 +1439,7 @@ ParseExpression(const MCExpr *&EVal) { PPCMCExpr::VariantKind Variant; const MCExpr *E = ExtractModifierFromExpr(EVal, Variant); if (E) - EVal = PPCMCExpr::Create(Variant, E, false, getParser().getContext()); + EVal = PPCMCExpr::create(Variant, E, false, getParser().getContext()); return false; } @@ -1485,7 +1486,7 @@ ParseDarwinExpression(const MCExpr *&EVal) { if (getLexer().isNot(AsmToken::RParen)) return Error(Parser.getTok().getLoc(), "expected ')'"); Parser.Lex(); // Eat the ')' - EVal = PPCMCExpr::Create(Variant, EVal, false, getParser().getContext()); + EVal = PPCMCExpr::create(Variant, EVal, false, getParser().getContext()); } return false; } @@ -1863,7 +1864,7 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) { Error(L, "expected identifier in directive"); return false; } - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + MCSymbolELF *Sym = cast<MCSymbolELF>(getContext().getOrCreateSymbol(Name)); if (getLexer().isNot(AsmToken::Comma)) { Error(L, "unexpected token in directive"); @@ -1936,19 +1937,19 @@ PPCAsmParser::applyModifierToExpr(const MCExpr *E, MCContext &Ctx) { switch (Variant) { case MCSymbolRefExpr::VK_PPC_LO: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_LO, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_LO, E, false, Ctx); case MCSymbolRefExpr::VK_PPC_HI: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HI, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_HI, E, false, Ctx); case MCSymbolRefExpr::VK_PPC_HA: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HA, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_HA, E, false, Ctx); case MCSymbolRefExpr::VK_PPC_HIGHER: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx); case MCSymbolRefExpr::VK_PPC_HIGHERA: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx); case MCSymbolRefExpr::VK_PPC_HIGHEST: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx); case MCSymbolRefExpr::VK_PPC_HIGHESTA: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx); default: return nullptr; } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 1a130e87bf3e..5e1d22789056 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -445,6 +445,6 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 86885e111dd1..72742dc3ee20 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -11,12 +11,12 @@ #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -142,13 +142,14 @@ public: // to resolve the fixup directly. Emit a relocation and leave // resolution of the final target address to the linker. if (const MCSymbolRefExpr *A = Target.getSymA()) { - const MCSymbolData &Data = Asm.getSymbolData(A->getSymbol()); - // The "other" values are stored in the last 6 bits of the second byte. - // The traditional defines for STO values assume the full byte and thus - // the shift to pack it. - unsigned Other = MCELF::getOther(Data) << 2; - if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0) - IsResolved = false; + if (const auto *S = dyn_cast<MCSymbolELF>(&A->getSymbol())) { + // The "other" values are stored in the last 6 bits of the second + // byte. The traditional defines for STO values assume the full byte + // and thus the shift to pack it. + unsigned Other = S->getOther() << 2; + if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0) + IsResolved = false; + } } break; } @@ -176,7 +177,7 @@ public: bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override { uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) - OW->Write32(0x60000000); + OW->write32(0x60000000); OW->WriteZeros(Count % 4); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 3e3489fc46aa..992be5b966c1 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -11,9 +11,9 @@ #include "MCTargetDesc/PPCFixupKinds.h" #include "MCTargetDesc/PPCMCExpr.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" @@ -28,7 +28,7 @@ namespace { unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; - bool needsRelocateWithSymbol(const MCSymbolData &SD, + bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override; }; } @@ -395,7 +395,7 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } -bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, +bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const { switch (Type) { default: @@ -407,7 +407,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, // The "other" values are stored in the last 6 bits of the second byte. // The traditional defines for STO values assume the full byte and thus // the shift to pack it. - unsigned Other = MCELF::getOther(SD) << 2; + unsigned Other = cast<MCSymbolELF>(Sym).getOther() << 2; return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0; } } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 17f4cd421641..95379246f301 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOpcodes.h" @@ -116,38 +117,19 @@ public: switch (Size) { case 4: if (IsLittleEndian) { - OS << (char)(Bits); - OS << (char)(Bits >> 8); - OS << (char)(Bits >> 16); - OS << (char)(Bits >> 24); + support::endian::Writer<support::little>(OS).write<uint32_t>(Bits); } else { - OS << (char)(Bits >> 24); - OS << (char)(Bits >> 16); - OS << (char)(Bits >> 8); - OS << (char)(Bits); + support::endian::Writer<support::big>(OS).write<uint32_t>(Bits); } break; case 8: // If we emit a pair of instructions, the first one is // always in the top 32 bits, even on little-endian. if (IsLittleEndian) { - OS << (char)(Bits >> 32); - OS << (char)(Bits >> 40); - OS << (char)(Bits >> 48); - OS << (char)(Bits >> 56); - OS << (char)(Bits); - OS << (char)(Bits >> 8); - OS << (char)(Bits >> 16); - OS << (char)(Bits >> 24); + uint64_t Swapped = (Bits << 32) | (Bits >> 32); + support::endian::Writer<support::little>(OS).write<uint64_t>(Swapped); } else { - OS << (char)(Bits >> 56); - OS << (char)(Bits >> 48); - OS << (char)(Bits >> 40); - OS << (char)(Bits >> 32); - OS << (char)(Bits >> 24); - OS << (char)(Bits >> 16); - OS << (char)(Bits >> 8); - OS << (char)(Bits); + support::endian::Writer<support::big>(OS).write<uint64_t>(Bits); } break; default: diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 7204befe15ee..6b97d4c1456b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -19,12 +19,12 @@ using namespace llvm; #define DEBUG_TYPE "ppcmcexpr" const PPCMCExpr* -PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr, +PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr, bool isDarwin, MCContext &Ctx) { return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin); } -void PPCMCExpr::PrintImpl(raw_ostream &OS) const { +void PPCMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { if (isDarwinSyntax()) { switch (Kind) { default: llvm_unreachable("Invalid kind!"); @@ -34,10 +34,10 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const { } OS << '('; - getSubExpr()->print(OS); + getSubExpr()->print(OS, MAI); OS << ')'; } else { - getSubExpr()->print(OS); + getSubExpr()->print(OS, MAI); switch (Kind) { default: llvm_unreachable("Invalid kind!"); @@ -53,21 +53,21 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const { } bool -PPCMCExpr::EvaluateAsConstant(int64_t &Res) const { +PPCMCExpr::evaluateAsConstant(int64_t &Res) const { MCValue Value; - if (!getSubExpr()->EvaluateAsRelocatable(Value, nullptr, nullptr)) + if (!getSubExpr()->evaluateAsRelocatable(Value, nullptr, nullptr)) return false; if (!Value.isAbsolute()) return false; - Res = EvaluateAsInt64(Value.getConstant()); + Res = evaluateAsInt64(Value.getConstant()); return true; } int64_t -PPCMCExpr::EvaluateAsInt64(int64_t Value) const { +PPCMCExpr::evaluateAsInt64(int64_t Value) const { switch (Kind) { case VK_PPC_LO: return Value & 0xffff; @@ -90,16 +90,16 @@ PPCMCExpr::EvaluateAsInt64(int64_t Value) const { } bool -PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, +PPCMCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const { MCValue Value; - if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout, Fixup)) + if (!getSubExpr()->evaluateAsRelocatable(Value, Layout, Fixup)) return false; if (Value.isAbsolute()) { - int64_t Result = EvaluateAsInt64(Value.getConstant()); + int64_t Result = evaluateAsInt64(Value.getConstant()); if ((Fixup == nullptr || (unsigned)Fixup->getKind() != PPC::fixup_ppc_half16) && (Result >= 0x8000)) return false; @@ -138,7 +138,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, Modifier = MCSymbolRefExpr::VK_PPC_HIGHESTA; break; } - Sym = MCSymbolRefExpr::Create(&Sym->getSymbol(), Modifier, Context); + Sym = MCSymbolRefExpr::create(&Sym->getSymbol(), Modifier, Context); Res = MCValue::get(Sym, Value.getSymB(), Value.getConstant()); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index ca72ccf0f76e..a641780516b3 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -34,7 +34,7 @@ private: const MCExpr *Expr; bool IsDarwin; - int64_t EvaluateAsInt64(int64_t Value) const; + int64_t evaluateAsInt64(int64_t Value) const; explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin) : Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {} @@ -43,22 +43,22 @@ public: /// @name Construction /// @{ - static const PPCMCExpr *Create(VariantKind Kind, const MCExpr *Expr, + static const PPCMCExpr *create(VariantKind Kind, const MCExpr *Expr, bool isDarwin, MCContext &Ctx); - static const PPCMCExpr *CreateLo(const MCExpr *Expr, + static const PPCMCExpr *createLo(const MCExpr *Expr, bool isDarwin, MCContext &Ctx) { - return Create(VK_PPC_LO, Expr, isDarwin, Ctx); + return create(VK_PPC_LO, Expr, isDarwin, Ctx); } - static const PPCMCExpr *CreateHi(const MCExpr *Expr, + static const PPCMCExpr *createHi(const MCExpr *Expr, bool isDarwin, MCContext &Ctx) { - return Create(VK_PPC_HI, Expr, isDarwin, Ctx); + return create(VK_PPC_HI, Expr, isDarwin, Ctx); } - static const PPCMCExpr *CreateHa(const MCExpr *Expr, + static const PPCMCExpr *createHa(const MCExpr *Expr, bool isDarwin, MCContext &Ctx) { - return Create(VK_PPC_HA, Expr, isDarwin, Ctx); + return create(VK_PPC_HA, Expr, isDarwin, Ctx); } /// @} @@ -77,19 +77,19 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override; void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *FindAssociatedSection() const override { - return getSubExpr()->FindAssociatedSection(); + MCSection *findAssociatedSection() const override { + return getSubExpr()->findAssociatedSection(); } // There are no TLS PPCMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} - bool EvaluateAsConstant(int64_t &Res) const; + bool evaluateAsConstant(int64_t &Res) const; static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 847437611a5f..1e8e8046669d 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -16,14 +16,14 @@ #include "PPCMCAsmInfo.h" #include "PPCTargetStreamer.h" #include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -70,8 +70,8 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { - Triple TheTriple(TT); +static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TheTriple) { bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 || TheTriple.getArch() == Triple::ppc64le); @@ -132,8 +132,14 @@ public: void emitAbiVersion(int AbiVersion) override { OS << "\t.abiversion " << AbiVersion << '\n'; } - void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override { - OS << "\t.localentry\t" << *S << ", " << *LocalOffset << '\n'; + void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { + const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo(); + + OS << "\t.localentry\t"; + S->print(OS, MAI); + OS << ", "; + LocalOffset->print(OS, MAI); + OS << '\n'; } }; @@ -159,25 +165,21 @@ public: Flags |= (AbiVersion & ELF::EF_PPC64_ABI); MCA.setELFHeaderEFlags(Flags); } - void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override { + void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { MCAssembler &MCA = getStreamer().getAssembler(); - MCSymbolData &Data = getStreamer().getOrCreateSymbolData(S); int64_t Res; - if (!LocalOffset->EvaluateAsAbsolute(Res, MCA)) + if (!LocalOffset->evaluateAsAbsolute(Res, MCA)) report_fatal_error(".localentry expression must be absolute."); unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res); if (Res != ELF::decodePPC64LocalEntryOffset(Encoded)) report_fatal_error(".localentry expression cannot be encoded."); - // The "other" values are stored in the last 6 bits of the second byte. - // The traditional defines for STO values assume the full byte and thus - // the shift to pack it. - unsigned Other = MCELF::getOther(Data) << 2; + unsigned Other = S->getOther(); Other &= ~ELF::STO_PPC64_LOCAL_MASK; Other |= Encoded; - MCELF::setOther(Data, Other >> 2); + S->setOther(Other); // For GAS compatibility, unless we already saw a .abiversion directive, // set e_flags to indicate ELFv2 ABI. @@ -185,22 +187,18 @@ public: if ((Flags & ELF::EF_PPC64_ABI) == 0) MCA.setELFHeaderEFlags(Flags | 2); } - void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override { + void emitAssignment(MCSymbol *S, const MCExpr *Value) override { + auto *Symbol = cast<MCSymbolELF>(S); // When encoding an assignment to set symbol A to symbol B, also copy // the st_other bits encoding the local entry point offset. if (Value->getKind() != MCExpr::SymbolRef) return; - const MCSymbol &RhsSym = - static_cast<const MCSymbolRefExpr *>(Value)->getSymbol(); - MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym); - MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol); - // The "other" values are stored in the last 6 bits of the second byte. - // The traditional defines for STO values assume the full byte and thus - // the shift to pack it. - unsigned Other = MCELF::getOther(SymbolData) << 2; + const auto &RhsSym = cast<MCSymbolELF>( + static_cast<const MCSymbolRefExpr *>(Value)->getSymbol()); + unsigned Other = Symbol->getOther(); Other &= ~ELF::STO_PPC64_LOCAL_MASK; - Other |= (MCELF::getOther(Data) << 2) & ELF::STO_PPC64_LOCAL_MASK; - MCELF::setOther(SymbolData, Other >> 2); + Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK; + Symbol->setOther(Other); } }; @@ -217,7 +215,7 @@ public: void emitAbiVersion(int AbiVersion) override { llvm_unreachable("Unknown pseudo-op: .abiversion"); } - void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override { + void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { llvm_unreachable("Unknown pseudo-op: .localentry"); } }; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index 3c906d2a51e3..9d7289658f0f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -24,7 +24,7 @@ using namespace llvm; namespace { class PPCMachObjectWriter : public MCMachObjectTargetWriter { - bool RecordScatteredRelocation(MachObjectWriter *Writer, + bool recordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -38,10 +38,9 @@ class PPCMachObjectWriter : public MCMachObjectTargetWriter { public: PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) - : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, - /*UseAggressiveSymbolFolding=*/Is64Bit) {} + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {} - void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override { @@ -187,9 +186,9 @@ static uint32_t getFixupOffset(const MCAsmLayout &Layout, /// \return false if falling back to using non-scattered relocation, /// otherwise true for normal scattered relocation. -/// based on X86MachObjectWriter::RecordScatteredRelocation -/// and ARMMachObjectWriter::RecordScatteredRelocation -bool PPCMachObjectWriter::RecordScatteredRelocation( +/// based on X86MachObjectWriter::recordScatteredRelocation +/// and ARMMachObjectWriter::recordScatteredRelocation +bool PPCMachObjectWriter::recordScatteredRelocation( MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, unsigned Log2Size, uint64_t &FixedValue) { @@ -206,28 +205,26 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - const MCSymbolData *A_SD = &Asm.getSymbolData(*A); - if (!A_SD->getFragment()) + if (!A->getFragment()) report_fatal_error("symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Writer->getSymbolAddress(*A, Layout); - uint64_t SecAddr = - Writer->getSectionAddress(A_SD->getFragment()->getParent()); + uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent()); FixedValue += SecAddr; uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { - const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbol *SB = &B->getSymbol(); - if (!B_SD->getFragment()) + if (!SB->getFragment()) report_fatal_error("symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); // FIXME: is Type correct? see include/llvm/Support/MachO.h Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); - FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } // FIXME: does FixedValue get used?? @@ -253,7 +250,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( } // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()? - // see PPCMCExpr::EvaluateAsRelocatableImpl() + // see PPCMCExpr::evaluateAsRelocatableImpl() uint32_t other_half = 0; switch (Type) { case MachO::PPC_RELOC_LO16_SECTDIFF: @@ -317,7 +314,7 @@ void PPCMachObjectWriter::RecordPPCRelocation( // Q: are branch targets ever scattered? RelocType != MachO::PPC_RELOC_BR24 && RelocType != MachO::PPC_RELOC_BR14) { - RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, Log2Size, FixedValue); return; } @@ -346,7 +343,7 @@ void PPCMachObjectWriter::RecordPPCRelocation( // Resolve constant variables. if (A->isVariable()) { int64_t Res; - if (A->getVariableValue()->EvaluateAsAbsolute( + if (A->getVariableValue()->evaluateAsAbsolute( Res, Layout, Writer->getSectionAddressMap())) { FixedValue = Res; return; diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 4f1c3c73e710..b42b0f9ef478 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -49,7 +49,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" @@ -181,14 +181,14 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, return; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_ConstantPoolIndex: O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getIndex(); return; case MachineOperand::MO_BlockAddress: - O << *GetBlockAddressSymbol(MO.getBlockAddress()); + GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: { // Computing the address of a global symbol, not calling it. @@ -222,8 +222,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, } else { SymToPrint = getSymbol(GV); } - - O << *SymToPrint; + + SymToPrint->print(O, MAI); printOffset(MO.getOffset(), O); return; @@ -422,11 +422,11 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, TM.getRelocationModel() == Reloc::PIC_) Kind = MCSymbolRefExpr::VK_PLT; const MCSymbolRefExpr *TlsRef = - MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); + MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, VK, OutContext); + const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, VK, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(Subtarget->isPPC64() ? PPC::BL8_NOP_TLS : PPC::BL_TLS) @@ -464,10 +464,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); const MCExpr *OffsExpr = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, + MCBinaryExpr::createSub(MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LOCAL, OutContext), - MCConstantExpr::Create(4, OutContext), + MCConstantExpr::create(4, OutContext), OutContext); // Emit the 'bl'. @@ -486,7 +486,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL) // FIXME: We would like an efficient form for this, so we don't have to do // a lot of extra uniquing. - .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); + .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); // Emit the label. OutStreamer->EmitLabel(PICBase); @@ -502,9 +502,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol(); TmpInst.setOpcode(PPC::LWZ); const MCExpr *Exp = - MCSymbolRefExpr::Create(PICOffset, MCSymbolRefExpr::VK_None, OutContext); + MCSymbolRefExpr::create(PICOffset, MCSymbolRefExpr::VK_None, OutContext); const MCExpr *PB = - MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), + MCSymbolRefExpr::create(MF->getPICBaseSymbol(), MCSymbolRefExpr::VK_None, OutContext); const MCOperand TR = TmpInst.getOperand(1); @@ -512,7 +512,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Step 1: lwz %Rt, .L$poff - .L$pb(%Ri) TmpInst.getOperand(1) = - MCOperand::createExpr(MCBinaryExpr::CreateSub(Exp, PB, OutContext)); + MCOperand::createExpr(MCBinaryExpr::createSub(Exp, PB, OutContext)); TmpInst.getOperand(0) = TR; TmpInst.getOperand(2) = PICR; EmitToStreamer(*OutStreamer, TmpInst); @@ -547,19 +547,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (PL == PICLevel::Small) { const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_GOT, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_GOT, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); } else { MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = - MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None, + MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None, OutContext); const MCExpr *PB = - MCSymbolRefExpr::Create(OutContext.getOrCreateSymbol(Twine(".LTOC")), + MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")), OutContext); - Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext); + Exp = MCBinaryExpr::createSub(Exp, PB, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); } EmitToStreamer(*OutStreamer, TmpInst); @@ -592,7 +592,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = - MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, + MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -639,7 +639,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA, OutContext); TmpInst.getOperand(2) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -681,7 +681,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -715,7 +715,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(2) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -729,7 +729,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTprel = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) @@ -748,7 +748,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -763,10 +763,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL) // FIXME: We would like an efficient form for this, so we don't have to do // a lot of extra uniquing. - .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext))); + .addExpr(MCSymbolRefExpr::create(NextInstr, OutContext))); const MCExpr *OffsExpr = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext), - MCSymbolRefExpr::Create(GOTRef, OutContext), + MCBinaryExpr::createSub(MCSymbolRefExpr::create(GOTSymbol, OutContext), + MCSymbolRefExpr::create(GOTRef, OutContext), OutContext); OutStreamer->EmitLabel(GOTRef); OutStreamer->EmitValue(OffsExpr, 4); @@ -786,10 +786,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::PPC32GOT: { MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); const MCExpr *SymGotTlsL = - MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, + MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, OutContext); const MCExpr *SymGotTlsHA = - MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, + MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI) .addReg(MI->getOperand(0).getReg()) @@ -808,7 +808,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsGD = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) @@ -825,7 +825,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create( + const MCExpr *SymGotTlsGD = MCSymbolRefExpr::create( MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO : MCSymbolRefExpr::VK_PPC_GOT_TLSGD, OutContext); @@ -853,7 +853,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsLD = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) @@ -870,7 +870,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create( + const MCExpr *SymGotTlsLD = MCSymbolRefExpr::create( MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO : MCSymbolRefExpr::VK_PPC_GOT_TLSLD, OutContext); @@ -900,7 +900,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymDtprel = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA, OutContext); EmitToStreamer( *OutStreamer, @@ -920,7 +920,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymDtprel = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) @@ -1012,8 +1012,8 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { // The GOT pointer points to the middle of the GOT, in order to reference the // entire 64kB range. 0x8000 is the midpoint. const MCExpr *tocExpr = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(CurrentPos, OutContext), - MCConstantExpr::Create(0x8000, OutContext), + MCBinaryExpr::createAdd(MCSymbolRefExpr::create(CurrentPos, OutContext), + MCConstantExpr::create(0x8000, OutContext), OutContext); OutStreamer->EmitAssignment(TOCSym, tocExpr); @@ -1036,10 +1036,10 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { OutStreamer->EmitLabel(RelocSymbol); const MCExpr *OffsExpr = - MCBinaryExpr::CreateSub( - MCSymbolRefExpr::Create(OutContext.getOrCreateSymbol(Twine(".LTOC")), + MCBinaryExpr::createSub( + MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")), OutContext), - MCSymbolRefExpr::Create(PICBase, OutContext), + MCSymbolRefExpr::create(PICBase, OutContext), OutContext); OutStreamer->EmitValue(OffsExpr, 4); OutStreamer->EmitLabel(CurrentFnSym); @@ -1062,12 +1062,12 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { MCSymbol *Symbol1 = CurrentFnSymForSize; // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function // entry point. - OutStreamer->EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), + OutStreamer->EmitValue(MCSymbolRefExpr::create(Symbol1, OutContext), 8 /*size*/); MCSymbol *Symbol2 = OutContext.getOrCreateSymbol(StringRef(".TOC.")); // Generates a R_PPC64_TOC relocation for TOC base insertion. OutStreamer->EmitValue( - MCSymbolRefExpr::Create(Symbol2, MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext), + MCSymbolRefExpr::create(Symbol2, MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext), 8/*size*/); // Emit a null environment pointer. OutStreamer->EmitIntValue(0, 8 /* size */); @@ -1133,22 +1133,22 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() { MCSymbol *GlobalEntryLabel = OutContext.createTempSymbol(); OutStreamer->EmitLabel(GlobalEntryLabel); const MCSymbolRefExpr *GlobalEntryLabelExp = - MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext); + MCSymbolRefExpr::create(GlobalEntryLabel, OutContext); MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC.")); const MCExpr *TOCDeltaExpr = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext), + MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext), GlobalEntryLabelExp, OutContext); const MCExpr *TOCDeltaHi = - PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext); + PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS) .addReg(PPC::X2) .addReg(PPC::X12) .addExpr(TOCDeltaHi)); const MCExpr *TOCDeltaLo = - PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext); + PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI) .addReg(PPC::X2) .addReg(PPC::X2) @@ -1157,16 +1157,16 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() { MCSymbol *LocalEntryLabel = OutContext.createTempSymbol(); OutStreamer->EmitLabel(LocalEntryLabel); const MCSymbolRefExpr *LocalEntryLabelExp = - MCSymbolRefExpr::Create(LocalEntryLabel, OutContext); + MCSymbolRefExpr::create(LocalEntryLabel, OutContext); const MCExpr *LocalOffsetExp = - MCBinaryExpr::CreateSub(LocalEntryLabelExp, + MCBinaryExpr::createSub(LocalEntryLabelExp, GlobalEntryLabelExp, OutContext); PPCTargetStreamer *TS = static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer()); if (TS) - TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp); + TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym), LocalOffsetExp); } } @@ -1305,10 +1305,10 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { OutStreamer->EmitLabel(Stub); OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext); - const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext); + const MCExpr *Anon = MCSymbolRefExpr::create(AnonSymbol, OutContext); + const MCExpr *LazyPtrExpr = MCSymbolRefExpr::create(LazyPtr, OutContext); const MCExpr *Sub = - MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext); + MCBinaryExpr::createSub(LazyPtrExpr, Anon, OutContext); // mflr r0 EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); @@ -1318,7 +1318,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // mflr r11 EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); // addis r11, r11, ha16(LazyPtr - AnonSymbol) - const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, true, OutContext); + const MCExpr *SubHa16 = PPCMCExpr::createHa(Sub, true, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS) .addReg(PPC::R11) .addReg(PPC::R11) @@ -1328,7 +1328,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // ldu r12, lo16(LazyPtr - AnonSymbol)(r11) // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11) - const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, true, OutContext); + const MCExpr *SubLo16 = PPCMCExpr::createLo(Sub, true, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(SubLo16).addExpr(SubLo16) @@ -1364,7 +1364,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { MCSymbol *Stub = Stubs[i].first; MCSymbol *RawSym = Stubs[i].second.getPointer(); MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); - const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext); + const MCExpr *LazyPtrExpr = MCSymbolRefExpr::create(LazyPtr, OutContext); OutStreamer->SwitchSection(StubSection); EmitAlignment(4); @@ -1373,7 +1373,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // lis r11, ha16(LazyPtr) const MCExpr *LazyPtrHa16 = - PPCMCExpr::CreateHa(LazyPtrExpr, true, OutContext); + PPCMCExpr::createHa(LazyPtrExpr, true, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LIS) .addReg(PPC::R11) .addExpr(LazyPtrHa16)); @@ -1381,7 +1381,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // ldu r12, lo16(LazyPtr)(r11) // lwzu r12, lo16(LazyPtr)(r11) const MCExpr *LazyPtrLo16 = - PPCMCExpr::CreateLo(LazyPtrExpr, true, OutContext); + PPCMCExpr::createLo(LazyPtrExpr, true, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16) @@ -1465,7 +1465,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { // need to be indirect and pc-rel. We accomplish this by using NLPs. // However, sometimes the types are local to the file. So we need to // fill in the value for the NLP in those cases. - OutStreamer->EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), + OutStreamer->EmitValue(MCSymbolRefExpr::create(MCSym.getPointer(), OutContext), isPPC64 ? 8 : 4/*size*/); } @@ -1484,7 +1484,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { OutStreamer->EmitLabel(Stubs[i].first); // .long _foo OutStreamer->EmitValue(MCSymbolRefExpr:: - Create(Stubs[i].second.getPointer(), + create(Stubs[i].second.getPointer(), OutContext), isPPC64 ? 8 : 4/*size*/); } diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 0b8e23c4ebf8..a561d5b1190a 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -148,6 +148,9 @@ class PPCFastISel final : public FastISel { bool isVSFRCRegister(unsigned Register) const { return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID; } + bool isVSSRCRegister(unsigned Register) const { + return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID; + } bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt, unsigned DestReg); bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, @@ -503,8 +506,11 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // If this is a potential VSX load with an offset of 0, a VSX indexed load can // be used. + bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg); bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg); - if (IsVSFRC && (Opc == PPC::LFD) && + bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS; + bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD; + if ((Is32VSXLoad || Is64VSXLoad) && (Addr.BaseType != Address::FrameIndexBase) && UseOffset && (Addr.Offset == 0)) { UseOffset = false; @@ -518,7 +524,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // into a RegBase. if (Addr.BaseType == Address::FrameIndexBase) { // VSX only provides an indexed load. - if (IsVSFRC && Opc == PPC::LFD) return false; + if (Is32VSXLoad || Is64VSXLoad) return false; MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( @@ -532,7 +538,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // Base reg with offset in range. } else if (UseOffset) { // VSX only provides an indexed load. - if (IsVSFRC && Opc == PPC::LFD) return false; + if (Is32VSXLoad || Is64VSXLoad) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addReg(Addr.Base.Reg); @@ -555,7 +561,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, case PPC::LWA: Opc = PPC::LWAX; break; case PPC::LWA_32: Opc = PPC::LWAX_32; break; case PPC::LD: Opc = PPC::LDX; break; - case PPC::LFS: Opc = PPC::LFSX; break; + case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break; case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break; } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) @@ -636,9 +642,12 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { // If this is a potential VSX store with an offset of 0, a VSX indexed store // can be used. + bool IsVSSRC = isVSSRCRegister(SrcReg); bool IsVSFRC = isVSFRCRegister(SrcReg); - if (IsVSFRC && (Opc == PPC::STFD) && - (Addr.BaseType != Address::FrameIndexBase) && UseOffset && + bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS; + bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD; + if ((Is32VSXStore || Is64VSXStore) && + (Addr.BaseType != Address::FrameIndexBase) && UseOffset && (Addr.Offset == 0)) { UseOffset = false; } @@ -648,7 +657,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { // into a RegBase. if (Addr.BaseType == Address::FrameIndexBase) { // VSX only provides an indexed store. - if (IsVSFRC && Opc == PPC::STFD) return false; + if (Is32VSXStore || Is64VSXStore) return false; MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( @@ -665,7 +674,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { // Base reg with offset in range. } else if (UseOffset) { // VSX only provides an indexed store. - if (IsVSFRC && Opc == PPC::STFD) return false; + if (Is32VSXStore || Is64VSXStore) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); @@ -684,7 +693,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { case PPC::STH8: Opc = PPC::STHX8; break; case PPC::STW8: Opc = PPC::STWX8; break; case PPC::STD: Opc = PPC::STDX; break; - case PPC::STFS: Opc = PPC::STFSX; break; + case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break; case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break; } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index bb9315e9520e..2600ee5db179 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10825,7 +10825,8 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // PPC does not allow r+i addressing modes for vectors! if (Ty->isVectorTy() && AM.BaseOffs != 0) return false; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index c93de430fd05..7fd3f9c3de3d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -556,7 +556,8 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 9685bac2aebb..d08b80871f3e 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -1078,6 +1078,82 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xssubsp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>; + + // FMA Instructions + let BaseName = "XSMADDASP" in { + let isCommutable = 1 in + def XSMADDASP : XX3Form<60, 1, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSMADDMSP : XX3Form<60, 9, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSMSUBASP" in { + let isCommutable = 1 in + def XSMSUBASP : XX3Form<60, 17, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fma f32:$XA, f32:$XB, + (fneg f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSMSUBMSP : XX3Form<60, 25, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSNMADDASP" in { + let isCommutable = 1 in + def XSNMADDASP : XX3Form<60, 129, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, + f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSNMADDMSP : XX3Form<60, 137, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSNMSUBASP" in { + let isCommutable = 1 in + def XSNMSUBASP : XX3Form<60, 145, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, + (fneg f32:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSNMSUBMSP : XX3Form<60, 153, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } } // AddedComplexity = 400 } // HasP8Vector diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index f1e28651aea2..05cb6e11db67 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -142,28 +142,28 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin) RefKind = MCSymbolRefExpr::VK_PLT; - const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx); + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx); if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), Ctx), + Expr = MCBinaryExpr::createAdd(Expr, + MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); // Subtract off the PIC base if required. if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) { const MachineFunction *MF = MO.getParent()->getParent()->getParent(); - const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx); - Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx); + const MCExpr *PB = MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); + Expr = MCBinaryExpr::createSub(Expr, PB, Ctx); } // Add ha16() / lo16() markers if required. switch (access) { case PPCII::MO_LO: - Expr = PPCMCExpr::CreateLo(Expr, isDarwin, Ctx); + Expr = PPCMCExpr::createLo(Expr, isDarwin, Ctx); break; case PPCII::MO_HA: - Expr = PPCMCExpr::CreateHa(Expr, isDarwin, Ctx); + Expr = PPCMCExpr::createHa(Expr, isDarwin, Ctx); break; } @@ -193,7 +193,7 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), AP.OutContext)); break; case MachineOperand::MO_GlobalAddress: diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp index 9ad134070082..9ee5db938b67 100644 --- a/lib/Target/PowerPC/PPCTargetObjectFile.cpp +++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp @@ -55,9 +55,9 @@ MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal( const MCExpr *PPC64LinuxTargetObjectFile:: getDebugThreadLocalSymbol(const MCSymbol *Sym) const { const MCExpr *Expr = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_PPC_DTPREL, getContext()); - return MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(0x8000, getContext()), + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_PPC_DTPREL, getContext()); + return MCBinaryExpr::createAdd(Expr, + MCConstantExpr::create(0x8000, getContext()), getContext()); } diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h index 8aaf5e188907..dbe7617d3542 100644 --- a/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/lib/Target/PowerPC/PPCTargetStreamer.h @@ -20,7 +20,7 @@ public: virtual void emitTCEntry(const MCSymbol &S) = 0; virtual void emitMachine(StringRef CPU) = 0; virtual void emitAbiVersion(int AbiVersion) = 0; - virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) = 0; + virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0; }; } diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 9b3606372035..0a05d25189b0 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -105,7 +105,7 @@ namespace ShaderType { /// a separate piece of memory that is unique from other /// memory locations. namespace AMDGPUAS { -enum AddressSpaces { +enum AddressSpaces : unsigned { PRIVATE_ADDRESS = 0, ///< Address space for private memory. GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). CONSTANT_ADDRESS = 2, ///< Address space for constant memory @@ -137,7 +137,10 @@ enum AddressSpaces { CONSTANT_BUFFER_14 = 22, CONSTANT_BUFFER_15 = 23, ADDRESS_NONE = 24, ///< Address space for unknown memory. - LAST_ADDRESS = ADDRESS_NONE + LAST_ADDRESS = ADDRESS_NONE, + + // Some places use this if the address space can't be determined. + UNKNOWN_ADDRESS_SPACE = ~0u }; } // namespace AMDGPUAS diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index d00ae78c99b0..d56838ec2019 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -257,9 +257,22 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); + setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); + setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; for (MVT VT : ScalarIntVTs) { @@ -301,6 +314,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::SMIN, MVT::i32, Legal); + setOperationAction(ISD::UMIN, MVT::i32, Legal); + setOperationAction(ISD::SMAX, MVT::i32, Legal); + setOperationAction(ISD::UMAX, MVT::i32, Legal); + if (!Subtarget->hasFFBH()) setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); @@ -962,17 +980,17 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_imax: - return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::SMAX, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_umax: - return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::UMAX, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_imin: - return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::SMIN, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_umin: - return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::UMIN, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_umul24: return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, @@ -1050,7 +1068,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op.getOperand(1)); - return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); + return DAG.getNode(ISD::SMAX, DL, VT, Neg, Op.getOperand(1)); } /// Linear Interpolation @@ -1149,7 +1167,7 @@ SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL, return SDValue(); } -/// \brief Generate Min/Max node +// FIXME: Remove this when combines added to DAGCombiner. SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL, EVT VT, SDValue LHS, @@ -1165,22 +1183,22 @@ SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL, switch (CCOpcode) { case ISD::SETULE: case ISD::SETULT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX; + unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETLE: case ISD::SETLT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX; + unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETGT: case ISD::SETGE: { - unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN; + unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETUGE: case ISD::SETUGT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN; + unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN; return DAG.getNode(Opc, DL, VT, LHS, RHS); } default: @@ -2644,11 +2662,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(COS_HW) NODE_NAME_CASE(SIN_HW) NODE_NAME_CASE(FMAX_LEGACY) - NODE_NAME_CASE(SMAX) - NODE_NAME_CASE(UMAX) NODE_NAME_CASE(FMIN_LEGACY) - NODE_NAME_CASE(SMIN) - NODE_NAME_CASE(UMIN) NODE_NAME_CASE(FMAX3) NODE_NAME_CASE(SMAX3) NODE_NAME_CASE(UMAX3) @@ -2794,14 +2808,6 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( break; } - case AMDGPUISD::SMAX: - case AMDGPUISD::UMAX: - case AMDGPUISD::SMIN: - case AMDGPUISD::UMIN: - computeKnownBitsForMinMax(Op.getOperand(0), Op.getOperand(1), - KnownZero, KnownOne, DAG, Depth); - break; - case AMDGPUISD::CARRY: case AMDGPUISD::BORROW: { KnownZero = APInt::getHighBitsSet(32, 31); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index c9f198129efc..fbb7d3c88437 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -228,11 +228,7 @@ enum NodeType : unsigned { COS_HW, SIN_HW, FMAX_LEGACY, - SMAX, - UMAX, FMIN_LEGACY, - SMIN, - UMIN, FMAX3, SMAX3, UMAX3, diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index f0f10ca59723..64e295f1144c 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -152,17 +152,15 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const return true; } -MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - int FrameIndex) const { +MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const { // TODO: Implement this function return nullptr; } -MachineInstr * -AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - ArrayRef<unsigned> Ops, - MachineInstr *LoadMI) const { +MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const { // TODO: Implement this function return nullptr; } diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index 07042b59be7b..8fd27a17638b 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -87,9 +87,11 @@ public: protected: MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const override; public: diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 790f34cea8cd..b413897d9d23 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -94,16 +94,6 @@ def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp, [] >; -// out = min(a, b) a and b are signed ints -def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp, - [SDNPCommutative, SDNPAssociative] ->; - -// out = min(a, b) a and b are unsigned ints -def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp, - [SDNPCommutative, SDNPAssociative] ->; - // FIXME: TableGen doesn't like commutative instructions with more // than 2 operands. // out = max(a, b, c) a, b and c are floats diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp index 9565e3fd5fa6..20831460b933 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -64,25 +64,25 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::createReg(MO.getReg()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName())); - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(Sym, Ctx)); + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx)); break; } case MachineOperand::MO_TargetIndex: { assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START); MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); - const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); MCOp = MCOperand::createExpr(Expr); break; } case MachineOperand::MO_ExternalSymbol: { MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName())); - const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); MCOp = MCOperand::createExpr(Expr); break; } diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index b262cdf57712..a5a901c739d4 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -272,7 +272,7 @@ public: } bool enableSubRegLiveness() const override { - return false; + return true; } }; diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp index 19bffd575117..95025a6e29f1 100644 --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp @@ -1084,7 +1084,7 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { case AsmToken::Identifier: Operands.push_back(AMDGPUOperand::CreateExpr( - MCSymbolRefExpr::Create(getContext().getOrCreateSymbol( + MCSymbolRefExpr::create(getContext().getOrCreateSymbol( Parser.getTok().getString()), getContext()), S)); Parser.Lex(); return MatchOperand_Success; diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index 279c3eb1912f..f70676943bb3 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -337,7 +337,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } else if (Op.isExpr()) { const MCExpr *Exp = Op.getExpr(); - Exp->print(O); + Exp->print(O, &MAI); } else { llvm_unreachable("unknown operand type in printOperand"); } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp index 2605ca52dfde..3713223697ed 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -25,18 +25,18 @@ namespace { class AMDGPUMCObjectWriter : public MCObjectWriter { public: AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {} - void ExecutePostLayoutBinding(MCAssembler &Asm, + void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override { //XXX: Implement if necessary. } - void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, + void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) override { assert(!"Not implemented"); } - void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; }; @@ -64,7 +64,7 @@ public: } //End anonymous namespace -void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm, +void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) { Asm.writeSectionData(&*I, Layout); diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 19d89fb27caa..028a86dfc7ad 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -11,7 +11,7 @@ #include "AMDGPUMCAsmInfo.h" using namespace llvm; -AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfoELF() { +AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() { HasSingleParameterDotFile = false; //===------------------------------------------------------------------===// MaxInstLength = 16; diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h index 8f75c76c4257..a5bac51e356f 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h @@ -17,7 +17,7 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { -class StringRef; +class Triple; // If you need to create another MCAsmInfo class, which inherits from MCAsmInfo, // you will need to make sure your new class sets PrivateGlobalPrefix to @@ -26,7 +26,7 @@ class StringRef; // with 'L' as a local symbol. class AMDGPUMCAsmInfo : public MCAsmInfoELF { public: - explicit AMDGPUMCAsmInfo(StringRef &TT); + explicit AMDGPUMCAsmInfo(const Triple &TT); }; } // namespace llvm #endif diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index a809564e3be0..e683498d52a5 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -148,15 +149,11 @@ void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const { } void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const { - for (unsigned i = 0; i < 4; i++) { - OS.write((uint8_t) ((Value >> (8 * i)) & 0xff)); - } + support::endian::Writer<support::little>(OS).write(Value); } void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const { - for (unsigned i = 0; i < 8; i++) { - EmitByte((Value >> (8 * i)) & 0xff, OS); - } + support::endian::Writer<support::little>(OS).write(Value); } unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const { diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 7126c82c0331..7beed092b3f7 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -781,10 +781,10 @@ def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>; def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>; def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>; def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>; -def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>; -def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>; -def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>; -def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>; +def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", smax>; +def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", smin>; +def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", umax>; +def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", umin>; def SETE_INT : R600_2OP < 0x3A, "SETE_INT", diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 52bf2aeb87de..12d08cf4c7f5 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -155,7 +155,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); - setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); @@ -211,6 +210,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FMINNUM); setTargetDAGCombine(ISD::FMAXNUM); + setTargetDAGCombine(ISD::SMIN); + setTargetDAGCombine(ISD::SMAX); + setTargetDAGCombine(ISD::UMIN); + setTargetDAGCombine(ISD::UMAX); setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::AND); @@ -251,47 +254,83 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &, return false; } -// FIXME: This really needs an address space argument. The immediate offset -// size is different for different sets of memory instruction sets. - -// The single offset DS instructions have a 16-bit unsigned byte offset. -// -// MUBUF / MTBUF have a 12-bit unsigned byte offset, and additionally can do r + -// r + i with addr64. 32-bit has more addressing mode options. Depending on the -// resource constant, it can also do (i64 r0) + (i32 r1) * (i14 i). -// -// SMRD instructions have an 8-bit, dword offset. -// bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, unsigned AS) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; - // Allow a 16-bit unsigned immediate field, since this is what DS instructions - // use. - if (!isUInt<16>(AM.BaseOffs)) - return false; + switch (AS) { + case AMDGPUAS::GLOBAL_ADDRESS: + case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions? + case AMDGPUAS::PRIVATE_ADDRESS: + case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: { + // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and + // additionally can do r + r + i with addr64. 32-bit has more addressing + // mode options. Depending on the resource constant, it can also do + // (i64 r0) + (i32 r1) * (i14 i). + // + // SMRD instructions have an 8-bit, dword offset. + // + // Assume nonunifom access, since the address space isn't enough to know + // what instruction we will use, and since we don't know if this is a load + // or store and scalar stores are only available on VI. + // + // We also know if we are doing an extload, we can't do a scalar load. + // + // Private arrays end up using a scratch buffer most of the time, so also + // assume those use MUBUF instructions. Scratch loads / stores are currently + // implemented as mubuf instructions with offen bit set, so slightly + // different than the normal addr64. + if (!isUInt<12>(AM.BaseOffs)) + return false; - // Only support r+r, - switch (AM.Scale) { - case 0: // "r+i" or just "i", depending on HasBaseReg. - break; - case 1: - if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + // FIXME: Since we can split immediate into soffset and immediate offset, + // would it make sense to allow any immediate? + + switch (AM.Scale) { + case 0: // r + i or just i, depending on HasBaseReg. + return true; + case 1: + return true; // We have r + r or r + i. + case 2: + if (AM.HasBaseReg) { + // Reject 2 * r + r. + return false; + } + + // Allow 2 * r as r + r + // Or 2 * r + i is allowed as r + r + i. + return true; + default: // Don't allow n * r return false; - // Otherwise we have r+r or r+i. - break; - case 2: - if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + } + } + case AMDGPUAS::LOCAL_ADDRESS: + case AMDGPUAS::REGION_ADDRESS: { + // Basic, single offset DS instructions allow a 16-bit unsigned immediate + // field. + // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have + // an 8-bit dword offset but we don't know the alignment here. + if (!isUInt<16>(AM.BaseOffs)) return false; - // Allow 2*r as r+r. - break; - default: // Don't allow n * r + + if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg. + return true; + + if (AM.Scale == 1 && AM.HasBaseReg) + return true; + return false; } - - return true; + case AMDGPUAS::FLAT_ADDRESS: { + // Flat instructions do not have offsets, and only have the register + // address. + return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1); + } + default: + llvm_unreachable("unhandled address space"); + } } bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, @@ -368,6 +407,12 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return TII->isInlineConstant(Imm); } +static EVT toIntegerVT(EVT VT) { + if (VT.isVector()) + return VT.changeVectorElementTypeToInteger(); + return MVT::getIntegerVT(VT.getSizeInBits()); +} + SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc SL, SDValue Chain, unsigned Offset, bool Signed) const { @@ -380,20 +425,42 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, Type *Ty = VT.getTypeForEVT(*DAG.getContext()); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + MVT PtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS); PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); - SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, - MRI.getLiveInVirtReg(InputPtrReg), MVT::i64); - SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, BasePtr, - DAG.getConstant(Offset, SL, MVT::i64)); + SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, + MRI.getLiveInVirtReg(InputPtrReg), PtrVT); + SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, + DAG.getConstant(Offset, SL, PtrVT)); SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS)); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); - return DAG.getLoad(ISD::UNINDEXED, Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, + unsigned Align = DL->getABITypeAlignment(Ty); + + if (VT != MemVT && VT.isFloatingPoint()) { + // Do an integer load and convert. + // FIXME: This is mostly because load legalization after type legalization + // doesn't handle FP extloads. + assert(VT.getScalarType() == MVT::f32 && + MemVT.getScalarType() == MVT::f16); + + EVT IVT = toIntegerVT(VT); + EVT MemIVT = toIntegerVT(MemVT); + SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD, + IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT, + false, // isVolatile + true, // isNonTemporal + true, // isInvariant + Align); // Alignment + return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load); + } + + ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + return DAG.getLoad(ISD::UNINDEXED, ExtTy, VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT, false, // isVolatile true, // isNonTemporal true, // isInvariant - DL->getABITypeAlignment(Ty)); // Alignment + Align); // Alignment } SDValue SITargetLowering::LowerFormalArguments( @@ -1570,15 +1637,15 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) { switch (Opc) { case ISD::FMAXNUM: return AMDGPUISD::FMAX3; - case AMDGPUISD::SMAX: + case ISD::SMAX: return AMDGPUISD::SMAX3; - case AMDGPUISD::UMAX: + case ISD::UMAX: return AMDGPUISD::UMAX3; case ISD::FMINNUM: return AMDGPUISD::FMIN3; - case AMDGPUISD::SMIN: + case ISD::SMIN: return AMDGPUISD::SMIN3; - case AMDGPUISD::UMIN: + case ISD::UMIN: return AMDGPUISD::UMIN3; default: llvm_unreachable("Not a min/max opcode"); @@ -1664,10 +1731,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, return performSetCCCombine(N, DCI); case ISD::FMAXNUM: // TODO: What about fmax_legacy? case ISD::FMINNUM: - case AMDGPUISD::SMAX: - case AMDGPUISD::SMIN: - case AMDGPUISD::UMAX: - case AMDGPUISD::UMIN: { + case ISD::SMAX: + case ISD::SMIN: + case ISD::UMAX: + case ISD::UMIN: { if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG && N->getValueType(0) != MVT::f64 && getTargetMachine().getOptLevel() > CodeGenOpt::None) diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index a95354c38816..a956b013bdb1 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -63,7 +63,7 @@ public: EVT /*VT*/) const override; bool isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const override; + Type *Ty, unsigned AS) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 839c2e9ecdd2..2f39074802b7 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -224,16 +224,16 @@ defm S_SUBB_U32 : SOP2_32 <sop2<0x05>, "s_subb_u32", } // End Uses = [SCC] defm S_MIN_I32 : SOP2_32 <sop2<0x06>, "s_min_i32", - [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))] + [(set i32:$dst, (smin i32:$src0, i32:$src1))] >; defm S_MIN_U32 : SOP2_32 <sop2<0x07>, "s_min_u32", - [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))] + [(set i32:$dst, (umin i32:$src0, i32:$src1))] >; defm S_MAX_I32 : SOP2_32 <sop2<0x08>, "s_max_i32", - [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))] + [(set i32:$dst, (smax i32:$src0, i32:$src1))] >; defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32", - [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))] + [(set i32:$dst, (umax i32:$src0, i32:$src1))] >; } // End Defs = [SCC] diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 388cb65c99cb..6b3b51afb4bd 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -408,7 +408,7 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc, uint64_t ImmValue = IsImm ? MCValOp.getImm() : 0; const MCExpr *ValExpr; if (IsImm) - ValExpr = MCConstantExpr::Create(ImmValue, getContext()); + ValExpr = MCConstantExpr::create(ImmValue, getContext()); else ValExpr = MCValOp.getExpr(); @@ -417,7 +417,7 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc, if (!IsImm || (ImmValue & ~0x1fff)) { MCInst TmpInst; const MCExpr *Expr = - SparcMCExpr::Create(SparcMCExpr::VK_Sparc_HI, ValExpr, getContext()); + SparcMCExpr::create(SparcMCExpr::VK_Sparc_HI, ValExpr, getContext()); TmpInst.setLoc(IDLoc); TmpInst.setOpcode(SP::SETHIi); TmpInst.addOperand(MCRegOp); @@ -429,7 +429,7 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc, if (!IsImm || ((ImmValue & 0x1fff) != 0 || ImmValue == 0)) { MCInst TmpInst; const MCExpr *Expr = - SparcMCExpr::Create(SparcMCExpr::VK_Sparc_LO, ValExpr, getContext()); + SparcMCExpr::create(SparcMCExpr::VK_Sparc_LO, ValExpr, getContext()); TmpInst.setLoc(IDLoc); TmpInst.setOpcode(SP::ORri); TmpInst.addOperand(MCRegOp); @@ -774,11 +774,11 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op, E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); - const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + const MCExpr *Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); if (isCall && getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_) - Res = SparcMCExpr::Create(SparcMCExpr::VK_Sparc_WPLT30, Res, + Res = SparcMCExpr::create(SparcMCExpr::VK_Sparc_WPLT30, Res, getContext()); Op = SparcOperand::CreateImm(Res, S, E); } @@ -1010,7 +1010,7 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal, break; } - EVal = SparcMCExpr::Create(VK, subExpr, getContext()); + EVal = SparcMCExpr::create(VK, subExpr, getContext()); return true; } diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp index bac2617b0f3e..5d714fe4da92 100644 --- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp +++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp @@ -120,7 +120,7 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum, } assert(MO.isExpr() && "Unknown operand kind in printOperand"); - MO.getExpr()->print(O); + MO.getExpr()->print(O, &MAI); } void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum, diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index 3792a596a6b8..9388527004f5 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -260,7 +260,7 @@ namespace { uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) - OW->Write32(0x01000000); + OW->write32(0x01000000); return true; } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 124cb3b4b98b..280c6d7937b2 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -20,8 +20,7 @@ using namespace llvm; void SparcELFMCAsmInfo::anchor() {} -SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) { - Triple TheTriple(TT); +SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Triple &TheTriple) { bool isV9 = (TheTriple.getArch() == Triple::sparcv9); IsLittleEndian = (TheTriple.getArch() == Triple::sparcel); @@ -51,8 +50,8 @@ SparcELFMCAsmInfo::getExprForPersonalitySymbol(const MCSymbol *Sym, MCStreamer &Streamer) const { if (Encoding & dwarf::DW_EH_PE_pcrel) { MCContext &Ctx = Streamer.getContext(); - return SparcMCExpr::Create(SparcMCExpr::VK_Sparc_R_DISP32, - MCSymbolRefExpr::Create(Sym, Ctx), Ctx); + return SparcMCExpr::create(SparcMCExpr::VK_Sparc_R_DISP32, + MCSymbolRefExpr::create(Sym, Ctx), Ctx); } return MCAsmInfo::getExprForPersonalitySymbol(Sym, Encoding, Streamer); @@ -64,8 +63,8 @@ SparcELFMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, MCStreamer &Streamer) const { if (Encoding & dwarf::DW_EH_PE_pcrel) { MCContext &Ctx = Streamer.getContext(); - return SparcMCExpr::Create(SparcMCExpr::VK_Sparc_R_DISP32, - MCSymbolRefExpr::Create(Sym, Ctx), Ctx); + return SparcMCExpr::create(SparcMCExpr::VK_Sparc_R_DISP32, + MCSymbolRefExpr::create(Sym, Ctx), Ctx); } return MCAsmInfo::getExprForFDESymbol(Sym, Encoding, Streamer); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h index 84de55145b65..12386f14443e 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h @@ -17,12 +17,12 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { -class StringRef; +class Triple; class SparcELFMCAsmInfo : public MCAsmInfoELF { void anchor() override; public: - explicit SparcELFMCAsmInfo(StringRef TT); + explicit SparcELFMCAsmInfo(const Triple &TheTriple); const MCExpr* getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const override; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp index 34079eea7885..9171d4dc9c00 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp @@ -86,16 +86,10 @@ void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, if (Ctx.getAsmInfo()->isLittleEndian()) { // Output the bits in little-endian byte order. - for (unsigned i = 0; i != 4; ++i) { - OS << (char)Bits; - Bits >>= 8; - } + support::endian::Writer<support::little>(OS).write<uint32_t>(Bits); } else { // Output the bits in big-endian byte order. - for (unsigned i = 0; i != 4; ++i) { - OS << (char)(Bits >> 24); - Bits <<= 8; - } + support::endian::Writer<support::big>(OS).write<uint32_t>(Bits); } unsigned tlsOpNo = 0; switch (MI.getOpcode()) { @@ -137,7 +131,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } int64_t Res; - if (Expr->EvaluateAsAbsolute(Res)) + if (Expr->evaluateAsAbsolute(Res)) return Res; llvm_unreachable("Unhandled expression!"); diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp index d97e3a25c5a7..e85a8cd5e339 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp @@ -15,9 +15,8 @@ #include "SparcMCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Object/ELF.h" @@ -26,20 +25,17 @@ using namespace llvm; #define DEBUG_TYPE "sparcmcexpr" const SparcMCExpr* -SparcMCExpr::Create(VariantKind Kind, const MCExpr *Expr, +SparcMCExpr::create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx) { return new (Ctx) SparcMCExpr(Kind, Expr); } - - -void SparcMCExpr::PrintImpl(raw_ostream &OS) const -{ +void SparcMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { bool closeParen = printVariantKind(OS, Kind); const MCExpr *Expr = getSubExpr(); - Expr->print(OS); + Expr->print(OS, MAI); if (closeParen) OS << ')'; @@ -160,10 +156,10 @@ Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) { } bool -SparcMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, +SparcMCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const { - return getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup); + return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup); } static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { @@ -184,8 +180,7 @@ static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { case MCExpr::SymbolRef: { const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr); - MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol()); - MCELF::SetType(SD, ELF::STT_TLS); + cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS); break; } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h index 116e10406a7c..d08ad86dbe04 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h @@ -69,7 +69,7 @@ public: /// @name Construction /// @{ - static const SparcMCExpr *Create(VariantKind Kind, const MCExpr *Expr, + static const SparcMCExpr *create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx); /// @} /// @name Accessors @@ -85,13 +85,13 @@ public: Sparc::Fixups getFixupKind() const { return getFixupKind(Kind); } /// @} - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override; void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *FindAssociatedSection() const override { - return getSubExpr()->FindAssociatedSection(); + MCSection *findAssociatedSection() const override { + return getSubExpr()->findAssociatedSection(); } void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index 4d5672e29550..d34c87977168 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -34,7 +34,7 @@ using namespace llvm; #include "SparcGenRegisterInfo.inc" static MCAsmInfo *createSparcMCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { + const Triple &TT) { MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT); unsigned Reg = MRI.getDwarfRegNum(SP::O6, true); MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0); @@ -43,7 +43,7 @@ static MCAsmInfo *createSparcMCAsmInfo(const MCRegisterInfo &MRI, } static MCAsmInfo *createSparcV9MCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { + const Triple &TT) { MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT); unsigned Reg = MRI.getDwarfRegNum(SP::O6, true); MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 2047); diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index 9903bc5799da..c5f046bfc5bb 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -78,9 +78,9 @@ namespace { static MCOperand createSparcMCOperand(SparcMCExpr::VariantKind Kind, MCSymbol *Sym, MCContext &OutContext) { - const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Sym, + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Sym, OutContext); - const SparcMCExpr *expr = SparcMCExpr::Create(Kind, MCSym, OutContext); + const SparcMCExpr *expr = SparcMCExpr::create(Kind, MCSym, OutContext); return MCOperand::createExpr(expr); } @@ -94,15 +94,15 @@ static MCOperand createPCXRelExprOp(SparcMCExpr::VariantKind Kind, MCSymbol *CurLabel, MCContext &OutContext) { - const MCSymbolRefExpr *GOT = MCSymbolRefExpr::Create(GOTLabel, OutContext); - const MCSymbolRefExpr *Start = MCSymbolRefExpr::Create(StartLabel, + const MCSymbolRefExpr *GOT = MCSymbolRefExpr::create(GOTLabel, OutContext); + const MCSymbolRefExpr *Start = MCSymbolRefExpr::create(StartLabel, OutContext); - const MCSymbolRefExpr *Cur = MCSymbolRefExpr::Create(CurLabel, + const MCSymbolRefExpr *Cur = MCSymbolRefExpr::create(CurLabel, OutContext); - const MCBinaryExpr *Sub = MCBinaryExpr::CreateSub(Cur, Start, OutContext); - const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(GOT, Sub, OutContext); - const SparcMCExpr *expr = SparcMCExpr::Create(Kind, + const MCBinaryExpr *Sub = MCBinaryExpr::createSub(Cur, Start, OutContext); + const MCBinaryExpr *Add = MCBinaryExpr::createAdd(GOT, Sub, OutContext); + const SparcMCExpr *expr = SparcMCExpr::create(Kind, Add, OutContext); return MCOperand::createExpr(expr); } @@ -199,7 +199,7 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI, EmitHiLo(*OutStreamer, GOTLabel, SparcMCExpr::VK_Sparc_H44, SparcMCExpr::VK_Sparc_M44, MCRegOP, OutContext, STI); - MCOperand imm = MCOperand::createExpr(MCConstantExpr::Create(12, + MCOperand imm = MCOperand::createExpr(MCConstantExpr::create(12, OutContext)); EmitSHL(*OutStreamer, MCRegOP, imm, MCRegOP, STI); MCOperand lo = createSparcMCOperand(SparcMCExpr::VK_Sparc_L44, @@ -211,7 +211,7 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI, EmitHiLo(*OutStreamer, GOTLabel, SparcMCExpr::VK_Sparc_HH, SparcMCExpr::VK_Sparc_HM, MCRegOP, OutContext, STI); - MCOperand imm = MCOperand::createExpr(MCConstantExpr::Create(32, + MCOperand imm = MCOperand::createExpr(MCConstantExpr::create(32, OutContext)); EmitSHL(*OutStreamer, MCRegOP, imm, MCRegOP, STI); // Use register %o7 to load the lower 32 bits. @@ -361,10 +361,10 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, O << (int)MO.getImm(); break; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); break; case MachineOperand::MO_BlockAddress: O << GetBlockAddressSymbol(MO.getBlockAddress())->getName(); diff --git a/lib/Target/Sparc/SparcMCInstLower.cpp b/lib/Target/Sparc/SparcMCInstLower.cpp index 9388d594973c..b084d0021ba0 100644 --- a/lib/Target/Sparc/SparcMCInstLower.cpp +++ b/lib/Target/Sparc/SparcMCInstLower.cpp @@ -59,9 +59,9 @@ static MCOperand LowerSymbolOperand(const MachineInstr *MI, break; } - const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, AP.OutContext); - const SparcMCExpr *expr = SparcMCExpr::Create(Kind, MCSym, + const SparcMCExpr *expr = SparcMCExpr::create(Kind, MCSym, AP.OutContext); return MCOperand::createExpr(expr); } diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp index 32b2240f87ea..412e124f9a26 100644 --- a/lib/Target/Sparc/SparcTargetObjectFile.cpp +++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp @@ -34,8 +34,8 @@ const MCExpr *SparcELFTargetObjectFile::getTTypeGlobalReference( } MCContext &Ctx = getContext(); - return SparcMCExpr::Create(SparcMCExpr::VK_Sparc_R_DISP32, - MCSymbolRefExpr::Create(SSym, Ctx), Ctx); + return SparcMCExpr::create(SparcMCExpr::VK_Sparc_R_DISP32, + MCSymbolRefExpr::create(SSym, Ctx), Ctx); } return TargetLoweringObjectFileELF::getTTypeGlobalReference( diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index b721def54e12..3aa4c6bd32d6 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -865,9 +865,9 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, } MCSymbol *Sym = Ctx.createTempSymbol(); Out.EmitLabel(Sym); - const MCExpr *Base = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); - Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx); + Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx); } // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol. @@ -904,7 +904,7 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, } StringRef Identifier = Parser.getTok().getString(); - Sym = MCSymbolRefExpr::Create(Ctx.getOrCreateSymbol(Identifier), + Sym = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(Identifier), Kind, Ctx); Parser.Lex(); } diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index 373ddfa7e257..059ae3f7fb09 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -37,13 +37,14 @@ void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp, } } -void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) { +void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI, + raw_ostream &O) { if (MO.isReg()) O << '%' << getRegisterName(MO.getReg()); else if (MO.isImm()) O << MO.getImm(); else if (MO.isExpr()) - O << *MO.getExpr(); + MO.getExpr()->print(O, MAI); else llvm_unreachable("Invalid operand"); } @@ -147,7 +148,7 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum, O << "0x"; O.write_hex(MO.getImm()); } else - O << *MO.getExpr(); + MO.getExpr()->print(O, &MAI); } void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum, @@ -175,7 +176,7 @@ void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum, void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - printOperand(MI->getOperand(OpNum), O); + printOperand(MI->getOperand(OpNum), &MAI, O); } void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum, diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index 847b6962e6f2..ba55e686f3ef 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -35,7 +35,8 @@ public: raw_ostream &O); // Print the given operand. - static void printOperand(const MCOperand &MO, raw_ostream &O); + static void printOperand(const MCOperand &MO, const MCAsmInfo *MAI, + raw_ostream &O); // Override MCInstPrinter. void printRegName(raw_ostream &O, unsigned RegNo) const override; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 1c3887ab5456..0e8a680d4dd4 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -105,7 +105,7 @@ void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, bool SystemZMCAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { for (uint64_t I = 0; I != Count; ++I) - OW->Write8(7); + OW->write8(7); return true; } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index 0161d6263e7d..b17977d41be1 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -13,7 +13,7 @@ using namespace llvm; -SystemZMCAsmInfo::SystemZMCAsmInfo(StringRef TT) { +SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) { PointerSize = 8; CalleeSaveStackSlotSize = 8; IsLittleEndian = false; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h index 19b5b4b09724..800f89232063 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h @@ -14,11 +14,11 @@ #include "llvm/Support/Compiler.h" namespace llvm { -class StringRef; +class Triple; class SystemZMCAsmInfo : public MCAsmInfoELF { public: - explicit SystemZMCAsmInfo(StringRef TT); + explicit SystemZMCAsmInfo(const Triple &TT); }; } // end namespace llvm diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index c9290c1922d3..fd52a2ebf2fd 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -217,7 +217,7 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, const MCOperand &MO = MI.getOperand(OpNum); const MCExpr *Expr; if (MO.isImm()) - Expr = MCConstantExpr::Create(MO.getImm() + Offset, Ctx); + Expr = MCConstantExpr::create(MO.getImm() + Offset, Ctx); else { Expr = MO.getExpr(); if (Offset) { @@ -225,8 +225,8 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, // is relative to the operand field itself, which is Offset bytes // into MI. Add Offset to the relocation value to cancel out // this difference. - const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); - Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + const MCExpr *OffsetExpr = MCConstantExpr::create(Offset, Ctx); + Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx); } } Fixups.push_back(MCFixup::create(Offset, Expr, (MCFixupKind)Kind)); diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 8c2075afe505..92681cf6e44b 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -132,7 +132,7 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) { } static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { + const Triple &TT) { MCAsmInfo *MAI = new SystemZMCAsmInfo(TT); MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index a0d079fcc359..3dca7bd89f05 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -68,14 +68,14 @@ static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) { static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) { StringRef Name = "__tls_get_offset"; - return MCSymbolRefExpr::Create(Context.getOrCreateSymbol(Name), + return MCSymbolRefExpr::create(Context.getOrCreateSymbol(Name), MCSymbolRefExpr::VK_PLT, Context); } static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) { StringRef Name = "_GLOBAL_OFFSET_TABLE_"; - return MCSymbolRefExpr::Create(Context.getOrCreateSymbol(Name), + return MCSymbolRefExpr::create(Context.getOrCreateSymbol(Name), MCSymbolRefExpr::VK_None, Context); } @@ -285,7 +285,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { auto *ZCPV = static_cast<SystemZConstantPoolValue*>(MCPV); const MCExpr *Expr = - MCSymbolRefExpr::Create(getSymbol(ZCPV->getGlobalValue()), + MCSymbolRefExpr::create(getSymbol(ZCPV->getGlobalValue()), getModifierVariantKind(ZCPV->getModifier()), OutContext); uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType()); @@ -305,7 +305,7 @@ bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, } else { SystemZMCInstLower Lower(MF->getContext(), *this); MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo))); - SystemZInstPrinter::printOperand(MO, OS); + SystemZInstPrinter::printOperand(MO, MAI, OS); } return false; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 24b5a41d7f67..91e12c2d9d7e 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -506,9 +506,10 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, *Fast = true; return true; } - + bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // Punt on globals for now, although they can be used in limited // RELATIVE LONG cases. if (AM.BaseGV) diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index b001abc693d6..2f7617bbdac3 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -369,7 +369,8 @@ public: bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *Fast) const override; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 90598852b5ed..4346850e0ac5 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -752,10 +752,9 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; } -MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - int FrameIndex) const { +MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Size = MFI->getObjectSize(FrameIndex); unsigned Opcode = MI->getOpcode(); @@ -765,9 +764,11 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, isInt<8>(MI->getOperand(2).getImm()) && !MI->getOperand(3).getReg()) { // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST - return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::AGSI)) - .addFrameIndex(FrameIndex).addImm(0) - .addImm(MI->getOperand(2).getImm()); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(SystemZ::AGSI)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI->getOperand(2).getImm()); } return nullptr; } @@ -786,9 +787,11 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, isInt<8>(MI->getOperand(2).getImm())) { // A(G)HI %reg, CONST -> A(G)SI %mem, CONST Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI); - return BuildMI(MF, MI->getDebugLoc(), get(Opcode)) - .addFrameIndex(FrameIndex).addImm(0) - .addImm(MI->getOperand(2).getImm()); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(Opcode)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI->getOperand(2).getImm()); } if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) { @@ -798,17 +801,23 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // source register instead. if (OpNum == 0) { unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD; - return BuildMI(MF, MI->getDebugLoc(), get(StoreOpcode)) - .addOperand(MI->getOperand(1)).addFrameIndex(FrameIndex) - .addImm(0).addReg(0); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(StoreOpcode)) + .addOperand(MI->getOperand(1)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addReg(0); } // If we're spilling the source of an LDGR or LGDR, load the // destination register instead. if (OpNum == 1) { unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD; unsigned Dest = MI->getOperand(0).getReg(); - return BuildMI(MF, MI->getDebugLoc(), get(LoadOpcode), Dest) - .addFrameIndex(FrameIndex).addImm(0).addReg(0); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(LoadOpcode), Dest) + .addFrameIndex(FrameIndex) + .addImm(0) + .addReg(0); } } @@ -830,17 +839,25 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (MMO->getSize() == Size && !MMO->isVolatile()) { // Handle conversion of loads. if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) { - return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) - .addFrameIndex(FrameIndex).addImm(0).addImm(Size) - .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) - .addMemOperand(MMO); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(SystemZ::MVC)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(Size) + .addOperand(MI->getOperand(1)) + .addImm(MI->getOperand(2).getImm()) + .addMemOperand(MMO); } // Handle conversion of stores. if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) { - return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) - .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) - .addImm(Size).addFrameIndex(FrameIndex).addImm(0) - .addMemOperand(MMO); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(SystemZ::MVC)) + .addOperand(MI->getOperand(1)) + .addImm(MI->getOperand(2).getImm()) + .addImm(Size) + .addFrameIndex(FrameIndex) + .addImm(0) + .addMemOperand(MMO); } } } @@ -856,7 +873,8 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, assert(AccessBytes != 0 && "Size of access should be known"); assert(AccessBytes <= Size && "Access outside the frame index"); uint64_t Offset = Size - AccessBytes; - MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(MemOpcode)); + MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, + MI->getDebugLoc(), get(MemOpcode)); for (unsigned I = 0; I < OpNum; ++I) MIB.addOperand(MI->getOperand(I)); MIB.addFrameIndex(FrameIndex).addImm(Offset); @@ -869,10 +887,9 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return nullptr; } -MachineInstr * -SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - ArrayRef<unsigned> Ops, - MachineInstr *LoadMI) const { +MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const { return nullptr; } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index b55810b253f1..e47f2ee9d0b6 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -187,9 +187,11 @@ public: LiveVariables *LV) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const override; bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp index a1dcedab54e7..2655e4866b20 100644 --- a/lib/Target/SystemZ/SystemZMCInstLower.cpp +++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -68,11 +68,11 @@ SystemZMCInstLower::getExpr(const MachineOperand &MO, default: llvm_unreachable("unknown operand type"); } - const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, Ctx); if (HasOffset) if (int64_t Offset = MO.getOffset()) { - const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); - Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + const MCExpr *OffsetExpr = MCConstantExpr::create(Offset, Ctx); + Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx); } return Expr; } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index a184b92d3c9f..d498bb104ef8 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -313,7 +313,7 @@ const MCExpr *TargetLoweringObjectFile::getTTypeGlobalReference( const TargetMachine &TM, MachineModuleInfo *MMI, MCStreamer &Streamer) const { const MCSymbolRefExpr *Ref = - MCSymbolRefExpr::Create(TM.getSymbol(GV, Mang), getContext()); + MCSymbolRefExpr::create(TM.getSymbol(GV, Mang), getContext()); return getTTypeReference(Ref, Encoding, Streamer); } @@ -332,8 +332,8 @@ getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding, // .-foo addressing. MCSymbol *PCSym = getContext().createTempSymbol(); Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); - return MCBinaryExpr::CreateSub(Sym, PC, getContext()); + const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext()); + return MCBinaryExpr::createSub(Sym, PC, getContext()); } } } @@ -341,7 +341,7 @@ getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding, const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const { // FIXME: It's not clear what, if any, default this should have - perhaps a // null return could mean 'no location' & we should just do that here. - return MCSymbolRefExpr::Create(Sym, *Ctx); + return MCSymbolRefExpr::create(Sym, *Ctx); } void TargetLoweringObjectFile::getNameWithPrefix( diff --git a/lib/Target/TargetRecip.cpp b/lib/Target/TargetRecip.cpp new file mode 100644 index 000000000000..42bc487fe6d8 --- /dev/null +++ b/lib/Target/TargetRecip.cpp @@ -0,0 +1,225 @@ +//===-------------------------- TargetRecip.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class is used to customize machine-specific reciprocal estimate code +// generation in a target-independent way. +// If a target does not support operations in this specification, then code +// generation will default to using supported operations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetRecip.h" +#include <map> + +using namespace llvm; + +// These are the names of the individual reciprocal operations. These are +// the key strings for queries and command-line inputs. +// In addition, the command-line interface recognizes the global parameters +// "all", "none", and "default". +static const char *RecipOps[] = { + "divd", + "divf", + "vec-divd", + "vec-divf", + "sqrtd", + "sqrtf", + "vec-sqrtd", + "vec-sqrtf", +}; + +// The uninitialized state is needed for the enabled settings and refinement +// steps because custom settings may arrive via the command-line before target +// defaults are set. +TargetRecip::TargetRecip() { + unsigned NumStrings = llvm::array_lengthof(RecipOps); + for (unsigned i = 0; i < NumStrings; ++i) + RecipMap.insert(std::make_pair(RecipOps[i], RecipParams())); +} + +static bool parseRefinementStep(const StringRef &In, size_t &Position, + uint8_t &Value) { + const char RefStepToken = ':'; + Position = In.find(RefStepToken); + if (Position == StringRef::npos) + return false; + + StringRef RefStepString = In.substr(Position + 1); + // Allow exactly one numeric character for the additional refinement + // step parameter. + if (RefStepString.size() == 1) { + char RefStepChar = RefStepString[0]; + if (RefStepChar >= '0' && RefStepChar <= '9') { + Value = RefStepChar - '0'; + return true; + } + } + report_fatal_error("Invalid refinement step for -recip."); +} + +bool TargetRecip::parseGlobalParams(const std::string &Arg) { + StringRef ArgSub = Arg; + + // Look for an optional setting of the number of refinement steps needed + // for this type of reciprocal operation. + size_t RefPos; + uint8_t RefSteps; + StringRef RefStepString; + if (parseRefinementStep(ArgSub, RefPos, RefSteps)) { + // Split the string for further processing. + RefStepString = ArgSub.substr(RefPos + 1); + ArgSub = ArgSub.substr(0, RefPos); + } + bool Enable; + bool UseDefaults; + if (ArgSub == "all") { + UseDefaults = false; + Enable = true; + } else if (ArgSub == "none") { + UseDefaults = false; + Enable = false; + } else if (ArgSub == "default") { + UseDefaults = true; + } else { + // Any other string is invalid or an individual setting. + return false; + } + + // All enable values will be initialized to target defaults if 'default' was + // specified. + if (!UseDefaults) + for (auto &KV : RecipMap) + KV.second.Enabled = Enable; + + // Custom refinement count was specified with all, none, or default. + if (!RefStepString.empty()) + for (auto &KV : RecipMap) + KV.second.RefinementSteps = RefSteps; + + return true; +} + +void TargetRecip::parseIndividualParams(const std::vector<std::string> &Args) { + static const char DisabledPrefix = '!'; + unsigned NumArgs = Args.size(); + + for (unsigned i = 0; i != NumArgs; ++i) { + StringRef Val = Args[i]; + + bool IsDisabled = Val[0] == DisabledPrefix; + // Ignore the disablement token for string matching. + if (IsDisabled) + Val = Val.substr(1); + + size_t RefPos; + uint8_t RefSteps; + StringRef RefStepString; + if (parseRefinementStep(Val, RefPos, RefSteps)) { + // Split the string for further processing. + RefStepString = Val.substr(RefPos + 1); + Val = Val.substr(0, RefPos); + } + + RecipIter Iter = RecipMap.find(Val); + if (Iter == RecipMap.end()) { + // Try again specifying float suffix. + Iter = RecipMap.find(Val.str() + 'f'); + if (Iter == RecipMap.end()) { + Iter = RecipMap.find(Val.str() + 'd'); + assert(Iter == RecipMap.end() && "Float entry missing from map"); + report_fatal_error("Invalid option for -recip."); + } + + // The option was specified without a float or double suffix. + if (RecipMap[Val.str() + 'd'].Enabled != Uninitialized) { + // Make sure that the double entry was not already specified. + // The float entry will be checked below. + report_fatal_error("Duplicate option for -recip."); + } + } + + if (Iter->second.Enabled != Uninitialized) + report_fatal_error("Duplicate option for -recip."); + + // Mark the matched option as found. Do not allow duplicate specifiers. + Iter->second.Enabled = !IsDisabled; + if (!RefStepString.empty()) + Iter->second.RefinementSteps = RefSteps; + + // If the precision was not specified, the double entry is also initialized. + if (Val.back() != 'f' && Val.back() != 'd') { + RecipMap[Val.str() + 'd'].Enabled = !IsDisabled; + if (!RefStepString.empty()) + RecipMap[Val.str() + 'd'].RefinementSteps = RefSteps; + } + } +} + +TargetRecip::TargetRecip(const std::vector<std::string> &Args) : + TargetRecip() { + unsigned NumArgs = Args.size(); + + // Check if "all", "default", or "none" was specified. + if (NumArgs == 1 && parseGlobalParams(Args[0])) + return; + + parseIndividualParams(Args); +} + +bool TargetRecip::isEnabled(const StringRef &Key) const { + ConstRecipIter Iter = RecipMap.find(Key); + assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); + assert(Iter->second.Enabled != Uninitialized && + "Enablement setting was not initialized"); + return Iter->second.Enabled; +} + +unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const { + ConstRecipIter Iter = RecipMap.find(Key); + assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); + assert(Iter->second.RefinementSteps != Uninitialized && + "Refinement step setting was not initialized"); + return Iter->second.RefinementSteps; +} + +/// Custom settings (previously initialized values) override target defaults. +void TargetRecip::setDefaults(const StringRef &Key, bool Enable, + unsigned RefSteps) { + if (Key == "all") { + for (auto &KV : RecipMap) { + RecipParams &RP = KV.second; + if (RP.Enabled == Uninitialized) + RP.Enabled = Enable; + if (RP.RefinementSteps == Uninitialized) + RP.RefinementSteps = RefSteps; + } + } else { + RecipParams &RP = RecipMap[Key]; + if (RP.Enabled == Uninitialized) + RP.Enabled = Enable; + if (RP.RefinementSteps == Uninitialized) + RP.RefinementSteps = RefSteps; + } +} + +bool TargetRecip::operator==(const TargetRecip &Other) const { + for (const auto &KV : RecipMap) { + const StringRef &Op = KV.first; + const RecipParams &RP = KV.second; + const RecipParams &OtherRP = Other.RecipMap.find(Op)->second; + if (RP.RefinementSteps != OtherRP.RefinementSteps) + return false; + if (RP.Enabled != OtherRP.Enabled) + return false; + } + return true; +} diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index a21f8c723503..9eee4a0f3d82 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -315,7 +315,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg, // Test (%SrcReg) { - const MCExpr *Disp = MCConstantExpr::Create(0, Ctx); + const MCExpr *Disp = MCConstantExpr::create(0, Ctx); std::unique_ptr<X86Operand> Op(X86Operand::CreateMem( getPointerWidth(), 0, Disp, SrcReg, 0, AccessSize, SMLoc(), SMLoc())); InstrumentMemOperand(*Op, AccessSize, false /* IsWrite */, RegCtx, Ctx, @@ -324,7 +324,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg, // Test -1(%SrcReg, %CntReg, AccessSize) { - const MCExpr *Disp = MCConstantExpr::Create(-1, Ctx); + const MCExpr *Disp = MCConstantExpr::create(-1, Ctx); std::unique_ptr<X86Operand> Op(X86Operand::CreateMem( getPointerWidth(), 0, Disp, SrcReg, CntReg, AccessSize, SMLoc(), SMLoc())); @@ -334,7 +334,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg, // Test (%DstReg) { - const MCExpr *Disp = MCConstantExpr::Create(0, Ctx); + const MCExpr *Disp = MCConstantExpr::create(0, Ctx); std::unique_ptr<X86Operand> Op(X86Operand::CreateMem( getPointerWidth(), 0, Disp, DstReg, 0, AccessSize, SMLoc(), SMLoc())); InstrumentMemOperand(*Op, AccessSize, true /* IsWrite */, RegCtx, Ctx, Out); @@ -342,7 +342,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg, // Test -1(%DstReg, %CntReg, AccessSize) { - const MCExpr *Disp = MCConstantExpr::Create(-1, Ctx); + const MCExpr *Disp = MCConstantExpr::create(-1, Ctx); std::unique_ptr<X86Operand> Op(X86Operand::CreateMem( getPointerWidth(), 0, Disp, DstReg, CntReg, AccessSize, SMLoc(), SMLoc())); @@ -461,7 +461,7 @@ void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op, while (Residue != 0) { const MCConstantExpr *Disp = - MCConstantExpr::Create(ApplyDisplacementBounds(Residue), Ctx); + MCConstantExpr::create(ApplyDisplacementBounds(Residue), Ctx); std::unique_ptr<X86Operand> DispOp = X86Operand::CreateMem(getPointerWidth(), 0, Disp, Reg, 0, 1, SMLoc(), SMLoc()); @@ -493,7 +493,7 @@ X86AddressSanitizer::AddDisplacement(X86Operand &Op, int64_t Displacement, CheckDisplacementBounds(NewDisplacement); *Residue = Displacement - NewDisplacement; - const MCExpr *Disp = MCConstantExpr::Create(NewDisplacement, Ctx); + const MCExpr *Disp = MCConstantExpr::create(NewDisplacement, Ctx); return X86Operand::CreateMem(Op.getMemModeSize(), Op.getMemSegReg(), Disp, Op.getMemBaseReg(), Op.getMemIndexReg(), Op.getMemScale(), SMLoc(), SMLoc()); @@ -615,7 +615,7 @@ private: const std::string &Fn = FuncName(AccessSize, IsWrite); MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn)); const MCSymbolRefExpr *FnExpr = - MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx); + MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx); EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr)); } }; @@ -643,7 +643,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall( MCInst Inst; Inst.setOpcode(X86::MOV8rm); Inst.addOperand(MCOperand::createReg(ShadowRegI8)); - const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx); + const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1, SMLoc(), SMLoc())); @@ -654,7 +654,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall( EmitInstruction( Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8)); MCSymbol *DoneSym = Ctx.createTempSymbol(); - const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx); EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr)); EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg( @@ -669,7 +669,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall( case 1: break; case 2: { - const MCExpr *Disp = MCConstantExpr::Create(1, Ctx); + const MCExpr *Disp = MCConstantExpr::create(1, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1, SMLoc(), SMLoc())); @@ -720,7 +720,7 @@ void X86AddressSanitizer32::InstrumentMemOperandLarge( Inst.setOpcode(X86::CMP16mi); break; } - const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx); + const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1, SMLoc(), SMLoc())); @@ -729,7 +729,7 @@ void X86AddressSanitizer32::InstrumentMemOperandLarge( EmitInstruction(Out, Inst); } MCSymbol *DoneSym = Ctx.createTempSymbol(); - const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx); EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr)); EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx); @@ -743,7 +743,7 @@ void X86AddressSanitizer32::InstrumentMOVSImpl(unsigned AccessSize, // No need to test when ECX is equals to zero. MCSymbol *DoneSym = Ctx.createTempSymbol(); - const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx); EmitInstruction( Out, MCInstBuilder(X86::TEST32rr).addReg(X86::ECX).addReg(X86::ECX)); EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr)); @@ -860,7 +860,7 @@ public: private: void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) { - const MCExpr *Disp = MCConstantExpr::Create(Offset, Ctx); + const MCExpr *Disp = MCConstantExpr::create(Offset, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc())); @@ -885,7 +885,7 @@ private: const std::string &Fn = FuncName(AccessSize, IsWrite); MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn)); const MCSymbolRefExpr *FnExpr = - MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx); + MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx); EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr)); } }; @@ -914,7 +914,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall( MCInst Inst; Inst.setOpcode(X86::MOV8rm); Inst.addOperand(MCOperand::createReg(ShadowRegI8)); - const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx); + const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1, SMLoc(), SMLoc())); @@ -925,7 +925,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall( EmitInstruction( Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8)); MCSymbol *DoneSym = Ctx.createTempSymbol(); - const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx); EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr)); EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg( @@ -940,7 +940,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall( case 1: break; case 2: { - const MCExpr *Disp = MCConstantExpr::Create(1, Ctx); + const MCExpr *Disp = MCConstantExpr::create(1, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1, SMLoc(), SMLoc())); @@ -991,7 +991,7 @@ void X86AddressSanitizer64::InstrumentMemOperandLarge( Inst.setOpcode(X86::CMP16mi); break; } - const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx); + const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1, SMLoc(), SMLoc())); @@ -1001,7 +1001,7 @@ void X86AddressSanitizer64::InstrumentMemOperandLarge( } MCSymbol *DoneSym = Ctx.createTempSymbol(); - const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx); EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr)); EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx); @@ -1015,7 +1015,7 @@ void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize, // No need to test when RCX is equals to zero. MCSymbol *DoneSym = Ctx.createTempSymbol(); - const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx); EmitInstruction( Out, MCInstBuilder(X86::TEST64rr).addReg(X86::RCX).addReg(X86::RCX)); EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr)); diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 3047fd1078a9..e8965710f022 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -973,7 +973,7 @@ void X86AsmParser::SetFrameRegister(unsigned RegNo) { std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { unsigned basereg = is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI); - const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); + const MCExpr *Disp = MCConstantExpr::create(0, getContext()); return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0); @@ -982,7 +982,7 @@ std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { unsigned basereg = is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI); - const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); + const MCExpr *Disp = MCConstantExpr::create(0, getContext()); return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0); @@ -1195,7 +1195,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; const MCExpr *Val = - MCSymbolRefExpr::Create(Sym, Variant, getContext()); + MCSymbolRefExpr::create(Sym, Variant, getContext()); if (IDVal == "b" && Sym->isUndefined()) return Error(Loc, "invalid reference to undefined symbol"); StringRef Identifier = Sym->getName(); @@ -1265,9 +1265,9 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, } if (SM.getImm() || !Disp) { - const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext()); + const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext()); if (Disp) - Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext()); + Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext()); else Disp = Imm; // An immediate displacement only. } @@ -1354,7 +1354,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, // Create the symbol reference. MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext()); + Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext()); return false; } @@ -1382,7 +1382,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, // An immediate following a 'segment register', 'colon' token sequence can // be followed by a bracketed expression. If it isn't we know we have our // final segment override. - const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext()); + const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext()); return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1, Start, ImmDispToken.getEndLoc(), Size); @@ -1435,7 +1435,7 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) { return ErrorOperand(Tok.getLoc(), "Expected } at this point"); Parser.Lex(); // Eat "}" const MCExpr *RndModeOp = - MCConstantExpr::Create(rndMode, Parser.getContext()); + MCConstantExpr::create(rndMode, Parser.getContext()); return X86Operand::CreateImm(RndModeOp, Start, End); } if(Tok.getIdentifier().equals("sae")){ @@ -1499,7 +1499,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, return nullptr; } - const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext()); + const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext()); // BaseReg is non-zero to avoid assertions. In the context of inline asm, // we're pointing to a local variable in memory, so the base register is // really the frame or stack pointer. @@ -1549,7 +1549,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, Val)); } - NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); + NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext()); return false; } @@ -1623,7 +1623,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) { unsigned Len = End.getPointer() - TypeLoc.getPointer(); InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); - const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); + const MCExpr *Imm = MCConstantExpr::create(CVal, getContext()); return X86Operand::CreateImm(Imm, Start, End); } @@ -1683,7 +1683,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End, Size); - const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext()); + const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext()); return X86Operand::CreateImm(ImmExpr, Start, End); } @@ -1841,7 +1841,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg, // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The // only way to do this without lookahead is to eat the '(' and see what is // after it. - const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); + const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext()); if (getLexer().isNot(AsmToken::LParen)) { SMLoc ExprEnd; if (getParser().parseExpression(Disp, ExprEnd)) return nullptr; @@ -2061,7 +2061,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx), NameLoc)); - const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode, + const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, getParser().getContext()); Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); @@ -2088,7 +2088,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) { Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc)); - const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode, + const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, getParser().getContext()); Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); @@ -2115,7 +2115,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (ComparisonCode != ~0U) { Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc)); - const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode, + const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, getParser().getContext()); Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); @@ -2375,7 +2375,7 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { X86Operand &Op = static_cast<X86Operand &>(*Ops[1]); assert(Op.isImm() && "expected immediate"); int64_t Res; - if (!Op.getImm()->EvaluateAsAbsolute(Res) || Res > 255) { + if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) { Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]"); return false; } diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 3469d19f4fd2..6e99c37c2bc7 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -546,6 +546,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, case TYPE_XMM512: mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4))); return; + case TYPE_BNDR: + mcInst.addOperand(MCOperand::createReg(X86::BND0 + (immediate >> 4))); case TYPE_REL8: isBranch = true; pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; @@ -827,6 +829,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_VK16: case TYPE_DEBUGREG: case TYPE_CONTROLREG: + case TYPE_BNDR: return translateRMRegister(mcInst, insn); case TYPE_M: case TYPE_M8: diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 9e6505001393..301db72feafb 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -460,6 +460,7 @@ enum OperandEncoding { ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \ + ENUM_ENTRY(TYPE_BNDR, "MPX bounds register") \ \ ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \ ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \ diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index af4399a41a06..ea727e6e82fb 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -150,11 +150,11 @@ void X86ATTInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo, // that address in hex. const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) { O << formatHex((uint64_t)Address); } else { // Otherwise, just print the expression. - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } } @@ -178,7 +178,9 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << markup("<imm:") << '$' << *Op.getExpr() << markup(">"); + O << markup("<imm:") << '$'; + Op.getExpr()->print(O, &MAI); + O << markup(">"); } } @@ -203,7 +205,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, O << formatImm(DispVal); } else { assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); - O << *DispSpec.getExpr(); + DispSpec.getExpr()->print(O, &MAI); } if (IndexReg.getReg() || BaseReg.getReg()) { @@ -273,7 +275,7 @@ void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op, O << formatImm(DispSpec.getImm()); } else { assert(DispSpec.isExpr() && "non-immediate displacement?"); - O << *DispSpec.getExpr(); + DispSpec.getExpr()->print(O, &MAI); } O << markup(">"); diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 4d92dafa938a..879378fc7a97 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -131,12 +131,12 @@ void X86IntelInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo, // that address in hex. const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) { O << formatHex((uint64_t)Address); } else { // Otherwise, just print the expression. - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } } @@ -150,7 +150,7 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << formatImm((int64_t)Op.getImm()); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } @@ -187,7 +187,7 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, if (!DispSpec.isImm()) { if (NeedPlus) O << " + "; assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); - O << *DispSpec.getExpr(); + DispSpec.getExpr()->print(O, &MAI); } else { int64_t DispVal = DispSpec.getImm(); if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { @@ -245,7 +245,7 @@ void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op, O << formatImm(DispSpec.getImm()); } else { assert(DispSpec.isExpr() && "non-immediate displacement?"); - O << *DispSpec.getExpr(); + DispSpec.getExpr()->print(O, &MAI); } O << ']'; diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 6d4284dc518b..1ac656d4614b 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -326,7 +326,7 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { // FIXME: We could generated something better than plain 0x90. if (!HasNopl) { for (uint64_t i = 0; i < Count; ++i) - OW->Write8(0x90); + OW->write8(0x90); return true; } @@ -336,10 +336,10 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; for (uint8_t i = 0; i < Prefixes; i++) - OW->Write8(0x66); + OW->write8(0x66); const uint8_t Rest = ThisNopLength - Prefixes; for (uint8_t i = 0; i < Rest; i++) - OW->Write8(Nops[Rest - 1][i]); + OW->write8(Nops[Rest - 1][i]); Count -= ThisNopLength; } while (Count != 0); diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 45088835cfb9..a33468dc4769 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -66,6 +66,7 @@ static X86_64RelType getType64(unsigned Kind, case X86::reloc_riprel_4byte: case X86::reloc_riprel_4byte_movq_load: return RT64_32; + case FK_PCRel_2: case FK_Data_2: return RT64_16; case FK_PCRel_1: diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp index a39def98e48e..2943dd383efa 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp @@ -31,13 +31,13 @@ public: StringRef SymName; SymI->getName(SymName); uint64_t SymAddr; SymI->getAddress(SymAddr); - uint64_t SymSize; SymI->getSize(SymSize); + uint64_t SymSize = SymI->getSize(); int64_t Addend; getELFRelocationAddend(Rel, Addend); MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName); // FIXME: check that the value is actually the same. if (!Sym->isVariable()) - Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx)); + Sym->setVariableValue(MCConstantExpr::create(SymAddr, Ctx)); const MCExpr *Expr = nullptr; // If hasAddend is true, then we need to add Addend (r_addend) to Expr. @@ -76,7 +76,7 @@ public: case R_X86_64_PC64: // S + A - P (P/pcrel is implicit) hasAddend = true; - Expr = MCSymbolRefExpr::Create(Sym, Ctx); + Expr = MCSymbolRefExpr::create(Sym, Ctx); break; case R_X86_64_GOT32: case R_X86_64_GOT64: @@ -85,27 +85,27 @@ public: case R_X86_64_GOTPLT64: // G + A hasAddend = true; - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, Ctx); break; case R_X86_64_PLT32: // L + A - P -> S@PLT + A hasAddend = true; - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_PLT, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_PLT, Ctx); break; case R_X86_64_GLOB_DAT: case R_X86_64_JUMP_SLOT: // S - Expr = MCSymbolRefExpr::Create(Sym, Ctx); + Expr = MCSymbolRefExpr::create(Sym, Ctx); break; case R_X86_64_GOTPCREL: case R_X86_64_GOTPCREL64: // G + GOT + A - P -> S@GOTPCREL + A hasAddend = true; - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx); break; case R_X86_64_GOTOFF64: // S + A - GOT - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTOFF, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTOFF, Ctx); break; case R_X86_64_PLTOFF64: // L + A - GOT @@ -113,15 +113,15 @@ public: case R_X86_64_SIZE32: case R_X86_64_SIZE64: // Z + A - Expr = MCConstantExpr::Create(SymSize, Ctx); + Expr = MCConstantExpr::create(SymSize, Ctx); break; default: - Expr = MCSymbolRefExpr::Create(Sym, Ctx); + Expr = MCSymbolRefExpr::create(Sym, Ctx); break; } if (Expr && hasAddend && Addend != 0) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(Addend, Ctx), + Expr = MCBinaryExpr::createAdd(Expr, + MCConstantExpr::create(Addend, Ctx), Ctx); return Expr; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index bda35f2b9726..fc0b0f89e23d 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -119,9 +119,9 @@ X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym, MCStreamer &Streamer) const { MCContext &Context = Streamer.getContext(); const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context); - const MCExpr *Four = MCConstantExpr::Create(4, Context); - return MCBinaryExpr::CreateAdd(Res, Four, Context); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context); + const MCExpr *Four = MCConstantExpr::create(4, Context); + return MCBinaryExpr::createAdd(Res, Four, Context); } void X86MCAsmInfoMicrosoft::anchor() { } @@ -132,6 +132,11 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { PrivateLabelPrefix = ".L"; PointerSize = 8; WinEHEncodingType = WinEH::EncodingType::Itanium; + } else { + // 32-bit X86 doesn't use CFI, so this isn't a real encoding type. It's just + // a place holder that the Windows EHStreamer looks for to suppress CFI + // output. In particular, usesWindowsCFI() returns false. + WinEHEncodingType = WinEH::EncodingType::X86; } ExceptionsType = ExceptionHandling::WinEH; diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 8aed7a4d9eb9..10c434c8b1b4 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -304,7 +304,7 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); return; } - Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx); + Expr = MCConstantExpr::create(DispOp.getImm(), Ctx); } else { Expr = DispOp.getExpr(); } @@ -351,7 +351,7 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, ImmOffset -= 1; if (ImmOffset) - Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx), + Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(ImmOffset, Ctx), Ctx); // Emit a symbolic constant as a fixup and 4 zeros. diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 8e3c72158fc0..cc98e55dc695 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -115,8 +115,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) { return X; } -static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { - Triple TheTriple(TT); +static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TheTriple) { bool is64Bit = TheTriple.getArch() == Triple::x86_64; MCAsmInfo *MAI; diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp index 6cf5af7217f9..a5aadd6a385e 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp @@ -39,33 +39,33 @@ public: MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName); // FIXME: check that the value is actually the same. if (!Sym->isVariable()) - Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx)); + Sym->setVariableValue(MCConstantExpr::create(SymAddr, Ctx)); const MCExpr *Expr = nullptr; switch(RelType) { case X86_64_RELOC_TLV: - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); break; case X86_64_RELOC_SIGNED_4: - Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), - MCConstantExpr::Create(4, Ctx), + Expr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Sym, Ctx), + MCConstantExpr::create(4, Ctx), Ctx); break; case X86_64_RELOC_SIGNED_2: - Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), - MCConstantExpr::Create(2, Ctx), + Expr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Sym, Ctx), + MCConstantExpr::create(2, Ctx), Ctx); break; case X86_64_RELOC_SIGNED_1: - Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), - MCConstantExpr::Create(1, Ctx), + Expr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Sym, Ctx), + MCConstantExpr::create(1, Ctx), Ctx); break; case X86_64_RELOC_GOT_LOAD: - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx); break; case X86_64_RELOC_GOT: - Expr = MCSymbolRefExpr::Create(Sym, isPCRel ? + Expr = MCSymbolRefExpr::create(Sym, isPCRel ? MCSymbolRefExpr::VK_GOTPCREL : MCSymbolRefExpr::VK_GOT, Ctx); @@ -84,7 +84,7 @@ public: report_fatal_error("Expected X86_64_RELOC_UNSIGNED after " "X86_64_RELOC_SUBTRACTOR."); - const MCExpr *LHS = MCSymbolRefExpr::Create(Sym, Ctx); + const MCExpr *LHS = MCSymbolRefExpr::create(Sym, Ctx); symbol_iterator RSymI = Rel.getSymbol(); uint64_t RSymAddr; @@ -94,15 +94,15 @@ public: MCSymbol *RSym = Ctx.getOrCreateSymbol(RSymName); if (!RSym->isVariable()) - RSym->setVariableValue(MCConstantExpr::Create(RSymAddr, Ctx)); + RSym->setVariableValue(MCConstantExpr::create(RSymAddr, Ctx)); - const MCExpr *RHS = MCSymbolRefExpr::Create(RSym, Ctx); + const MCExpr *RHS = MCSymbolRefExpr::create(RSym, Ctx); - Expr = MCBinaryExpr::CreateSub(LHS, RHS, Ctx); + Expr = MCBinaryExpr::createSub(LHS, RHS, Ctx); break; } default: - Expr = MCSymbolRefExpr::Create(Sym, Ctx); + Expr = MCSymbolRefExpr::create(Sym, Ctx); break; } return Expr; diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 9da3e1fc36bf..95acc07192da 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -25,7 +25,7 @@ using namespace llvm; namespace { class X86MachObjectWriter : public MCMachObjectTargetWriter { - bool RecordScatteredRelocation(MachObjectWriter *Writer, + bool recordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -33,7 +33,7 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter { MCValue Target, unsigned Log2Size, uint64_t &FixedValue); - void RecordTLVPRelocation(MachObjectWriter *Writer, + void recordTLVPRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -54,12 +54,10 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter { MCValue Target, uint64_t &FixedValue); public: - X86MachObjectWriter(bool Is64Bit, uint32_t CPUType, - uint32_t CPUSubtype) - : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, - /*UseAggressiveSymbolFolding=*/Is64Bit) {} + X86MachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {} - void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override { @@ -142,13 +140,11 @@ void X86MachObjectWriter::RecordX86_64Relocation( const MCSymbol *A = &Target.getSymA()->getSymbol(); if (A->isTemporary()) A = &Writer->findAliasedSymbol(*A); - const MCSymbolData &A_SD = Asm.getSymbolData(*A); const MCSymbol *A_Base = Asm.getAtom(*A); const MCSymbol *B = &Target.getSymB()->getSymbol(); if (B->isTemporary()) B = &Writer->findAliasedSymbol(*B); - const MCSymbolData &B_SD = Asm.getSymbolData(*B); const MCSymbol *B_Base = Asm.getAtom(*B); // Neither symbol can be modified. @@ -190,7 +186,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( (!B_Base ? 0 : Writer->getSymbolAddress(*B_Base, Layout)); if (!A_Base) - Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; + Index = A->getFragment()->getParent()->getOrdinal() + 1; Type = MachO::X86_64_RELOC_UNSIGNED; MachO::any_relocation_info MRE; @@ -202,7 +198,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( if (B_Base) RelSymbol = B_Base; else - Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; + Index = B->getFragment()->getParent()->getOrdinal() + 1; Type = MachO::X86_64_RELOC_SUBTRACTOR; } else { const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); @@ -211,7 +207,6 @@ void X86MachObjectWriter::RecordX86_64Relocation( if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec)) Asm.addLocalUsedInReloc(*Symbol); } - const MCSymbolData &SD = Asm.getSymbolData(*Symbol); RelSymbol = Asm.getAtom(*Symbol); // Relocations inside debug sections always use local relocations when @@ -235,7 +230,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( Layout.getSymbolOffset(*RelSymbol); } else if (Symbol->isInSection() && !Symbol->isVariable()) { // The index is the section ordinal (1-based). - Index = SD.getFragment()->getParent()->getOrdinal() + 1; + Index = Symbol->getFragment()->getParent()->getOrdinal() + 1; Value += Writer->getSymbolAddress(*Symbol, Layout); if (IsPCRel) @@ -243,7 +238,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( } else if (Symbol->isVariable()) { const MCExpr *Value = Symbol->getVariableValue(); int64_t Res; - bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, + bool isAbs = Value->evaluateAsAbsolute(Res, Layout, Writer->getSectionAddressMap()); if (isAbs) { FixedValue = Res; @@ -339,7 +334,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, +bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -354,23 +349,21 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - const MCSymbolData *A_SD = &Asm.getSymbolData(*A); - if (!A_SD->getFragment()) + if (!A->getFragment()) report_fatal_error("symbol '" + A->getName() + "' can not be undefined in a subtraction expression", false); uint32_t Value = Writer->getSymbolAddress(*A, Layout); - uint64_t SecAddr = - Writer->getSectionAddress(A_SD->getFragment()->getParent()); + uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent()); FixedValue += SecAddr; uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { - const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbol *SB = &B->getSymbol(); - if (!B_SD->getFragment()) + if (!SB->getFragment()) report_fatal_error("symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression", false); @@ -380,10 +373,10 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // Note that there is no longer any semantic difference between these two // relocation types from the linkers point of view, this is done solely for // pedantic compatibility with 'as'. - Type = A_SD->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF : - (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF; + Type = A->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF + : (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF; Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); - FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } // Relocations are written out in reverse order, so the PAIR comes first. @@ -435,7 +428,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, return true; } -void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, +void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -490,7 +483,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // If this is a 32-bit TLVP reloc it's handled a bit differently. if (Target.getSymA() && Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { - RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + recordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, FixedValue); return; } @@ -499,7 +492,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // scattered relocation entry. Differences always require scattered // relocations. if (Target.getSymB()) { - RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, Log2Size, FixedValue); return; } @@ -515,10 +508,10 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, if (IsPCRel) Offset += 1 << Log2Size; // Try to record the scattered relocation if needed. Fall back to non - // scattered if necessary (see comments in RecordScatteredRelocation() + // scattered if necessary (see comments in recordScatteredRelocation() // for details). if (Offset && A && !Writer->doesSymbolRequireExternRelocation(*A) && - RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, Log2Size, FixedValue)) return; @@ -538,7 +531,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // Resolve constant variables. if (A->isVariable()) { int64_t Res; - if (A->getVariableValue()->EvaluateAsAbsolute( + if (A->getVariableValue()->evaluateAsAbsolute( Res, Layout, Writer->getSectionAddressMap())) { FixedValue = Res; return; diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index c70e2e954631..852267400bba 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -168,6 +168,8 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", "Support PRFCHW instructions">; def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", "Support RDSEED instruction">; +def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", + "Support MPX instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", @@ -188,10 +190,6 @@ def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", "LEA instruction with certain arguments is slow">; def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", "INC and DEC instructions are slower than ADD and SUB">; -def FeatureUseSqrtEst : SubtargetFeature<"use-sqrt-est", "UseSqrtEst", "true", - "Use RSQRT* to optimize square root calculations">; -def FeatureUseRecipEst : SubtargetFeature<"use-recip-est", "UseReciprocalEst", - "true", "Use RCP* to optimize division calculations">; def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", "Use software floating point features.">; @@ -380,7 +378,7 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, - FeatureSlowIncDec]>; + FeatureSlowIncDec, FeatureMPX]>; def : KnightsLandingProc<"knl">; // FIXME: define SKX model @@ -391,7 +389,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, - FeatureSlowIncDec]>; + FeatureSlowIncDec, FeatureMPX]>; def : SkylakeProc<"skylake">; def : SkylakeProc<"skx">; // Legacy alias. @@ -444,7 +442,7 @@ def : ProcessorModel<"btver2", BtVer2Model, FeaturePRFCHW, FeatureAES, FeaturePCLMUL, FeatureBMI, FeatureF16C, FeatureMOVBE, FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem, - FeatureSlowSHLD, FeatureUseSqrtEst, FeatureUseRecipEst]>; + FeatureSlowSHLD]>; // TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips. diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index f97557e5c609..64fc6d0d7e5c 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -78,7 +78,7 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO, switch (MO.getType()) { default: llvm_unreachable("unknown symbol type!"); case MachineOperand::MO_ConstantPoolIndex: - O << *P.GetCPISymbol(MO.getIndex()); + P.GetCPISymbol(MO.getIndex())->print(O, P.MAI); P.printOffset(MO.getOffset(), O); break; case MachineOperand::MO_GlobalAddress: { @@ -127,9 +127,12 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO, // If the name begins with a dollar-sign, enclose it in parens. We do this // to avoid having it look like an integer immediate to the assembler. if (GVSym->getName()[0] != '$') - O << *GVSym; - else - O << '(' << *GVSym << ')'; + GVSym->print(O, P.MAI); + else { + O << '('; + GVSym->print(O, P.MAI); + O << ')'; + } P.printOffset(MO.getOffset(), O); break; } @@ -146,12 +149,15 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO, // These affect the name of the symbol, not any suffix. break; case X86II::MO_GOT_ABSOLUTE_ADDRESS: - O << " + [.-" << *P.MF->getPICBaseSymbol() << ']'; + O << " + [.-"; + P.MF->getPICBaseSymbol()->print(O, P.MAI); + O << ']'; break; case X86II::MO_PIC_BASE_OFFSET: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: - O << '-' << *P.MF->getPICBaseSymbol(); + O << '-'; + P.MF->getPICBaseSymbol()->print(O, P.MAI); break; case X86II::MO_TLSGD: O << "@TLSGD"; break; case X86II::MO_TLSLD: O << "@TLSLD"; break; @@ -168,7 +174,8 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO, case X86II::MO_PLT: O << "@PLT"; break; case X86II::MO_TLVP: O << "@TLVP"; break; case X86II::MO_TLVP_PIC_BASE: - O << "@TLVP" << '-' << *P.MF->getPICBaseSymbol(); + O << "@TLVP" << '-'; + P.MF->getPICBaseSymbol()->print(O, P.MAI); break; case X86II::MO_SECREL: O << "@SECREL32"; break; } @@ -525,7 +532,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { // register any SEH handlers, so its object files should be safe. OutStreamer->EmitSymbolAttribute(S, MCSA_Global); OutStreamer->EmitAssignment( - S, MCConstantExpr::Create(int64_t(1), MMI->getContext())); + S, MCConstantExpr::create(int64_t(1), MMI->getContext())); } } } @@ -549,7 +556,7 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel, // using NLPs; however, sometimes the types are local to the file. // We need to fill in the value for the NLP in those cases. OutStreamer.EmitValue( - MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()), + MCSymbolRefExpr::create(MCSym.getPointer(), OutStreamer.getContext()), 4 /*size*/); } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9af0aebea232..3dc75d76cee3 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -3530,9 +3530,9 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, SmallVector<MachineOperand, 8> AddrOps; AM.getFullAddress(AddrOps); - MachineInstr *Result = - XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, - Size, Alignment, /*AllowCommute=*/true); + MachineInstr *Result = XII.foldMemoryOperandImpl( + *FuncInfo.MF, MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment, + /*AllowCommute=*/true); if (!Result) return false; @@ -3541,20 +3541,21 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, // to just look at OpNo + the offset to the index reg. We actually need to // scan the instruction to find the index reg and see if its the correct reg // class. - for (MIOperands MO(Result); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->isDef() || MO->getReg() != AM.IndexReg) + unsigned OperandNo = 0; + for (MachineInstr::mop_iterator I = Result->operands_begin(), + E = Result->operands_end(); I != E; ++I, ++OperandNo) { + MachineOperand &MO = *I; + if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg) continue; // Found the index reg, now try to rewrite it. - unsigned OpNo = MO.getOperandNo(); unsigned IndexReg = constrainOperandRegClass(Result->getDesc(), - MO->getReg(), OpNo); - if (IndexReg == MO->getReg()) + MO.getReg(), OperandNo); + if (IndexReg == MO.getReg()) continue; - MO->setReg(IndexReg); + MO.setReg(IndexReg); } Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); - FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); MI->eraseFromParent(); return true; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3ba811574489..e3ec288a683e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -67,12 +67,6 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization( "rather than promotion."), cl::Hidden); -static cl::opt<int> ReciprocalEstimateRefinementSteps( - "x86-recip-refinement-steps", cl::init(1), - cl::desc("Specify the number of Newton-Raphson iterations applied to the " - "result of the hardware reciprocal estimate instruction."), - cl::NotHidden); - // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, SDValue V2); @@ -842,13 +836,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); - // Only provide customized ctpop vector bit twiddling for vector types we - // know to perform better than using the popcnt instructions on each vector - // element. If popcnt isn't supported, always provide the custom version. - if (!Subtarget->hasPOPCNT()) { - setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); - setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); - } + setOperationAction(ISD::CTPOP, MVT::v16i8, Custom); + setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); + setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); + setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); // Custom lower build_vector, vector_shuffle, and extract_vector_elt. for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { @@ -1113,6 +1104,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); + setOperationAction(ISD::CTPOP, MVT::v32i8, Custom); + setOperationAction(ISD::CTPOP, MVT::v16i16, Custom); + setOperationAction(ISD::CTPOP, MVT::v8i32, Custom); + setOperationAction(ISD::CTPOP, MVT::v4i64, Custom); + if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); setOperationAction(ISD::FMA, MVT::v4f64, Legal); @@ -1147,16 +1143,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // when we have a 256bit-wide blend with immediate. setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); - // Only provide customized ctpop vector bit twiddling for vector types we - // know to perform better than using the popcnt instructions on each - // vector element. If popcnt isn't supported, always provide the custom - // version. - if (!Subtarget->hasPOPCNT()) - setOperationAction(ISD::CTPOP, MVT::v4i64, Custom); - - // Custom CTPOP always performs better on natively supported v8i32 - setOperationAction(ISD::CTPOP, MVT::v8i32, Custom); - // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal); @@ -1273,7 +1259,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal); - + setOperationAction(ISD::BR_CC, MVT::i1, Expand); setOperationAction(ISD::SETCC, MVT::i1, Custom); setOperationAction(ISD::XOR, MVT::i1, Legal); @@ -1842,7 +1828,7 @@ X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, Subtarget->isPICStyleGOT()); // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF // entries. - return MCSymbolRefExpr::Create(MBB->getSymbol(), + return MCSymbolRefExpr::create(MBB->getSymbol(), MCSymbolRefExpr::VK_GOTOFF, Ctx); } @@ -1866,7 +1852,7 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); // Otherwise, the reference is relative to the PIC base. - return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx); + return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); } std::pair<const TargetRegisterClass *, uint8_t> @@ -1981,7 +1967,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); } else if (VA.getLocInfo() == CCValAssign::BCvt) - ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy); + ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); assert(VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."); @@ -2018,13 +2004,13 @@ X86TargetLowering::LowerReturn(SDValue Chain, if (Subtarget->is64Bit()) { if (ValVT == MVT::x86mmx) { if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { - ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy); + ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. if (!Subtarget->hasSSE2()) - ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy); + ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); } } } @@ -2451,7 +2437,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::BCvt) - ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); + ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue); if (VA.isExtInLoc()) { // Handle MMX values passed in XMM regs. @@ -2780,6 +2766,19 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (MF.getTarget().Options.DisableTailCalls) isTailCall = false; + if (Subtarget->isPICStyleGOT() && + !MF.getTarget().Options.GuaranteedTailCallOpt) { + // If we are using a GOT, disable tail calls to external symbols with + // default visibility. Tail calling such a symbol requires using a GOT + // relocation, which forces early binding of the symbol. This breaks code + // that require lazy function symbol resolution. Using musttail or + // GuaranteedTailCallOpt will override this. + GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); + if (!G || (!G->getGlobal()->hasLocalLinkage() && + G->getGlobal()->hasDefaultVisibility())) + isTailCall = false; + } + bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall(); if (IsMustTail) { // Force this to be a tail call. The verifier rules are enough to ensure @@ -2898,14 +2897,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); else if (RegVT.is128BitVector()) { // Special case: passing MMX values in XMM registers. - Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); + Arg = DAG.getBitcast(MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); } else Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg); + Arg = DAG.getBitcast(RegVT, Arg); break; case CCValAssign::Indirect: { // Store the argument. @@ -2964,8 +2963,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Note: The actual moving to ECX is done further down. GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); - if (G && !G->getGlobal()->hasHiddenVisibility() && - !G->getGlobal()->hasProtectedVisibility()) + if (G && !G->getGlobal()->hasLocalLinkage() && + G->getGlobal()->hasDefaultVisibility()) Callee = LowerGlobalAddress(Callee, DAG); else if (isa<ExternalSymbolSDNode>(Callee)) Callee = LowerExternalSymbol(Callee, DAG); @@ -4073,7 +4072,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, } else llvm_unreachable("Unexpected vector type"); - return DAG.getNode(ISD::BITCAST, dl, VT, Vec); + return DAG.getBitcast(VT, Vec); } static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, @@ -4200,9 +4199,9 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32; SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8); - Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256); + Vec256 = DAG.getBitcast(CastVT, Vec256); Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask); - return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256); + return DAG.getBitcast(ResultVT, Vec256); } return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128); @@ -4255,7 +4254,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, } else llvm_unreachable("Unexpected vector type"); - return DAG.getNode(ISD::BITCAST, dl, VT, Vec); + return DAG.getBitcast(VT, Vec); } /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd @@ -4611,7 +4610,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, } } - return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V); + return DAG.getBitcast(MVT::v16i8, V); } /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. @@ -4749,7 +4748,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, DAG.getIntPtrConstant(InsertPSMask, DL)); - return DAG.getNode(ISD::BITCAST, DL, VT, Result); + return DAG.getBitcast(VT, Result); } /// Return a vector logical shift node. @@ -4759,12 +4758,11 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, assert(VT.is128BitVector() && "Unknown type for VShift"); MVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; - SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); + SrcOp = DAG.getBitcast(ShVT, SrcOp); MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType()); assert(NumBits % 8 == 0 && "Only support byte sized shifts"); SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy); - return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); + return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); } static SDValue @@ -4949,7 +4947,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, SDValue(ResNode.getNode(), 1)); } - return DAG.getNode(ISD::BITCAST, DL, VT, ResNode); + return DAG.getBitcast(VT, ResNode); } return SDValue(); } @@ -5261,8 +5259,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { SDValue Imm = ConvertI1VectorToInterger(Op, DAG); if (Imm.getValueSizeInBits() == VT.getSizeInBits()) - return DAG.getNode(ISD::BITCAST, dl, VT, Imm); - SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm); + return DAG.getBitcast(VT, Imm); + SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, DAG.getIntPtrConstant(0, dl)); } @@ -5277,7 +5275,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { SDValue In = Op.getOperand(idx); if (In.getOpcode() == ISD::UNDEF) continue; - if (!isa<ConstantSDNode>(In)) + if (!isa<ConstantSDNode>(In)) NonConstIdx.push_back(idx); else { Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx; @@ -5304,12 +5302,12 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { } else if (HasConstElts) Imm = DAG.getConstant(0, dl, VT); - else + else Imm = DAG.getUNDEF(VT); if (Imm.getValueSizeInBits() == VT.getSizeInBits()) - DstVec = DAG.getNode(ISD::BITCAST, dl, VT, Imm); + DstVec = DAG.getBitcast(VT, Imm); else { - SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm); + SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, DAG.getIntPtrConstant(0, dl)); } @@ -5818,9 +5816,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // convert it to a vector with movd (S2V+shuffle to zero extend). Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item); - return DAG.getNode( - ISD::BITCAST, dl, VT, - getShuffleVectorZeroOrUndef(Item, Idx * 2, true, Subtarget, DAG)); + return DAG.getBitcast(VT, getShuffleVectorZeroOrUndef( + Item, Idx * 2, true, Subtarget, DAG)); } } @@ -5866,7 +5863,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } - return DAG.getNode(ISD::BITCAST, dl, VT, Item); + return DAG.getBitcast(VT, Item); } } @@ -6257,6 +6254,42 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, return true; } +/// \brief Test whether a shuffle mask is equivalent within each 256-bit lane. +/// +/// This checks a shuffle mask to see if it is performing the same +/// 256-bit lane-relative shuffle in each 256-bit lane. This trivially implies +/// that it is also not lane-crossing. It may however involve a blend from the +/// same lane of a second vector. +/// +/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is +/// non-trivial to compute in the face of undef lanes. The representation is +/// *not* suitable for use with existing 256-bit shuffles as it will contain +/// entries from both V1 and V2 inputs to the wider mask. +static bool +is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, + SmallVectorImpl<int> &RepeatedMask) { + int LaneSize = 256 / VT.getScalarSizeInBits(); + RepeatedMask.resize(LaneSize, -1); + int Size = Mask.size(); + for (int i = 0; i < Size; ++i) { + if (Mask[i] < 0) + continue; + if ((Mask[i] % Size) / LaneSize != i / LaneSize) + // This entry crosses lanes, so there is no way to model this shuffle. + return false; + + // Ok, handle the in-lane shuffles by detecting if and when they repeat. + if (RepeatedMask[i % LaneSize] == -1) + // This is the first non-undef entry in this slot of a 256-bit lane. + RepeatedMask[i % LaneSize] = + Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size; + else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i]) + // Found a mismatch with the repeated mask. + return false; + } + return true; +} + /// \brief Checks whether a shuffle mask is equivalent to an explicit list of /// arguments. /// @@ -6316,6 +6349,22 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL, return DAG.getConstant(Imm, DL, MVT::i8); } +/// \brief Get a 8-bit shuffle, 1 bit per lane, immediate for a mask. +/// +/// This helper function produces an 8-bit shuffle immediate corresponding to +/// the ubiquitous shuffle encoding scheme used in x86 instructions for +/// shuffling 8 lanes. +static SDValue get1bitLaneShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL, + SelectionDAG &DAG) { + assert(Mask.size() <= 8 && + "Up to 8 elts may be in Imm8 1-bit lane shuffle mask"); + unsigned Imm = 0; + for (unsigned i = 0; i < Mask.size(); ++i) + if (Mask[i] >= 0) + Imm |= (Mask[i] % 2) << i; + return DAG.getConstant(Imm, DL, MVT::i8); +} + /// \brief Try to emit a blend instruction for a shuffle using bit math. /// /// This is used as a fallback approach when first class blend instructions are @@ -6341,10 +6390,9 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1, V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask); // We have to cast V2 around. MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64); - V2 = DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::ANDNP, DL, MaskVT, - DAG.getNode(ISD::BITCAST, DL, MaskVT, V1Mask), - DAG.getNode(ISD::BITCAST, DL, MaskVT, V2))); + V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::ANDNP, DL, MaskVT, + DAG.getBitcast(MaskVT, V1Mask), + DAG.getBitcast(MaskVT, V2))); return DAG.getNode(ISD::OR, DL, VT, V1, V2); } @@ -6395,11 +6443,11 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, BlendMask |= 1u << (i * Scale + j); MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32; - V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2, - DAG.getConstant(BlendMask, DL, MVT::i8))); + V1 = DAG.getBitcast(BlendVT, V1); + V2 = DAG.getBitcast(BlendVT, V2); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2, + DAG.getConstant(BlendMask, DL, MVT::i8))); } // FALLTHROUGH case MVT::v8i16: { @@ -6412,11 +6460,11 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, for (int j = 0; j < Scale; ++j) BlendMask |= 1u << (i * Scale + j); - V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2, - DAG.getConstant(BlendMask, DL, MVT::i8))); + V1 = DAG.getBitcast(MVT::v8i16, V1); + V2 = DAG.getBitcast(MVT::v8i16, V2); + return DAG.getBitcast(VT, + DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2, + DAG.getConstant(BlendMask, DL, MVT::i8))); } case MVT::v16i16: { @@ -6465,13 +6513,12 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL, MVT::i8)); - V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2); - return DAG.getNode( - ISD::BITCAST, DL, VT, - DAG.getNode(ISD::VSELECT, DL, BlendVT, - DAG.getNode(ISD::BUILD_VECTOR, DL, BlendVT, VSELECTMask), - V1, V2)); + V1 = DAG.getBitcast(BlendVT, V1); + V2 = DAG.getBitcast(BlendVT, V2); + return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, DL, BlendVT, + DAG.getNode(ISD::BUILD_VECTOR, DL, + BlendVT, VSELECTMask), + V1, V2)); } default: @@ -6652,13 +6699,12 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, if (Subtarget->hasSSSE3()) { // Cast the inputs to i8 vector of correct length to match PALIGNR. MVT AlignVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes); - Lo = DAG.getNode(ISD::BITCAST, DL, AlignVT, Lo); - Hi = DAG.getNode(ISD::BITCAST, DL, AlignVT, Hi); + Lo = DAG.getBitcast(AlignVT, Lo); + Hi = DAG.getBitcast(AlignVT, Hi); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo, - DAG.getConstant(Rotation * Scale, DL, - MVT::i8))); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo, + DAG.getConstant(Rotation * Scale, DL, MVT::i8))); } assert(VT.getSizeInBits() == 128 && @@ -6671,15 +6717,15 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, int HiByteShift = Rotation * Scale; // Cast the inputs to v2i64 to match PSLLDQ/PSRLDQ. - Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Lo); - Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Hi); + Lo = DAG.getBitcast(MVT::v2i64, Lo); + Hi = DAG.getBitcast(MVT::v2i64, Hi); SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo, DAG.getConstant(LoByteShift, DL, MVT::i8)); SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi, DAG.getConstant(HiByteShift, DL, MVT::i8)); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift)); + return DAG.getBitcast(VT, + DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift)); } /// \brief Compute whether each element of a shuffle is zeroable. @@ -6740,8 +6786,8 @@ static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1, SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL, IntEltVT); if (EltVT.isFloatingPoint()) { - Zero = DAG.getNode(ISD::BITCAST, DL, EltVT, Zero); - AllOnes = DAG.getNode(ISD::BITCAST, DL, EltVT, AllOnes); + Zero = DAG.getBitcast(EltVT, Zero); + AllOnes = DAG.getBitcast(EltVT, AllOnes); } SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero); SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); @@ -6833,11 +6879,11 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1, MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale); assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) && "Illegal integer vector type"); - V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V); + V = DAG.getBitcast(ShiftVT, V); V = DAG.getNode(OpCode, DL, ShiftVT, V, DAG.getConstant(ShiftAmt, DL, MVT::i8)); - return DAG.getNode(ISD::BITCAST, DL, VT, V); + return DAG.getBitcast(VT, V); }; // SSE/AVX supports logical shifts up to 64-bit integers - so we can just @@ -6878,31 +6924,28 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( if (Subtarget->hasSSE41()) { MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale), NumElements / Scale); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV)); + return DAG.getBitcast(VT, DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV)); } // For any extends we can cheat for larger element sizes and use shuffle // instructions that can fold with a load and/or copy. if (AnyExt && EltBits == 32) { int PSHUFDMask[4] = {0, -1, 1, -1}; - return DAG.getNode( - ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, - DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV), - getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, + DAG.getBitcast(MVT::v4i32, InputV), + getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); } if (AnyExt && EltBits == 16 && Scale > 2) { int PSHUFDMask[4] = {0, -1, 0, -1}; InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, - DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV), + DAG.getBitcast(MVT::v4i32, InputV), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); int PSHUFHWMask[4] = {1, -1, -1, -1}; - return DAG.getNode( - ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, InputV), - getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG))); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, + DAG.getBitcast(MVT::v8i16, InputV), + getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG))); } // If this would require more than 2 unpack instructions to expand, use @@ -6914,11 +6957,11 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( for (int i = 0; i < 16; ++i) PSHUFBMask[i] = DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, DL, MVT::i8); - InputV = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, InputV); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV, - DAG.getNode(ISD::BUILD_VECTOR, DL, - MVT::v16i8, PSHUFBMask))); + InputV = DAG.getBitcast(MVT::v16i8, InputV); + return DAG.getBitcast(VT, + DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV, + DAG.getNode(ISD::BUILD_VECTOR, DL, + MVT::v16i8, PSHUFBMask))); } // Otherwise emit a sequence of unpacks. @@ -6926,13 +6969,13 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements); SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT) : getZeroVector(InputVT, Subtarget, DAG, DL); - InputV = DAG.getNode(ISD::BITCAST, DL, InputVT, InputV); + InputV = DAG.getBitcast(InputVT, InputV); InputV = DAG.getNode(X86ISD::UNPCKL, DL, InputVT, InputV, Ext); Scale /= 2; EltBits *= 2; NumElements /= 2; } while (Scale > 1); - return DAG.getNode(ISD::BITCAST, DL, VT, InputV); + return DAG.getBitcast(VT, InputV); } /// \brief Try to lower a vector shuffle as a zero extension on any microarch. @@ -7030,9 +7073,9 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend( }; if (SDValue V = CanZExtLowHalf()) { - V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V); + V = DAG.getBitcast(MVT::v2i64, V); V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V); - return DAG.getNode(ISD::BITCAST, DL, VT, V); + return DAG.getBitcast(VT, V); } // No viable ext lowering found. @@ -7106,7 +7149,7 @@ static SDValue lowerVectorShuffleAsElementInsertion( if (SDValue V2S = getScalarValueForVectorElement( V2, Mask[V2Index] - Mask.size(), DAG)) { // We need to zext the scalar if it is smaller than an i32. - V2S = DAG.getNode(ISD::BITCAST, DL, EltVT, V2S); + V2S = DAG.getBitcast(EltVT, V2S); if (EltVT == MVT::i8 || EltVT == MVT::i16) { // Using zext to expand a narrow element won't work for non-zero // insertions. @@ -7155,7 +7198,7 @@ static SDValue lowerVectorShuffleAsElementInsertion( V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2); if (ExtVT != VT) - V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2); + V2 = DAG.getBitcast(VT, V2); if (V2Index != 0) { // If we have 4 or fewer lanes we can cheaply shuffle the element into @@ -7167,13 +7210,13 @@ static SDValue lowerVectorShuffleAsElementInsertion( V2Shuffle[V2Index] = 0; V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); } else { - V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V2); + V2 = DAG.getBitcast(MVT::v2i64, V2); V2 = DAG.getNode( X86ISD::VSHLDQ, DL, MVT::v2i64, V2, DAG.getConstant( V2Index * EltVT.getSizeInBits()/8, DL, DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64))); - V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2); + V2 = DAG.getBitcast(VT, V2); } } return V2; @@ -7396,13 +7439,13 @@ static SDValue lowerVectorShuffleAsUnpack(SDLoc DL, MVT VT, SDValue V1, V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); // Cast the inputs to the type we will use to unpack them. - V1 = DAG.getNode(ISD::BITCAST, DL, UnpackVT, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, UnpackVT, V2); + V1 = DAG.getBitcast(UnpackVT, V1); + V2 = DAG.getBitcast(UnpackVT, V2); // Unpack the inputs and cast the result back to the desired type. - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, - DL, UnpackVT, V1, V2)); + return DAG.getBitcast( + VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, + UnpackVT, V1, V2)); }; // We try each unpack from the largest to the smallest to try and find one @@ -7558,12 +7601,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // Straight shuffle of a single input vector. For everything from SSE2 // onward this has a single fast instruction with no scary immediates. // We have to map the mask as it is actually a v4i32 shuffle instruction. - V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V1); + V1 = DAG.getBitcast(MVT::v4i32, V1); int WidenedMask[4] = { std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1, std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1}; - return DAG.getNode( - ISD::BITCAST, DL, MVT::v2i64, + return DAG.getBitcast( + MVT::v2i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG))); } @@ -7584,12 +7627,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, }; if (SDValue V1Pack = GetPackNode(V1)) if (SDValue V2Pack = GetPackNode(V2)) - return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, - DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, - Mask[0] == 0 ? V1Pack.getOperand(0) - : V1Pack.getOperand(1), - Mask[1] == 2 ? V2Pack.getOperand(0) - : V2Pack.getOperand(1))); + return DAG.getBitcast(MVT::v2i64, + DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, + Mask[0] == 0 ? V1Pack.getOperand(0) + : V1Pack.getOperand(1), + Mask[1] == 2 ? V2Pack.getOperand(0) + : V2Pack.getOperand(1))); // Try to use shift instructions. if (SDValue Shift = @@ -7639,10 +7682,10 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // incur 2 cycles of stall for integer vectors on Nehalem and older chips. // However, all the alternatives are still more cycles and newer chips don't // have this problem. It would be really nice if x86 had better shuffles here. - V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V2); - return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, - DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask)); + V1 = DAG.getBitcast(MVT::v2f64, V1); + V2 = DAG.getBitcast(MVT::v2f64, V2); + return DAG.getBitcast(MVT::v2i64, + DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask)); } /// \brief Test whether this can be lowered with a single SHUFPS instruction. @@ -7941,11 +7984,10 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // up the inputs, bypassing domain shift penalties that we would encur if we // directly used PSHUFD on Nehalem and older. For newer chips, this isn't // relevant. - return DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, - DAG.getVectorShuffle( - MVT::v4f32, DL, - DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V1), - DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V2), Mask)); + return DAG.getBitcast( + MVT::v4i32, + DAG.getVectorShuffle(MVT::v4f32, DL, DAG.getBitcast(MVT::v4f32, V1), + DAG.getBitcast(MVT::v4f32, V2), Mask)); } /// \brief Lowering of single-input v8i16 shuffles is the cornerstone of SSE2 @@ -8123,11 +8165,10 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( int PSHUFDMask[] = {0, 1, 2, 3}; PSHUFDMask[ADWord] = BDWord; PSHUFDMask[BDWord] = ADWord; - V = DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, - DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V), - getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, - DAG))); + V = DAG.getBitcast( + VT, + DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), + getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); // Adjust the mask to match the new locations of A and B. for (int &M : Mask) @@ -8368,11 +8409,10 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG)); if (!isNoopShuffleMask(PSHUFDMask)) - V = DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, - DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V), - getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, - DAG))); + V = DAG.getBitcast( + VT, + DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), + getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); // At this point, each half should contain all its inputs, and we can then // just shuffle them into their final position. @@ -8433,11 +8473,11 @@ static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1, if (V1InUse) V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, - DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, V1), + DAG.getBitcast(MVT::v16i8, V1), DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask)); if (V2InUse) V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, - DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, V2), + DAG.getBitcast(MVT::v16i8, V2), DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask)); // If we need shuffled inputs from both, blend the two. @@ -8448,7 +8488,7 @@ static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1, V = V1InUse ? V1 : V2; // Cast the result back to the correct type. - return DAG.getNode(ISD::BITCAST, DL, VT, V); + return DAG.getBitcast(VT, V); } /// \brief Generic lowering of 8-lane i16 shuffles. @@ -8749,10 +8789,9 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // Update the lane map based on the mapping we ended up with. LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2; } - V1 = DAG.getNode( - ISD::BITCAST, DL, MVT::v16i8, - DAG.getVectorShuffle(MVT::v8i16, DL, - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1), + V1 = DAG.getBitcast( + MVT::v16i8, + DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle)); // Unpack the bytes to form the i16s that will be shuffled into place. @@ -8770,10 +8809,9 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, assert(PostDupI16Shuffle[i / 2] == MappedMask && "Conflicting entrties in the original shuffle!"); } - return DAG.getNode( - ISD::BITCAST, DL, MVT::v16i8, - DAG.getVectorShuffle(MVT::v8i16, DL, - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1), + return DAG.getBitcast( + MVT::v16i8, + DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle)); }; if (SDValue V = tryToWidenViaDuplication()) @@ -8866,19 +8904,18 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // We use the mask type to pick which bytes are preserved based on how many // elements are dropped. MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 }; - SDValue ByteClearMask = - DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, - DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1])); + SDValue ByteClearMask = DAG.getBitcast( + MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1])); V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask); if (!IsSingleInput) V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask); // Now pack things back together. - V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1); - V2 = IsSingleInput ? V1 : DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2); + V1 = DAG.getBitcast(MVT::v8i16, V1); + V2 = IsSingleInput ? V1 : DAG.getBitcast(MVT::v8i16, V2); SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, V2); for (int i = 1; i < NumEvenDrops; ++i) { - Result = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, Result); + Result = DAG.getBitcast(MVT::v8i16, Result); Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result); } @@ -8912,7 +8949,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, std::none_of(std::begin(HiBlendMask), std::end(HiBlendMask), [](int M) { return M >= 0 && M % 2 == 1; })) { // Use a mask to drop the high bytes. - VLoHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V); + VLoHalf = DAG.getBitcast(MVT::v8i16, V); VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf, DAG.getConstant(0x00FF, DL, MVT::v8i16)); @@ -8929,10 +8966,10 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, } else { // Otherwise just unpack the low half of V into VLoHalf and the high half into // VHiHalf so that we can blend them as i16s. - VLoHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); - VHiHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); + VLoHalf = DAG.getBitcast( + MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); + VHiHalf = DAG.getBitcast( + MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); } SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask); @@ -9073,8 +9110,8 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1, LoV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, LoOps); HiV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, HiOps); } - return std::make_pair(DAG.getNode(ISD::BITCAST, DL, SplitVT, LoV), - DAG.getNode(ISD::BITCAST, DL, SplitVT, HiV)); + return std::make_pair(DAG.getBitcast(SplitVT, LoV), + DAG.getBitcast(SplitVT, HiV)); }; SDValue LoV1, HiV1, LoV2, HiV2; @@ -9407,12 +9444,12 @@ static SDValue lowerVectorShuffleByMerging128BitLanes( LaneMask[2 * i + 1] = 2*Lanes[i] + 1; } - V1 = DAG.getNode(ISD::BITCAST, DL, LaneVT, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, LaneVT, V2); + V1 = DAG.getBitcast(LaneVT, V1); + V2 = DAG.getBitcast(LaneVT, V2); SDValue LaneShuffle = DAG.getVectorShuffle(LaneVT, DL, V1, V2, LaneMask); // Cast it back to the type we actually want. - LaneShuffle = DAG.getNode(ISD::BITCAST, DL, VT, LaneShuffle); + LaneShuffle = DAG.getBitcast(VT, LaneShuffle); // Now do a simple shuffle that isn't lane crossing. SmallVector<int, 8> NewMask; @@ -9441,6 +9478,37 @@ static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) { return true; } +static SDValue lowerVectorShuffleWithSHUFPD(SDLoc DL, MVT VT, + ArrayRef<int> Mask, SDValue V1, + SDValue V2, SelectionDAG &DAG) { + + // Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, .. + // Mask for V4F64; 0/1, 4/5, 2/3, 6/7.. + assert(VT.getScalarSizeInBits() == 64 && "Unexpected data type for VSHUFPD"); + int NumElts = VT.getVectorNumElements(); + bool ShufpdMask = true; + bool CommutableMask = true; + unsigned Immediate = 0; + for (int i = 0; i < NumElts; ++i) { + if (Mask[i] < 0) + continue; + int Val = (i & 6) + NumElts * (i & 1); + int CommutVal = (i & 0xe) + NumElts * ((i & 1)^1); + if (Mask[i] < Val || Mask[i] > Val + 1) + ShufpdMask = false; + if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1) + CommutableMask = false; + Immediate |= (Mask[i] % 2) << i; + } + if (ShufpdMask) + return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, + DAG.getConstant(Immediate, DL, MVT::i8)); + if (CommutableMask) + return DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1, + DAG.getConstant(Immediate, DL, MVT::i8)); + return SDValue(); +} + /// \brief Handle lowering of 4-lane 64-bit floating point shuffles. /// /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2 @@ -9505,24 +9573,9 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return Blend; // Check if the blend happens to exactly fit that of SHUFPD. - if ((Mask[0] == -1 || Mask[0] < 2) && - (Mask[1] == -1 || (Mask[1] >= 4 && Mask[1] < 6)) && - (Mask[2] == -1 || (Mask[2] >= 2 && Mask[2] < 4)) && - (Mask[3] == -1 || Mask[3] >= 6)) { - unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 5) << 1) | - ((Mask[2] == 3) << 2) | ((Mask[3] == 7) << 3); - return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V1, V2, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); - } - if ((Mask[0] == -1 || (Mask[0] >= 4 && Mask[0] < 6)) && - (Mask[1] == -1 || Mask[1] < 2) && - (Mask[2] == -1 || Mask[2] >= 6) && - (Mask[3] == -1 || (Mask[3] >= 2 && Mask[3] < 4))) { - unsigned SHUFPDMask = (Mask[0] == 5) | ((Mask[1] == 1) << 1) | - ((Mask[2] == 7) << 2) | ((Mask[3] == 3) << 3); - return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V2, V1, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); - } + if (SDValue Op = + lowerVectorShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG)) + return Op; // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. However, if we have AVX2 and either inputs are already in place, @@ -9584,10 +9637,10 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, PSHUFDMask[2 * i] = 2 * RepeatedMask[i]; PSHUFDMask[2 * i + 1] = 2 * RepeatedMask[i] + 1; } - return DAG.getNode( - ISD::BITCAST, DL, MVT::v4i64, + return DAG.getBitcast( + MVT::v4i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, - DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, V1), + DAG.getBitcast(MVT::v8i32, V1), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); } } @@ -9700,11 +9753,11 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask)); if (Subtarget->hasAVX2()) - return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, - DAG.getNode(ISD::BITCAST, DL, MVT::v8f32, - DAG.getNode(ISD::BUILD_VECTOR, DL, + return DAG.getNode( + X86ISD::VPERMV, DL, MVT::v8f32, + DAG.getBitcast(MVT::v8f32, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask)), - V1); + V1); // Otherwise, fall back. return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, @@ -9894,12 +9947,11 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, PSHUFBMask[2 * i] = DAG.getConstant(2 * M, DL, MVT::i8); PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, DL, MVT::i8); } - return DAG.getNode( - ISD::BITCAST, DL, MVT::v16i16, - DAG.getNode( - X86ISD::PSHUFB, DL, MVT::v32i8, - DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V1), - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask))); + return DAG.getBitcast(MVT::v16i16, + DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, + DAG.getBitcast(MVT::v32i8, V1), + DAG.getNode(ISD::BUILD_VECTOR, DL, + MVT::v32i8, PSHUFBMask))); } // Try to simplify this by merging 128-bit lanes to enable a lane-based @@ -10039,10 +10091,9 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits), VT.getVectorNumElements()); - V1 = DAG.getNode(ISD::BITCAST, DL, FpVT, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, FpVT, V2); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask)); + V1 = DAG.getBitcast(FpVT, V1); + V2 = DAG.getBitcast(FpVT, V2); + return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask)); } switch (VT.SimpleTy) { @@ -10064,64 +10115,60 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, } } -/// \brief Handle lowering of 8-lane 64-bit floating point shuffles. -static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - SDLoc DL(Op); - assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); - assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); - ArrayRef<int> Mask = SVOp->getMask(); - assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - - // X86 has dedicated unpack instructions that can handle specific blend - // operations: UNPCKH and UNPCKL. - if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2); +static SDValue lowerVectorShuffleWithVALIGN(SDLoc DL, MVT VT, + ArrayRef<int> Mask, SDValue V1, + SDValue V2, SelectionDAG &DAG) { - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG); + assert(VT.getScalarSizeInBits() >= 32 && "Unexpected data type for VALIGN"); + // VALIGN pattern 2, 3, 4, 5, .. (sequential, shifted right) + int AlignVal = -1; + for (int i = 0; i < (signed)VT.getVectorNumElements(); ++i) { + if (Mask[i] < 0) + continue; + if (Mask[i] < i) + return SDValue(); + if (AlignVal == -1) + AlignVal = Mask[i] - i; + else if (Mask[i] - i != AlignVal) + return SDValue(); + } + // Vector source operands should be swapped + return DAG.getNode(X86ISD::VALIGN, DL, VT, V2, V1, + DAG.getConstant(AlignVal, DL, MVT::i8)); } -/// \brief Handle lowering of 16-lane 32-bit floating point shuffles. -static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - SDLoc DL(Op); - assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); - assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); - ArrayRef<int> Mask = SVOp->getMask(); - assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); +static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT, + ArrayRef<int> Mask, SDValue V1, + SDValue V2, SelectionDAG &DAG) { - // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, - {// First 128-bit lane. - 0, 16, 1, 17, 4, 20, 5, 21, - // Second 128-bit lane. - 8, 24, 9, 25, 12, 28, 13, 29})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, - {// First 128-bit lane. - 2, 18, 3, 19, 6, 22, 7, 23, - // Second 128-bit lane. - 10, 26, 11, 27, 14, 30, 15, 31})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2); + assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV"); - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG); + MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); + MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); + + SmallVector<SDValue, 32> VPermMask; + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) + VPermMask.push_back(Mask[i] < 0 ? DAG.getUNDEF(MaskEltVT) : + DAG.getConstant(Mask[i], DL,MaskEltVT)); + SDValue MaskNode = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecVT, + VPermMask); + if (isSingleInputShuffleMask(Mask)) + return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1); + + return DAG.getNode(X86ISD::VPERMV3, DL, VT, MaskNode, V1, V2); } -/// \brief Handle lowering of 8-lane 64-bit integer shuffles. -static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, + +/// \brief Handle lowering of 8-lane 64-bit floating point shuffles. +static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); - assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); - assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); + MVT VT = Op.getSimpleValueType(); + assert((V1.getSimpleValueType() == MVT::v8f64 || + V1.getSimpleValueType() == MVT::v8i64) && "Bad operand type!"); + assert((V2.getSimpleValueType() == MVT::v8f64 || + V2.getSimpleValueType() == MVT::v8i64) && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); @@ -10129,21 +10176,40 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // X86 has dedicated unpack instructions that can handle specific blend // operations: UNPCKH and UNPCKL. if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2); + return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2); + return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG); + if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG)) + return Op; + + if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, VT, Mask, V1, V2, DAG)) + return Op; + + // PERMILPD instruction - mask 0/1, 0/1, 2/3, 2/3, 4/5, 4/5, 6/7, 6/7 + if (isSingleInputShuffleMask(Mask)) { + if (!is128BitLaneCrossingShuffleMask(VT, Mask)) + return DAG.getNode(X86ISD::VPERMILPI, DL, VT, V1, + get1bitLaneShuffleImm8ForMask(Mask, DL, DAG)); + + SmallVector<int, 4> RepeatedMask; + if (is256BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) + return DAG.getNode(X86ISD::VPERMI, DL, VT, V1, + getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); + } + return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG); } /// \brief Handle lowering of 16-lane 32-bit integer shuffles. -static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, +static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); - assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); - assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); + assert((V1.getSimpleValueType() == MVT::v16i32 || + V1.getSimpleValueType() == MVT::v16f32) && "Bad operand type!"); + assert((V2.getSimpleValueType() == MVT::v16i32 || + V2.getSimpleValueType() == MVT::v16f32) && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); @@ -10154,16 +10220,39 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, 0, 16, 1, 17, 4, 20, 5, 21, // Second 128-bit lane. 8, 24, 9, 25, 12, 28, 13, 29})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2); + return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, {// First 128-bit lane. 2, 18, 3, 19, 6, 22, 7, 23, // Second 128-bit lane. 10, 26, 11, 27, 14, 30, 15, 31})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2); + return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG); + if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, + 12, 12, 14, 14})) + return DAG.getNode(X86ISD::MOVSLDUP, DL, VT, V1); + if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, + 13, 13, 15, 15})) + return DAG.getNode(X86ISD::MOVSHDUP, DL, VT, V1); + + SmallVector<int, 4> RepeatedMask; + if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) { + if (isSingleInputShuffleMask(Mask)) { + unsigned Opc = VT.isInteger() ? X86ISD::PSHUFD : X86ISD::VPERMILPI; + return DAG.getNode(Opc, DL, VT, V1, + getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); + } + + for (int i = 0; i < 4; ++i) + if (RepeatedMask[i] >= 16) + RepeatedMask[i] -= 12; + return lowerVectorShuffleWithSHUFPS(DL, VT, RepeatedMask, V1, V2, DAG); + } + + if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG)) + return Op; + + return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG); } /// \brief Handle lowering of 32-lane 16-bit integer shuffles. @@ -10223,13 +10312,11 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, // the requisite ISA extensions for that element type are available. switch (VT.SimpleTy) { case MVT::v8f64: - return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG); - case MVT::v16f32: - return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v8i64: - return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG); + return lowerV8X64VectorShuffle(Op, V1, V2, Subtarget, DAG); + case MVT::v16f32: case MVT::v16i32: - return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG); + return lowerV16X32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v32i16: if (Subtarget->hasBWI()) return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG); @@ -10311,10 +10398,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, // Make sure that the new vector type is legal. For example, v2f64 isn't // legal on SSE1. if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) { - V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1); - V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2); - return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask)); + V1 = DAG.getBitcast(NewVT, V1); + V2 = DAG.getBitcast(NewVT, V2); + return DAG.getBitcast( + VT, DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask)); } } @@ -10509,12 +10596,11 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); // If Idx is 0, it's cheaper to do a move instead of a pextrw. if (Idx == 0) - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BITCAST, dl, - MVT::v4i32, - Op.getOperand(0)), - Op.getOperand(1))); + return DAG.getNode( + ISD::TRUNCATE, dl, MVT::i16, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + DAG.getBitcast(MVT::v4i32, Op.getOperand(0)), + Op.getOperand(1))); SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1)); SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, @@ -10538,10 +10624,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { User->getValueType(0) != MVT::i32)) return SDValue(); SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, - Op.getOperand(0)), - Op.getOperand(1)); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract); + DAG.getBitcast(MVT::v4i32, Op.getOperand(0)), + Op.getOperand(1)); + return DAG.getBitcast(MVT::f32, Extract); } if (VT == MVT::i32 || VT == MVT::i64) { @@ -10655,8 +10740,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, if (Idx == 0) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BITCAST, dl, - MVT::v4i32, Vec), + DAG.getBitcast(MVT::v4i32, Vec), Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. MVT EltVT = MVT::i32; @@ -10877,8 +10961,8 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); assert(OpVT.is128BitVector() && "Expected an SSE type!"); - return DAG.getNode(ISD::BITCAST, dl, OpVT, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt)); + return DAG.getBitcast( + OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt)); } // Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in @@ -11670,14 +11754,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, MachinePointerInfo::getConstantPool(), false, false, false, 16); - SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, XR1), - CLod0); + SDValue Unpck1 = + getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, MachinePointerInfo::getConstantPool(), false, false, false, 16); - SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck1); + SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); SDValue Result; @@ -11685,12 +11768,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, // FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'. Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); } else { - SDValue S2F = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Sub); + SDValue S2F = DAG.getBitcast(MVT::v4i32, Sub); SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32, S2F, 0x4E, DAG); Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, - DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Shuffle), - Sub); + DAG.getBitcast(MVT::v2f64, Shuffle), Sub); } return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, @@ -11713,20 +11795,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG); Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, - DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load), + DAG.getBitcast(MVT::v2f64, Load), DAG.getIntPtrConstant(0, dl)); // Or the load with the bias. - SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, - DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, - MVT::v2f64, Load)), - DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, - MVT::v2f64, Bias))); - Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, - DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or), - DAG.getIntPtrConstant(0, dl)); + SDValue Or = DAG.getNode( + ISD::OR, dl, MVT::v2i64, + DAG.getBitcast(MVT::v2i64, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Load)), + DAG.getBitcast(MVT::v2i64, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias))); + Or = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, + DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl)); // Subtract the bias. SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); @@ -11805,19 +11886,16 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, if (Subtarget.hasSSE41()) { EVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16; // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa); - SDValue VecCstLowBitcast = - DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstLow); - SDValue VecBitcast = DAG.getNode(ISD::BITCAST, DL, VecI16VT, V); + SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow); + SDValue VecBitcast = DAG.getBitcast(VecI16VT, V); // Low will be bitcasted right away, so do not bother bitcasting back to its // original type. Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32)); // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16), // (uint4) 0x53000000, 0xaa); - SDValue VecCstHighBitcast = - DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstHigh); - SDValue VecShiftBitcast = - DAG.getNode(ISD::BITCAST, DL, VecI16VT, HighShift); + SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh); + SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift); // High will be bitcasted right away, so do not bother bitcasting back to // its original type. High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast, @@ -11843,11 +11921,11 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, makeArrayRef(&CstFAddArray[0], NumElts)); // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f); - SDValue HighBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, High); + SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High); SDValue FHigh = DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd); // return (float4) lo + fhi; - SDValue LowBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, Low); + SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low); return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); } @@ -12103,8 +12181,8 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, MVT HVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements()/2); - OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); + OpLo = DAG.getBitcast(HVT, OpLo); + OpHi = DAG.getBitcast(HVT, OpHi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } @@ -12189,14 +12267,14 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { if (InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI()) return Op; // legal, will go to VPMOVB2M, VPMOVW2M - if ((InVT.is256BitVector() || InVT.is128BitVector()) + if ((InVT.is256BitVector() || InVT.is128BitVector()) && InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI() && Subtarget->hasVLX()) return Op; // legal, will go to VPMOVB2M, VPMOVW2M if (InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI()) return Op; // legal, will go to VPMOVD2M, VPMOVQ2M - if ((InVT.is256BitVector() || InVT.is128BitVector()) + if ((InVT.is256BitVector() || InVT.is128BitVector()) && InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI() && Subtarget->hasVLX()) return Op; // legal, will go to VPMOVB2M, VPMOVQ2M @@ -12224,7 +12302,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget->hasInt256()) { static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; - In = DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, In); + In = DAG.getBitcast(MVT::v8i32, In); In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32), ShufMask); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, @@ -12235,8 +12313,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { DAG.getIntPtrConstant(0, DL)); SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(2, DL)); - OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi); + OpLo = DAG.getBitcast(MVT::v4i32, OpLo); + OpHi = DAG.getBitcast(MVT::v4i32, OpHi); static const int ShufMask[] = {0, 2, 4, 6}; return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask); } @@ -12244,7 +12322,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { // On AVX2, v8i32 -> v8i16 becomed PSHUFB. if (Subtarget->hasInt256()) { - In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In); + In = DAG.getBitcast(MVT::v32i8, In); SmallVector<SDValue,32> pshufbMask; for (unsigned i = 0; i < 2; ++i) { @@ -12261,14 +12339,14 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { } SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, pshufbMask); In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV); - In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In); + In = DAG.getBitcast(MVT::v4i64, In); static const int ShufMask[] = {0, 2, -1, -1}; In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64), &ShufMask[0]); In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(0, DL)); - return DAG.getNode(ISD::BITCAST, DL, VT, In); + return DAG.getBitcast(VT, In); } SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, @@ -12277,8 +12355,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, DAG.getIntPtrConstant(4, DL)); - OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi); + OpLo = DAG.getBitcast(MVT::v16i8, OpLo); + OpHi = DAG.getBitcast(MVT::v16i8, OpHi); // The PSHUFB mask: static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, @@ -12288,13 +12366,13 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1); OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1); - OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi); + OpLo = DAG.getBitcast(MVT::v4i32, OpLo); + OpHi = DAG.getBitcast(MVT::v4i32, OpHi); // The MOVLHPS Mask: static const int ShufMask2[] = {0, 1, 4, 5}; SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2); - return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, res); + return DAG.getBitcast(MVT::v8i16, res); } // Handle truncation of V256 to V128 using shuffles. @@ -12310,8 +12388,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { // Prepare truncation shuffle mask for (unsigned i = 0; i != NumElems; ++i) MaskVec[i] = i * 2; - SDValue V = DAG.getVectorShuffle(NVT, DL, - DAG.getNode(ISD::BITCAST, DL, NVT, In), + SDValue V = DAG.getVectorShuffle(NVT, DL, DAG.getBitcast(NVT, In), DAG.getUNDEF(NVT), &MaskVec[0]); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, DAG.getIntPtrConstant(0, DL)); @@ -12420,13 +12497,12 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { // For a vector, cast operands to a vector type, perform the logic op, // and cast the result back to the original value type. MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64); - SDValue MaskCasted = DAG.getNode(ISD::BITCAST, dl, VecVT, Mask); - SDValue Operand = IsFNABS ? - DAG.getNode(ISD::BITCAST, dl, VecVT, Op0.getOperand(0)) : - DAG.getNode(ISD::BITCAST, dl, VecVT, Op0); + SDValue MaskCasted = DAG.getBitcast(VecVT, Mask); + SDValue Operand = IsFNABS ? DAG.getBitcast(VecVT, Op0.getOperand(0)) + : DAG.getBitcast(VecVT, Op0); unsigned BitOp = IsFABS ? ISD::AND : IsFNABS ? ISD::OR : ISD::XOR; - return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted)); + return DAG.getBitcast(VT, + DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted)); } // If not vector, then scalar. @@ -12591,7 +12667,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget, // Cast all vectors into TestVT for PTEST. for (unsigned i = 0, e = VecIns.size(); i < e; ++i) - VecIns[i] = DAG.getNode(ISD::BITCAST, DL, TestVT, VecIns[i]); + VecIns[i] = DAG.getBitcast(TestVT, VecIns[i]); // If more than one full vectors are evaluated, OR them first before PTEST. for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) { @@ -12925,29 +13001,31 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, DAGCombinerInfo &DCI, unsigned &RefinementSteps, bool &UseOneConstNR) const { - // FIXME: We should use instruction latency models to calculate the cost of - // each potential sequence, but this is very hard to do reliably because - // at least Intel's Core* chips have variable timing based on the number of - // significant digits in the divisor and/or sqrt operand. - if (!Subtarget->useSqrtEst()) - return SDValue(); - EVT VT = Op.getValueType(); + const char *RecipOp; - // SSE1 has rsqrtss and rsqrtps. + // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps. // TODO: Add support for AVX512 (v16f32). // It is likely not profitable to do this for f64 because a double-precision // rsqrt estimate with refinement on x86 prior to FMA requires at least 16 // instructions: convert to single, rsqrtss, convert back to double, refine // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || - (Subtarget->hasAVX() && VT == MVT::v8f32)) { - RefinementSteps = 1; - UseOneConstNR = false; - return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); - } - return SDValue(); + if (VT == MVT::f32 && Subtarget->hasSSE1()) + RecipOp = "sqrtf"; + else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) || + (VT == MVT::v8f32 && Subtarget->hasAVX())) + RecipOp = "vec-sqrtf"; + else + return SDValue(); + + TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + RefinementSteps = Recips.getRefinementSteps(RecipOp); + UseOneConstNR = false; + return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); } /// The minimum architected relative accuracy is 2^-12. We need one @@ -12955,15 +13033,9 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, SDValue X86TargetLowering::getRecipEstimate(SDValue Op, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const { - // FIXME: We should use instruction latency models to calculate the cost of - // each potential sequence, but this is very hard to do reliably because - // at least Intel's Core* chips have variable timing based on the number of - // significant digits in the divisor. - if (!Subtarget->useReciprocalEst()) - return SDValue(); - EVT VT = Op.getValueType(); - + const char *RecipOp; + // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps. // TODO: Add support for AVX512 (v16f32). // It is likely not profitable to do this for f64 because a double-precision @@ -12971,12 +13043,20 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, // 15 instructions: convert to single, rcpss, convert back to double, refine // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || - (Subtarget->hasAVX() && VT == MVT::v8f32)) { - RefinementSteps = ReciprocalEstimateRefinementSteps; - return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); - } - return SDValue(); + if (VT == MVT::f32 && Subtarget->hasSSE1()) + RecipOp = "divf"; + else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) || + (VT == MVT::v8f32 && Subtarget->hasAVX())) + RecipOp = "vec-divf"; + else + return SDValue(); + + TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + RefinementSteps = Recips.getRefinementSteps(RecipOp); + return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); } /// If we have at least two divisions that use the same divisor, convert to @@ -13407,8 +13487,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, assert(Subtarget->hasSSE2() && "Don't know how to lower!"); // First cast everything to the right type. - Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); - Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); + Op0 = DAG.getBitcast(MVT::v4i32, Op0); + Op1 = DAG.getBitcast(MVT::v4i32, Op1); // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. The lower @@ -13442,7 +13522,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, if (Invert) Result = DAG.getNOT(dl, Result, MVT::v4i32); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); + return DAG.getBitcast(VT, Result); } if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) { @@ -13451,8 +13531,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!"); // First cast everything to the right type. - Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); - Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); + Op0 = DAG.getBitcast(MVT::v4i32, Op0); + Op1 = DAG.getBitcast(MVT::v4i32, Op1); // Do the compare. SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); @@ -13465,7 +13545,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, if (Invert) Result = DAG.getNOT(dl, Result, MVT::v4i32); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); + return DAG.getBitcast(VT, Result); } } @@ -13662,7 +13742,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp); EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; - VCmp = DAG.getNode(ISD::BITCAST, DL, VCmpVT, VCmp); + VCmp = DAG.getBitcast(VCmpVT, VCmp); SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2); @@ -13687,12 +13767,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0)) Op2Scalar = Op2.getOperand(0); if (Op1Scalar.getNode() && Op2Scalar.getNode()) { - SDValue newSelect = DAG.getNode(ISD::SELECT, DL, + SDValue newSelect = DAG.getNode(ISD::SELECT, DL, Op1Scalar.getValueType(), Cond, Op1Scalar, Op2Scalar); if (newSelect.getValueSizeInBits() == VT.getSizeInBits()) - return DAG.getNode(ISD::BITCAST, DL, VT, newSelect); - SDValue ExtVec = DAG.getNode(ISD::BITCAST, DL, MVT::v8i1, newSelect); + return DAG.getBitcast(VT, newSelect); + SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec, DAG.getIntPtrConstant(0, DL)); } @@ -13975,7 +14055,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr); MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2); CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2); - Curr = DAG.getNode(ISD::BITCAST, dl, CurrVT, Curr); + Curr = DAG.getBitcast(CurrVT, Curr); } SDValue SignExt = Curr; @@ -13993,7 +14073,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr, DAG.getConstant(31, dl, MVT::i8)); SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5}); - return DAG.getNode(ISD::BITCAST, dl, VT, Ext); + return DAG.getBitcast(VT, Ext); } return SDValue(); @@ -14202,7 +14282,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, // Bitcast the loaded value to a vector of the original element type, in // the size of the target vector type. - SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res); + SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res); unsigned SizeRatio = RegSz / MemSz; if (Ext == ISD::SEXTLOAD) { @@ -14227,7 +14307,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, SDValue Shuff = DAG.getVectorShuffle( WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]); - Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff); + Shuff = DAG.getBitcast(RegVT, Shuff); // Build the arithmetic shift. unsigned Amt = RegVT.getVectorElementType().getSizeInBits() - @@ -14249,7 +14329,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]); // Bitcast to the requested type. - Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff); + Shuff = DAG.getBitcast(RegVT, Shuff); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF); return Shuff; } @@ -14933,7 +15013,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, MVT EltVT = VT.getVectorElementType(); EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits()); - ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt); + ShAmt = DAG.getBitcast(ShVT, ShAmt); return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } @@ -14959,8 +15039,8 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); + DAG.getBitcast(BitcastVT, Mask), + DAG.getIntPtrConstant(0, dl)); switch (Op.getOpcode()) { default: break; @@ -15017,12 +15097,31 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Op.getOperand(2), Op.getOperand(3)); case INTR_TYPE_1OP_MASK_RM: { SDValue Src = Op.getOperand(1); - SDValue Src0 = Op.getOperand(2); + SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); - SDValue RoundingMode = Op.getOperand(4); + SDValue RoundingMode; + if (Op.getNumOperands() == 4) + RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); + else + RoundingMode = Op.getOperand(4); + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; + if (IntrWithRoundingModeOpcode != 0) { + unsigned Round = cast<ConstantSDNode>(RoundingMode)->getZExtValue(); + if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) + return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, + dl, Op.getValueType(), Src, RoundingMode), + Mask, PassThru, Subtarget, DAG); + } return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src, RoundingMode), - Mask, Src0, Subtarget, DAG); + Mask, PassThru, Subtarget, DAG); + } + case INTR_TYPE_1OP_MASK: { + SDValue Src = Op.getOperand(1); + SDValue Passthru = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src), + Mask, Passthru, Subtarget, DAG); } case INTR_TYPE_SCALAR_MASK_RM: { SDValue Src1 = Op.getOperand(1); @@ -15069,6 +15168,30 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Src1,Src2), Mask, PassThru, Subtarget, DAG); } + case INTR_TYPE_3OP_MASK: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src3 = Op.getOperand(3); + SDValue PassThru = Op.getOperand(4); + SDValue Mask = Op.getOperand(5); + // We specify 2 possible opcodes for intrinsics with rounding modes. + // First, we check if the intrinsic may have non-default rounding mode, + // (IntrData->Opc1 != 0), then we check the rounding mode operand. + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; + if (IntrWithRoundingModeOpcode != 0) { + SDValue Rnd = Op.getOperand(6); + unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue(); + if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) { + return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, + dl, Op.getValueType(), + Src1, Src2, Src3, Rnd), + Mask, PassThru, Subtarget, DAG); + } + } + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + Src1, Src2, Src3), + Mask, PassThru, Subtarget, DAG); + } case FMA_OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); @@ -15140,7 +15263,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, DAG.getUNDEF(BitcastVT), CmpMask, DAG.getIntPtrConstant(0, dl)); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); + return DAG.getBitcast(Op.getValueType(), Res); } case COMI: { // Comparison intrinsics ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; @@ -15176,7 +15299,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Mask.getValueType().getSizeInBits()); SDLoc dl(Op); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), + DAG.getBitcast(BitcastVT, Mask), DAG.getIntPtrConstant(0, dl)); return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress, @@ -15191,7 +15314,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Mask.getValueType().getSizeInBits()); SDLoc dl(Op); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), + DAG.getBitcast(BitcastVT, Mask), DAG.getIntPtrConstant(0, dl)); return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1), Op.getOperand(2)); @@ -15211,16 +15334,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(1)); - case Intrinsic::x86_avx512_mask_valign_q_512: - case Intrinsic::x86_avx512_mask_valign_d_512: - // Vector source operands are swapped. - return getVectorMaskingNode(DAG.getNode(X86ISD::VALIGN, dl, - Op.getValueType(), Op.getOperand(2), - Op.getOperand(1), - Op.getOperand(3)), - Op.getOperand(5), Op.getOperand(4), - Subtarget, DAG); - // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest // or testp pattern and a setcc for the result. @@ -15289,8 +15402,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget case Intrinsic::x86_avx512_kortestz_w: case Intrinsic::x86_avx512_kortestc_w: { unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B; - SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1)); - SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2)); + SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1)); + SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2)); SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8); SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test); @@ -15378,7 +15491,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget // Compute the symbol for the LSDA. We know it'll get emitted later. MachineFunction &MF = DAG.getMachineFunction(); SDValue Op1 = Op.getOperand(1); - Op1->dump(); auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal()); MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol( GlobalValue::getRealLinkageName(Fn->getName())); @@ -15409,7 +15521,7 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else - MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); + MaskInReg = DAG.getBitcast(MaskVT, Mask); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); @@ -15437,7 +15549,7 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else - MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); + MaskInReg = DAG.getBitcast(MaskVT, Mask); SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); @@ -15460,7 +15572,7 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else - MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); + MaskInReg = DAG.getBitcast(MaskVT, Mask); //SDVTList VTs = DAG.getVTList(MVT::Other); SDValue Ops[] = {MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops); @@ -15693,23 +15805,25 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); + EVT VT = DataToCompress.getValueType(); if (isAllOnes(Mask)) // return just a store return DAG.getStore(Chain, dl, DataToCompress, Addr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, + VT.getScalarSizeInBits()/8); - EVT VT = DataToCompress.getValueType(); EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VT.getVectorNumElements()); EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, Mask.getValueType().getSizeInBits()); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), + DAG.getBitcast(BitcastVT, Mask), DAG.getIntPtrConstant(0, dl)); SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress, DAG.getUNDEF(VT)); return DAG.getStore(Chain, dl, Compressed, Addr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, + VT.getScalarSizeInBits()/8); } case EXPAND_FROM_MEM: { SDLoc dl(Op); @@ -15721,17 +15835,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, if (isAllOnes(Mask)) // return just a load return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false, - false, 0); + false, VT.getScalarSizeInBits()/8); EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VT.getVectorNumElements()); EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, Mask.getValueType().getSizeInBits()); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), + DAG.getBitcast(BitcastVT, Mask), DAG.getIntPtrConstant(0, dl)); SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), - false, false, false, 0); + false, false, false, + VT.getScalarSizeInBits()/8); SDValue Results[] = { DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand, PathThru), @@ -16274,8 +16389,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, -1, 4, -1, 5, -1, 6, -1, 7}; ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask); - ALo = DAG.getNode(ISD::BITCAST, dl, ExVT, ALo); - BLo = DAG.getNode(ISD::BITCAST, dl, ExVT, BLo); + ALo = DAG.getBitcast(ExVT, ALo); + BLo = DAG.getBitcast(ExVT, BLo); ALo = DAG.getNode(ISD::SRA, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT)); BLo = DAG.getNode(ISD::SRA, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT)); } @@ -16294,8 +16409,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, -1, 12, -1, 13, -1, 14, -1, 15}; AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask); - AHi = DAG.getNode(ISD::BITCAST, dl, ExVT, AHi); - BHi = DAG.getNode(ISD::BITCAST, dl, ExVT, BHi); + AHi = DAG.getBitcast(ExVT, AHi); + BHi = DAG.getBitcast(ExVT, BHi); AHi = DAG.getNode(ISD::SRA, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT)); BHi = DAG.getNode(ISD::SRA, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT)); } @@ -16323,8 +16438,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, // Now multiply odd parts. SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds); - Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens); - Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds); + Evens = DAG.getBitcast(VT, Evens); + Odds = DAG.getBitcast(VT, Odds); // Merge the two vectors back together with a shuffle. This expands into 2 // shuffles. @@ -16352,10 +16467,10 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, // Bit cast to 32-bit vectors for MULUDQ EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32; - A = DAG.getNode(ISD::BITCAST, dl, MulVT, A); - B = DAG.getNode(ISD::BITCAST, dl, MulVT, B); - Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi); - Bhi = DAG.getNode(ISD::BITCAST, dl, MulVT, Bhi); + A = DAG.getBitcast(MulVT, A); + B = DAG.getBitcast(MulVT, B); + Ahi = DAG.getBitcast(MulVT, Ahi); + Bhi = DAG.getBitcast(MulVT, Bhi); SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B); SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi); @@ -16417,7 +16532,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); - return DAG.getNode(ISD::BITCAST, dl, VT, CallInfo.first); + return DAG.getBitcast(VT, CallInfo.first); } static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, @@ -16455,12 +16570,10 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ; // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h> // => <2 x i64> <ae|cg> - SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(Opcode, dl, MulVT, Op0, Op1)); + SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Op0, Op1)); // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef> // => <2 x i64> <bf|dh> - SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1)); + SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1)); // Shuffle it back into the right order. SDValue Highs, Lows; @@ -16499,16 +16612,16 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, // Return true if the requred (according to Opcode) shift-imm form is natively // supported by the Subtarget -static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, +static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, unsigned Opcode) { if (VT.getScalarSizeInBits() < 16) return false; - + if (VT.is512BitVector() && (VT.getScalarSizeInBits() > 16 || Subtarget->hasBWI())) return true; - bool LShift = VT.is128BitVector() || + bool LShift = VT.is128BitVector() || (VT.is256BitVector() && Subtarget->hasInt256()); bool AShift = LShift && (Subtarget->hasVLX() || @@ -16518,15 +16631,15 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, // The shift amount is a variable, but it is the same for all vector lanes. // These instrcutions are defined together with shift-immediate. -static -bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget, +static +bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget, unsigned Opcode) { return SupportedVectorShiftWithImm(VT, Subtarget, Opcode); } // Return true if the requred (according to Opcode) variable-shift form is // natively supported by the Subtarget -static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget, +static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget, unsigned Opcode) { if (!Subtarget->hasInt256() || VT.getScalarSizeInBits() < 16) @@ -16574,7 +16687,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // Make a large shift. SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R, ShiftAmt, DAG); - SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL); + SHL = DAG.getBitcast(VT, SHL); // Zero out the rightmost bits. SmallVector<SDValue, 32> V( NumElts, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, MVT::i8)); @@ -16585,7 +16698,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // Make a large shift. SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R, ShiftAmt, DAG); - SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL); + SRL = DAG.getBitcast(VT, SRL); // Zero out the leftmost bits. SmallVector<SDValue, 32> V( NumElts, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, MVT::i8)); @@ -16801,7 +16914,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, dl, VT)); - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op); + Op = DAG.getBitcast(MVT::v4f32, Op); Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); return DAG.getNode(ISD::MUL, dl, VT, Op, R); } @@ -16871,11 +16984,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2); if (TargetOpcode == X86ISD::MOVSD) CastVT = MVT::v2i64; - SDValue BitCast1 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift1); - SDValue BitCast2 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift2); + SDValue BitCast1 = DAG.getBitcast(CastVT, Shift1); + SDValue BitCast2 = DAG.getBitcast(CastVT, Shift2); SDValue Result = getTargetShuffleNode(TargetOpcode, dl, CastVT, BitCast2, BitCast1, DAG); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); + return DAG.getBitcast(VT, Result); } } @@ -16931,10 +17044,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z); SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, R, R); SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, R, R); - ALo = DAG.getNode(ISD::BITCAST, dl, ExtVT, ALo); - AHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, AHi); - RLo = DAG.getNode(ISD::BITCAST, dl, ExtVT, RLo); - RHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, RHi); + ALo = DAG.getBitcast(ExtVT, ALo); + AHi = DAG.getBitcast(ExtVT, AHi); + RLo = DAG.getBitcast(ExtVT, RLo); + RHi = DAG.getBitcast(ExtVT, RHi); SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo); SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi); Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT)); @@ -17293,7 +17406,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Elts); - SDValue ToV2F64 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, BV); + SDValue ToV2F64 = DAG.getBitcast(MVT::v2f64, BV); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64, DAG.getIntPtrConstant(0, dl)); } @@ -17315,141 +17428,241 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, return SDValue(); } -static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - SDNode *Node = Op.getNode(); - SDLoc dl(Node); +/// Compute the horizontal sum of bytes in V for the elements of VT. +/// +/// Requires V to be a byte vector and VT to be an integer vector type with +/// wider elements than V's type. The width of the elements of VT determines +/// how many bytes of V are summed horizontally to produce each element of the +/// result. +static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(V); + MVT ByteVecVT = V.getSimpleValueType(); + MVT EltVT = VT.getVectorElementType(); + int NumElts = VT.getVectorNumElements(); + assert(ByteVecVT.getVectorElementType() == MVT::i8 && + "Expected value to have byte element type."); + assert(EltVT != MVT::i8 && + "Horizontal byte sum only makes sense for wider elements!"); + unsigned VecSize = VT.getSizeInBits(); + assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!"); + + // PSADBW instruction horizontally add all bytes and leave the result in i64 + // chunks, thus directly computes the pop count for v2i64 and v4i64. + if (EltVT == MVT::i64) { + SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); + V = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, V, Zeros); + return DAG.getBitcast(VT, V); + } + + if (EltVT == MVT::i32) { + // We unpack the low half and high half into i32s interleaved with zeros so + // that we can use PSADBW to horizontally sum them. The most useful part of + // this is that it lines up the results of two PSADBW instructions to be + // two v2i64 vectors which concatenated are the 4 population counts. We can + // then use PACKUSWB to shrink and concatenate them into a v4i32 again. + SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL); + SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V, Zeros); + SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V, Zeros); + + // Do the horizontal sums into two v2i64s. + Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); + Low = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, + DAG.getBitcast(ByteVecVT, Low), Zeros); + High = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, + DAG.getBitcast(ByteVecVT, High), Zeros); + + // Merge them together. + MVT ShortVecVT = MVT::getVectorVT(MVT::i16, VecSize / 16); + V = DAG.getNode(X86ISD::PACKUS, DL, ByteVecVT, + DAG.getBitcast(ShortVecVT, Low), + DAG.getBitcast(ShortVecVT, High)); + + return DAG.getBitcast(VT, V); + } + + // The only element type left is i16. + assert(EltVT == MVT::i16 && "Unknown how to handle type"); + + // To obtain pop count for each i16 element starting from the pop count for + // i8 elements, shift the i16s left by 8, sum as i8s, and then shift as i16s + // right by 8. It is important to shift as i16s as i8 vector shift isn't + // directly supported. + SmallVector<SDValue, 16> Shifters(NumElts, DAG.getConstant(8, DL, EltVT)); + SDValue Shifter = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Shifters); + SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), Shifter); + V = DAG.getNode(ISD::ADD, DL, ByteVecVT, DAG.getBitcast(ByteVecVT, Shl), + DAG.getBitcast(ByteVecVT, V)); + return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), Shifter); +} + +static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, SDLoc DL, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + MVT EltVT = VT.getVectorElementType(); + unsigned VecSize = VT.getSizeInBits(); - Op = Op.getOperand(0); - EVT VT = Op.getValueType(); - assert((VT.is128BitVector() || VT.is256BitVector()) && - "CTPOP lowering only implemented for 128/256-bit wide vector types"); + // Implement a lookup table in register by using an algorithm based on: + // http://wm.ite.pl/articles/sse-popcount.html + // + // The general idea is that every lower byte nibble in the input vector is an + // index into a in-register pre-computed pop count table. We then split up the + // input vector in two new ones: (1) a vector with only the shifted-right + // higher nibbles for each byte and (2) a vector with the lower nibbles (and + // masked out higher ones) for each byte. PSHUB is used separately with both + // to index the in-register table. Next, both are added and the result is a + // i8 vector where each element contains the pop count for input byte. + // + // To obtain the pop count for elements != i8, we follow up with the same + // approach and use additional tricks as described below. + // + const int LUT[16] = {/* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2, + /* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3, + /* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3, + /* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4}; + + int NumByteElts = VecSize / 8; + MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts); + SDValue In = DAG.getBitcast(ByteVecVT, Op); + SmallVector<SDValue, 16> LUTVec; + for (int i = 0; i < NumByteElts; ++i) + LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8)); + SDValue InRegLUT = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, LUTVec); + SmallVector<SDValue, 16> Mask0F(NumByteElts, + DAG.getConstant(0x0F, DL, MVT::i8)); + SDValue M0F = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, Mask0F); + + // High nibbles + SmallVector<SDValue, 16> Four(NumByteElts, DAG.getConstant(4, DL, MVT::i8)); + SDValue FourV = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, Four); + SDValue HighNibbles = DAG.getNode(ISD::SRL, DL, ByteVecVT, In, FourV); + + // Low nibbles + SDValue LowNibbles = DAG.getNode(ISD::AND, DL, ByteVecVT, In, M0F); + + // The input vector is used as the shuffle mask that index elements into the + // LUT. After counting low and high nibbles, add the vector to obtain the + // final pop count per i8 element. + SDValue HighPopCnt = + DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, HighNibbles); + SDValue LowPopCnt = + DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, LowNibbles); + SDValue PopCnt = DAG.getNode(ISD::ADD, DL, ByteVecVT, HighPopCnt, LowPopCnt); - unsigned NumElts = VT.getVectorNumElements(); - EVT EltVT = VT.getVectorElementType(); - unsigned Len = EltVT.getSizeInBits(); + if (EltVT == MVT::i8) + return PopCnt; + + return LowerHorizontalByteSum(PopCnt, VT, Subtarget, DAG); +} + +static SDValue LowerVectorCTPOPBitmath(SDValue Op, SDLoc DL, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + assert(VT.is128BitVector() && + "Only 128-bit vector bitmath lowering supported."); + + int VecSize = VT.getSizeInBits(); + MVT EltVT = VT.getVectorElementType(); + int Len = EltVT.getSizeInBits(); // This is the vectorized version of the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel // with a minor tweak to use a series of adds + shifts instead of vector - // multiplications. Implemented for the v2i64, v4i64, v4i32, v8i32 types: - // - // v2i64, v4i64, v4i32 => Only profitable w/ popcnt disabled - // v8i32 => Always profitable - // - // FIXME: There a couple of possible improvements: - // - // 1) Support for i8 and i16 vectors (needs measurements if popcnt enabled). - // 2) Use strategies from http://wm.ite.pl/articles/sse-popcount.html - // - assert(EltVT.isInteger() && (Len == 32 || Len == 64) && Len % 8 == 0 && - "CTPOP not implemented for this vector element type."); + // multiplications. Implemented for all integer vector types. We only use + // this when we don't have SSSE3 which allows a LUT-based lowering that is + // much faster, even faster than using native popcnt instructions. + + auto GetShift = [&](unsigned OpCode, SDValue V, int Shifter) { + MVT VT = V.getSimpleValueType(); + SmallVector<SDValue, 32> Shifters( + VT.getVectorNumElements(), + DAG.getConstant(Shifter, DL, VT.getVectorElementType())); + return DAG.getNode(OpCode, DL, VT, V, + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Shifters)); + }; + auto GetMask = [&](SDValue V, APInt Mask) { + MVT VT = V.getSimpleValueType(); + SmallVector<SDValue, 32> Masks( + VT.getVectorNumElements(), + DAG.getConstant(Mask, DL, VT.getVectorElementType())); + return DAG.getNode(ISD::AND, DL, VT, V, + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Masks)); + }; - // X86 canonicalize ANDs to vXi64, generate the appropriate bitcasts to avoid - // extra legalization. - bool NeedsBitcast = EltVT == MVT::i32; - MVT BitcastVT = VT.is256BitVector() ? MVT::v4i64 : MVT::v2i64; + // We don't want to incur the implicit masks required to SRL vNi8 vectors on + // x86, so set the SRL type to have elements at least i16 wide. This is + // correct because all of our SRLs are followed immediately by a mask anyways + // that handles any bits that sneak into the high bits of the byte elements. + MVT SrlVT = Len > 8 ? VT : MVT::getVectorVT(MVT::i16, VecSize / 16); - SDValue Cst55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, - EltVT); - SDValue Cst33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, - EltVT); - SDValue Cst0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, - EltVT); + SDValue V = Op; // v = v - ((v >> 1) & 0x55555555...) - SmallVector<SDValue, 8> Ones(NumElts, DAG.getConstant(1, dl, EltVT)); - SDValue OnesV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ones); - SDValue Srl = DAG.getNode(ISD::SRL, dl, VT, Op, OnesV); - if (NeedsBitcast) - Srl = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Srl); - - SmallVector<SDValue, 8> Mask55(NumElts, Cst55); - SDValue M55 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask55); - if (NeedsBitcast) - M55 = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M55); - - SDValue And = DAG.getNode(ISD::AND, dl, Srl.getValueType(), Srl, M55); - if (VT != And.getValueType()) - And = DAG.getNode(ISD::BITCAST, dl, VT, And); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op, And); + SDValue Srl = + DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 1)); + SDValue And = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x55))); + V = DAG.getNode(ISD::SUB, DL, VT, V, And); // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) - SmallVector<SDValue, 8> Mask33(NumElts, Cst33); - SDValue M33 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask33); - SmallVector<SDValue, 8> Twos(NumElts, DAG.getConstant(2, dl, EltVT)); - SDValue TwosV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Twos); + SDValue AndLHS = GetMask(V, APInt::getSplat(Len, APInt(8, 0x33))); + Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 2)); + SDValue AndRHS = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x33))); + V = DAG.getNode(ISD::ADD, DL, VT, AndLHS, AndRHS); - Srl = DAG.getNode(ISD::SRL, dl, VT, Sub, TwosV); - if (NeedsBitcast) { - Srl = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Srl); - M33 = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M33); - Sub = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Sub); - } + // v = (v + (v >> 4)) & 0x0F0F0F0F... + Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 4)); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, V, Srl); + V = GetMask(Add, APInt::getSplat(Len, APInt(8, 0x0F))); - SDValue AndRHS = DAG.getNode(ISD::AND, dl, M33.getValueType(), Srl, M33); - SDValue AndLHS = DAG.getNode(ISD::AND, dl, M33.getValueType(), Sub, M33); - if (VT != AndRHS.getValueType()) { - AndRHS = DAG.getNode(ISD::BITCAST, dl, VT, AndRHS); - AndLHS = DAG.getNode(ISD::BITCAST, dl, VT, AndLHS); - } - SDValue Add = DAG.getNode(ISD::ADD, dl, VT, AndLHS, AndRHS); + // At this point, V contains the byte-wise population count, and we are + // merely doing a horizontal sum if necessary to get the wider element + // counts. + if (EltVT == MVT::i8) + return V; - // v = (v + (v >> 4)) & 0x0F0F0F0F... - SmallVector<SDValue, 8> Fours(NumElts, DAG.getConstant(4, dl, EltVT)); - SDValue FoursV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Fours); - Srl = DAG.getNode(ISD::SRL, dl, VT, Add, FoursV); - Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl); - - SmallVector<SDValue, 8> Mask0F(NumElts, Cst0F); - SDValue M0F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask0F); - if (NeedsBitcast) { - Add = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Add); - M0F = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M0F); - } - And = DAG.getNode(ISD::AND, dl, M0F.getValueType(), Add, M0F); - if (VT != And.getValueType()) - And = DAG.getNode(ISD::BITCAST, dl, VT, And); - - // The algorithm mentioned above uses: - // v = (v * 0x01010101...) >> (Len - 8) - // - // Change it to use vector adds + vector shifts which yield faster results on - // Haswell than using vector integer multiplication. - // - // For i32 elements: - // v = v + (v >> 8) - // v = v + (v >> 16) - // - // For i64 elements: - // v = v + (v >> 8) - // v = v + (v >> 16) - // v = v + (v >> 32) - // - Add = And; - SmallVector<SDValue, 8> Csts; - for (unsigned i = 8; i <= Len/2; i *= 2) { - Csts.assign(NumElts, DAG.getConstant(i, dl, EltVT)); - SDValue CstsV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Csts); - Srl = DAG.getNode(ISD::SRL, dl, VT, Add, CstsV); - Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl); - Csts.clear(); + return LowerHorizontalByteSum( + DAG.getBitcast(MVT::getVectorVT(MVT::i8, VecSize / 8), V), VT, Subtarget, + DAG); +} + +static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + // FIXME: Need to add AVX-512 support here! + assert((VT.is256BitVector() || VT.is128BitVector()) && + "Unknown CTPOP type to handle"); + SDLoc DL(Op.getNode()); + SDValue Op0 = Op.getOperand(0); + + if (!Subtarget->hasSSSE3()) { + // We can't use the fast LUT approach, so fall back on vectorized bitmath. + assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!"); + return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG); } - // The result is on the least significant 6-bits on i32 and 7-bits on i64. - SDValue Cst3F = DAG.getConstant(APInt(Len, Len == 32 ? 0x3F : 0x7F), dl, - EltVT); - SmallVector<SDValue, 8> Cst3FV(NumElts, Cst3F); - SDValue M3F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Cst3FV); - if (NeedsBitcast) { - Add = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Add); - M3F = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M3F); + if (VT.is256BitVector() && !Subtarget->hasInt256()) { + unsigned NumElems = VT.getVectorNumElements(); + + // Extract each 128-bit vector, compute pop count and concat the result. + SDValue LHS = Extract128BitVector(Op0, 0, DAG, DL); + SDValue RHS = Extract128BitVector(Op0, NumElems/2, DAG, DL); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, + LowerVectorCTPOPInRegLUT(LHS, DL, Subtarget, DAG), + LowerVectorCTPOPInRegLUT(RHS, DL, Subtarget, DAG)); } - And = DAG.getNode(ISD::AND, dl, M3F.getValueType(), Add, M3F); - if (VT != And.getValueType()) - And = DAG.getNode(ISD::BITCAST, dl, VT, And); - return And; + return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG); +} + +static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + assert(Op.getValueType().isVector() && + "We only do custom lowering for vector population count."); + return LowerVectorCTPOP(Op, Subtarget, DAG); } static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { @@ -17840,8 +18053,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, MVT::f64); SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn, - DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, VBias)); - Or = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or); + DAG.getBitcast(MVT::v2i64, VBias)); + Or = DAG.getBitcast(MVT::v2f64, Or); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias); Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); return; @@ -17964,7 +18177,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0)); - SDValue ToVecInt = DAG.getNode(ISD::BITCAST, dl, WiderVT, Expanded); + SDValue ToVecInt = DAG.getBitcast(WiderVT, Expanded); if (ExperimentalVectorWideningLegalization) { // If we are legalizing vectors by widening, we already have the desired @@ -17994,7 +18207,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FANDN: return "X86ISD::FANDN"; case X86ISD::FOR: return "X86ISD::FOR"; case X86ISD::FXOR: return "X86ISD::FXOR"; - case X86ISD::FSRL: return "X86ISD::FSRL"; case X86ISD::FILD: return "X86ISD::FILD"; case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; @@ -18121,6 +18333,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW"; case X86ISD::SHUFP: return "X86ISD::SHUFP"; + case X86ISD::SHUF128: return "X86ISD::SHUF128"; case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS"; case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD"; case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS"; @@ -18143,8 +18356,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VPERMV3: return "X86ISD::VPERMV3"; case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3"; case X86ISD::VPERMI: return "X86ISD::VPERMI"; + case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM"; + case X86ISD::VRANGE: return "X86ISD::VRANGE"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; case X86ISD::PMULDQ: return "X86ISD::PMULDQ"; + case X86ISD::PSADBW: return "X86ISD::PSADBW"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; @@ -18184,6 +18400,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND"; case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND"; case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; + case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; + case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; case X86ISD::ADDS: return "X86ISD::ADDS"; case X86ISD::SUBS: return "X86ISD::SUBS"; } @@ -18193,7 +18411,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // X86 supports extremely general addressing modes. CodeModel::Model M = getTargetMachine().getCodeModel(); Reloc::Model R = getTargetMachine().getRelocationModel(); @@ -20028,7 +20247,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, SDValue(ResNode.getNode(), 1)); } - return DAG.getNode(ISD::BITCAST, dl, VT, ResNode); + return DAG.getBitcast(VT, ResNode); } } @@ -20087,7 +20306,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, // Just remove no-op shuffle masks. if (Mask.size() == 1) { - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Input), + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input), /*AddTo*/ true); return true; } @@ -20123,14 +20342,14 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, } if (Depth == 1 && Root->getOpcode() == Shuffle) return false; // Nothing to do! - Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input); + Op = DAG.getBitcast(ShuffleVT, Input); DCI.AddToWorklist(Op.getNode()); if (Shuffle == X86ISD::MOVDDUP) Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op); else Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op); DCI.AddToWorklist(Op.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } @@ -20141,11 +20360,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, MVT ShuffleVT = MVT::v4f32; if (Depth == 1 && Root->getOpcode() == Shuffle) return false; // Nothing to do! - Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input); + Op = DAG.getBitcast(ShuffleVT, Input); DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op); DCI.AddToWorklist(Op.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } @@ -20155,11 +20374,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, MVT ShuffleVT = MVT::v4f32; if (Depth == 1 && Root->getOpcode() == Shuffle) return false; // Nothing to do! - Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input); + Op = DAG.getBitcast(ShuffleVT, Input); DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op); DCI.AddToWorklist(Op.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } @@ -20189,11 +20408,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, default: llvm_unreachable("Impossible mask size!"); }; - Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input); + Op = DAG.getBitcast(ShuffleVT, Input); DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op); DCI.AddToWorklist(Op.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } @@ -20222,14 +20441,14 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8)); } MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes); - Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Input); + Op = DAG.getBitcast(ByteVT, Input); DCI.AddToWorklist(Op.getNode()); SDValue PSHUFBMaskOp = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVT, PSHUFBMask); DCI.AddToWorklist(PSHUFBMaskOp.getNode()); Op = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Op, PSHUFBMaskOp); DCI.AddToWorklist(Op.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } @@ -20401,7 +20620,7 @@ static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) { #ifndef NDEBUG for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i) for (int j = 0; j < LaneElts; ++j) - assert(Mask[j] == Mask[i * LaneElts + j] - LaneElts && + assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) && "Mask doesn't repeat in high 128-bit lanes!"); #endif Mask.resize(LaneElts); @@ -20532,7 +20751,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask, SDValue W = Chain.pop_back_val(); if (V.getValueType() != W.getOperand(0).getValueType()) - V = DAG.getNode(ISD::BITCAST, DL, W.getOperand(0).getValueType(), V); + V = DAG.getBitcast(W.getOperand(0).getValueType(), V); switch (W.getOpcode()) { default: @@ -20551,7 +20770,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask, } } if (V.getValueType() != N.getValueType()) - V = DAG.getNode(ISD::BITCAST, DL, N.getValueType(), V); + V = DAG.getBitcast(N.getValueType(), V); // Return the new chain to replace N. return V; @@ -20668,12 +20887,12 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, DMask[DOffset + 0] = DOffset + 1; DMask[DOffset + 1] = DOffset + 0; MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); - V = DAG.getNode(ISD::BITCAST, DL, DVT, V); + V = DAG.getBitcast(DVT, V); DCI.AddToWorklist(V.getNode()); V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V, getV4X86ShuffleImm8ForMask(DMask, DL, DAG)); DCI.AddToWorklist(V.getNode()); - return DAG.getNode(ISD::BITCAST, DL, VT, V); + return DAG.getBitcast(VT, V); } // Look for shuffle patterns which can be implemented as a single unpack. @@ -20704,7 +20923,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) || makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) { // We can replace all three shuffles with an unpack. - V = DAG.getNode(ISD::BITCAST, DL, VT, D.getOperand(0)); + V = DAG.getBitcast(VT, D.getOperand(0)); DCI.AddToWorklist(V.getNode()); return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL : X86ISD::UNPCKH, @@ -20848,8 +21067,8 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, CanFold = SVOp->getMaskElt(i) < 0; if (CanFold) { - SDValue BC00 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(0)); - SDValue BC01 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(1)); + SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0)); + SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1)); SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01); return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, &SVOp->getMask()[0]); } @@ -20981,7 +21200,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, Shuffle = DAG.getVectorShuffle(CurrentVT, dl, InVec.getOperand(0), Shuffle, &ShuffleMask[0]); - Shuffle = DAG.getNode(ISD::BITCAST, dl, OriginalVT, Shuffle); + Shuffle = DAG.getBitcast(OriginalVT, Shuffle); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle, EltNo); } @@ -21101,7 +21320,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, SDValue Vals[4]; if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) { - SDValue Cst = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, InputVector); + SDValue Cst = DAG.getBitcast(MVT::v2i64, InputVector); EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(); SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst, DAG.getConstant(0, dl, VecIdxTy)); @@ -21717,13 +21936,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (TValIsAllOnes && FValIsAllZeros) Ret = Cond; else if (TValIsAllOnes) - Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, - DAG.getNode(ISD::BITCAST, DL, CondVT, RHS)); + Ret = + DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS)); else if (FValIsAllZeros) Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, - DAG.getNode(ISD::BITCAST, DL, CondVT, LHS)); + DAG.getBitcast(CondVT, LHS)); - return DAG.getNode(ISD::BITCAST, DL, VT, Ret); + return DAG.getBitcast(VT, Ret); } } @@ -22554,15 +22773,13 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, // and work with those going forward. SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, OnesOrZeroesF); - SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, - Vector64); + SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64); OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Vector32, DAG.getIntPtrConstant(0, DL)); IntVT = MVT::i32; } - SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, - OnesOrZeroesF); + SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF); SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI, DAG.getConstant(1, DL, IntVT)); SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, @@ -22775,7 +22992,7 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG, SDValue NewShuffle = DAG.getVectorShuffle(Shuffle->getValueType(0), DL, Shuffle->getOperand(0), DAG.getConstant(0, DL, SrcType), Mask); - return DAG.getNode(ISD::BITCAST, DL, N0.getValueType(), NewShuffle); + return DAG.getBitcast(N0.getValueType(), NewShuffle); } static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, @@ -22916,7 +23133,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && "Unsupported VT for PSIGN"); Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0)); - return DAG.getNode(ISD::BITCAST, DL, VT, Mask); + return DAG.getBitcast(VT, Mask); } // PBLENDVB only available on SSE 4.1 if (!Subtarget->hasSSE41()) @@ -22924,11 +23141,11 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8; - X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X); - Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y); - Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask); + X = DAG.getBitcast(BlendVT, X); + Y = DAG.getBitcast(BlendVT, Y); + Mask = DAG.getBitcast(BlendVT, Mask); Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X); - return DAG.getNode(ISD::BITCAST, DL, VT, Mask); + return DAG.getBitcast(VT, Mask); } } @@ -23129,7 +23346,7 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); // Convert Src0 value - SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0()); + SDValue WideSrc0 = DAG.getBitcast(WideVecVT, Mld->getSrc0()); if (Mld->getSrc0().getOpcode() != ISD::UNDEF) { SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) @@ -23146,7 +23363,7 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, SDValue Mask = Mld->getMask(); if (Mask.getValueType() == VT) { // Mask and original value have the same type - NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask); + NewMask = DAG.getBitcast(WideVecVT, Mask); SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; @@ -23214,7 +23431,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue()); + SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue()); SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; @@ -23231,7 +23448,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue Mask = Mst->getMask(); if (Mask.getValueType() == VT) { // Mask and original value have the same type - NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask); + NewMask = DAG.getBitcast(WideVecVT, Mask); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) @@ -23323,7 +23540,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue()); + SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue()); SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; @@ -23354,7 +23571,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), StoreType, VT.getSizeInBits()/StoreType.getSizeInBits()); assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff); + SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff); SmallVector<SDValue, 8> Chains; SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, dl, TLI.getPointerTy()); @@ -23495,7 +23712,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue ExtOp0 = OldExtract.getOperand(0); unsigned VecSize = ExtOp0.getValueSizeInBits(); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64); - SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtOp0); + SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, BitCast, OldExtract.getOperand(1)); return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(), @@ -24239,10 +24456,10 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, // DAG. SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); // The AND node needs bitcasts to/from an integer vector type around it. - SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst); + SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst); SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, N->getOperand(0)->getOperand(0), MaskConst); - SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd); + SDValue Res = DAG.getBitcast(VT, NewAnd); return Res; } @@ -24442,8 +24659,7 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, // In this case, the inner vzext is completely dead because we're going to // only look at bits inside of the low element. Just do the outer vzext on // a bitcast of the input to the inner. - return DAG.getNode(X86ISD::VZEXT, DL, VT, - DAG.getNode(ISD::BITCAST, DL, OpVT, V)); + return DAG.getNode(X86ISD::VZEXT, DL, VT, DAG.getBitcast(OpVT, V)); } // Check if we can bypass extracting and re-inserting an element of an input @@ -24465,7 +24681,7 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV, DAG.getIntPtrConstant(0, DL)); } - Op = DAG.getNode(ISD::BITCAST, DL, OpVT, OrigV); + Op = DAG.getBitcast(OpVT, OrigV); return DAG.getNode(X86ISD::VZEXT, DL, VT, Op); } } @@ -25301,6 +25517,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Res.first = DestReg; Res.second = &X86::GR64RegClass; } + } else if (VT != MVT::Other) { + // Type mismatch and not a clobber: Return an error; + Res.first = 0; + Res.second = nullptr; } } else if (Res.second == &X86::FR32RegClass || Res.second == &X86::FR64RegClass || @@ -25326,13 +25546,23 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Res.second = &X86::VR256RegClass; else if (X86::VR512RegClass.hasType(VT)) Res.second = &X86::VR512RegClass; + else if (VT != MVT::Other) { + // Type mismatch and not a clobber: Return an error; + Res.first = 0; + Res.second = nullptr; + } + } else if (VT != MVT::Other) { + // Type mismatch and not a clobber: Return an error; + Res.first = 0; + Res.second = nullptr; } return Res; } int X86TargetLowering::getScalingFactorCost(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // Scaling factors are not free at all. // An indexed folded instruction, i.e., inst (reg1, reg2, scale), // will take 2 allocations in the out of order engine instead of 1 @@ -25351,7 +25581,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM, // E.g., on Haswell: // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3. // vmovaps %ymm1, (%r8) can use port 2, 3, or 7. - if (isLegalAddressingMode(AM, Ty)) + if (isLegalAddressingMode(AM, Ty, AS)) // Scale represents reg2 * scale, thus account for 1 // as soon as we use a second register. return AM.Scale != 0; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index b589ca42e56c..b5d062f72b24 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -56,10 +56,6 @@ namespace llvm { /// corresponds to X86::ANDNPS or X86::ANDNPD. FANDN, - /// Bitwise logical right shift of floating point values. This - /// corresponds to X86::PSRLDQ. - FSRL, - /// These operations represent an abstract X86 call /// instruction, which includes a bunch of information. In particular the /// operands of these node are: @@ -184,6 +180,9 @@ namespace llvm { /// Shuffle 16 8-bit values within a vector. PSHUFB, + /// Compute Sum of Absolute Differences. + PSADBW, + /// Bitwise Logical AND NOT of Packed FP values. ANDNP, @@ -200,6 +199,7 @@ namespace llvm { /// Combined add and sub on an FP vector. ADDSUB, + // FP vector ops with rounding mode. FADD_RND, FSUB_RND, @@ -207,7 +207,11 @@ namespace llvm { FDIV_RND, FMAX_RND, FMIN_RND, - + FSQRT_RND, + + // FP vector get exponent + FGETEXP_RND, + // Integer add/sub with unsigned saturation. ADDUS, SUBUS, @@ -355,6 +359,8 @@ namespace llvm { PSHUFHW, PSHUFLW, SHUFP, + //Shuffle Packed Values at 128-bit granularity + SHUF128, MOVDDUP, MOVSHDUP, MOVSLDUP, @@ -374,6 +380,10 @@ namespace llvm { VPERMIV3, VPERMI, VPERM2X128, + //Fix Up Special Packed Float32/64 values + VFIXUPIMM, + //Range Restriction Calculation For Packed Pairs of Float32/64 values + VRANGE, // Broadcast scalar to vector VBROADCAST, // Broadcast subvector to vector @@ -729,7 +739,8 @@ namespace llvm { /// Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; /// Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can @@ -748,7 +759,8 @@ namespace llvm { /// of the specified type. /// If the AM is supported, the return value must be >= 0. /// If the AM is not supported, it returns a negative value. - int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; + int getScalingFactorCost(const AddrMode &AM, Type *Ty, + unsigned AS) const override; bool isVectorShiftByScalarCheap(Type *Ty) const override; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 9d11d3c7050f..c1d0aef07118 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1047,12 +1047,6 @@ multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _, EVEX_4V; } } - -defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", X86VPermi, v8i64_info>, - EVEX_V512, VEX_W; -defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", X86VPermi, v8f64_info>, - EVEX_V512, VEX_W; - defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>, EVEX_V512; defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>, @@ -1063,37 +1057,6 @@ def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), (VPERMILPDZri VR512:$src1, imm:$imm)>; -// -- VPERM - register form -- -multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { - - def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, - (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V; - - def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, - (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>, - EVEX_4V; -} - -defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, loadv16i32, i512mem, - v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, loadv8i64, i512mem, - v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -let ExeDomain = SSEPackedSingle in -defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, loadv16f32, f512mem, - v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, loadv8f64, f512mem, - v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - // -- VPERM2I - 3 source operands form -- multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC, PatFrag mem_frag, X86MemOperand x86memop, @@ -3401,32 +3364,6 @@ defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, VR512, loadv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// -// AVX-512 - PSHUFD -// - -multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC, - SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT> { - def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, u8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, - (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>, - EVEX; - def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst), - (ins x86memop:$src1, u8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, - (OpVT (OpNode (mem_frag addr:$src1), - (i8 imm:$src2))))]>, EVEX; -} - -defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, loadv16i32, - i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; - -//===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// @@ -3729,14 +3666,14 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, (ins _.RC:$src1, u8imm:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))), - SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V; + SSE_INTSHIFT_ITINS_P.rr>; let mayLoad = 1 in defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i8 imm:$src2))), - SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V; + SSE_INTSHIFT_ITINS_P.rm>; } multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, @@ -3746,7 +3683,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))), - SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V, EVEX_B; + SSE_INTSHIFT_ITINS_P.rm>, EVEX_B; } multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -3836,16 +3773,16 @@ multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, } defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>, - avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>; + avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>, - avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>; + avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V; defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>, - avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>; + avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V; -defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>; -defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>; +defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>, AVX512BIi8Base, EVEX_4V; +defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>; defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>; @@ -3865,7 +3802,8 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", - (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))), + (_.VT (OpNode _.RC:$src1, + (_.VT (bitconvert (_.LdFrag addr:$src2))))), SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; } @@ -3927,6 +3865,65 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>, defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>; defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>; +//===-------------------------------------------------------------------===// +// 1-src variable permutation VPERMW/D/Q +//===-------------------------------------------------------------------===// +multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in + defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>, + avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512; + + let Predicates = [HasAVX512, HasVLX] in + defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>, + avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256; +} + +multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasAVX512] in + defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, + VTInfo.info512>, + avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, + VTInfo.info512>, EVEX_V512; + let Predicates = [HasAVX512, HasVLX] in + defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, + VTInfo.info256>, + avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, + VTInfo.info256>, EVEX_V256; +} + + +defm VPERM : avx512_var_shift_w<0x8D, "vpermw", X86VPermv>; + +defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, + avx512vl_i32_info>; +defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, + avx512vl_i64_info>, VEX_W; +defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, + avx512vl_f32_info>; +defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, + avx512vl_f64_info>, VEX_W; + +defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", + X86VPermi, avx512vl_i64_info>, + EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; +defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", + X86VPermi, avx512vl_f64_info>, + EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; + +//===----------------------------------------------------------------------===// +// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW +//===----------------------------------------------------------------------===// + +defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", + X86PShufd, avx512vl_i32_info>, + EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; +defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", + X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W; +defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", + X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// @@ -4869,11 +4866,6 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, (ins _.RC:$src), OpcodeStr, "$src", "$src", (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>; - defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src), OpcodeStr, - "{sae}, $src", "$src, {sae}", - (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B; - defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src), OpcodeStr, "$src", "$src", (OpNode (_.FloatVT @@ -4881,24 +4873,58 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, (i32 FROUND_CURRENT))>; defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.MemOp:$src), OpcodeStr, "$src", "$src", + (ins _.MemOp:$src), OpcodeStr, + "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (OpNode (_.FloatVT (X86VBroadcast (_.ScalarLdFrag addr:$src))), (i32 FROUND_CURRENT))>, EVEX_B; } +multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, + SDNode OpNode> { + defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src), OpcodeStr, + "{sae}, $src", "$src, {sae}", + (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B; +} multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> { defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>, - EVEX_CD8<32, CD8VF>; + avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>, + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>, - VEX_W, EVEX_CD8<32, CD8VF>; + avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>, + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } +multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, + SDNode OpNode> { + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>, + EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>, + EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>, + EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>, + EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + } +} let Predicates = [HasERI], hasSideEffects = 0 in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD; + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX; +} +defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>, + avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX; + +multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, + SDNode OpNodeRnd, X86VectorVTInfo _>{ + defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", + (_.VT (OpNodeRnd _.RC:$src, (i32 imm:$rc)))>, + EVEX, EVEX_B, EVEX_RC; } multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, @@ -5007,20 +5033,22 @@ multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, } } -defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>; +multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, + SDNode OpNodeRnd> { + defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd, + v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd, + v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; +} + +defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>, + avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>; defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, SSE_SQRTSS, SSE_SQRTSD>; let Predicates = [HasAVX512] in { - def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)), - (VSQRTPSZr VR512:$src1)>; - def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)), - (VSQRTPDZr VR512:$src1)>; - def : Pat<(f32 (fsqrt FR32X:$src)), (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -5583,30 +5611,6 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1, (loadv8i64 addr:$src2), (i8 imm:$imm))), (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; -multiclass avx512_valign<X86VectorVTInfo _> { - defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), - "valign"##_.Suffix, - "$src3, $src2, $src1", "$src1, $src2, $src3", - (_.VT (X86VAlign _.RC:$src2, _.RC:$src1, - (i8 imm:$src3)))>, - AVX512AIi8Base, EVEX_4V; - - // Also match valign of packed floats. - def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))), - (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>; - - let mayLoad = 1 in - def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst), - (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), - !strconcat("valign"##_.Suffix, - "\t{$src3, $src2, $src1, $dst|" - "$dst, $src1, $src2, $src3}"), - []>, EVEX_4V; -} -defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - // Helper fragments to match sext vXi1 to vXiY. def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>; def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>; @@ -5949,7 +5953,7 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, (_.LdFrag addr:$src))), _.RC:$src0)))]>, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; - + let mayLoad = 1 in def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.MemOp:$src), @@ -5958,7 +5962,6 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, (_.VT (bitconvert (_.LdFrag addr:$src))), _.ImmAllZerosV)))]>, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>; - } multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, @@ -5979,3 +5982,212 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>, EVEX; defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, EVEX, VEX_W; + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_vec,imm) +// op(reg_vec2,broadcast(eltVt),imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + (i8 imm:$src3), + (i32 FROUND_CURRENT))>; + let mayLoad = 1 in { + defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + (i8 imm:$src3), + (i32 FROUND_CURRENT))>; + defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), + OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr##", $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), + (i8 imm:$src3), + (i32 FROUND_CURRENT))>, EVEX_B; + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_vec,imm) +// op(reg_vec2,broadcast(eltVt),imm) +multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + (i8 imm:$src3))>; + let mayLoad = 1 in { + defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + (i8 imm:$src3))>; + defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), + OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr##", $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), + (i8 imm:$src3))>, EVEX_B; + } +} + +//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_scalar,imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + + defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + (i8 imm:$src3), + (i32 FROUND_CURRENT))>; + let mayLoad = 1 in { + defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT (scalar_to_vector + (_.ScalarLdFrag addr:$src2))), + (i8 imm:$src3), + (i32 FROUND_CURRENT))>; + + let isAsmParserOnly = 1 in { + defm rmi_alt :AVX512_maskable_in_asm<opc, MRMSrcMem, _, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + []>; + } + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _>{ + defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), + OpcodeStr, "$src3,{sae}, $src2, $src1", + "$src1, $src2,{sae}, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + (i8 imm:$src3), + (i32 FROUND_NO_EXC))>, EVEX_B; +} +//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _> { + defm NAME: avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _>; +} + +multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, + AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>, + avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>, + EVEX_V512; + + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>, + EVEX_V128; + defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>, + EVEX_V256; + } +} + +multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, + bits<8> opc, SDNode OpNode>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512; + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128; + defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256; + } +} + +multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, + X86VectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, _>, + avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNode, _>; + } +} + +defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd", + avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VFIXUPIMMPS : avx512_common_fp_sae_packed_imm<"vfixupimmps", + avx512vl_f32_info, 0x54, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + +defm VFIXUPIMMSD: avx512_common_fp_sae_scalar_imm<"vfixupimmsd", f64x_info, + 0x55, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info, + 0x55, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, + 0x50, X86VRange, HasDQI>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, + 0x50, X86VRange, HasDQI>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + +defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info, + 0x51, X86VRange, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, + 0x51, X86VRange, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + + +multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _, + bits<8> opc, SDNode OpNode = X86Shuf128>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512; + + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256; + } +} + +defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; + +multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I, + AVX512VLVectorVTInfo VTInfo_FP>{ + defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>, + AVX512AIi8Base, EVEX_4V; + let isCodeGenOnly = 1 in { + defm NAME#_FP: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0x03, X86VAlign>, + AVX512AIi8Base, EVEX_4V; + } +} + +defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>, + EVEX_CD8<32, CD8VF>; +defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>, + EVEX_CD8<64, CD8VF>, VEX_W; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 331faf2fd0b4..e2fa295c0230 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -764,6 +764,14 @@ class AVX512BIi8Base : PD { Domain ExeDomain = SSEPackedInt; ImmType ImmT = Imm8; } +class AVX512XSIi8Base : XS { + Domain ExeDomain = SSEPackedInt; + ImmType ImmT = Imm8; +} +class AVX512XDIi8Base : XD { + Domain ExeDomain = SSEPackedInt; + ImmType ImmT = Imm8; +} class AVX512PSIi8Base : PS { Domain ExeDomain = SSEPackedSingle; ImmType ImmT = Imm8; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 79d213c6e1a3..dfe58ef8067b 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -35,8 +35,6 @@ def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>; // SSE specific DAG Nodes. //===----------------------------------------------------------------------===// -def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, - SDTCisFP<0>, SDTCisInt<2> ]>; def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisFP<1>, SDTCisVT<3, i8>, SDTCisVec<1>]>; @@ -65,7 +63,6 @@ def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; -def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>; def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; @@ -78,6 +75,9 @@ def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>; def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; +def X86psadbw : SDNode<"X86ISD::PSADBW", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; @@ -219,6 +219,8 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisInt<2>]>; def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; +def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>; def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>; @@ -229,6 +231,9 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>; +def SDTFPUnaryOpRound : SDTypeProfile<1, 2, [ // fsqrt_round, fgetexp_round, etc. + SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]>; + def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>, @@ -247,7 +252,8 @@ def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>; -def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>; +def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>; +def X86Shuf128 : SDNode<"X86ISD::SHUF128", SDTShuff3OpI>; def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>; def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>; @@ -279,6 +285,9 @@ def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; +def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>; +def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>; + def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSubVecOfVec<1, 0>]>, []>; @@ -298,6 +307,8 @@ def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>; def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>; def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; +def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; +def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>; def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 43decf7cdda9..6b7a9299dcfb 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -433,6 +433,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) } static const X86MemoryFoldTableEntry MemoryFoldTable1[] = { + { X86::BSF16rr, X86::BSF16rm, 0 }, + { X86::BSF32rr, X86::BSF32rm, 0 }, + { X86::BSF64rr, X86::BSF64rm, 0 }, + { X86::BSR16rr, X86::BSR16rm, 0 }, + { X86::BSR32rr, X86::BSR32rm, 0 }, + { X86::BSR64rr, X86::BSR64rm, 0 }, { X86::CMP16rr, X86::CMP16rm, 0 }, { X86::CMP32rr, X86::CMP32rm, 0 }, { X86::CMP64rr, X86::CMP64rm, 0 }, @@ -1690,8 +1696,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 }, { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 }, { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 }, - { X86::VALIGNQrri, X86::VALIGNQrmi, 0 }, - { X86::VALIGNDrri, X86::VALIGNDrmi, 0 }, + { X86::VALIGNQZrri, X86::VALIGNQZrmi, 0 }, + { X86::VALIGNDZrri, X86::VALIGNDZrmi, 0 }, { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 }, { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE }, { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE }, @@ -4697,8 +4703,17 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return false; } +static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs) { + unsigned NumAddrOps = MOs.size(); + for (unsigned i = 0; i != NumAddrOps; ++i) + MIB.addOperand(MOs[i]); + if (NumAddrOps < 4) // FrameIndex only + addOffset(MIB, 0); +} + static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, ArrayRef<MachineOperand> MOs, + MachineBasicBlock::iterator InsertPt, MachineInstr *MI, const TargetInstrInfo &TII) { // Create the base instruction with the memory operand as the first part. @@ -4706,11 +4721,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), MI->getDebugLoc(), true); MachineInstrBuilder MIB(MF, NewMI); - unsigned NumAddrOps = MOs.size(); - for (unsigned i = 0; i != NumAddrOps; ++i) - MIB.addOperand(MOs[i]); - if (NumAddrOps < 4) // FrameIndex only - addOffset(MIB, 0); + addOperands(MIB, MOs); // Loop over the rest of the ri operands, converting them over. unsigned NumOps = MI->getDesc().getNumOperands()-2; @@ -4722,11 +4733,16 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, MachineOperand &MO = MI->getOperand(i); MIB.addOperand(MO); } + + MachineBasicBlock *MBB = InsertPt->getParent(); + MBB->insert(InsertPt, NewMI); + return MIB; } static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, ArrayRef<MachineOperand> MOs, + MachineBasicBlock::iterator InsertPt, MachineInstr *MI, const TargetInstrInfo &TII) { // Omit the implicit operands, something BuildMI can't do. MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), @@ -4737,38 +4753,32 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, MachineOperand &MO = MI->getOperand(i); if (i == OpNo) { assert(MO.isReg() && "Expected to fold into reg operand!"); - unsigned NumAddrOps = MOs.size(); - for (unsigned i = 0; i != NumAddrOps; ++i) - MIB.addOperand(MOs[i]); - if (NumAddrOps < 4) // FrameIndex only - addOffset(MIB, 0); + addOperands(MIB, MOs); } else { MIB.addOperand(MO); } } + + MachineBasicBlock *MBB = InsertPt->getParent(); + MBB->insert(InsertPt, NewMI); + return MIB; } static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, ArrayRef<MachineOperand> MOs, + MachineBasicBlock::iterator InsertPt, MachineInstr *MI) { - MachineFunction &MF = *MI->getParent()->getParent(); - MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); - - unsigned NumAddrOps = MOs.size(); - for (unsigned i = 0; i != NumAddrOps; ++i) - MIB.addOperand(MOs[i]); - if (NumAddrOps < 4) // FrameIndex only - addOffset(MIB, 0); + MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, + MI->getDebugLoc(), TII.get(Opcode)); + addOperands(MIB, MOs); return MIB.addImm(0); } -MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - unsigned OpNum, - ArrayRef<MachineOperand> MOs, - unsigned Size, unsigned Align, - bool AllowCommute) const { +MachineInstr *X86InstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, unsigned OpNum, + ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt, + unsigned Size, unsigned Align, bool AllowCommute) const { const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr; bool isCallRegIndirect = Subtarget.callRegIndirect(); @@ -4802,7 +4812,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, isTwoAddrFold = true; } else if (OpNum == 0) { if (MI->getOpcode() == X86::MOV32r0) { - NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); + NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI); if (NewMI) return NewMI; } @@ -4847,9 +4857,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } if (isTwoAddrFold) - NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); + NewMI = FuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this); else - NewMI = FuseInst(MF, Opcode, OpNum, MOs, MI, *this); + NewMI = FuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this); if (NarrowToMOV32rm) { // If this is the special case where we use a MOV32rm to load a 32-bit @@ -4901,8 +4911,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Attempt to fold with the commuted version of the instruction. unsigned CommuteOp = (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1); - NewMI = foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, Size, Align, - /*AllowCommute=*/false); + NewMI = + foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, InsertPt, Size, Align, + /*AllowCommute=*/false); if (NewMI) return NewMI; @@ -5131,10 +5142,9 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, MI->addRegisterKilled(Reg, TRI, true); } -MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - int FrameIndex) const { +MachineInstr *X86InstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const { // Check switch flag if (NoFusing) return nullptr; @@ -5173,8 +5183,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return nullptr; return foldMemoryOperandImpl(MF, MI, Ops[0], - MachineOperand::CreateFI(FrameIndex), Size, - Alignment, /*AllowCommute=*/true); + MachineOperand::CreateFI(FrameIndex), InsertPt, + Size, Alignment, /*AllowCommute=*/true); } static bool isPartialRegisterLoad(const MachineInstr &LoadMI, @@ -5196,17 +5206,16 @@ static bool isPartialRegisterLoad(const MachineInstr &LoadMI, return false; } -MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - MachineInstr *LoadMI) const { +MachineInstr *X86InstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const { // If loading from a FrameIndex, fold directly from the FrameIndex. unsigned NumOps = LoadMI->getDesc().getNumOperands(); int FrameIndex; if (isLoadFromStackSlot(LoadMI, FrameIndex)) { if (isPartialRegisterLoad(*LoadMI, MF)) return nullptr; - return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex); + return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex); } // Check switch flag @@ -5326,7 +5335,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, break; } } - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt, /*Size=*/0, Alignment, /*AllowCommute=*/true); } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 0dd8101bbe5b..ac1b2d4fedc6 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -307,6 +307,7 @@ public: /// references has been changed. MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const override; /// foldMemoryOperand - Same as the previous version except it allows folding @@ -314,6 +315,7 @@ public: /// stack slot. MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const override; /// canFoldMemoryOperand - Returns true if the specified load / store is @@ -407,6 +409,7 @@ public: MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned OpNum, ArrayRef<MachineOperand> MOs, + MachineBasicBlock::iterator InsertPt, unsigned Size, unsigned Alignment, bool AllowCommute) const; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 70c2027520f9..e936b4bc466e 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -788,6 +788,7 @@ def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; +def HasMPX : Predicate<"Subtarget->hasMPX()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">; @@ -2456,6 +2457,9 @@ include "X86InstrAVX512.td" include "X86InstrMMX.td" include "X86Instr3DNow.td" +// MPX instructions +include "X86InstrMPX.td" + include "X86InstrVMX.td" include "X86InstrSVM.td" diff --git a/lib/Target/X86/X86InstrMPX.td b/lib/Target/X86/X86InstrMPX.td new file mode 100644 index 000000000000..cf5e2e38fe58 --- /dev/null +++ b/lib/Target/X86/X86InstrMPX.td @@ -0,0 +1,70 @@ +//===-- X86InstrMPX.td - MPX Instruction Set ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 MPX instruction set, defining the +// instructions, and properties of the instructions which are needed for code +// generation, machine code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> { + def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins i32mem:$src), + OpcodeStr#" \t{$src, $dst|$dst, $src}", []>, + Requires<[HasMPX, Not64BitMode]>; + def 64rm: RI<opc, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), + OpcodeStr#" \t{$src, $dst|$dst, $src}", []>, + Requires<[HasMPX, In64BitMode]>; +} + +defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS; + +multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> { + def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i32mem:$src2), + OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>, + Requires<[HasMPX, Not64BitMode]>; + def 64rm: RI<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i64mem:$src2), + OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>, + Requires<[HasMPX, In64BitMode]>; + def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2), + OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>, + Requires<[HasMPX, Not64BitMode]>; + def 64rr: RI<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2), + OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>, + Requires<[HasMPX, In64BitMode]>; +} +defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS; +defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD; +defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD; + +def BNDMOVRMrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src), + "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + Requires<[HasMPX]>; +def BNDMOVRM32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), + "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + Requires<[HasMPX, Not64BitMode]>; +def BNDMOVRM64rm : RI<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src), + "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + Requires<[HasMPX, In64BitMode]>; + +def BNDMOVMRrr : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src), + "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + Requires<[HasMPX]>; +def BNDMOVMR32mr : I<0x1B, MRMDestMem, (outs i64mem:$dst), (ins BNDR:$src), + "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + Requires<[HasMPX, Not64BitMode]>; +def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs i128mem:$dst), (ins BNDR:$src), + "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + Requires<[HasMPX, In64BitMode]>; + +def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src), + "bndstx \t{$src, $dst|$dst, $src}", []>, TB, + Requires<[HasMPX]>; +def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), + "bndldx \t{$src, $dst|$dst, $src}", []>, TB, + Requires<[HasMPX]>;
\ No newline at end of file diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d3b401e8cfcb..8294e38e9957 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3560,7 +3560,7 @@ multiclass scalar_unary_math_patterns<Intrinsic Intr, string OpcPrefix, let Predicates = [HasAVX] in { def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), (!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; - + def : Pat<(VT (X86Blendi VT:$dst, (Intr VT:$src), (i8 1))), (!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; } @@ -4053,6 +4053,20 @@ defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, int_x86_avx2_psad_bw, SSE_PMADD, 1>; +let Predicates = [HasAVX2] in + def : Pat<(v32i8 (X86psadbw (v32i8 VR256:$src1), + (v32i8 VR256:$src2))), + (VPSADBWYrr VR256:$src2, VR256:$src1)>; + +let Predicates = [HasAVX] in + def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1), + (v16i8 VR128:$src2))), + (VPSADBWrr VR128:$src2, VR128:$src1)>; + +def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1), + (v16i8 VR128:$src2))), + (PSADBWrr VR128:$src2, VR128:$src1)>; + let Predicates = [HasAVX] in defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, @@ -4207,16 +4221,6 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in { } } // Constraints = "$src1 = $dst" -let Predicates = [HasAVX] in { - def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), - (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; -} - -let Predicates = [UseSSE2] in { - def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), - (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; -} - //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Comparison Instructions //===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 4af514a83ca5..0268066c2ba1 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -21,7 +21,8 @@ enum IntrinsicType { GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, - INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK, + INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, + INTR_TYPE_3OP_MASK, FMA_OP_MASK, INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM, BLEND }; @@ -339,9 +340,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV, X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV, - X86ISD::FDIV_RND), + X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV, - X86ISD::FDIV_RND), + X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG, X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG, @@ -366,6 +367,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG, X86ISD::EXPAND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_pd_512, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX, @@ -559,6 +572,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::RNDSCALE, 0), X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RNDSCALE, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT, + X86ISD::FSQRT_RND), + X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT, + X86ISD::FSQRT_RND), X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0), X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0), X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB, @@ -583,6 +604,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0), + X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0), + X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0), X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 556b518936f3..ff1436af4ece 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -128,6 +128,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { const DataLayout *DL = TM.getDataLayout(); assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference"); + MCSymbol *Sym = nullptr; SmallString<128> Name; StringRef Suffix; @@ -160,12 +161,14 @@ GetSymbolFromOperand(const MachineOperand &MO) const { else getMang()->getNameWithPrefix(Name, MO.getSymbolName()); } else if (MO.isMBB()) { - Name += MO.getMBB()->getSymbol()->getName(); + assert(Suffix.empty()); + Sym = MO.getMBB()->getSymbol(); } unsigned OrigLen = Name.size() - PrefixLen; Name += Suffix; - MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); + if (!Sym) + Sym = Ctx.getOrCreateSymbol(Name); StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen); @@ -240,10 +243,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; case X86II::MO_TLVP_PIC_BASE: - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); // Subtract the pic base. - Expr = MCBinaryExpr::CreateSub(Expr, - MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), + Expr = MCBinaryExpr::createSub(Expr, + MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); break; @@ -264,10 +267,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_PIC_BASE_OFFSET: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: - Expr = MCSymbolRefExpr::Create(Sym, Ctx); + Expr = MCSymbolRefExpr::create(Sym, Ctx); // Subtract the pic base. - Expr = MCBinaryExpr::CreateSub(Expr, - MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx), + Expr = MCBinaryExpr::createSub(Expr, + MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); if (MO.isJTI()) { assert(MAI.doesSetDirectiveSuppressesReloc()); @@ -277,17 +280,17 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, // section so we are restricting it to jumptable references. MCSymbol *Label = Ctx.createTempSymbol(); AsmPrinter.OutStreamer->EmitAssignment(Label, Expr); - Expr = MCSymbolRefExpr::Create(Label, Ctx); + Expr = MCSymbolRefExpr::create(Label, Ctx); } break; } if (!Expr) - Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); + Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), Ctx), + Expr = MCBinaryExpr::createAdd(Expr, + MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); return MCOperand::createExpr(Expr); } @@ -710,7 +713,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, } MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)); - const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context); + const MCSymbolRefExpr *symRef = MCSymbolRefExpr::create(sym, SRVK, context); MCInst LEA; if (is64Bits) { @@ -749,7 +752,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr"; MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name); const MCSymbolRefExpr *tlsRef = - MCSymbolRefExpr::Create(tlsGetAddr, + MCSymbolRefExpr::create(tlsGetAddr, MCSymbolRefExpr::VK_PLT, context); @@ -1071,7 +1074,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32) - .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); + .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); // Emit the label. OutStreamer->EmitLabel(PICBase); @@ -1100,12 +1103,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // Now that we have emitted the label, lower the complex operand expression. MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); - const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); + const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); const MCExpr *PICBase = - MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext); - DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext); + MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); + DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); - DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), + DotExpr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(OpSym,OutContext), DotExpr, OutContext); EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1f361631a0b7..e9b6bfc3273c 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -175,12 +175,12 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, return &X86::GR64_NOSPRegClass; return &X86::GR32_NOSPRegClass; case 2: // Available for tailcall (not callee-saved GPRs). - if (IsWin64) + const Function *F = MF.getFunction(); + if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64)) return &X86::GR64_TCW64RegClass; else if (Is64Bit) return &X86::GR64_TCRegClass; - const Function *F = MF.getFunction(); bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false); if (hasHipeCC) return &X86::GR32RegClass; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 2e735fa3c026..cdb151c26a05 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -302,6 +302,11 @@ def CR15 : X86Reg<"cr15", 15>; def EIZ : X86Reg<"eiz", 4>; def RIZ : X86Reg<"riz", 4>; +// Bound registers, used in MPX instructions +def BND0 : X86Reg<"bnd0", 0>; +def BND1 : X86Reg<"bnd1", 1>; +def BND2 : X86Reg<"bnd2", 2>; +def BND3 : X86Reg<"bnd3", 3>; //===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the @@ -484,3 +489,6 @@ def VK8WM : RegisterClass<"X86", [v8i1], 8, (sub VK8, K0)> {let Size = 8;} def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;} def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;} def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;} + +// Bound registers +def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
\ No newline at end of file diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 1cdab14e034e..74af29f4566c 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -259,6 +259,7 @@ void X86Subtarget::initializeEnvironment() { HasSHA = false; HasPRFCHW = false; HasRDSEED = false; + HasMPX = false; IsBTMemSlow = false; IsSHLDSlow = false; IsUAMemFast = false; @@ -273,8 +274,6 @@ void X86Subtarget::initializeEnvironment() { LEAUsesAG = false; SlowLEA = false; SlowIncDec = false; - UseSqrtEst = false; - UseReciprocalEst = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 455dd7744d73..a476f7aba932 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -190,16 +190,6 @@ protected: /// True if INC and DEC instructions are slow when writing to flags bool SlowIncDec; - /// Use the RSQRT* instructions to optimize square root calculations. - /// For this to be profitable, the cost of FSQRT and FDIV must be - /// substantially higher than normal FP ops like FADD and FMUL. - bool UseSqrtEst; - - /// Use the RCP* instructions to optimize FP division calculations. - /// For this to be profitable, the cost of FDIV must be - /// substantially higher than normal FP ops like FADD and FMUL. - bool UseReciprocalEst; - /// Processor has AVX-512 PreFetch Instructions bool HasPFI; @@ -218,6 +208,9 @@ protected: /// Processor has AVX-512 Vector Length eXtenstions bool HasVLX; + /// Processot supports MPX - Memory Protection Extensions + bool HasMPX; + /// Use software floating point for code generation. bool UseSoftFloat; @@ -377,14 +370,13 @@ public: bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } bool slowIncDec() const { return SlowIncDec; } - bool useSqrtEst() const { return UseSqrtEst; } - bool useReciprocalEst() const { return UseReciprocalEst; } bool hasCDI() const { return HasCDI; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } bool hasDQI() const { return HasDQI; } bool hasBWI() const { return HasBWI; } bool hasVLX() const { return HasVLX; } + bool hasMPX() const { return HasMPX; } bool isAtom() const { return X86ProcFamily == IntelAtom; } bool isSLM() const { return X86ProcFamily == IntelSLM; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 3e5f1d82202f..646cff7c5bdb 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -105,6 +105,13 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, if (Subtarget.isTargetWin64()) this->Options.TrapUnreachable = true; + // TODO: By default, all reciprocal estimate operations are off because + // that matches the behavior before TargetRecip was added (except for btver2 + // which used subtarget features to enable this type of codegen). + // We should change this to match GCC behavior where everything but + // scalar division estimates are turned on by default with -ffast-math. + this->Options.Reciprocals.setDefaults("all", false, 1); + initAsmInfo(); } @@ -221,9 +228,9 @@ bool X86PassConfig::addILPOpts() { } bool X86PassConfig::addPreISel() { - // Only add this pass for 32-bit x86. + // Only add this pass for 32-bit x86 Windows. Triple TT(TM->getTargetTriple()); - if (TT.getArch() == Triple::x86) + if (TT.isOSWindows() && TT.getArch() == Triple::x86) addPass(createX86WinEHStatePass()); return true; } diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 6bf45c37e38b..f9f62904b64b 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -32,9 +32,9 @@ const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference( if ((Encoding & DW_EH_PE_indirect) && (Encoding & DW_EH_PE_pcrel)) { const MCSymbol *Sym = TM.getSymbol(GV, Mang); const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); - const MCExpr *Four = MCConstantExpr::Create(4, getContext()); - return MCBinaryExpr::CreateAdd(Res, Four, getContext()); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); + const MCExpr *Four = MCConstantExpr::create(4, getContext()); + return MCBinaryExpr::createAdd(Res, Four, getContext()); } return TargetLoweringObjectFileMachO::getTTypeGlobalReference( @@ -55,14 +55,14 @@ const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel( // foo@GOTPCREL+4+<offset>. unsigned FinalOff = Offset+MV.getConstant()+4; const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); - const MCExpr *Off = MCConstantExpr::Create(FinalOff, getContext()); - return MCBinaryExpr::CreateAdd(Res, Off, getContext()); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); + const MCExpr *Off = MCConstantExpr::create(FinalOff, getContext()); + return MCBinaryExpr::createAdd(Res, Off, getContext()); } const MCExpr *X86ELFTargetObjectFile::getDebugThreadLocalSymbol( const MCSymbol *Sym) const { - return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext()); + return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext()); } void @@ -116,7 +116,7 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol( if (GOLHS->isThreadLocal()) return nullptr; - return MCSymbolRefExpr::Create(TM.getSymbol(GOLHS, Mang), + return MCSymbolRefExpr::create(TM.getSymbol(GOLHS, Mang), MCSymbolRefExpr::VK_COFF_IMGREL32, getContext()); } diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index 4efaada40926..ce69ea721993 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -16,6 +16,7 @@ #include "X86.h" #include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Dominators.h" @@ -59,30 +60,49 @@ public: private: void emitExceptionRegistrationRecord(Function *F); - void linkExceptionRegistration(IRBuilder<> &Builder, Value *RegNode, - Value *Handler); - void unlinkExceptionRegistration(IRBuilder<> &Builder, Value *RegNode); + void linkExceptionRegistration(IRBuilder<> &Builder, Value *Handler); + void unlinkExceptionRegistration(IRBuilder<> &Builder); + void addCXXStateStores(Function &F, MachineModuleInfo &MMI); + void addCXXStateStoresToFunclet(Value *ParentRegNode, WinEHFuncInfo &FuncInfo, + Function &F, int BaseState); + void insertStateNumberStore(Value *ParentRegNode, Instruction *IP, int State); Value *emitEHLSDA(IRBuilder<> &Builder, Function *F); Function *generateLSDAInEAXThunk(Function *ParentFunc); + int escapeRegNode(Function &F); + // Module-level type getters. - Type *getEHRegistrationType(); - Type *getSEH3RegistrationType(); - Type *getSEH4RegistrationType(); - Type *getCXXEH3RegistrationType(); + Type *getEHLinkRegistrationType(); + Type *getSEHRegistrationType(); + Type *getCXXEHRegistrationType(); // Per-module data. Module *TheModule = nullptr; - StructType *EHRegistrationTy = nullptr; - StructType *CXXEH3RegistrationTy = nullptr; - StructType *SEH3RegistrationTy = nullptr; - StructType *SEH4RegistrationTy = nullptr; + StructType *EHLinkRegistrationTy = nullptr; + StructType *CXXEHRegistrationTy = nullptr; + StructType *SEHRegistrationTy = nullptr; + Function *FrameRecover = nullptr; + Function *FrameAddress = nullptr; + Function *FrameEscape = nullptr; // Per-function state EHPersonality Personality = EHPersonality::Unknown; Function *PersonalityFn = nullptr; + + /// The stack allocation containing all EH data, including the link in the + /// fs:00 chain and the current state. + AllocaInst *RegNode = nullptr; + + /// Struct type of RegNode. Used for GEPing. + Type *RegNodeTy = nullptr; + + /// The index of the state field of RegNode. + int StateFieldIndex = ~0U; + + /// The linked list node subobject inside of RegNode. + Value *Link = nullptr; }; } @@ -92,16 +112,21 @@ char WinEHStatePass::ID = 0; bool WinEHStatePass::doInitialization(Module &M) { TheModule = &M; + FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::frameescape); + FrameRecover = Intrinsic::getDeclaration(TheModule, Intrinsic::framerecover); + FrameAddress = Intrinsic::getDeclaration(TheModule, Intrinsic::frameaddress); return false; } bool WinEHStatePass::doFinalization(Module &M) { assert(TheModule == &M); TheModule = nullptr; - EHRegistrationTy = nullptr; - CXXEH3RegistrationTy = nullptr; - SEH3RegistrationTy = nullptr; - SEH4RegistrationTy = nullptr; + EHLinkRegistrationTy = nullptr; + CXXEHRegistrationTy = nullptr; + SEHRegistrationTy = nullptr; + FrameEscape = nullptr; + FrameRecover = nullptr; + FrameAddress = nullptr; return false; } @@ -136,8 +161,19 @@ bool WinEHStatePass::runOnFunction(Function &F) { if (!isMSVCEHPersonality(Personality)) return false; + // Disable frame pointer elimination in this function. + // FIXME: Do the nested handlers need to keep the parent ebp in ebp, or can we + // use an arbitrary register? + F.addFnAttr("no-frame-pointer-elim", "true"); + emitExceptionRegistrationRecord(&F); - // FIXME: State insertion. + + auto *MMIPtr = getAnalysisIfAvailable<MachineModuleInfo>(); + assert(MMIPtr && "MachineModuleInfo should always be available"); + MachineModuleInfo &MMI = *MMIPtr; + if (Personality == EHPersonality::MSVC_CXX) { + addCXXStateStores(F, MMI); + } // Reset per-function state. PersonalityFn = nullptr; @@ -152,17 +188,17 @@ bool WinEHStatePass::runOnFunction(Function &F) { /// EHRegistrationNode *Next; /// PEXCEPTION_ROUTINE Handler; /// }; -Type *WinEHStatePass::getEHRegistrationType() { - if (EHRegistrationTy) - return EHRegistrationTy; +Type *WinEHStatePass::getEHLinkRegistrationType() { + if (EHLinkRegistrationTy) + return EHLinkRegistrationTy; LLVMContext &Context = TheModule->getContext(); - EHRegistrationTy = StructType::create(Context, "EHRegistrationNode"); + EHLinkRegistrationTy = StructType::create(Context, "EHRegistrationNode"); Type *FieldTys[] = { - EHRegistrationTy->getPointerTo(0), // EHRegistrationNode *Next + EHLinkRegistrationTy->getPointerTo(0), // EHRegistrationNode *Next Type::getInt8PtrTy(Context) // EXCEPTION_DISPOSITION (*Handler)(...) }; - EHRegistrationTy->setBody(FieldTys, false); - return EHRegistrationTy; + EHLinkRegistrationTy->setBody(FieldTys, false); + return EHLinkRegistrationTy; } /// The __CxxFrameHandler3 registration node: @@ -171,40 +207,21 @@ Type *WinEHStatePass::getEHRegistrationType() { /// EHRegistrationNode SubRecord; /// int32_t TryLevel; /// }; -Type *WinEHStatePass::getCXXEH3RegistrationType() { - if (CXXEH3RegistrationTy) - return CXXEH3RegistrationTy; +Type *WinEHStatePass::getCXXEHRegistrationType() { + if (CXXEHRegistrationTy) + return CXXEHRegistrationTy; LLVMContext &Context = TheModule->getContext(); Type *FieldTys[] = { Type::getInt8PtrTy(Context), // void *SavedESP - getEHRegistrationType(), // EHRegistrationNode SubRecord + getEHLinkRegistrationType(), // EHRegistrationNode SubRecord Type::getInt32Ty(Context) // int32_t TryLevel }; - CXXEH3RegistrationTy = + CXXEHRegistrationTy = StructType::create(FieldTys, "CXXExceptionRegistration"); - return CXXEH3RegistrationTy; -} - -/// The _except_handler3 registration node: -/// struct EH3ExceptionRegistration { -/// EHRegistrationNode SubRecord; -/// void *ScopeTable; -/// int32_t TryLevel; -/// }; -Type *WinEHStatePass::getSEH3RegistrationType() { - if (SEH3RegistrationTy) - return SEH3RegistrationTy; - LLVMContext &Context = TheModule->getContext(); - Type *FieldTys[] = { - getEHRegistrationType(), // EHRegistrationNode SubRecord - Type::getInt8PtrTy(Context), // void *ScopeTable - Type::getInt32Ty(Context) // int32_t TryLevel - }; - SEH3RegistrationTy = StructType::create(FieldTys, "EH3ExceptionRegistration"); - return SEH3RegistrationTy; + return CXXEHRegistrationTy; } -/// The _except_handler4 registration node: +/// The _except_handler3/4 registration node: /// struct EH4ExceptionRegistration { /// void *SavedESP; /// _EXCEPTION_POINTERS *ExceptionPointers; @@ -212,19 +229,19 @@ Type *WinEHStatePass::getSEH3RegistrationType() { /// int32_t EncodedScopeTable; /// int32_t TryLevel; /// }; -Type *WinEHStatePass::getSEH4RegistrationType() { - if (SEH4RegistrationTy) - return SEH4RegistrationTy; +Type *WinEHStatePass::getSEHRegistrationType() { + if (SEHRegistrationTy) + return SEHRegistrationTy; LLVMContext &Context = TheModule->getContext(); Type *FieldTys[] = { Type::getInt8PtrTy(Context), // void *SavedESP Type::getInt8PtrTy(Context), // void *ExceptionPointers - getEHRegistrationType(), // EHRegistrationNode SubRecord + getEHLinkRegistrationType(), // EHRegistrationNode SubRecord Type::getInt32Ty(Context), // int32_t EncodedScopeTable Type::getInt32Ty(Context) // int32_t TryLevel }; - SEH4RegistrationTy = StructType::create(FieldTys, "EH4ExceptionRegistration"); - return SEH4RegistrationTy; + SEHRegistrationTy = StructType::create(FieldTys, "SEHExceptionRegistration"); + return SEHRegistrationTy; } // Emit an exception registration record. These are stack allocations with the @@ -238,62 +255,63 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) { StringRef PersonalityName = PersonalityFn->getName(); IRBuilder<> Builder(&F->getEntryBlock(), F->getEntryBlock().begin()); Type *Int8PtrType = Builder.getInt8PtrTy(); - Value *SubRecord = nullptr; - if (PersonalityName == "__CxxFrameHandler3") { - Type *RegNodeTy = getCXXEH3RegistrationType(); - Value *RegNode = Builder.CreateAlloca(RegNodeTy); + if (Personality == EHPersonality::MSVC_CXX) { + RegNodeTy = getCXXEHRegistrationType(); + RegNode = Builder.CreateAlloca(RegNodeTy); // FIXME: We can skip this in -GS- mode, when we figure that out. // SavedESP = llvm.stacksave() Value *SP = Builder.CreateCall( Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {}); Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0)); // TryLevel = -1 - Builder.CreateStore(Builder.getInt32(-1), - Builder.CreateStructGEP(RegNodeTy, RegNode, 2)); + StateFieldIndex = 2; + insertStateNumberStore(RegNode, Builder.GetInsertPoint(), -1); // Handler = __ehhandler$F Function *Trampoline = generateLSDAInEAXThunk(F); - SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 1); - linkExceptionRegistration(Builder, SubRecord, Trampoline); - } else if (PersonalityName == "_except_handler3") { - Type *RegNodeTy = getSEH3RegistrationType(); - Value *RegNode = Builder.CreateAlloca(RegNodeTy); - // TryLevel = -1 - Builder.CreateStore(Builder.getInt32(-1), - Builder.CreateStructGEP(RegNodeTy, RegNode, 2)); - // ScopeTable = llvm.x86.seh.lsda(F) - Value *LSDA = emitEHLSDA(Builder, F); - Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 1)); - SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 0); - linkExceptionRegistration(Builder, SubRecord, PersonalityFn); - } else if (PersonalityName == "_except_handler4") { - Type *RegNodeTy = getSEH4RegistrationType(); - Value *RegNode = Builder.CreateAlloca(RegNodeTy); + Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 1); + linkExceptionRegistration(Builder, Trampoline); + } else if (Personality == EHPersonality::MSVC_X86SEH) { + // If _except_handler4 is in use, some additional guard checks and prologue + // stuff is required. + bool UseStackGuard = (PersonalityName == "_except_handler4"); + RegNodeTy = getSEHRegistrationType(); + RegNode = Builder.CreateAlloca(RegNodeTy); // SavedESP = llvm.stacksave() Value *SP = Builder.CreateCall( Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {}); Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0)); - // TryLevel = -2 - Builder.CreateStore(Builder.getInt32(-2), - Builder.CreateStructGEP(RegNodeTy, RegNode, 4)); - // FIXME: XOR the LSDA with __security_cookie. + // TryLevel = -2 / -1 + StateFieldIndex = 4; + insertStateNumberStore(RegNode, Builder.GetInsertPoint(), + UseStackGuard ? -2 : -1); // ScopeTable = llvm.x86.seh.lsda(F) Value *FI8 = Builder.CreateBitCast(F, Int8PtrType); Value *LSDA = Builder.CreateCall( Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_lsda), FI8); - Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 1)); - SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 2); - linkExceptionRegistration(Builder, SubRecord, PersonalityFn); + Type *Int32Ty = Type::getInt32Ty(TheModule->getContext()); + LSDA = Builder.CreatePtrToInt(LSDA, Int32Ty); + // If using _except_handler4, xor the address of the table with + // __security_cookie. + if (UseStackGuard) { + Value *Cookie = + TheModule->getOrInsertGlobal("__security_cookie", Int32Ty); + Value *Val = Builder.CreateLoad(Int32Ty, Cookie); + LSDA = Builder.CreateXor(LSDA, Val); + } + Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 3)); + Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 2); + linkExceptionRegistration(Builder, PersonalityFn); } else { llvm_unreachable("unexpected personality function"); } - // FIXME: Insert an unlink before all returns. + // Insert an unlink before all returns. for (BasicBlock &BB : *F) { TerminatorInst *T = BB.getTerminator(); if (!isa<ReturnInst>(T)) continue; Builder.SetInsertPoint(T); - unlinkExceptionRegistration(Builder, SubRecord); + unlinkExceptionRegistration(Builder); } } @@ -342,33 +360,122 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) { } void WinEHStatePass::linkExceptionRegistration(IRBuilder<> &Builder, - Value *RegNode, Value *Handler) { - Type *RegNodeTy = getEHRegistrationType(); + Value *Handler) { + Type *LinkTy = getEHLinkRegistrationType(); // Handler = Handler Handler = Builder.CreateBitCast(Handler, Builder.getInt8PtrTy()); - Builder.CreateStore(Handler, Builder.CreateStructGEP(RegNodeTy, RegNode, 1)); + Builder.CreateStore(Handler, Builder.CreateStructGEP(LinkTy, Link, 1)); // Next = [fs:00] Constant *FSZero = - Constant::getNullValue(RegNodeTy->getPointerTo()->getPointerTo(257)); + Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257)); Value *Next = Builder.CreateLoad(FSZero); - Builder.CreateStore(Next, Builder.CreateStructGEP(RegNodeTy, RegNode, 0)); - // [fs:00] = RegNode - Builder.CreateStore(RegNode, FSZero); + Builder.CreateStore(Next, Builder.CreateStructGEP(LinkTy, Link, 0)); + // [fs:00] = Link + Builder.CreateStore(Link, FSZero); } -void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder, - Value *RegNode) { - // Clone RegNode into the current BB for better address mode folding. - if (auto *GEP = dyn_cast<GetElementPtrInst>(RegNode)) { +void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) { + // Clone Link into the current BB for better address mode folding. + if (auto *GEP = dyn_cast<GetElementPtrInst>(Link)) { GEP = cast<GetElementPtrInst>(GEP->clone()); Builder.Insert(GEP); - RegNode = GEP; + Link = GEP; } - Type *RegNodeTy = getEHRegistrationType(); - // [fs:00] = RegNode->Next + Type *LinkTy = getEHLinkRegistrationType(); + // [fs:00] = Link->Next Value *Next = - Builder.CreateLoad(Builder.CreateStructGEP(RegNodeTy, RegNode, 0)); + Builder.CreateLoad(Builder.CreateStructGEP(LinkTy, Link, 0)); Constant *FSZero = - Constant::getNullValue(RegNodeTy->getPointerTo()->getPointerTo(257)); + Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257)); Builder.CreateStore(Next, FSZero); } + +void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) { + WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F); + calculateWinCXXEHStateNumbers(&F, FuncInfo); + + // The base state for the parent is -1. + addCXXStateStoresToFunclet(RegNode, FuncInfo, F, -1); + + // Set up RegNodeEscapeIndex + int RegNodeEscapeIndex = escapeRegNode(F); + + // Only insert stores in catch handlers. + Constant *FI8 = + ConstantExpr::getBitCast(&F, Type::getInt8PtrTy(TheModule->getContext())); + for (auto P : FuncInfo.HandlerBaseState) { + Function *Handler = const_cast<Function *>(P.first); + int BaseState = P.second; + IRBuilder<> Builder(&Handler->getEntryBlock(), + Handler->getEntryBlock().begin()); + // FIXME: Find and reuse such a call if present. + Value *ParentFP = Builder.CreateCall(FrameAddress, {Builder.getInt32(1)}); + Value *RecoveredRegNode = Builder.CreateCall( + FrameRecover, {FI8, ParentFP, Builder.getInt32(RegNodeEscapeIndex)}); + RecoveredRegNode = + Builder.CreateBitCast(RecoveredRegNode, RegNodeTy->getPointerTo(0)); + addCXXStateStoresToFunclet(RecoveredRegNode, FuncInfo, *Handler, BaseState); + } +} + +/// Escape RegNode so that we can access it from child handlers. Find the call +/// to frameescape, if any, in the entry block and append RegNode to the list +/// of arguments. +int WinEHStatePass::escapeRegNode(Function &F) { + // Find the call to frameescape and extract its arguments. + IntrinsicInst *EscapeCall = nullptr; + for (Instruction &I : F.getEntryBlock()) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I); + if (II && II->getIntrinsicID() == Intrinsic::frameescape) { + EscapeCall = II; + break; + } + } + SmallVector<Value *, 8> Args; + if (EscapeCall) { + auto Ops = EscapeCall->arg_operands(); + Args.append(Ops.begin(), Ops.end()); + } + Args.push_back(RegNode); + + // Replace the call (if it exists) with new one. Otherwise, insert at the end + // of the entry block. + IRBuilder<> Builder(&F.getEntryBlock(), + EscapeCall ? EscapeCall : F.getEntryBlock().end()); + Builder.CreateCall(FrameEscape, Args); + if (EscapeCall) + EscapeCall->eraseFromParent(); + return Args.size() - 1; +} + +void WinEHStatePass::addCXXStateStoresToFunclet(Value *ParentRegNode, + WinEHFuncInfo &FuncInfo, + Function &F, int BaseState) { + // Iterate all the instructions and emit state number stores. + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (auto *CI = dyn_cast<CallInst>(&I)) { + // Possibly throwing call instructions have no actions to take after + // an unwind. Ensure they are in the -1 state. + if (CI->doesNotThrow()) + continue; + insertStateNumberStore(ParentRegNode, CI, BaseState); + } else if (auto *II = dyn_cast<InvokeInst>(&I)) { + // Look up the state number of the landingpad this unwinds to. + LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst(); + // FIXME: Why does this assertion fail? + //assert(FuncInfo.LandingPadStateMap.count(LPI) && "LP has no state!"); + int State = FuncInfo.LandingPadStateMap[LPI]; + insertStateNumberStore(ParentRegNode, II, State); + } + } + } +} + +void WinEHStatePass::insertStateNumberStore(Value *ParentRegNode, + Instruction *IP, int State) { + IRBuilder<> Builder(IP); + Value *StateField = + Builder.CreateStructGEP(RegNodeTy, ParentRegNode, StateFieldIndex); + Builder.CreateStore(Builder.getInt32(State), StateField); +} diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp index 36b3b02a707a..500c84d2a418 100644 --- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp +++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp @@ -45,7 +45,8 @@ printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O) { report_fatal_error("can't handle InlineJT32"); } -static void printExpr(const MCExpr *Expr, raw_ostream &OS) { +static void printExpr(const MCExpr *Expr, const MCAsmInfo *MAI, + raw_ostream &OS) { int Offset = 0; const MCSymbolRefExpr *SRE; @@ -60,7 +61,7 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { } assert(SRE->getKind() == MCSymbolRefExpr::VK_None); - OS << SRE->getSymbol(); + SRE->getSymbol().print(OS, MAI); if (Offset) { if (Offset > 0) @@ -83,5 +84,5 @@ printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { } assert(Op.isExpr() && "unknown operand kind in printOperand"); - printExpr(Op.getExpr(), O); + printExpr(Op.getExpr(), &MAI, O); } diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp index f2d2b37d6f21..3178a4edbb3b 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp @@ -8,12 +8,11 @@ //===----------------------------------------------------------------------===// #include "XCoreMCAsmInfo.h" -#include "llvm/ADT/StringRef.h" using namespace llvm; void XCoreMCAsmInfo::anchor() { } -XCoreMCAsmInfo::XCoreMCAsmInfo(StringRef TT) { +XCoreMCAsmInfo::XCoreMCAsmInfo(const Triple &TT) { SupportsDebugInformation = true; Data16bitsDirective = "\t.short\t"; Data32bitsDirective = "\t.long\t"; diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h index 26df211eecee..39581e424e8c 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h @@ -17,14 +17,14 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - class StringRef; - class Target; +class Triple; - class XCoreMCAsmInfo : public MCAsmInfoELF { - void anchor() override; - public: - explicit XCoreMCAsmInfo(StringRef TT); - }; +class XCoreMCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit XCoreMCAsmInfo(const Triple &TT); +}; } // namespace llvm diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index ce0d39fe407f..f0e459620c9c 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -54,7 +54,7 @@ static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU, } static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { + const Triple &TT) { MCAsmInfo *MAI = new XCoreMCAsmInfo(TT); // Initial state of the frame pointer is SP. diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index 23e24f2afd5a..702056d781d0 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -37,7 +37,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -100,7 +100,7 @@ void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) { Twine(Sym->getName() + StringRef(".globound"))); OutStreamer->EmitSymbolAttribute(SymGlob, MCSA_Global); OutStreamer->EmitAssignment(SymGlob, - MCConstantExpr::Create(ATy->getNumElements(), + MCConstantExpr::create(ATy->getNumElements(), OutContext)); if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()) { @@ -157,7 +157,8 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { unsigned Size = TD->getTypeAllocSize(C->getType()); if (MAI->hasDotTypeDotSizeDirective()) { OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); - OutStreamer->EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext)); + OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym), + MCConstantExpr::create(Size, OutContext)); } OutStreamer->EmitLabel(GVSym); @@ -201,7 +202,7 @@ printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O, MachineBasicBlock *MBB = JTBBs[i]; if (i > 0) O << ","; - O << *MBB->getSymbol(); + MBB->getSymbol()->print(O, MAI); } } @@ -217,17 +218,17 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, O << MO.getImm(); break; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); break; case MachineOperand::MO_GlobalAddress: - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); break; case MachineOperand::MO_ConstantPoolIndex: O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getIndex(); break; case MachineOperand::MO_BlockAddress: - O << *GetBlockAddressSymbol(MO.getBlockAddress()); + GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI); break; default: llvm_unreachable("not implemented"); diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index f56caec24d63..aa71241102ff 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1926,7 +1926,8 @@ static inline bool isImmUs4(int64_t val) /// by AM is legal for this target, for a load/store of the specified type. bool XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { if (Ty->getTypeID() == Type::VoidTyID) return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 22014ed4bac6..97f0494b6fe3 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -120,7 +120,8 @@ namespace llvm { EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override; - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; private: const TargetMachine &TM; diff --git a/lib/Target/XCore/XCoreMCInstLower.cpp b/lib/Target/XCore/XCoreMCInstLower.cpp index cffba5fee03f..03c5fa2e9c42 100644 --- a/lib/Target/XCore/XCoreMCInstLower.cpp +++ b/lib/Target/XCore/XCoreMCInstLower.cpp @@ -65,7 +65,7 @@ MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO, llvm_unreachable("<unknown operand type>"); } - const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx); + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, Kind, *Ctx); if (!Offset) return MCOperand::createExpr(MCSym); @@ -73,8 +73,8 @@ MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO, // Assume offset is never negative. assert(Offset > 0); - const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx); - const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); + const MCConstantExpr *OffsetExpr = MCConstantExpr::create(Offset, *Ctx); + const MCBinaryExpr *Add = MCBinaryExpr::createAdd(MCSym, OffsetExpr, *Ctx); return MCOperand::createExpr(Add); } |