diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-30 16:33:32 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-30 16:33:32 +0000 |
commit | 51315c45ff5643a27f9c84b816db54ee870ba29b (patch) | |
tree | 1d87443fa0e53d3e6b315ce25787e64be0906bf7 /contrib/llvm/lib/Target/BPF | |
parent | 6dfd050075216be8538ae375a22d30db72916f7e (diff) | |
parent | eb11fae6d08f479c0799db45860a98af528fa6e7 (diff) |
Merge llvm trunk r338150, and resolve conflicts.
Notes
Notes:
svn path=/projects/clang700-import/; revision=336916
Diffstat (limited to 'contrib/llvm/lib/Target/BPF')
26 files changed, 1082 insertions, 184 deletions
diff --git a/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp index deaa11325809..496f2befde58 100644 --- a/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/contrib/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -460,7 +460,7 @@ bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } else if (BPFOperand::isValidIdAtStart (Name)) Operands.push_back(BPFOperand::createToken(Name, NameLoc)); else - return true; + return Error(NameLoc, "invalid register/token name"); while (!getLexer().is(AsmToken::EndOfStatement)) { // Attempt to parse token as operator @@ -472,8 +472,10 @@ bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, continue; // Attempt to parse token as an immediate - if (parseImmediate(Operands) != MatchOperand_Success) - return true; + if (parseImmediate(Operands) != MatchOperand_Success) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token"); + } } if (getLexer().isNot(AsmToken::EndOfStatement)) { diff --git a/contrib/llvm/lib/Target/BPF/BPF.h b/contrib/llvm/lib/Target/BPF/BPF.h index 4a0cb20357c8..76d3e1ca5f6f 100644 --- a/contrib/llvm/lib/Target/BPF/BPF.h +++ b/contrib/llvm/lib/Target/BPF/BPF.h @@ -17,6 +17,11 @@ namespace llvm { class BPFTargetMachine; FunctionPass *createBPFISelDag(BPFTargetMachine &TM); +FunctionPass *createBPFMIPeepholePass(); +FunctionPass *createBPFMIPreEmitPeepholePass(); + +void initializeBPFMIPeepholePass(PassRegistry&); +void initializeBPFMIPreEmitPeepholePass(PassRegistry&); } #endif diff --git a/contrib/llvm/lib/Target/BPF/BPF.td b/contrib/llvm/lib/Target/BPF/BPF.td index 2d0c22a3a516..877bd15f4f2b 100644 --- a/contrib/llvm/lib/Target/BPF/BPF.td +++ b/contrib/llvm/lib/Target/BPF/BPF.td @@ -26,6 +26,12 @@ def : Proc<"probe", []>; def DummyFeature : SubtargetFeature<"dummy", "isDummyMode", "true", "unused feature">; +def ALU32 : SubtargetFeature<"alu32", "HasAlu32", "true", + "Enable ALU32 instructions">; + +def DwarfRIS: SubtargetFeature<"dwarfris", "UseDwarfRIS", "true", + "Disable MCAsmInfo DwarfUsesRelocationsAcrossSections">; + def BPFInstPrinter : AsmWriter { string AsmWriterClassName = "InstPrinter"; bit isMCAsmWriter = 1; diff --git a/contrib/llvm/lib/Target/BPF/BPFCallingConv.td b/contrib/llvm/lib/Target/BPF/BPFCallingConv.td index 8cec6fa54698..637f9752ec42 100644 --- a/contrib/llvm/lib/Target/BPF/BPFCallingConv.td +++ b/contrib/llvm/lib/Target/BPF/BPFCallingConv.td @@ -26,4 +26,24 @@ def CC_BPF64 : CallingConv<[ CCAssignToStack<8, 8> ]>; +// Return-value convention when -mattr=+alu32 enabled +def RetCC_BPF32 : CallingConv<[ + CCIfType<[i32], CCAssignToRegWithShadow<[W0], [R0]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[R0], [W0]>> +]>; + +// Calling convention when -mattr=+alu32 enabled +def CC_BPF32 : CallingConv<[ + // Promote i8/i16/i32 args to i64 + CCIfType<[i32], CCAssignToRegWithShadow<[W1, W2, W3, W4, W5], + [R1, R2, R3, R4, R5]>>, + + // All arguments get passed in integer registers if there is space. + CCIfType<[i64], CCAssignToRegWithShadow<[R1, R2, R3, R4, R5], + [W1, W2, W3, W4, W5]>>, + + // Could be assigned to the stack in 8-byte aligned units, but unsupported + CCAssignToStack<8, 8> +]>; + def CSR : CalleeSavedRegs<(add R6, R7, R8, R9, R10)>; diff --git a/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp index 61b04d1f2a13..8b9bc08e144f 100644 --- a/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -39,8 +39,14 @@ using namespace llvm; namespace { class BPFDAGToDAGISel : public SelectionDAGISel { + + /// Subtarget - Keep a pointer to the BPFSubtarget around so that we can + /// make the right decision when generating code for different subtargets. + const BPFSubtarget *Subtarget; + public: - explicit BPFDAGToDAGISel(BPFTargetMachine &TM) : SelectionDAGISel(TM) { + explicit BPFDAGToDAGISel(BPFTargetMachine &TM) + : SelectionDAGISel(TM), Subtarget(nullptr) { curr_func_ = nullptr; } @@ -48,6 +54,12 @@ public: return "BPF DAG->DAG Pattern Instruction Selection"; } + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + Subtarget = &MF.getSubtarget<BPFSubtarget>(); + return SelectionDAGISel::runOnMachineFunction(MF); + } + void PreprocessISelDAG() override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintCode, @@ -65,9 +77,9 @@ private: bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset); // Node preprocessing cases - void PreprocessLoad(SDNode *Node, SelectionDAG::allnodes_iterator I); + void PreprocessLoad(SDNode *Node, SelectionDAG::allnodes_iterator &I); void PreprocessCopyToReg(SDNode *Node); - void PreprocessTrunc(SDNode *Node, SelectionDAG::allnodes_iterator I); + void PreprocessTrunc(SDNode *Node, SelectionDAG::allnodes_iterator &I); // Find constants from a constant structure typedef std::vector<unsigned char> val_vec_type; @@ -176,12 +188,9 @@ bool BPFDAGToDAGISel::SelectInlineAsmMemoryOperand( void BPFDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); - // Dump information about the Node being selected - DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); - // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { - DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); return; } @@ -241,7 +250,7 @@ void BPFDAGToDAGISel::Select(SDNode *Node) { } void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, - SelectionDAG::allnodes_iterator I) { + SelectionDAG::allnodes_iterator &I) { union { uint8_t c[8]; uint16_t s; @@ -268,7 +277,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, if (OP1N->getOpcode() <= ISD::BUILTIN_OP_END || OP1N->getNumOperands() == 0) return; - DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); const GlobalAddressSDNode *GADN = dyn_cast<GlobalAddressSDNode>(OP1N->getOperand(0).getNode()); @@ -278,7 +287,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val.c); } else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END && LDAddrNode->getNumOperands() > 0) { - DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); SDValue OP1 = LDAddrNode->getOperand(0); if (const GlobalAddressSDNode *GADN = @@ -301,8 +310,8 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, val = new_val.d; } - DEBUG(dbgs() << "Replacing load of size " << size << " with constant " << val - << '\n'); + LLVM_DEBUG(dbgs() << "Replacing load of size " << size << " with constant " + << val << '\n'); SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64); // After replacement, the current node is dead, we need to @@ -418,8 +427,8 @@ bool BPFDAGToDAGISel::fillGenericConstant(const DataLayout &DL, if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { uint64_t val = CI->getZExtValue(); - DEBUG(dbgs() << "Byte array at offset " << Offset << " with value " << val - << '\n'); + LLVM_DEBUG(dbgs() << "Byte array at offset " << Offset << " with value " + << val << '\n'); if (Size > 8 || (Size & (Size - 1))) return false; @@ -508,17 +517,49 @@ void BPFDAGToDAGISel::PreprocessCopyToReg(SDNode *Node) { break; } - DEBUG(dbgs() << "Find Load Value to VReg " - << TargetRegisterInfo::virtReg2Index(RegN->getReg()) << '\n'); + LLVM_DEBUG(dbgs() << "Find Load Value to VReg " + << TargetRegisterInfo::virtReg2Index(RegN->getReg()) + << '\n'); load_to_vreg_[RegN->getReg()] = mem_load_op; } void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, - SelectionDAG::allnodes_iterator I) { + SelectionDAG::allnodes_iterator &I) { ConstantSDNode *MaskN = dyn_cast<ConstantSDNode>(Node->getOperand(1)); if (!MaskN) return; + // The Reg operand should be a virtual register, which is defined + // outside the current basic block. DAG combiner has done a pretty + // good job in removing truncating inside a single basic block except + // when the Reg operand comes from bpf_load_[byte | half | word] for + // which the generic optimizer doesn't understand their results are + // zero extended. + SDValue BaseV = Node->getOperand(0); + if (BaseV.getOpcode() == ISD::INTRINSIC_W_CHAIN) { + unsigned IntNo = cast<ConstantSDNode>(BaseV->getOperand(1))->getZExtValue(); + uint64_t MaskV = MaskN->getZExtValue(); + + if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) || + (IntNo == Intrinsic::bpf_load_half && MaskV == 0xFFFF) || + (IntNo == Intrinsic::bpf_load_word && MaskV == 0xFFFFFFFF))) + return; + + LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: "; + Node->dump(); dbgs() << '\n'); + + I--; + CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV); + I++; + CurDAG->DeleteNode(Node); + + return; + } + + // Multiple basic blocks case. + if (BaseV.getOpcode() != ISD::CopyFromReg) + return; + unsigned match_load_op = 0; switch (MaskN->getZExtValue()) { default: @@ -534,19 +575,12 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, break; } - // The Reg operand should be a virtual register, which is defined - // outside the current basic block. DAG combiner has done a pretty - // good job in removing truncating inside a single basic block. - SDValue BaseV = Node->getOperand(0); - if (BaseV.getOpcode() != ISD::CopyFromReg) - return; - const RegisterSDNode *RegN = dyn_cast<RegisterSDNode>(BaseV.getNode()->getOperand(1)); if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg())) return; unsigned AndOpReg = RegN->getReg(); - DEBUG(dbgs() << "Examine " << printReg(AndOpReg) << '\n'); + LLVM_DEBUG(dbgs() << "Examine " << printReg(AndOpReg) << '\n'); // Examine the PHI insns in the MachineBasicBlock to found out the // definitions of this virtual register. At this stage (DAG2DAG @@ -576,8 +610,8 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, // %2 = PHI %0, <%bb.1>, %1, <%bb.3> // Trace each incoming definition, e.g., (%0, %bb.1) and (%1, %bb.3) // The AND operation can be removed if both %0 in %bb.1 and %1 in - // %bb.3 are defined with with a load matching the MaskN. - DEBUG(dbgs() << "Check PHI Insn: "; MII->dump(); dbgs() << '\n'); + // %bb.3 are defined with a load matching the MaskN. + LLVM_DEBUG(dbgs() << "Check PHI Insn: "; MII->dump(); dbgs() << '\n'); unsigned PrevReg = -1; for (unsigned i = 0; i < MII->getNumOperands(); ++i) { const MachineOperand &MOP = MII->getOperand(i); @@ -593,8 +627,8 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, } } - DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump(); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump(); + dbgs() << '\n'); I--; CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV); diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp index 3ea96e3148f2..9272cf692dc9 100644 --- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -33,6 +33,10 @@ using namespace llvm; #define DEBUG_TYPE "bpf-lower" +static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order", + cl::Hidden, cl::init(false), + cl::desc("Expand memcpy into load/store pairs in order")); + static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose( @@ -57,6 +61,8 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, // Set up the register classes. addRegisterClass(MVT::i64, &BPF::GPRRegClass); + if (STI.getHasAlu32()) + addRegisterClass(MVT::i32, &BPF::GPR32RegClass); // Compute derived properties from the register classes computeRegisterProperties(STI.getRegisterInfo()); @@ -67,9 +73,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BRIND, MVT::Other, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Expand); - setOperationAction(ISD::SETCC, MVT::i64, Expand); - setOperationAction(ISD::SELECT, MVT::i64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); @@ -77,32 +80,39 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - setOperationAction(ISD::UDIVREM, MVT::i64, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); - - setOperationAction(ISD::MULHU, MVT::i64, Expand); - setOperationAction(ISD::MULHS, MVT::i64, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + for (auto VT : { MVT::i32, MVT::i64 }) { + if (VT == MVT::i32 && !STI.getHasAlu32()) + continue; - setOperationAction(ISD::ADDC, MVT::i64, Expand); - setOperationAction(ISD::ADDE, MVT::i64, Expand); - setOperationAction(ISD::SUBC, MVT::i64, Expand); - setOperationAction(ISD::SUBE, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::MULHS, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::SHL_PARTS, VT, Expand); + setOperationAction(ISD::SRL_PARTS, VT, Expand); + setOperationAction(ISD::SRA_PARTS, VT, Expand); + setOperationAction(ISD::CTPOP, VT, Expand); + + setOperationAction(ISD::SETCC, VT, Expand); + setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Custom); + } - setOperationAction(ISD::ROTR, MVT::i64, Expand); - setOperationAction(ISD::ROTL, MVT::i64, Expand); - setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); - setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); - setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + if (STI.getHasAlu32()) { + setOperationAction(ISD::BSWAP, MVT::i32, Promote); + setOperationAction(ISD::BR_CC, MVT::i32, Promote); + } setOperationAction(ISD::CTTZ, MVT::i64, Custom); setOperationAction(ISD::CTLZ, MVT::i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); - setOperationAction(ISD::CTPOP, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); @@ -126,12 +136,33 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setMinFunctionAlignment(3); setPrefFunctionAlignment(3); - // inline memcpy() for kernel to see explicit copy - MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 128; - MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 128; - MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 128; + if (BPFExpandMemcpyInOrder) { + // LLVM generic code will try to expand memcpy into load/store pairs at this + // stage which is before quite a few IR optimization passes, therefore the + // loads and stores could potentially be moved apart from each other which + // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT + // compilers. + // + // When -bpf-expand-memcpy-in-order specified, we want to defer the expand + // of memcpy to later stage in IR optimization pipeline so those load/store + // pairs won't be touched and could be kept in order. Hence, we set + // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores + // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy. + MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0; + MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0; + MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0; + } else { + // inline memcpy() for kernel to see explicit copy + unsigned CommonMaxStores = + STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc(); + + MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores; + MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores; + MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores; + } // CPU/Feature control + HasAlu32 = STI.getHasAlu32(); HasJmpExt = STI.getHasJmpExt(); } @@ -189,26 +220,29 @@ SDValue BPFTargetLowering::LowerFormalArguments( // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Ins, CC_BPF64); + CCInfo.AnalyzeFormalArguments(Ins, getHasAlu32() ? CC_BPF32 : CC_BPF64); for (auto &VA : ArgLocs) { if (VA.isRegLoc()) { // Arguments passed in registers EVT RegVT = VA.getLocVT(); - switch (RegVT.getSimpleVT().SimpleTy) { + MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy; + switch (SimpleTy) { default: { errs() << "LowerFormalArguments Unhandled argument type: " << RegVT.getEVTString() << '\n'; llvm_unreachable(0); } + case MVT::i32: case MVT::i64: - unsigned VReg = RegInfo.createVirtualRegister(&BPF::GPRRegClass); + unsigned VReg = RegInfo.createVirtualRegister(SimpleTy == MVT::i64 ? + &BPF::GPRRegClass : + &BPF::GPR32RegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); - // If this is an 8/16/32-bit value, it is really passed promoted to 64 - // bits. Insert an assert[sz]ext to capture this, then truncate to the - // right size. + // If this is an value that has been promoted to wider types, insert an + // assert[sz]ext to capture this, then truncate to the right size. if (VA.getLocInfo() == CCValAssign::SExt) ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); @@ -220,6 +254,8 @@ SDValue BPFTargetLowering::LowerFormalArguments( ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); InVals.push_back(ArgValue); + + break; } } else { fail(DL, DAG, "defined with too many args"); @@ -264,7 +300,7 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CC_BPF64); + CCInfo.AnalyzeCallOperands(Outs, getHasAlu32() ? CC_BPF32 : CC_BPF64); unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -388,7 +424,7 @@ BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, } // Analize return values. - CCInfo.AnalyzeReturn(Outs, RetCC_BPF64); + CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64); SDValue Flag; SmallVector<SDValue, 4> RetOps(1, Chain); @@ -432,7 +468,7 @@ SDValue BPFTargetLowering::LowerCallResult( return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InFlag).getValue(1); } - CCInfo.AnalyzeCallResult(Ins, RetCC_BPF64); + CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64); // Copy all of the result registers out of their specified physreg. for (auto &Val : RVLocs) { @@ -485,8 +521,7 @@ SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { if (!getHasJmpExt()) NegateCC(LHS, RHS, CC); - SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i64); - + SDValue TargetCC = DAG.getConstant(CC, DL, LHS.getValueType()); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; @@ -507,6 +542,8 @@ const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { return "BPFISD::BR_CC"; case BPFISD::Wrapper: return "BPFISD::Wrapper"; + case BPFISD::MEMCPY: + return "BPFISD::MEMCPY"; } return nullptr; } @@ -523,14 +560,90 @@ SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op, return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA); } +unsigned +BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, + unsigned Reg, bool isSigned) const { + const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i64); + int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri; + MachineFunction *F = BB->getParent(); + DebugLoc DL = MI.getDebugLoc(); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned PromotedReg0 = RegInfo.createVirtualRegister(RC); + unsigned PromotedReg1 = RegInfo.createVirtualRegister(RC); + unsigned PromotedReg2 = RegInfo.createVirtualRegister(RC); + BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); + BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1) + .addReg(PromotedReg0).addImm(32); + BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2) + .addReg(PromotedReg1).addImm(32); + + return PromotedReg2; +} + +MachineBasicBlock * +BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI, + MachineBasicBlock *BB) + const { + MachineFunction *MF = MI.getParent()->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineInstrBuilder MIB(*MF, MI); + unsigned ScratchReg; + + // This function does custom insertion during lowering BPFISD::MEMCPY which + // only has two register operands from memcpy semantics, the copy source + // address and the copy destination address. + // + // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need + // a third scratch register to serve as the destination register of load and + // source register of store. + // + // The scratch register here is with the Define | Dead | EarlyClobber flags. + // The EarlyClobber flag has the semantic property that the operand it is + // attached to is clobbered before the rest of the inputs are read. Hence it + // must be unique among the operands to the instruction. The Define flag is + // needed to coerce the machine verifier that an Undef value isn't a problem + // as we anyway is loading memory into it. The Dead flag is needed as the + // value in scratch isn't supposed to be used by any other instruction. + ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass); + MIB.addReg(ScratchReg, + RegState::Define | RegState::Dead | RegState::EarlyClobber); + + return BB; +} + MachineBasicBlock * BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); - bool isSelectOp = MI.getOpcode() == BPF::Select; + unsigned Opc = MI.getOpcode(); + bool isSelectRROp = (Opc == BPF::Select || + Opc == BPF::Select_64_32 || + Opc == BPF::Select_32 || + Opc == BPF::Select_32_64); + + bool isMemcpyOp = Opc == BPF::MEMCPY; + +#ifndef NDEBUG + bool isSelectRIOp = (Opc == BPF::Select_Ri || + Opc == BPF::Select_Ri_64_32 || + Opc == BPF::Select_Ri_32 || + Opc == BPF::Select_Ri_32_64); + + + assert((isSelectRROp || isSelectRIOp || isMemcpyOp) && + "Unexpected instr type to insert"); +#endif + + if (isMemcpyOp) + return EmitInstrWithCustomInserterMemcpy(MI, BB); - assert((isSelectOp || MI.getOpcode() == BPF::Select_Ri) && "Unexpected instr type to insert"); + bool is32BitCmp = (Opc == BPF::Select_32 || + Opc == BPF::Select_32_64 || + Opc == BPF::Select_Ri_32 || + Opc == BPF::Select_Ri_32_64); // To "insert" a SELECT instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg @@ -561,56 +674,72 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, BB->addSuccessor(Copy1MBB); // Insert Branch if Flag - unsigned LHS = MI.getOperand(1).getReg(); int CC = MI.getOperand(3).getImm(); int NewCC; switch (CC) { case ISD::SETGT: - NewCC = isSelectOp ? BPF::JSGT_rr : BPF::JSGT_ri; + NewCC = isSelectRROp ? BPF::JSGT_rr : BPF::JSGT_ri; break; case ISD::SETUGT: - NewCC = isSelectOp ? BPF::JUGT_rr : BPF::JUGT_ri; + NewCC = isSelectRROp ? BPF::JUGT_rr : BPF::JUGT_ri; break; case ISD::SETGE: - NewCC = isSelectOp ? BPF::JSGE_rr : BPF::JSGE_ri; + NewCC = isSelectRROp ? BPF::JSGE_rr : BPF::JSGE_ri; break; case ISD::SETUGE: - NewCC = isSelectOp ? BPF::JUGE_rr : BPF::JUGE_ri; + NewCC = isSelectRROp ? BPF::JUGE_rr : BPF::JUGE_ri; break; case ISD::SETEQ: - NewCC = isSelectOp ? BPF::JEQ_rr : BPF::JEQ_ri; + NewCC = isSelectRROp ? BPF::JEQ_rr : BPF::JEQ_ri; break; case ISD::SETNE: - NewCC = isSelectOp ? BPF::JNE_rr : BPF::JNE_ri; + NewCC = isSelectRROp ? BPF::JNE_rr : BPF::JNE_ri; break; case ISD::SETLT: - NewCC = isSelectOp ? BPF::JSLT_rr : BPF::JSLT_ri; + NewCC = isSelectRROp ? BPF::JSLT_rr : BPF::JSLT_ri; break; case ISD::SETULT: - NewCC = isSelectOp ? BPF::JULT_rr : BPF::JULT_ri; + NewCC = isSelectRROp ? BPF::JULT_rr : BPF::JULT_ri; break; case ISD::SETLE: - NewCC = isSelectOp ? BPF::JSLE_rr : BPF::JSLE_ri; + NewCC = isSelectRROp ? BPF::JSLE_rr : BPF::JSLE_ri; break; case ISD::SETULE: - NewCC = isSelectOp ? BPF::JULE_rr : BPF::JULE_ri; + NewCC = isSelectRROp ? BPF::JULE_rr : BPF::JULE_ri; break; default: report_fatal_error("unimplemented select CondCode " + Twine(CC)); } - if (isSelectOp) - BuildMI(BB, DL, TII.get(NewCC)) - .addReg(LHS) - .addReg(MI.getOperand(2).getReg()) - .addMBB(Copy1MBB); - else { + + unsigned LHS = MI.getOperand(1).getReg(); + bool isSignedCmp = (CC == ISD::SETGT || + CC == ISD::SETGE || + CC == ISD::SETLT || + CC == ISD::SETLE); + + // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need + // to be promoted, however if the 32-bit comparison operands are destination + // registers then they are implicitly zero-extended already, there is no + // need of explicit zero-extend sequence for them. + // + // We simply do extension for all situations in this method, but we will + // try to remove those unnecessary in BPFMIPeephole pass. + if (is32BitCmp) + LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp); + + if (isSelectRROp) { + unsigned RHS = MI.getOperand(2).getReg(); + + if (is32BitCmp) + RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp); + + BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB); + } else { int64_t imm32 = MI.getOperand(2).getImm(); // sanity check before we build J*_ri instruction. assert (isInt<32>(imm32)); BuildMI(BB, DL, TII.get(NewCC)) - .addReg(LHS) - .addImm(imm32) - .addMBB(Copy1MBB); + .addReg(LHS).addImm(imm32).addMBB(Copy1MBB); } // Copy0MBB: @@ -634,3 +763,13 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } + +EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, + EVT VT) const { + return getHasAlu32() ? MVT::i32 : MVT::i64; +} + +MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL, + EVT VT) const { + return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64; +} diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.h b/contrib/llvm/lib/Target/BPF/BPFISelLowering.h index 6ca2594a7e88..0aa8b9ac57ac 100644 --- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.h @@ -28,7 +28,8 @@ enum NodeType : unsigned { CALL, SELECT_CC, BR_CC, - Wrapper + Wrapper, + MEMCPY }; } @@ -54,10 +55,17 @@ public: EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; + bool getHasAlu32() const { return HasAlu32; } bool getHasJmpExt() const { return HasJmpExt; } + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; + + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override; + private: // Control Instruction Selection Features + bool HasAlu32; bool HasJmpExt; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; @@ -100,6 +108,14 @@ private: Type *Ty) const override { return true; } + + unsigned EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, unsigned Reg, + bool isSigned) const; + + MachineBasicBlock * EmitInstrWithCustomInserterMemcpy(MachineInstr &MI, + MachineBasicBlock *BB) + const; + }; } diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp index 5351cfa95020..4d47debdaa74 100644 --- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp @@ -36,10 +36,92 @@ void BPFInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (BPF::GPRRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(BPF::MOV_rr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); + else if (BPF::GPR32RegClass.contains(DestReg, SrcReg)) + BuildMI(MBB, I, DL, get(BPF::MOV_rr_32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); else llvm_unreachable("Impossible reg-to-reg copy"); } +void BPFInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + uint64_t CopyLen = MI->getOperand(2).getImm(); + uint64_t Alignment = MI->getOperand(3).getImm(); + unsigned ScratchReg = MI->getOperand(4).getReg(); + MachineBasicBlock *BB = MI->getParent(); + DebugLoc dl = MI->getDebugLoc(); + unsigned LdOpc, StOpc; + + switch (Alignment) { + case 1: + LdOpc = BPF::LDB; + StOpc = BPF::STB; + break; + case 2: + LdOpc = BPF::LDH; + StOpc = BPF::STH; + break; + case 4: + LdOpc = BPF::LDW; + StOpc = BPF::STW; + break; + case 8: + LdOpc = BPF::LDD; + StOpc = BPF::STD; + break; + default: + llvm_unreachable("unsupported memcpy alignment"); + } + + unsigned IterationNum = CopyLen >> Log2_64(Alignment); + for(unsigned I = 0; I < IterationNum; ++I) { + BuildMI(*BB, MI, dl, get(LdOpc)) + .addReg(ScratchReg, RegState::Define).addReg(SrcReg) + .addImm(I * Alignment); + BuildMI(*BB, MI, dl, get(StOpc)) + .addReg(ScratchReg, RegState::Kill).addReg(DstReg) + .addImm(I * Alignment); + } + + unsigned BytesLeft = CopyLen & (Alignment - 1); + unsigned Offset = IterationNum * Alignment; + bool Hanging4Byte = BytesLeft & 0x4; + bool Hanging2Byte = BytesLeft & 0x2; + bool Hanging1Byte = BytesLeft & 0x1; + if (Hanging4Byte) { + BuildMI(*BB, MI, dl, get(BPF::LDW)) + .addReg(ScratchReg, RegState::Define).addReg(SrcReg).addImm(Offset); + BuildMI(*BB, MI, dl, get(BPF::STW)) + .addReg(ScratchReg, RegState::Kill).addReg(DstReg).addImm(Offset); + Offset += 4; + } + if (Hanging2Byte) { + BuildMI(*BB, MI, dl, get(BPF::LDH)) + .addReg(ScratchReg, RegState::Define).addReg(SrcReg).addImm(Offset); + BuildMI(*BB, MI, dl, get(BPF::STH)) + .addReg(ScratchReg, RegState::Kill).addReg(DstReg).addImm(Offset); + Offset += 2; + } + if (Hanging1Byte) { + BuildMI(*BB, MI, dl, get(BPF::LDB)) + .addReg(ScratchReg, RegState::Define).addReg(SrcReg).addImm(Offset); + BuildMI(*BB, MI, dl, get(BPF::STB)) + .addReg(ScratchReg, RegState::Kill).addReg(DstReg).addImm(Offset); + } + + BB->erase(MI); +} + +bool BPFInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + if (MI.getOpcode() == BPF::MEMCPY) { + expandMEMCPY(MI); + return true; + } + + return false; +} + void BPFInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool IsKill, int FI, @@ -54,6 +136,11 @@ void BPFInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(IsKill)) .addFrameIndex(FI) .addImm(0); + else if (RC == &BPF::GPR32RegClass) + BuildMI(MBB, I, DL, get(BPF::STW32)) + .addReg(SrcReg, getKillRegState(IsKill)) + .addFrameIndex(FI) + .addImm(0); else llvm_unreachable("Can't store this register to stack slot"); } @@ -69,6 +156,8 @@ void BPFInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (RC == &BPF::GPRRegClass) BuildMI(MBB, I, DL, get(BPF::LDD), DestReg).addFrameIndex(FI).addImm(0); + else if (RC == &BPF::GPR32RegClass) + BuildMI(MBB, I, DL, get(BPF::LDW32), DestReg).addFrameIndex(FI).addImm(0); else llvm_unreachable("Can't load this register from stack slot"); } @@ -83,7 +172,7 @@ bool BPFInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock::iterator I = MBB.end(); while (I != MBB.begin()) { --I; - if (I->isDebugValue()) + if (I->isDebugInstr()) continue; // Working from the bottom, when we see a non-terminator @@ -158,7 +247,7 @@ unsigned BPFInstrInfo::removeBranch(MachineBasicBlock &MBB, while (I != MBB.begin()) { --I; - if (I->isDebugValue()) + if (I->isDebugInstr()) continue; if (I->getOpcode() != BPF::JMP) break; diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h index f591f48a89a6..fb65a86a6d18 100644 --- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h +++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h @@ -34,6 +34,8 @@ public: const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; + bool expandPostRAPseudo(MachineInstr &MI) const override; + void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -55,6 +57,9 @@ public: MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded = nullptr) const override; +private: + void expandMEMCPY(MachineBasicBlock::iterator) const; + }; } diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td index 126d55fc28de..aaef5fb706e0 100644 --- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -28,6 +28,10 @@ def SDT_BPFBrCC : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>, SDTCisVT<3, OtherVT>]>; def SDT_BPFWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; +def SDT_BPFMEMCPY : SDTypeProfile<0, 4, [SDTCisVT<0, i64>, + SDTCisVT<1, i64>, + SDTCisVT<2, i64>, + SDTCisVT<3, i64>]>; def BPFcall : SDNode<"BPFISD::CALL", SDT_BPFCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, @@ -43,8 +47,13 @@ def BPFbrcc : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC, def BPFselectcc : SDNode<"BPFISD::SELECT_CC", SDT_BPFSelectCC, [SDNPInGlue]>; def BPFWrapper : SDNode<"BPFISD::Wrapper", SDT_BPFWrapper>; +def BPFmemcpy : SDNode<"BPFISD::MEMCPY", SDT_BPFMEMCPY, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPMayStore, SDNPMayLoad]>; def BPFIsLittleEndian : Predicate<"CurDAG->getDataLayout().isLittleEndian()">; def BPFIsBigEndian : Predicate<"!CurDAG->getDataLayout().isLittleEndian()">; +def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">; +def BPFNoALU32 : Predicate<"!Subtarget->getHasAlu32()">; def brtarget : Operand<OtherVT> { let PrintMethod = "printBrTargetOperand"; @@ -57,6 +66,8 @@ def u64imm : Operand<i64> { def i64immSExt32 : PatLeaf<(i64 imm), [{return isInt<32>(N->getSExtValue()); }]>; +def i32immSExt32 : PatLeaf<(i32 imm), + [{return isInt<32>(N->getSExtValue()); }]>; // Addressing modes. def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [], []>; @@ -218,7 +229,7 @@ multiclass ALU<BPFArithOp Opc, string OpcodeStr, SDNode OpNode> { (outs GPR32:$dst), (ins GPR32:$src2, i32imm:$imm), "$dst "#OpcodeStr#" $imm", - [(set GPR32:$dst, (OpNode GPR32:$src2, i32:$imm))]>; + [(set GPR32:$dst, (OpNode GPR32:$src2, i32immSExt32:$imm))]>; } let Constraints = "$dst = $src2" in { @@ -292,7 +303,7 @@ def MOV_ri_32 : ALU_RI<BPF_ALU, BPF_MOV, (outs GPR32:$dst), (ins i32imm:$imm), "$dst = $imm", - [(set GPR32:$dst, (i32 i32:$imm))]>; + [(set GPR32:$dst, (i32 i32immSExt32:$imm))]>; } def FI_ri @@ -347,9 +358,11 @@ class STORE<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern> class STOREi64<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode> : STORE<Opc, OpcodeStr, [(OpNode i64:$src, ADDRri:$addr)]>; -def STW : STOREi64<BPF_W, "u32", truncstorei32>; -def STH : STOREi64<BPF_H, "u16", truncstorei16>; -def STB : STOREi64<BPF_B, "u8", truncstorei8>; +let Predicates = [BPFNoALU32] in { + def STW : STOREi64<BPF_W, "u32", truncstorei32>; + def STH : STOREi64<BPF_H, "u16", truncstorei16>; + def STB : STOREi64<BPF_B, "u8", truncstorei8>; +} def STD : STOREi64<BPF_DW, "u64", store>; // LOAD instructions @@ -371,9 +384,13 @@ class LOAD<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern> class LOADi64<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode> : LOAD<SizeOp, OpcodeStr, [(set i64:$dst, (OpNode ADDRri:$addr))]>; -def LDW : LOADi64<BPF_W, "u32", zextloadi32>; -def LDH : LOADi64<BPF_H, "u16", zextloadi16>; -def LDB : LOADi64<BPF_B, "u8", zextloadi8>; + +let Predicates = [BPFNoALU32] in { + def LDW : LOADi64<BPF_W, "u32", zextloadi32>; + def LDH : LOADi64<BPF_H, "u16", zextloadi16>; + def LDB : LOADi64<BPF_B, "u8", zextloadi8>; +} + def LDD : LOADi64<BPF_DW, "u64", load>; class BRANCH<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern> @@ -456,7 +473,7 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1, } // ADJCALLSTACKDOWN/UP pseudo insns -let Defs = [R11], Uses = [R11] in { +let Defs = [R11], Uses = [R11], isCodeGenOnly = 1 in { def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), "#ADJCALLSTACKDOWN $amt1 $amt2", [(BPFcallseq_start timm:$amt1, timm:$amt2)]>; @@ -465,7 +482,7 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), [(BPFcallseq_end timm:$amt1, timm:$amt2)]>; } -let usesCustomInserter = 1 in { +let usesCustomInserter = 1, isCodeGenOnly = 1 in { def Select : Pseudo<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, i64imm:$imm, GPR:$src, GPR:$src2), "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", @@ -476,6 +493,36 @@ let usesCustomInserter = 1 in { "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", [(set i64:$dst, (BPFselectcc i64:$lhs, (i64immSExt32:$rhs), (i64 imm:$imm), i64:$src, i64:$src2))]>; + def Select_64_32 : Pseudo<(outs GPR32:$dst), + (ins GPR:$lhs, GPR:$rhs, i64imm:$imm, GPR32:$src, GPR32:$src2), + "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", + [(set i32:$dst, + (BPFselectcc i64:$lhs, i64:$rhs, (i64 imm:$imm), i32:$src, i32:$src2))]>; + def Select_Ri_64_32 : Pseudo<(outs GPR32:$dst), + (ins GPR:$lhs, i64imm:$rhs, i64imm:$imm, GPR32:$src, GPR32:$src2), + "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", + [(set i32:$dst, + (BPFselectcc i64:$lhs, (i64immSExt32:$rhs), (i64 imm:$imm), i32:$src, i32:$src2))]>; + def Select_32 : Pseudo<(outs GPR32:$dst), + (ins GPR32:$lhs, GPR32:$rhs, i32imm:$imm, GPR32:$src, GPR32:$src2), + "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", + [(set i32:$dst, + (BPFselectcc i32:$lhs, i32:$rhs, (i32 imm:$imm), i32:$src, i32:$src2))]>; + def Select_Ri_32 : Pseudo<(outs GPR32:$dst), + (ins GPR32:$lhs, i32imm:$rhs, i32imm:$imm, GPR32:$src, GPR32:$src2), + "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", + [(set i32:$dst, + (BPFselectcc i32:$lhs, (i32immSExt32:$rhs), (i32 imm:$imm), i32:$src, i32:$src2))]>; + def Select_32_64 : Pseudo<(outs GPR:$dst), + (ins GPR32:$lhs, GPR32:$rhs, i32imm:$imm, GPR:$src, GPR:$src2), + "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", + [(set i64:$dst, + (BPFselectcc i32:$lhs, i32:$rhs, (i32 imm:$imm), i64:$src, i64:$src2))]>; + def Select_Ri_32_64 : Pseudo<(outs GPR:$dst), + (ins GPR32:$lhs, i32imm:$rhs, i32imm:$imm, GPR:$src, GPR:$src2), + "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", + [(set i64:$dst, + (BPFselectcc i32:$lhs, (i32immSExt32:$rhs), (i32 imm:$imm), i64:$src, i64:$src2))]>; } // load 64-bit global addr into register @@ -492,9 +539,11 @@ def : Pat<(BPFcall imm:$dst), (JAL imm:$dst)>; def : Pat<(BPFcall GPR:$dst), (JALX GPR:$dst)>; // Loads -def : Pat<(extloadi8 ADDRri:$src), (i64 (LDB ADDRri:$src))>; -def : Pat<(extloadi16 ADDRri:$src), (i64 (LDH ADDRri:$src))>; -def : Pat<(extloadi32 ADDRri:$src), (i64 (LDW ADDRri:$src))>; +let Predicates = [BPFNoALU32] in { + def : Pat<(i64 (extloadi8 ADDRri:$src)), (i64 (LDB ADDRri:$src))>; + def : Pat<(i64 (extloadi16 ADDRri:$src)), (i64 (LDH ADDRri:$src))>; + def : Pat<(i64 (extloadi32 ADDRri:$src)), (i64 (LDW ADDRri:$src))>; +} // Atomics class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode> @@ -581,3 +630,102 @@ def LD_ABS_W : LOAD_ABS<BPF_W, "u32", int_bpf_load_word>; def LD_IND_B : LOAD_IND<BPF_B, "u8", int_bpf_load_byte>; def LD_IND_H : LOAD_IND<BPF_H, "u16", int_bpf_load_half>; def LD_IND_W : LOAD_IND<BPF_W, "u32", int_bpf_load_word>; + +let isCodeGenOnly = 1 in { + def MOV_32_64 : ALU_RR<BPF_ALU, BPF_MOV, + (outs GPR:$dst), (ins GPR32:$src), + "$dst = $src", []>; +} + +def : Pat<(i64 (sext GPR32:$src)), + (SRA_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>; + +def : Pat<(i64 (zext GPR32:$src)), + (SRL_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>; + +// For i64 -> i32 truncation, use the 32-bit subregister directly. +def : Pat<(i32 (trunc GPR:$src)), + (i32 (EXTRACT_SUBREG GPR:$src, sub_32))>; + +// For i32 -> i64 anyext, we don't care about the high bits. +def : Pat<(i64 (anyext GPR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; + +class STORE32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern> + : TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value, + (outs), + (ins GPR32:$src, MEMri:$addr), + "*("#OpcodeStr#" *)($addr) = $src", + Pattern> { + bits<4> src; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = src; + let Inst{47-32} = addr{15-0}; // offset + let BPFClass = BPF_STX; +} + +class STOREi32<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode> + : STORE32<Opc, OpcodeStr, [(OpNode i32:$src, ADDRri:$addr)]>; + +let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { + def STW32 : STOREi32<BPF_W, "u32", store>; + def STH32 : STOREi32<BPF_H, "u16", truncstorei16>; + def STB32 : STOREi32<BPF_B, "u8", truncstorei8>; +} + +class LOAD32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern> + : TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value, + (outs GPR32:$dst), + (ins MEMri:$addr), + "$dst = *("#OpcodeStr#" *)($addr)", + Pattern> { + bits<4> dst; + bits<20> addr; + + let Inst{51-48} = dst; + let Inst{55-52} = addr{19-16}; + let Inst{47-32} = addr{15-0}; + let BPFClass = BPF_LDX; +} + +class LOADi32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode> + : LOAD32<SizeOp, OpcodeStr, [(set i32:$dst, (OpNode ADDRri:$addr))]>; + +let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { + def LDW32 : LOADi32<BPF_W, "u32", load>; + def LDH32 : LOADi32<BPF_H, "u16", zextloadi16>; + def LDB32 : LOADi32<BPF_B, "u8", zextloadi8>; +} + +let Predicates = [BPFHasALU32] in { + def : Pat<(truncstorei8 GPR:$src, ADDRri:$dst), + (STB32 (EXTRACT_SUBREG GPR:$src, sub_32), ADDRri:$dst)>; + def : Pat<(truncstorei16 GPR:$src, ADDRri:$dst), + (STH32 (EXTRACT_SUBREG GPR:$src, sub_32), ADDRri:$dst)>; + def : Pat<(truncstorei32 GPR:$src, ADDRri:$dst), + (STW32 (EXTRACT_SUBREG GPR:$src, sub_32), ADDRri:$dst)>; + def : Pat<(i32 (extloadi8 ADDRri:$src)), (i32 (LDB32 ADDRri:$src))>; + def : Pat<(i32 (extloadi16 ADDRri:$src)), (i32 (LDH32 ADDRri:$src))>; + def : Pat<(i64 (zextloadi8 ADDRri:$src)), + (SUBREG_TO_REG (i64 0), (LDB32 ADDRri:$src), sub_32)>; + def : Pat<(i64 (zextloadi16 ADDRri:$src)), + (SUBREG_TO_REG (i64 0), (LDH32 ADDRri:$src), sub_32)>; + def : Pat<(i64 (zextloadi32 ADDRri:$src)), + (SUBREG_TO_REG (i64 0), (LDW32 ADDRri:$src), sub_32)>; + def : Pat<(i64 (extloadi8 ADDRri:$src)), + (SUBREG_TO_REG (i64 0), (LDB32 ADDRri:$src), sub_32)>; + def : Pat<(i64 (extloadi16 ADDRri:$src)), + (SUBREG_TO_REG (i64 0), (LDH32 ADDRri:$src), sub_32)>; + def : Pat<(i64 (extloadi32 ADDRri:$src)), + (SUBREG_TO_REG (i64 0), (LDW32 ADDRri:$src), sub_32)>; +} + +let usesCustomInserter = 1, isCodeGenOnly = 1 in { + def MEMCPY : Pseudo< + (outs), + (ins GPR:$dst, GPR:$src, i64imm:$len, i64imm:$align, variable_ops), + "#memcpy dst: $dst, src: $src, len: $len, align: $align", + [(BPFmemcpy GPR:$dst, GPR:$src, imm:$len, imm:$align)]>; +} diff --git a/contrib/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/contrib/llvm/lib/Target/BPF/BPFMIPeephole.cpp new file mode 100644 index 000000000000..9e984d0facfb --- /dev/null +++ b/contrib/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -0,0 +1,284 @@ +//===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs peephole optimizations to cleanup ugly code sequences at +// MachineInstruction layer. +// +// Currently, there are two optimizations implemented: +// - One pre-RA MachineSSA pass to eliminate type promotion sequences, those +// zero extend 32-bit subregisters to 64-bit registers, if the compiler +// could prove the subregisters is defined by 32-bit operations in which +// case the upper half of the underlying 64-bit registers were zeroed +// implicitly. +// +// - One post-RA PreEmit pass to do final cleanup on some redundant +// instructions generated due to bad RA on subregister. +//===----------------------------------------------------------------------===// + +#include "BPF.h" +#include "BPFInstrInfo.h" +#include "BPFTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "bpf-mi-zext-elim" + +STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated"); + +namespace { + +struct BPFMIPeephole : public MachineFunctionPass { + + static char ID; + const BPFInstrInfo *TII; + MachineFunction *MF; + MachineRegisterInfo *MRI; + + BPFMIPeephole() : MachineFunctionPass(ID) { + initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry()); + } + +private: + // Initialize class variables. + void initialize(MachineFunction &MFParm); + + bool isMovFrom32Def(MachineInstr *MovMI); + bool eliminateZExtSeq(void); + +public: + + // Main entry point for this pass. + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + + initialize(MF); + + return eliminateZExtSeq(); + } +}; + +// Initialize class variables. +void BPFMIPeephole::initialize(MachineFunction &MFParm) { + MF = &MFParm; + MRI = &MF->getRegInfo(); + TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo(); + LLVM_DEBUG(dbgs() << "*** BPF MachineSSA peephole pass ***\n\n"); +} + +bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) +{ + MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg()); + + LLVM_DEBUG(dbgs() << " Def of Mov Src:"); + LLVM_DEBUG(DefInsn->dump()); + + if (!DefInsn) + return false; + + if (DefInsn->isPHI()) { + for (unsigned i = 1, e = DefInsn->getNumOperands(); i < e; i += 2) { + MachineOperand &opnd = DefInsn->getOperand(i); + + if (!opnd.isReg()) + return false; + + MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); + // quick check on PHI incoming definitions. + if (!PhiDef || PhiDef->isPHI() || PhiDef->getOpcode() == BPF::COPY) + return false; + } + } + + if (DefInsn->getOpcode() == BPF::COPY) { + MachineOperand &opnd = DefInsn->getOperand(1); + + if (!opnd.isReg()) + return false; + + unsigned Reg = opnd.getReg(); + if ((TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->getRegClass(Reg) == &BPF::GPRRegClass)) + return false; + } + + LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n"); + + return true; +} + +bool BPFMIPeephole::eliminateZExtSeq(void) { + MachineInstr* ToErase = nullptr; + bool Eliminated = false; + + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + // If the previous instruction was marked for elimination, remove it now. + if (ToErase) { + ToErase->eraseFromParent(); + ToErase = nullptr; + } + + // Eliminate the 32-bit to 64-bit zero extension sequence when possible. + // + // MOV_32_64 rB, wA + // SLL_ri rB, rB, 32 + // SRL_ri rB, rB, 32 + if (MI.getOpcode() == BPF::SRL_ri && + MI.getOperand(2).getImm() == 32) { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned ShfReg = MI.getOperand(1).getReg(); + MachineInstr *SllMI = MRI->getVRegDef(ShfReg); + + LLVM_DEBUG(dbgs() << "Starting SRL found:"); + LLVM_DEBUG(MI.dump()); + + if (!SllMI || + SllMI->isPHI() || + SllMI->getOpcode() != BPF::SLL_ri || + SllMI->getOperand(2).getImm() != 32) + continue; + + LLVM_DEBUG(dbgs() << " SLL found:"); + LLVM_DEBUG(SllMI->dump()); + + MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg()); + if (!MovMI || + MovMI->isPHI() || + MovMI->getOpcode() != BPF::MOV_32_64) + continue; + + LLVM_DEBUG(dbgs() << " Type cast Mov found:"); + LLVM_DEBUG(MovMI->dump()); + + unsigned SubReg = MovMI->getOperand(1).getReg(); + if (!isMovFrom32Def(MovMI)) { + LLVM_DEBUG(dbgs() + << " One ZExt elim sequence failed qualifying elim.\n"); + continue; + } + + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg) + .addImm(0).addReg(SubReg).addImm(BPF::sub_32); + + SllMI->eraseFromParent(); + MovMI->eraseFromParent(); + // MI is the right shift, we can't erase it in it's own iteration. + // Mark it to ToErase, and erase in the next iteration. + ToErase = &MI; + ZExtElemNum++; + Eliminated = true; + } + } + } + + return Eliminated; +} + +} // end default namespace + +INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE, + "BPF MachineSSA Peephole Optimization", false, false) + +char BPFMIPeephole::ID = 0; +FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); } + +STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated"); + +namespace { + +struct BPFMIPreEmitPeephole : public MachineFunctionPass { + + static char ID; + MachineFunction *MF; + const TargetRegisterInfo *TRI; + + BPFMIPreEmitPeephole() : MachineFunctionPass(ID) { + initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry()); + } + +private: + // Initialize class variables. + void initialize(MachineFunction &MFParm); + + bool eliminateRedundantMov(void); + +public: + + // Main entry point for this pass. + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + + initialize(MF); + + return eliminateRedundantMov(); + } +}; + +// Initialize class variables. +void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) { + MF = &MFParm; + TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo(); + LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n"); +} + +bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) { + MachineInstr* ToErase = nullptr; + bool Eliminated = false; + + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + // If the previous instruction was marked for elimination, remove it now. + if (ToErase) { + LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:"); + LLVM_DEBUG(ToErase->dump()); + ToErase->eraseFromParent(); + ToErase = nullptr; + } + + // Eliminate identical move: + // + // MOV rA, rA + // + // This is particularly possible to happen when sub-register support + // enabled. The special type cast insn MOV_32_64 involves different + // register class on src (i32) and dst (i64), RA could generate useless + // instruction due to this. + if (MI.getOpcode() == BPF::MOV_32_64) { + unsigned dst = MI.getOperand(0).getReg(); + unsigned dst_sub = TRI->getSubReg(dst, BPF::sub_32); + unsigned src = MI.getOperand(1).getReg(); + + if (dst_sub != src) + continue; + + ToErase = &MI; + RedundantMovElemNum++; + Eliminated = true; + } + } + } + + return Eliminated; +} + +} // end default namespace + +INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole", + "BPF PreEmit Peephole Optimization", false, false) + +char BPFMIPreEmitPeephole::ID = 0; +FunctionPass* llvm::createBPFMIPreEmitPeepholePass() +{ + return new BPFMIPreEmitPeephole(); +} diff --git a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp index 6f7067816098..635c11113151 100644 --- a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp @@ -37,8 +37,8 @@ BPFRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - Reserved.set(BPF::R10); // R10 is read only frame pointer - Reserved.set(BPF::R11); // R11 is pseudo stack pointer + markSuperRegs(Reserved, BPF::W10); // [W|R]10 is read only frame pointer + markSuperRegs(Reserved, BPF::W11); // [W|R]11 is pseudo stack pointer return Reserved; } diff --git a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h index 4202850e9eb9..bb0d6bcf5450 100644 --- a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h +++ b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.h @@ -29,6 +29,8 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const override; + bool enableMultipleCopyHints() const override { return true; } + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; diff --git a/contrib/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp new file mode 100644 index 000000000000..24d5f59bbfd7 --- /dev/null +++ b/contrib/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp @@ -0,0 +1,43 @@ +//===-- BPFSelectionDAGInfo.cpp - BPF SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the BPFSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "BPFTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DerivedTypes.h" +using namespace llvm; + +#define DEBUG_TYPE "bpf-selectiondag-info" + +SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + // Requires the copy size to be a constant. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + + unsigned CopyLen = ConstantSize->getZExtValue(); + unsigned StoresNumEstimate = alignTo(CopyLen, Align) >> Log2_32(Align); + // Impose the same copy length limit as MaxStoresPerMemcpy. + if (StoresNumEstimate > getCommonMaxStoresPerMemFunc()) + return SDValue(); + + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + + Dst = DAG.getNode(BPFISD::MEMCPY, dl, VTs, Chain, Dst, Src, + DAG.getConstant(CopyLen, dl, MVT::i64), + DAG.getConstant(Align, dl, MVT::i64)); + + return Dst.getValue(0); +} diff --git a/contrib/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h b/contrib/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h new file mode 100644 index 000000000000..19d3c5769573 --- /dev/null +++ b/contrib/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h @@ -0,0 +1,36 @@ +//===-- BPFSelectionDAGInfo.h - BPF SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the BPF subclass for SelectionDAGTargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_BPF_BPFSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_BPF_BPFSELECTIONDAGINFO_H + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +namespace llvm { + +class BPFSelectionDAGInfo : public SelectionDAGTargetInfo { +public: + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + + unsigned getCommonMaxStoresPerMemFunc() const { return 128; } + +}; + +} + +#endif diff --git a/contrib/llvm/lib/Target/BPF/BPFSubtarget.cpp b/contrib/llvm/lib/Target/BPF/BPFSubtarget.cpp index 42ca87f9ef67..56780bd9d46f 100644 --- a/contrib/llvm/lib/Target/BPF/BPFSubtarget.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFSubtarget.cpp @@ -30,11 +30,14 @@ BPFSubtarget &BPFSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { initializeEnvironment(); initSubtargetFeatures(CPU, FS); + ParseSubtargetFeatures(CPU, FS); return *this; } void BPFSubtarget::initializeEnvironment() { HasJmpExt = false; + HasAlu32 = false; + UseDwarfRIS = false; } void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/contrib/llvm/lib/Target/BPF/BPFSubtarget.h b/contrib/llvm/lib/Target/BPF/BPFSubtarget.h index fa1f24443bc3..60e56435fe4c 100644 --- a/contrib/llvm/lib/Target/BPF/BPFSubtarget.h +++ b/contrib/llvm/lib/Target/BPF/BPFSubtarget.h @@ -17,6 +17,7 @@ #include "BPFFrameLowering.h" #include "BPFISelLowering.h" #include "BPFInstrInfo.h" +#include "BPFSelectionDAGInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -33,7 +34,7 @@ class BPFSubtarget : public BPFGenSubtargetInfo { BPFInstrInfo InstrInfo; BPFFrameLowering FrameLowering; BPFTargetLowering TLInfo; - SelectionDAGTargetInfo TSInfo; + BPFSelectionDAGInfo TSInfo; private: void initializeEnvironment(); @@ -47,6 +48,12 @@ protected: // whether the cpu supports jmp ext bool HasJmpExt; + // whether the cpu supports alu32 instructions. + bool HasAlu32; + + // whether we should enable MCAsmInfo DwarfUsesRelocationsAcrossSections + bool UseDwarfRIS; + public: // This constructor initializes the data members to match that // of the specified triple. @@ -59,6 +66,8 @@ public: // subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); bool getHasJmpExt() const { return HasJmpExt; } + bool getHasAlu32() const { return HasAlu32; } + bool getUseDwarfRIS() const { return UseDwarfRIS; } const BPFInstrInfo *getInstrInfo() const override { return &InstrInfo; } const BPFFrameLowering *getFrameLowering() const override { @@ -67,7 +76,7 @@ public: const BPFTargetLowering *getTargetLowering() const override { return &TLInfo; } - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + const BPFSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } const TargetRegisterInfo *getRegisterInfo() const override { diff --git a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp index 60672fa2684b..84d89bff74fe 100644 --- a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -13,6 +13,7 @@ #include "BPFTargetMachine.h" #include "BPF.h" +#include "MCTargetDesc/BPFMCAsmInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -22,11 +23,18 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +static cl:: +opt<bool> DisableMIPeephole("disable-bpf-peephole", cl::Hidden, + cl::desc("Disable machine peepholes for BPF")); + extern "C" void LLVMInitializeBPFTarget() { // Register the target. RegisterTargetMachine<BPFTargetMachine> X(getTheBPFleTarget()); RegisterTargetMachine<BPFTargetMachine> Y(getTheBPFbeTarget()); RegisterTargetMachine<BPFTargetMachine> Z(getTheBPFTarget()); + + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeBPFMIPeepholePass(PR); } // DataLayout: little or big endian @@ -61,6 +69,9 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT, TLOF(make_unique<TargetLoweringObjectFileELF>()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); + + BPFMCAsmInfo *MAI = static_cast<BPFMCAsmInfo *>(const_cast<MCAsmInfo *>(AsmInfo)); + MAI->setDwarfUsesRelocationsAcrossSections(!Subtarget.getUseDwarfRIS()); } namespace { // BPF Code Generator Pass Configuration Options. @@ -74,6 +85,8 @@ public: } bool addInstSelector() override; + void addMachineSSAOptimization() override; + void addPreEmitPass() override; }; } @@ -88,3 +101,21 @@ bool BPFPassConfig::addInstSelector() { return false; } + +void BPFPassConfig::addMachineSSAOptimization() { + // The default implementation must be called first as we want eBPF + // Peephole ran at last. + TargetPassConfig::addMachineSSAOptimization(); + + const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl(); + if (Subtarget->getHasAlu32() && !DisableMIPeephole) + addPass(createBPFMIPeepholePass()); +} + +void BPFPassConfig::addPreEmitPass() { + const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl(); + + if (getOptLevel() != CodeGenOpt::None) + if (Subtarget->getHasAlu32() && !DisableMIPeephole) + addPass(createBPFMIPreEmitPeepholePass()); +} diff --git a/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp index 6fc87d79c439..e7790ddb3d7e 100644 --- a/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -35,6 +35,34 @@ namespace { /// A disassembler class for BPF. class BPFDisassembler : public MCDisassembler { public: + enum BPF_CLASS { + BPF_LD = 0x0, + BPF_LDX = 0x1, + BPF_ST = 0x2, + BPF_STX = 0x3, + BPF_ALU = 0x4, + BPF_JMP = 0x5, + BPF_RES = 0x6, + BPF_ALU64 = 0x7 + }; + + enum BPF_SIZE { + BPF_W = 0x0, + BPF_H = 0x1, + BPF_B = 0x2, + BPF_DW = 0x3 + }; + + enum BPF_MODE { + BPF_IMM = 0x0, + BPF_ABS = 0x1, + BPF_IND = 0x2, + BPF_MEM = 0x3, + BPF_LEN = 0x4, + BPF_MSH = 0x5, + BPF_XADD = 0x6 + }; + BPFDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} ~BPFDisassembler() override = default; @@ -43,6 +71,10 @@ public: ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &VStream, raw_ostream &CStream) const override; + + uint8_t getInstClass(uint64_t Inst) const { return (Inst >> 56) & 0x7; }; + uint8_t getInstSize(uint64_t Inst) const { return (Inst >> 59) & 0x3; }; + uint8_t getInstMode(uint64_t Inst) const { return (Inst >> 61) & 0x7; }; }; } // end anonymous namespace @@ -141,8 +173,17 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, Result = readInstruction64(Bytes, Address, Size, Insn, IsLittleEndian); if (Result == MCDisassembler::Fail) return MCDisassembler::Fail; - Result = decodeInstruction(DecoderTableBPF64, Instr, Insn, - Address, this, STI); + uint8_t InstClass = getInstClass(Insn); + if ((InstClass == BPF_LDX || InstClass == BPF_STX) && + getInstSize(Insn) != BPF_DW && + getInstMode(Insn) == BPF_MEM && + STI.getFeatureBits()[BPF::ALU32]) + Result = decodeInstruction(DecoderTableBPFALU3264, Instr, Insn, Address, + this, STI); + else + Result = decodeInstruction(DecoderTableBPF64, Instr, Insn, Address, this, + STI); + if (Result == MCDisassembler::Fail) return MCDisassembler::Fail; switch (Instr.getOpcode()) { diff --git a/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp index 1f4ef098403d..20627da38817 100644 --- a/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp +++ b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "BPFInstPrinter.h" -#include "BPF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 6593d9d018fd..6c255e9ef780 100644 --- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -12,6 +12,7 @@ #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/Support/EndianStream.h" #include <cassert> #include <cstdint> @@ -21,18 +22,16 @@ namespace { class BPFAsmBackend : public MCAsmBackend { public: - bool IsLittleEndian; - - BPFAsmBackend(bool IsLittleEndian) - : MCAsmBackend(), IsLittleEndian(IsLittleEndian) {} + BPFAsmBackend(support::endianness Endian) : MCAsmBackend(Endian) {} ~BPFAsmBackend() override = default; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsResolved) const override; + uint64_t Value, bool IsResolved, + const MCSubtargetInfo *STI) const override; - std::unique_ptr<MCObjectWriter> - createObjectWriter(raw_pwrite_stream &OS) const override; + std::unique_ptr<MCObjectTargetWriter> + createObjectTargetWriter() const override; // No instruction requires relaxation bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, @@ -43,22 +42,25 @@ public: unsigned getNumFixupKinds() const override { return 1; } - bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } + bool mayNeedRelaxation(const MCInst &Inst, + const MCSubtargetInfo &STI) const override { + return false; + } void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, MCInst &Res) const override {} - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + bool writeNopData(raw_ostream &OS, uint64_t Count) const override; }; } // end anonymous namespace -bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { +bool BPFAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { if ((Count % 8) != 0) return false; for (uint64_t i = 0; i < Count; i += 8) - OW->write64(0x15000000); + support::endian::write<uint64_t>(OS, 0x15000000, Endian); return true; } @@ -66,19 +68,17 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, uint64_t Value, - bool IsResolved) const { + bool IsResolved, + const MCSubtargetInfo *STI) const { if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) { assert(Value == 0); - } else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) { - unsigned Size = Fixup.getKind() == FK_Data_4 ? 4 : 8; - - for (unsigned i = 0; i != Size; ++i) { - unsigned Idx = IsLittleEndian ? i : Size - i - 1; - Data[Fixup.getOffset() + Idx] = uint8_t(Value >> (i * 8)); - } + } else if (Fixup.getKind() == FK_Data_4) { + support::endian::write<uint32_t>(&Data[Fixup.getOffset()], Value, Endian); + } else if (Fixup.getKind() == FK_Data_8) { + support::endian::write<uint64_t>(&Data[Fixup.getOffset()], Value, Endian); } else if (Fixup.getKind() == FK_PCRel_4) { Value = (uint32_t)((Value - 8) / 8); - if (IsLittleEndian) { + if (Endian == support::little) { Data[Fixup.getOffset() + 1] = 0x10; support::endian::write32le(&Data[Fixup.getOffset() + 4], Value); } else { @@ -88,31 +88,26 @@ void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, } else { assert(Fixup.getKind() == FK_PCRel_2); Value = (uint16_t)((Value - 8) / 8); - if (IsLittleEndian) { - Data[Fixup.getOffset() + 2] = Value & 0xFF; - Data[Fixup.getOffset() + 3] = Value >> 8; - } else { - Data[Fixup.getOffset() + 2] = Value >> 8; - Data[Fixup.getOffset() + 3] = Value & 0xFF; - } + support::endian::write<uint16_t>(&Data[Fixup.getOffset() + 2], Value, + Endian); } } -std::unique_ptr<MCObjectWriter> -BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { - return createBPFELFObjectWriter(OS, 0, IsLittleEndian); +std::unique_ptr<MCObjectTargetWriter> +BPFAsmBackend::createObjectTargetWriter() const { + return createBPFELFObjectWriter(0); } MCAsmBackend *llvm::createBPFAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &) { - return new BPFAsmBackend(/*IsLittleEndian=*/true); + return new BPFAsmBackend(support::little); } MCAsmBackend *llvm::createBPFbeAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &) { - return new BPFAsmBackend(/*IsLittleEndian=*/false); + return new BPFAsmBackend(support::big); } diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index f7de612dab15..134e890dfe49 100644 --- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -54,9 +54,7 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, } } -std::unique_ptr<MCObjectWriter> -llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, - bool IsLittleEndian) { - return createELFObjectWriter(llvm::make_unique<BPFELFObjectWriter>(OSABI), OS, - IsLittleEndian); +std::unique_ptr<MCObjectTargetWriter> +llvm::createBPFELFObjectWriter(uint8_t OSABI) { + return llvm::make_unique<BPFELFObjectWriter>(OSABI); } diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h index fd7c97bf1f0a..171f7f607ff4 100644 --- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h +++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h @@ -44,6 +44,10 @@ public: // line numbers, etc. CodePointerSize = 8; } + + void setDwarfUsesRelocationsAcrossSections(bool enable) { + DwarfUsesRelocationsAcrossSections = enable; + } }; } diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp index b4ecfdee7bff..437f658caf6e 100644 --- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -122,44 +122,35 @@ void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, computeAvailableFeatures(STI.getFeatureBits())); unsigned Opcode = MI.getOpcode(); - support::endian::Writer<support::little> LE(OS); - support::endian::Writer<support::big> BE(OS); + support::endian::Writer OSE(OS, + IsLittleEndian ? support::little : support::big); if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) { uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); - LE.write<uint8_t>(Value >> 56); + OS << char(Value >> 56); if (IsLittleEndian) - LE.write<uint8_t>((Value >> 48) & 0xff); + OS << char((Value >> 48) & 0xff); else - LE.write<uint8_t>(SwapBits((Value >> 48) & 0xff)); - LE.write<uint16_t>(0); - if (IsLittleEndian) - LE.write<uint32_t>(Value & 0xffffFFFF); - else - BE.write<uint32_t>(Value & 0xffffFFFF); + OS << char(SwapBits((Value >> 48) & 0xff)); + OSE.write<uint16_t>(0); + OSE.write<uint32_t>(Value & 0xffffFFFF); const MCOperand &MO = MI.getOperand(1); uint64_t Imm = MO.isImm() ? MO.getImm() : 0; - LE.write<uint8_t>(0); - LE.write<uint8_t>(0); - LE.write<uint16_t>(0); - if (IsLittleEndian) - LE.write<uint32_t>(Imm >> 32); - else - BE.write<uint32_t>(Imm >> 32); + OSE.write<uint8_t>(0); + OSE.write<uint8_t>(0); + OSE.write<uint16_t>(0); + OSE.write<uint32_t>(Imm >> 32); } else { // Get instruction encoding and emit it uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); - LE.write<uint8_t>(Value >> 56); - if (IsLittleEndian) { - LE.write<uint8_t>((Value >> 48) & 0xff); - LE.write<uint16_t>((Value >> 32) & 0xffff); - LE.write<uint32_t>(Value & 0xffffFFFF); - } else { - LE.write<uint8_t>(SwapBits((Value >> 48) & 0xff)); - BE.write<uint16_t>((Value >> 32) & 0xffff); - BE.write<uint32_t>(Value & 0xffffFFFF); - } + OS << char(Value >> 56); + if (IsLittleEndian) + OS << char((Value >> 48) & 0xff); + else + OS << char(SwapBits((Value >> 48) & 0xff)); + OSE.write<uint16_t>((Value >> 32) & 0xffff); + OSE.write<uint32_t>(Value & 0xffffFFFF); } } diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index cbf1ea7d7fb8..834b57527882 100644 --- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -52,10 +52,10 @@ static MCSubtargetInfo *createBPFMCSubtargetInfo(const Triple &TT, static MCStreamer *createBPFMCStreamer(const Triple &T, MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&MAB, - raw_pwrite_stream &OS, + std::unique_ptr<MCObjectWriter> &&OW, std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll) { - return createELFStreamer(Ctx, std::move(MAB), OS, std::move(Emitter), + return createELFStreamer(Ctx, std::move(MAB), std::move(OW), std::move(Emitter), RelaxAll); } diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index a6dac3abca02..6d2f0a1601e6 100644 --- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -24,7 +24,7 @@ class MCAsmBackend; class MCCodeEmitter; class MCContext; class MCInstrInfo; -class MCObjectWriter; +class MCObjectTargetWriter; class MCRegisterInfo; class MCSubtargetInfo; class MCTargetOptions; @@ -52,9 +52,7 @@ MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &Options); -std::unique_ptr<MCObjectWriter> createBPFELFObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI, - bool IsLittleEndian); +std::unique_ptr<MCObjectTargetWriter> createBPFELFObjectWriter(uint8_t OSABI); } // Defines symbolic names for BPF registers. This defines a mapping from |