aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-07-30 16:33:32 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-07-30 16:33:32 +0000
commit51315c45ff5643a27f9c84b816db54ee870ba29b (patch)
tree1d87443fa0e53d3e6b315ce25787e64be0906bf7 /contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
parent6dfd050075216be8538ae375a22d30db72916f7e (diff)
parenteb11fae6d08f479c0799db45860a98af528fa6e7 (diff)
Merge llvm trunk r338150, and resolve conflicts.
Notes
Notes: svn path=/projects/clang700-import/; revision=336916
Diffstat (limited to 'contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp')
-rw-r--r--contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp257
1 files changed, 198 insertions, 59 deletions
diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 3ea96e3148f2..9272cf692dc9 100644
--- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -33,6 +33,10 @@ using namespace llvm;
#define DEBUG_TYPE "bpf-lower"
+static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
+ cl::Hidden, cl::init(false),
+ cl::desc("Expand memcpy into load/store pairs in order"));
+
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg) {
MachineFunction &MF = DAG.getMachineFunction();
DAG.getContext()->diagnose(
@@ -57,6 +61,8 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
// Set up the register classes.
addRegisterClass(MVT::i64, &BPF::GPRRegClass);
+ if (STI.getHasAlu32())
+ addRegisterClass(MVT::i32, &BPF::GPR32RegClass);
// Compute derived properties from the register classes
computeRegisterProperties(STI.getRegisterInfo());
@@ -67,9 +73,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
- setOperationAction(ISD::SETCC, MVT::i64, Expand);
- setOperationAction(ISD::SELECT, MVT::i64, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
@@ -77,32 +80,39 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::UREM, MVT::i64, Expand);
-
- setOperationAction(ISD::MULHU, MVT::i64, Expand);
- setOperationAction(ISD::MULHS, MVT::i64, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ for (auto VT : { MVT::i32, MVT::i64 }) {
+ if (VT == MVT::i32 && !STI.getHasAlu32())
+ continue;
- setOperationAction(ISD::ADDC, MVT::i64, Expand);
- setOperationAction(ISD::ADDE, MVT::i64, Expand);
- setOperationAction(ISD::SUBC, MVT::i64, Expand);
- setOperationAction(ISD::SUBE, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::SHL_PARTS, VT, Expand);
+ setOperationAction(ISD::SRL_PARTS, VT, Expand);
+ setOperationAction(ISD::SRA_PARTS, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+
+ setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Custom);
+ }
- setOperationAction(ISD::ROTR, MVT::i64, Expand);
- setOperationAction(ISD::ROTL, MVT::i64, Expand);
- setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
- setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
- setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+ if (STI.getHasAlu32()) {
+ setOperationAction(ISD::BSWAP, MVT::i32, Promote);
+ setOperationAction(ISD::BR_CC, MVT::i32, Promote);
+ }
setOperationAction(ISD::CTTZ, MVT::i64, Custom);
setOperationAction(ISD::CTLZ, MVT::i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
- setOperationAction(ISD::CTPOP, MVT::i64, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
@@ -126,12 +136,33 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setMinFunctionAlignment(3);
setPrefFunctionAlignment(3);
- // inline memcpy() for kernel to see explicit copy
- MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 128;
- MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 128;
- MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 128;
+ if (BPFExpandMemcpyInOrder) {
+ // LLVM generic code will try to expand memcpy into load/store pairs at this
+ // stage which is before quite a few IR optimization passes, therefore the
+ // loads and stores could potentially be moved apart from each other which
+ // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT
+ // compilers.
+ //
+ // When -bpf-expand-memcpy-in-order specified, we want to defer the expand
+ // of memcpy to later stage in IR optimization pipeline so those load/store
+ // pairs won't be touched and could be kept in order. Hence, we set
+ // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores
+ // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy.
+ MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;
+ MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;
+ MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
+ } else {
+ // inline memcpy() for kernel to see explicit copy
+ unsigned CommonMaxStores =
+ STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc();
+
+ MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
+ MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
+ MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
+ }
// CPU/Feature control
+ HasAlu32 = STI.getHasAlu32();
HasJmpExt = STI.getHasJmpExt();
}
@@ -189,26 +220,29 @@ SDValue BPFTargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeFormalArguments(Ins, CC_BPF64);
+ CCInfo.AnalyzeFormalArguments(Ins, getHasAlu32() ? CC_BPF32 : CC_BPF64);
for (auto &VA : ArgLocs) {
if (VA.isRegLoc()) {
// Arguments passed in registers
EVT RegVT = VA.getLocVT();
- switch (RegVT.getSimpleVT().SimpleTy) {
+ MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy;
+ switch (SimpleTy) {
default: {
errs() << "LowerFormalArguments Unhandled argument type: "
<< RegVT.getEVTString() << '\n';
llvm_unreachable(0);
}
+ case MVT::i32:
case MVT::i64:
- unsigned VReg = RegInfo.createVirtualRegister(&BPF::GPRRegClass);
+ unsigned VReg = RegInfo.createVirtualRegister(SimpleTy == MVT::i64 ?
+ &BPF::GPRRegClass :
+ &BPF::GPR32RegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT);
- // If this is an 8/16/32-bit value, it is really passed promoted to 64
- // bits. Insert an assert[sz]ext to capture this, then truncate to the
- // right size.
+ // If this is an value that has been promoted to wider types, insert an
+ // assert[sz]ext to capture this, then truncate to the right size.
if (VA.getLocInfo() == CCValAssign::SExt)
ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
@@ -220,6 +254,8 @@ SDValue BPFTargetLowering::LowerFormalArguments(
ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);
InVals.push_back(ArgValue);
+
+ break;
}
} else {
fail(DL, DAG, "defined with too many args");
@@ -264,7 +300,7 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CC_BPF64);
+ CCInfo.AnalyzeCallOperands(Outs, getHasAlu32() ? CC_BPF32 : CC_BPF64);
unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -388,7 +424,7 @@ BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
// Analize return values.
- CCInfo.AnalyzeReturn(Outs, RetCC_BPF64);
+ CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -432,7 +468,7 @@ SDValue BPFTargetLowering::LowerCallResult(
return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InFlag).getValue(1);
}
- CCInfo.AnalyzeCallResult(Ins, RetCC_BPF64);
+ CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
// Copy all of the result registers out of their specified physreg.
for (auto &Val : RVLocs) {
@@ -485,8 +521,7 @@ SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
if (!getHasJmpExt())
NegateCC(LHS, RHS, CC);
- SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i64);
-
+ SDValue TargetCC = DAG.getConstant(CC, DL, LHS.getValueType());
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
@@ -507,6 +542,8 @@ const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "BPFISD::BR_CC";
case BPFISD::Wrapper:
return "BPFISD::Wrapper";
+ case BPFISD::MEMCPY:
+ return "BPFISD::MEMCPY";
}
return nullptr;
}
@@ -523,14 +560,90 @@ SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA);
}
+unsigned
+BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
+ unsigned Reg, bool isSigned) const {
+ const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
+ int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri;
+ MachineFunction *F = BB->getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned PromotedReg0 = RegInfo.createVirtualRegister(RC);
+ unsigned PromotedReg1 = RegInfo.createVirtualRegister(RC);
+ unsigned PromotedReg2 = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
+ BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)
+ .addReg(PromotedReg0).addImm(32);
+ BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2)
+ .addReg(PromotedReg1).addImm(32);
+
+ return PromotedReg2;
+}
+
+MachineBasicBlock *
+BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,
+ MachineBasicBlock *BB)
+ const {
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineInstrBuilder MIB(*MF, MI);
+ unsigned ScratchReg;
+
+ // This function does custom insertion during lowering BPFISD::MEMCPY which
+ // only has two register operands from memcpy semantics, the copy source
+ // address and the copy destination address.
+ //
+ // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need
+ // a third scratch register to serve as the destination register of load and
+ // source register of store.
+ //
+ // The scratch register here is with the Define | Dead | EarlyClobber flags.
+ // The EarlyClobber flag has the semantic property that the operand it is
+ // attached to is clobbered before the rest of the inputs are read. Hence it
+ // must be unique among the operands to the instruction. The Define flag is
+ // needed to coerce the machine verifier that an Undef value isn't a problem
+ // as we anyway is loading memory into it. The Dead flag is needed as the
+ // value in scratch isn't supposed to be used by any other instruction.
+ ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass);
+ MIB.addReg(ScratchReg,
+ RegState::Define | RegState::Dead | RegState::EarlyClobber);
+
+ return BB;
+}
+
MachineBasicBlock *
BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
- bool isSelectOp = MI.getOpcode() == BPF::Select;
+ unsigned Opc = MI.getOpcode();
+ bool isSelectRROp = (Opc == BPF::Select ||
+ Opc == BPF::Select_64_32 ||
+ Opc == BPF::Select_32 ||
+ Opc == BPF::Select_32_64);
+
+ bool isMemcpyOp = Opc == BPF::MEMCPY;
+
+#ifndef NDEBUG
+ bool isSelectRIOp = (Opc == BPF::Select_Ri ||
+ Opc == BPF::Select_Ri_64_32 ||
+ Opc == BPF::Select_Ri_32 ||
+ Opc == BPF::Select_Ri_32_64);
+
+
+ assert((isSelectRROp || isSelectRIOp || isMemcpyOp) &&
+ "Unexpected instr type to insert");
+#endif
+
+ if (isMemcpyOp)
+ return EmitInstrWithCustomInserterMemcpy(MI, BB);
- assert((isSelectOp || MI.getOpcode() == BPF::Select_Ri) && "Unexpected instr type to insert");
+ bool is32BitCmp = (Opc == BPF::Select_32 ||
+ Opc == BPF::Select_32_64 ||
+ Opc == BPF::Select_Ri_32 ||
+ Opc == BPF::Select_Ri_32_64);
// To "insert" a SELECT instruction, we actually have to insert the diamond
// control-flow pattern. The incoming instruction knows the destination vreg
@@ -561,56 +674,72 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BB->addSuccessor(Copy1MBB);
// Insert Branch if Flag
- unsigned LHS = MI.getOperand(1).getReg();
int CC = MI.getOperand(3).getImm();
int NewCC;
switch (CC) {
case ISD::SETGT:
- NewCC = isSelectOp ? BPF::JSGT_rr : BPF::JSGT_ri;
+ NewCC = isSelectRROp ? BPF::JSGT_rr : BPF::JSGT_ri;
break;
case ISD::SETUGT:
- NewCC = isSelectOp ? BPF::JUGT_rr : BPF::JUGT_ri;
+ NewCC = isSelectRROp ? BPF::JUGT_rr : BPF::JUGT_ri;
break;
case ISD::SETGE:
- NewCC = isSelectOp ? BPF::JSGE_rr : BPF::JSGE_ri;
+ NewCC = isSelectRROp ? BPF::JSGE_rr : BPF::JSGE_ri;
break;
case ISD::SETUGE:
- NewCC = isSelectOp ? BPF::JUGE_rr : BPF::JUGE_ri;
+ NewCC = isSelectRROp ? BPF::JUGE_rr : BPF::JUGE_ri;
break;
case ISD::SETEQ:
- NewCC = isSelectOp ? BPF::JEQ_rr : BPF::JEQ_ri;
+ NewCC = isSelectRROp ? BPF::JEQ_rr : BPF::JEQ_ri;
break;
case ISD::SETNE:
- NewCC = isSelectOp ? BPF::JNE_rr : BPF::JNE_ri;
+ NewCC = isSelectRROp ? BPF::JNE_rr : BPF::JNE_ri;
break;
case ISD::SETLT:
- NewCC = isSelectOp ? BPF::JSLT_rr : BPF::JSLT_ri;
+ NewCC = isSelectRROp ? BPF::JSLT_rr : BPF::JSLT_ri;
break;
case ISD::SETULT:
- NewCC = isSelectOp ? BPF::JULT_rr : BPF::JULT_ri;
+ NewCC = isSelectRROp ? BPF::JULT_rr : BPF::JULT_ri;
break;
case ISD::SETLE:
- NewCC = isSelectOp ? BPF::JSLE_rr : BPF::JSLE_ri;
+ NewCC = isSelectRROp ? BPF::JSLE_rr : BPF::JSLE_ri;
break;
case ISD::SETULE:
- NewCC = isSelectOp ? BPF::JULE_rr : BPF::JULE_ri;
+ NewCC = isSelectRROp ? BPF::JULE_rr : BPF::JULE_ri;
break;
default:
report_fatal_error("unimplemented select CondCode " + Twine(CC));
}
- if (isSelectOp)
- BuildMI(BB, DL, TII.get(NewCC))
- .addReg(LHS)
- .addReg(MI.getOperand(2).getReg())
- .addMBB(Copy1MBB);
- else {
+
+ unsigned LHS = MI.getOperand(1).getReg();
+ bool isSignedCmp = (CC == ISD::SETGT ||
+ CC == ISD::SETGE ||
+ CC == ISD::SETLT ||
+ CC == ISD::SETLE);
+
+ // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need
+ // to be promoted, however if the 32-bit comparison operands are destination
+ // registers then they are implicitly zero-extended already, there is no
+ // need of explicit zero-extend sequence for them.
+ //
+ // We simply do extension for all situations in this method, but we will
+ // try to remove those unnecessary in BPFMIPeephole pass.
+ if (is32BitCmp)
+ LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp);
+
+ if (isSelectRROp) {
+ unsigned RHS = MI.getOperand(2).getReg();
+
+ if (is32BitCmp)
+ RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp);
+
+ BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB);
+ } else {
int64_t imm32 = MI.getOperand(2).getImm();
// sanity check before we build J*_ri instruction.
assert (isInt<32>(imm32));
BuildMI(BB, DL, TII.get(NewCC))
- .addReg(LHS)
- .addImm(imm32)
- .addMBB(Copy1MBB);
+ .addReg(LHS).addImm(imm32).addMBB(Copy1MBB);
}
// Copy0MBB:
@@ -634,3 +763,13 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
+
+EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
+ EVT VT) const {
+ return getHasAlu32() ? MVT::i32 : MVT::i64;
+}
+
+MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
+ EVT VT) const {
+ return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64;
+}