aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/mlx5/mlx5_en
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/mlx5/mlx5_en')
-rw-r--r--sys/dev/mlx5/mlx5_en/en.h946
-rw-r--r--sys/dev/mlx5/mlx5_en/en_rl.h174
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c1203
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c1487
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_main.c3901
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_rl.c1542
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_rx.c550
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_tx.c666
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c53
9 files changed, 10522 insertions, 0 deletions
diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h
new file mode 100644
index 000000000000..73f0268ca270
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/en.h
@@ -0,0 +1,946 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MLX5_EN_H_
+#define _MLX5_EN_H_
+
+#include <linux/kmod.h>
+#include <linux/page.h>
+#include <linux/slab.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+#include <linux/vmalloc.h>
+#include <linux/moduleparam.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_lro.h>
+#include <netinet/udp.h>
+#include <net/ethernet.h>
+#include <sys/buf_ring.h>
+#include <sys/kthread.h>
+
+#include "opt_rss.h"
+
+#ifdef RSS
+#include <net/rss_config.h>
+#include <netinet/in_rss.h>
+#endif
+
+#include <machine/bus.h>
+
+#include <dev/mlx5/driver.h>
+#include <dev/mlx5/qp.h>
+#include <dev/mlx5/cq.h>
+#include <dev/mlx5/port.h>
+#include <dev/mlx5/vport.h>
+#include <dev/mlx5/diagnostics.h>
+
+#include <dev/mlx5/mlx5_core/wq.h>
+#include <dev/mlx5/mlx5_core/transobj.h>
+#include <dev/mlx5/mlx5_core/mlx5_core.h>
+
+#define IEEE_8021QAZ_MAX_TCS 8
+
+#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x7
+#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa
+#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xe
+
+#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x7
+#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
+#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xe
+
+#define MLX5E_MAX_RX_SEGS 7
+
+#ifndef MLX5E_MAX_RX_BYTES
+#define MLX5E_MAX_RX_BYTES MCLBYTES
+#endif
+
+#if (MLX5E_MAX_RX_SEGS == 1)
+/* FreeBSD HW LRO is limited by 16KB - the size of max mbuf */
+#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ MJUM16BYTES
+#else
+#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ \
+ MIN(65535, MLX5E_MAX_RX_SEGS * MLX5E_MAX_RX_BYTES)
+#endif
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20
+#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80
+#define MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ 0x7
+#define MLX5E_CACHELINE_SIZE CACHE_LINE_SIZE
+#define MLX5E_HW2SW_MTU(hwmtu) \
+ ((hwmtu) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN))
+#define MLX5E_SW2HW_MTU(swmtu) \
+ ((swmtu) + (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN))
+#define MLX5E_SW2MB_MTU(swmtu) \
+ (MLX5E_SW2HW_MTU(swmtu) + MLX5E_NET_IP_ALIGN)
+#define MLX5E_MTU_MIN 72 /* Min MTU allowed by the kernel */
+#define MLX5E_MTU_MAX MIN(ETHERMTU_JUMBO, MJUM16BYTES) /* Max MTU of Ethernet
+ * jumbo frames */
+
+#define MLX5E_BUDGET_MAX 8192 /* RX and TX */
+#define MLX5E_RX_BUDGET_MAX 256
+#define MLX5E_SQ_BF_BUDGET 16
+#define MLX5E_SQ_TX_QUEUE_SIZE 4096 /* SQ drbr queue size */
+
+#define MLX5E_MAX_TX_NUM_TC 8 /* units */
+#define MLX5E_MAX_TX_HEADER 128 /* bytes */
+#define MLX5E_MAX_TX_PAYLOAD_SIZE 65536 /* bytes */
+#define MLX5E_MAX_TX_MBUF_SIZE 65536 /* bytes */
+#define MLX5E_MAX_TX_MBUF_FRAGS \
+ ((MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) - \
+ (MLX5E_MAX_TX_HEADER / MLX5_SEND_WQE_DS) - \
+ 1 /* the maximum value of the DS counter is 0x3F and not 0x40 */) /* units */
+#define MLX5E_MAX_TX_INLINE \
+ (MLX5E_MAX_TX_HEADER - sizeof(struct mlx5e_tx_wqe) + \
+ sizeof(((struct mlx5e_tx_wqe *)0)->eth.inline_hdr_start)) /* bytes */
+
+#define MLX5E_100MB (100000)
+#define MLX5E_1GB (1000000)
+
+MALLOC_DECLARE(M_MLX5EN);
+
+struct mlx5_core_dev;
+struct mlx5e_cq;
+
+typedef void (mlx5e_cq_comp_t)(struct mlx5_core_cq *);
+
+#define MLX5E_STATS_COUNT(a,b,c,d) a
+#define MLX5E_STATS_VAR(a,b,c,d) b;
+#define MLX5E_STATS_DESC(a,b,c,d) c, d,
+
+#define MLX5E_VPORT_STATS(m) \
+ /* HW counters */ \
+ m(+1, u64 rx_packets, "rx_packets", "Received packets") \
+ m(+1, u64 rx_bytes, "rx_bytes", "Received bytes") \
+ m(+1, u64 tx_packets, "tx_packets", "Transmitted packets") \
+ m(+1, u64 tx_bytes, "tx_bytes", "Transmitted bytes") \
+ m(+1, u64 rx_error_packets, "rx_error_packets", "Received error packets") \
+ m(+1, u64 rx_error_bytes, "rx_error_bytes", "Received error bytes") \
+ m(+1, u64 tx_error_packets, "tx_error_packets", "Transmitted error packets") \
+ m(+1, u64 tx_error_bytes, "tx_error_bytes", "Transmitted error bytes") \
+ m(+1, u64 rx_unicast_packets, "rx_unicast_packets", "Received unicast packets") \
+ m(+1, u64 rx_unicast_bytes, "rx_unicast_bytes", "Received unicast bytes") \
+ m(+1, u64 tx_unicast_packets, "tx_unicast_packets", "Transmitted unicast packets") \
+ m(+1, u64 tx_unicast_bytes, "tx_unicast_bytes", "Transmitted unicast bytes") \
+ m(+1, u64 rx_multicast_packets, "rx_multicast_packets", "Received multicast packets") \
+ m(+1, u64 rx_multicast_bytes, "rx_multicast_bytes", "Received multicast bytes") \
+ m(+1, u64 tx_multicast_packets, "tx_multicast_packets", "Transmitted multicast packets") \
+ m(+1, u64 tx_multicast_bytes, "tx_multicast_bytes", "Transmitted multicast bytes") \
+ m(+1, u64 rx_broadcast_packets, "rx_broadcast_packets", "Received broadcast packets") \
+ m(+1, u64 rx_broadcast_bytes, "rx_broadcast_bytes", "Received broadcast bytes") \
+ m(+1, u64 tx_broadcast_packets, "tx_broadcast_packets", "Transmitted broadcast packets") \
+ m(+1, u64 tx_broadcast_bytes, "tx_broadcast_bytes", "Transmitted broadcast bytes") \
+ m(+1, u64 rx_out_of_buffer, "rx_out_of_buffer", "Receive out of buffer, no recv wqes events") \
+ /* SW counters */ \
+ m(+1, u64 tso_packets, "tso_packets", "Transmitted TSO packets") \
+ m(+1, u64 tso_bytes, "tso_bytes", "Transmitted TSO bytes") \
+ m(+1, u64 lro_packets, "lro_packets", "Received LRO packets") \
+ m(+1, u64 lro_bytes, "lro_bytes", "Received LRO bytes") \
+ m(+1, u64 sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \
+ m(+1, u64 sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \
+ m(+1, u64 rx_csum_good, "rx_csum_good", "Received checksum valid packets") \
+ m(+1, u64 rx_csum_none, "rx_csum_none", "Received no checksum packets") \
+ m(+1, u64 tx_csum_offload, "tx_csum_offload", "Transmit checksum offload packets") \
+ m(+1, u64 tx_queue_dropped, "tx_queue_dropped", "Transmit queue dropped") \
+ m(+1, u64 tx_defragged, "tx_defragged", "Transmit queue defragged") \
+ m(+1, u64 rx_wqe_err, "rx_wqe_err", "Receive WQE errors")
+
+#define MLX5E_VPORT_STATS_NUM (0 MLX5E_VPORT_STATS(MLX5E_STATS_COUNT))
+
+struct mlx5e_vport_stats {
+ struct sysctl_ctx_list ctx;
+ u64 arg [0];
+ MLX5E_VPORT_STATS(MLX5E_STATS_VAR)
+ u32 rx_out_of_buffer_prev;
+};
+
+#define MLX5E_PPORT_IEEE802_3_STATS(m) \
+ m(+1, u64 frames_tx, "frames_tx", "Frames transmitted") \
+ m(+1, u64 frames_rx, "frames_rx", "Frames received") \
+ m(+1, u64 check_seq_err, "check_seq_err", "Sequence errors") \
+ m(+1, u64 alignment_err, "alignment_err", "Alignment errors") \
+ m(+1, u64 octets_tx, "octets_tx", "Bytes transmitted") \
+ m(+1, u64 octets_received, "octets_received", "Bytes received") \
+ m(+1, u64 multicast_xmitted, "multicast_xmitted", "Multicast transmitted") \
+ m(+1, u64 broadcast_xmitted, "broadcast_xmitted", "Broadcast transmitted") \
+ m(+1, u64 multicast_rx, "multicast_rx", "Multicast received") \
+ m(+1, u64 broadcast_rx, "broadcast_rx", "Broadcast received") \
+ m(+1, u64 in_range_len_errors, "in_range_len_errors", "In range length errors") \
+ m(+1, u64 out_of_range_len, "out_of_range_len", "Out of range length errors") \
+ m(+1, u64 too_long_errors, "too_long_errors", "Too long errors") \
+ m(+1, u64 symbol_err, "symbol_err", "Symbol errors") \
+ m(+1, u64 mac_control_tx, "mac_control_tx", "MAC control transmitted") \
+ m(+1, u64 mac_control_rx, "mac_control_rx", "MAC control received") \
+ m(+1, u64 unsupported_op_rx, "unsupported_op_rx", "Unsupported operation received") \
+ m(+1, u64 pause_ctrl_rx, "pause_ctrl_rx", "Pause control received") \
+ m(+1, u64 pause_ctrl_tx, "pause_ctrl_tx", "Pause control transmitted")
+
+#define MLX5E_PPORT_RFC2819_STATS(m) \
+ m(+1, u64 drop_events, "drop_events", "Dropped events") \
+ m(+1, u64 octets, "octets", "Octets") \
+ m(+1, u64 pkts, "pkts", "Packets") \
+ m(+1, u64 broadcast_pkts, "broadcast_pkts", "Broadcast packets") \
+ m(+1, u64 multicast_pkts, "multicast_pkts", "Multicast packets") \
+ m(+1, u64 crc_align_errors, "crc_align_errors", "CRC alignment errors") \
+ m(+1, u64 undersize_pkts, "undersize_pkts", "Undersized packets") \
+ m(+1, u64 oversize_pkts, "oversize_pkts", "Oversized packets") \
+ m(+1, u64 fragments, "fragments", "Fragments") \
+ m(+1, u64 jabbers, "jabbers", "Jabbers") \
+ m(+1, u64 collisions, "collisions", "Collisions")
+
+#define MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \
+ m(+1, u64 p64octets, "p64octets", "Bytes") \
+ m(+1, u64 p65to127octets, "p65to127octets", "Bytes") \
+ m(+1, u64 p128to255octets, "p128to255octets", "Bytes") \
+ m(+1, u64 p256to511octets, "p256to511octets", "Bytes") \
+ m(+1, u64 p512to1023octets, "p512to1023octets", "Bytes") \
+ m(+1, u64 p1024to1518octets, "p1024to1518octets", "Bytes") \
+ m(+1, u64 p1519to2047octets, "p1519to2047octets", "Bytes") \
+ m(+1, u64 p2048to4095octets, "p2048to4095octets", "Bytes") \
+ m(+1, u64 p4096to8191octets, "p4096to8191octets", "Bytes") \
+ m(+1, u64 p8192to10239octets, "p8192to10239octets", "Bytes")
+
+#define MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \
+ m(+1, u64 in_octets, "in_octets", "In octets") \
+ m(+1, u64 in_ucast_pkts, "in_ucast_pkts", "In unicast packets") \
+ m(+1, u64 in_discards, "in_discards", "In discards") \
+ m(+1, u64 in_errors, "in_errors", "In errors") \
+ m(+1, u64 in_unknown_protos, "in_unknown_protos", "In unknown protocols") \
+ m(+1, u64 out_octets, "out_octets", "Out octets") \
+ m(+1, u64 out_ucast_pkts, "out_ucast_pkts", "Out unicast packets") \
+ m(+1, u64 out_discards, "out_discards", "Out discards") \
+ m(+1, u64 out_errors, "out_errors", "Out errors") \
+ m(+1, u64 in_multicast_pkts, "in_multicast_pkts", "In multicast packets") \
+ m(+1, u64 in_broadcast_pkts, "in_broadcast_pkts", "In broadcast packets") \
+ m(+1, u64 out_multicast_pkts, "out_multicast_pkts", "Out multicast packets") \
+ m(+1, u64 out_broadcast_pkts, "out_broadcast_pkts", "Out broadcast packets")
+
+#define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m) \
+ m(+1, u64 time_since_last_clear, "time_since_last_clear", \
+ "Time since the last counters clear event (msec)") \
+ m(+1, u64 symbol_errors, "symbol_errors", "Symbol errors") \
+ m(+1, u64 sync_headers_errors, "sync_headers_errors", "Sync header error counter") \
+ m(+1, u64 bip_errors_lane0, "edpl_bip_errors_lane0", \
+ "Indicates the number of PRBS errors on lane 0") \
+ m(+1, u64 bip_errors_lane1, "edpl_bip_errors_lane1", \
+ "Indicates the number of PRBS errors on lane 1") \
+ m(+1, u64 bip_errors_lane2, "edpl_bip_errors_lane2", \
+ "Indicates the number of PRBS errors on lane 2") \
+ m(+1, u64 bip_errors_lane3, "edpl_bip_errors_lane3", \
+ "Indicates the number of PRBS errors on lane 3") \
+ m(+1, u64 fc_corrected_blocks_lane0, "fc_corrected_blocks_lane0", \
+ "FEC correctable block counter lane 0") \
+ m(+1, u64 fc_corrected_blocks_lane1, "fc_corrected_blocks_lane1", \
+ "FEC correctable block counter lane 1") \
+ m(+1, u64 fc_corrected_blocks_lane2, "fc_corrected_blocks_lane2", \
+ "FEC correctable block counter lane 2") \
+ m(+1, u64 fc_corrected_blocks_lane3, "fc_corrected_blocks_lane3", \
+ "FEC correctable block counter lane 3") \
+ m(+1, u64 rs_corrected_blocks, "rs_corrected_blocks", \
+ "FEC correcable block counter") \
+ m(+1, u64 rs_uncorrectable_blocks, "rs_uncorrectable_blocks", \
+ "FEC uncorrecable block counter") \
+ m(+1, u64 rs_no_errors_blocks, "rs_no_errors_blocks", \
+ "The number of RS-FEC blocks received that had no errors") \
+ m(+1, u64 rs_single_error_blocks, "rs_single_error_blocks", \
+ "The number of corrected RS-FEC blocks received that had" \
+ "exactly 1 error symbol") \
+ m(+1, u64 rs_corrected_symbols_total, "rs_corrected_symbols_total", \
+ "Port FEC corrected symbol counter") \
+ m(+1, u64 rs_corrected_symbols_lane0, "rs_corrected_symbols_lane0", \
+ "FEC corrected symbol counter lane 0") \
+ m(+1, u64 rs_corrected_symbols_lane1, "rs_corrected_symbols_lane1", \
+ "FEC corrected symbol counter lane 1") \
+ m(+1, u64 rs_corrected_symbols_lane2, "rs_corrected_symbols_lane2", \
+ "FEC corrected symbol counter lane 2") \
+ m(+1, u64 rs_corrected_symbols_lane3, "rs_corrected_symbols_lane3", \
+ "FEC corrected symbol counter lane 3")
+
+/* Per priority statistics for PFC */
+#define MLX5E_PPORT_PER_PRIO_STATS_SUB(m,n,p) \
+ m(n, p, +1, u64, rx_octets, "rx_octets", "Received octets") \
+ m(n, p, +1, u64, reserved_0, "reserved_0", "Reserved") \
+ m(n, p, +1, u64, reserved_1, "reserved_1", "Reserved") \
+ m(n, p, +1, u64, reserved_2, "reserved_2", "Reserved") \
+ m(n, p, +1, u64, rx_frames, "rx_frames", "Received frames") \
+ m(n, p, +1, u64, tx_octets, "tx_octets", "Transmitted octets") \
+ m(n, p, +1, u64, reserved_3, "reserved_3", "Reserved") \
+ m(n, p, +1, u64, reserved_4, "reserved_4", "Reserved") \
+ m(n, p, +1, u64, reserved_5, "reserved_5", "Reserved") \
+ m(n, p, +1, u64, tx_frames, "tx_frames", "Transmitted frames") \
+ m(n, p, +1, u64, rx_pause, "rx_pause", "Received pause frames") \
+ m(n, p, +1, u64, rx_pause_duration, "rx_pause_duration", \
+ "Received pause duration") \
+ m(n, p, +1, u64, tx_pause, "tx_pause", "Transmitted pause frames") \
+ m(n, p, +1, u64, tx_pause_duration, "tx_pause_duration", \
+ "Transmitted pause duration") \
+ m(n, p, +1, u64, rx_pause_transition, "rx_pause_transition", \
+ "Received pause transitions") \
+ m(n, p, +1, u64, rx_discards, "rx_discards", "Discarded received frames") \
+ m(n, p, +1, u64, device_stall_minor_watermark, \
+ "device_stall_minor_watermark", "Device stall minor watermark") \
+ m(n, p, +1, u64, device_stall_critical_watermark, \
+ "device_stall_critical_watermark", "Device stall critical watermark")
+
+#define MLX5E_PPORT_PER_PRIO_STATS_PREFIX(m,p,c,t,f,s,d) \
+ m(c, t pri_##p##_##f, "prio" #p "_" s, "Priority " #p " - " d)
+
+#define MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO 8
+
+#define MLX5E_PPORT_PER_PRIO_STATS(m) \
+ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,0) \
+ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,1) \
+ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,2) \
+ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,3) \
+ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,4) \
+ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,5) \
+ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,6) \
+ MLX5E_PPORT_PER_PRIO_STATS_SUB(MLX5E_PPORT_PER_PRIO_STATS_PREFIX,m,7)
+
+/*
+ * Make sure to update mlx5e_update_pport_counters()
+ * when adding a new MLX5E_PPORT_STATS block
+ */
+#define MLX5E_PPORT_STATS(m) \
+ MLX5E_PPORT_PER_PRIO_STATS(m) \
+ MLX5E_PPORT_IEEE802_3_STATS(m) \
+ MLX5E_PPORT_RFC2819_STATS(m)
+
+#define MLX5E_PORT_STATS_DEBUG(m) \
+ MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \
+ MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \
+ MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m)
+
+#define MLX5E_PPORT_IEEE802_3_STATS_NUM \
+ (0 MLX5E_PPORT_IEEE802_3_STATS(MLX5E_STATS_COUNT))
+#define MLX5E_PPORT_RFC2819_STATS_NUM \
+ (0 MLX5E_PPORT_RFC2819_STATS(MLX5E_STATS_COUNT))
+#define MLX5E_PPORT_STATS_NUM \
+ (0 MLX5E_PPORT_STATS(MLX5E_STATS_COUNT))
+
+#define MLX5E_PPORT_PER_PRIO_STATS_NUM \
+ (0 MLX5E_PPORT_PER_PRIO_STATS(MLX5E_STATS_COUNT))
+#define MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM \
+ (0 MLX5E_PPORT_RFC2819_STATS_DEBUG(MLX5E_STATS_COUNT))
+#define MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM \
+ (0 MLX5E_PPORT_RFC2863_STATS_DEBUG(MLX5E_STATS_COUNT))
+#define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM \
+ (0 MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(MLX5E_STATS_COUNT))
+#define MLX5E_PORT_STATS_DEBUG_NUM \
+ (0 MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_COUNT))
+
+struct mlx5e_pport_stats {
+ struct sysctl_ctx_list ctx;
+ u64 arg [0];
+ MLX5E_PPORT_STATS(MLX5E_STATS_VAR)
+};
+
+struct mlx5e_port_stats_debug {
+ struct sysctl_ctx_list ctx;
+ u64 arg [0];
+ MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_VAR)
+};
+
+#define MLX5E_RQ_STATS(m) \
+ m(+1, u64 packets, "packets", "Received packets") \
+ m(+1, u64 csum_none, "csum_none", "Received packets") \
+ m(+1, u64 lro_packets, "lro_packets", "Received packets") \
+ m(+1, u64 lro_bytes, "lro_bytes", "Received packets") \
+ m(+1, u64 sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \
+ m(+1, u64 sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \
+ m(+1, u64 wqe_err, "wqe_err", "Received packets")
+
+#define MLX5E_RQ_STATS_NUM (0 MLX5E_RQ_STATS(MLX5E_STATS_COUNT))
+
+struct mlx5e_rq_stats {
+ struct sysctl_ctx_list ctx;
+ u64 arg [0];
+ MLX5E_RQ_STATS(MLX5E_STATS_VAR)
+};
+
+#define MLX5E_SQ_STATS(m) \
+ m(+1, u64 packets, "packets", "Transmitted packets") \
+ m(+1, u64 tso_packets, "tso_packets", "Transmitted packets") \
+ m(+1, u64 tso_bytes, "tso_bytes", "Transmitted bytes") \
+ m(+1, u64 csum_offload_none, "csum_offload_none", "Transmitted packets") \
+ m(+1, u64 defragged, "defragged", "Transmitted packets") \
+ m(+1, u64 dropped, "dropped", "Transmitted packets") \
+ m(+1, u64 nop, "nop", "Transmitted packets")
+
+#define MLX5E_SQ_STATS_NUM (0 MLX5E_SQ_STATS(MLX5E_STATS_COUNT))
+
+struct mlx5e_sq_stats {
+ struct sysctl_ctx_list ctx;
+ u64 arg [0];
+ MLX5E_SQ_STATS(MLX5E_STATS_VAR)
+};
+
+struct mlx5e_stats {
+ struct mlx5e_vport_stats vport;
+ struct mlx5e_pport_stats pport;
+ struct mlx5e_port_stats_debug port_stats_debug;
+};
+
+struct mlx5e_rq_param {
+ u32 rqc [MLX5_ST_SZ_DW(rqc)];
+ struct mlx5_wq_param wq;
+};
+
+struct mlx5e_sq_param {
+ u32 sqc [MLX5_ST_SZ_DW(sqc)];
+ struct mlx5_wq_param wq;
+};
+
+struct mlx5e_cq_param {
+ u32 cqc [MLX5_ST_SZ_DW(cqc)];
+ struct mlx5_wq_param wq;
+};
+
+struct mlx5e_params {
+ u8 log_sq_size;
+ u8 log_rq_size;
+ u16 num_channels;
+ u8 default_vlan_prio;
+ u8 num_tc;
+ u8 rx_cq_moderation_mode;
+ u8 tx_cq_moderation_mode;
+ u16 rx_cq_moderation_usec;
+ u16 rx_cq_moderation_pkts;
+ u16 tx_cq_moderation_usec;
+ u16 tx_cq_moderation_pkts;
+ u16 min_rx_wqes;
+ bool hw_lro_en;
+ bool cqe_zipping_en;
+ u32 lro_wqe_sz;
+ u16 rx_hash_log_tbl_sz;
+ u32 tx_pauseframe_control __aligned(4);
+ u32 rx_pauseframe_control __aligned(4);
+ u32 tx_priority_flow_control __aligned(4);
+ u32 rx_priority_flow_control __aligned(4);
+ u16 tx_max_inline;
+ u8 tx_min_inline_mode;
+ u8 channels_rsss;
+};
+
+#define MLX5E_PARAMS(m) \
+ m(+1, u64 tx_queue_size_max, "tx_queue_size_max", "Max send queue size") \
+ m(+1, u64 rx_queue_size_max, "rx_queue_size_max", "Max receive queue size") \
+ m(+1, u64 tx_queue_size, "tx_queue_size", "Default send queue size") \
+ m(+1, u64 rx_queue_size, "rx_queue_size", "Default receive queue size") \
+ m(+1, u64 channels, "channels", "Default number of channels") \
+ m(+1, u64 channels_rsss, "channels_rsss", "Default channels receive side scaling stride") \
+ m(+1, u64 coalesce_usecs_max, "coalesce_usecs_max", "Maximum usecs for joining packets") \
+ m(+1, u64 coalesce_pkts_max, "coalesce_pkts_max", "Maximum packets to join") \
+ m(+1, u64 rx_coalesce_usecs, "rx_coalesce_usecs", "Limit in usec for joining rx packets") \
+ m(+1, u64 rx_coalesce_pkts, "rx_coalesce_pkts", "Maximum number of rx packets to join") \
+ m(+1, u64 rx_coalesce_mode, "rx_coalesce_mode", "0: EQE mode 1: CQE mode") \
+ m(+1, u64 tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining tx packets") \
+ m(+1, u64 tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of tx packets to join") \
+ m(+1, u64 tx_coalesce_mode, "tx_coalesce_mode", "0: EQE mode 1: CQE mode") \
+ m(+1, u64 tx_bufring_disable, "tx_bufring_disable", "0: Enable bufring 1: Disable bufring") \
+ m(+1, u64 tx_completion_fact, "tx_completion_fact", "1..MAX: Completion event ratio") \
+ m(+1, u64 tx_completion_fact_max, "tx_completion_fact_max", "Maximum completion event ratio") \
+ m(+1, u64 hw_lro, "hw_lro", "set to enable hw_lro") \
+ m(+1, u64 cqe_zipping, "cqe_zipping", "0 : CQE zipping disabled") \
+ m(+1, u64 modify_tx_dma, "modify_tx_dma", "0: Enable TX 1: Disable TX") \
+ m(+1, u64 modify_rx_dma, "modify_rx_dma", "0: Enable RX 1: Disable RX") \
+ m(+1, u64 diag_pci_enable, "diag_pci_enable", "0: Disabled 1: Enabled") \
+ m(+1, u64 diag_general_enable, "diag_general_enable", "0: Disabled 1: Enabled") \
+ m(+1, u64 hw_mtu, "hw_mtu", "Current hardware MTU value") \
+ m(+1, u64 mc_local_lb, "mc_local_lb", "0: Local multicast loopback enabled 1: Disabled") \
+ m(+1, u64 uc_local_lb, "uc_local_lb", "0: Local unicast loopback enabled 1: Disabled")
+
+
+#define MLX5E_PARAMS_NUM (0 MLX5E_PARAMS(MLX5E_STATS_COUNT))
+
+struct mlx5e_params_ethtool {
+ u64 arg [0];
+ MLX5E_PARAMS(MLX5E_STATS_VAR)
+ u64 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+ u8 prio_tc[IEEE_8021QAZ_MAX_TCS];
+ u8 dscp2prio[MLX5_MAX_SUPPORTED_DSCP];
+ u8 trust_state;
+};
+
+/* EEPROM Standards for plug in modules */
+#ifndef MLX5E_ETH_MODULE_SFF_8472
+#define MLX5E_ETH_MODULE_SFF_8472 0x1
+#define MLX5E_ETH_MODULE_SFF_8472_LEN 128
+#endif
+
+#ifndef MLX5E_ETH_MODULE_SFF_8636
+#define MLX5E_ETH_MODULE_SFF_8636 0x2
+#define MLX5E_ETH_MODULE_SFF_8636_LEN 256
+#endif
+
+#ifndef MLX5E_ETH_MODULE_SFF_8436
+#define MLX5E_ETH_MODULE_SFF_8436 0x3
+#define MLX5E_ETH_MODULE_SFF_8436_LEN 256
+#endif
+
+/* EEPROM I2C Addresses */
+#define MLX5E_I2C_ADDR_LOW 0x50
+#define MLX5E_I2C_ADDR_HIGH 0x51
+
+#define MLX5E_EEPROM_LOW_PAGE 0x0
+#define MLX5E_EEPROM_HIGH_PAGE 0x3
+
+#define MLX5E_EEPROM_HIGH_PAGE_OFFSET 128
+#define MLX5E_EEPROM_PAGE_LENGTH 256
+
+#define MLX5E_EEPROM_INFO_BYTES 0x3
+
+struct mlx5e_cq {
+ /* data path - accessed per cqe */
+ struct mlx5_cqwq wq;
+
+ /* data path - accessed per HW polling */
+ struct mlx5_core_cq mcq;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ struct mlx5_wq_ctrl wq_ctrl;
+} __aligned(MLX5E_CACHELINE_SIZE);
+
+struct mlx5e_rq_mbuf {
+ bus_dmamap_t dma_map;
+ caddr_t data;
+ struct mbuf *mbuf;
+};
+
+struct mlx5e_rq {
+ /* data path */
+ struct mlx5_wq_ll wq;
+ struct mtx mtx;
+ bus_dma_tag_t dma_tag;
+ u32 wqe_sz;
+ u32 nsegs;
+ struct mlx5e_rq_mbuf *mbuf;
+ struct ifnet *ifp;
+ struct mlx5e_rq_stats stats;
+ struct mlx5e_cq cq;
+ struct lro_ctrl lro;
+ volatile int enabled;
+ int ix;
+
+ /* control */
+ struct mlx5_wq_ctrl wq_ctrl;
+ u32 rqn;
+ struct mlx5e_channel *channel;
+ struct callout watchdog;
+} __aligned(MLX5E_CACHELINE_SIZE);
+
+struct mlx5e_sq_mbuf {
+ bus_dmamap_t dma_map;
+ struct mbuf *mbuf;
+ u32 num_bytes;
+ u32 num_wqebbs;
+};
+
+enum {
+ MLX5E_SQ_READY,
+ MLX5E_SQ_FULL
+};
+
+struct mlx5e_sq {
+ /* data path */
+ struct mtx lock;
+ bus_dma_tag_t dma_tag;
+ struct mtx comp_lock;
+
+ /* dirtied @completion */
+ u16 cc;
+
+ /* dirtied @xmit */
+ u16 pc __aligned(MLX5E_CACHELINE_SIZE);
+ u16 bf_offset;
+ u16 cev_counter; /* completion event counter */
+ u16 cev_factor; /* completion event factor */
+ u16 cev_next_state; /* next completion event state */
+#define MLX5E_CEV_STATE_INITIAL 0 /* timer not started */
+#define MLX5E_CEV_STATE_SEND_NOPS 1 /* send NOPs */
+#define MLX5E_CEV_STATE_HOLD_NOPS 2 /* don't send NOPs yet */
+ u16 stopped; /* set if SQ is stopped */
+ struct callout cev_callout;
+ union {
+ u32 d32[2];
+ u64 d64;
+ } doorbell;
+ struct mlx5e_sq_stats stats;
+
+ struct mlx5e_cq cq;
+ struct task sq_task;
+ struct taskqueue *sq_tq;
+
+ /* pointers to per packet info: write@xmit, read@completion */
+ struct mlx5e_sq_mbuf *mbuf;
+ struct buf_ring *br;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ struct mlx5_uar uar;
+ struct ifnet *ifp;
+ u32 sqn;
+ u32 bf_buf_size;
+ u32 mkey_be;
+ u16 max_inline;
+ u8 min_inline_mode;
+ u8 vlan_inline_cap;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5e_priv *priv;
+ int tc;
+ unsigned int queue_state;
+} __aligned(MLX5E_CACHELINE_SIZE);
+
+static inline bool
+mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n)
+{
+ u16 cc = sq->cc;
+ u16 pc = sq->pc;
+
+ return ((sq->wq.sz_m1 & (cc - pc)) >= n || cc == pc);
+}
+
+struct mlx5e_channel {
+ /* data path */
+ struct mlx5e_rq rq;
+ struct mlx5e_sq sq[MLX5E_MAX_TX_NUM_TC];
+ struct ifnet *ifp;
+ u32 mkey_be;
+ u8 num_tc;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ int ix;
+ int cpu;
+} __aligned(MLX5E_CACHELINE_SIZE);
+
+enum mlx5e_traffic_types {
+ MLX5E_TT_IPV4_TCP,
+ MLX5E_TT_IPV6_TCP,
+ MLX5E_TT_IPV4_UDP,
+ MLX5E_TT_IPV6_UDP,
+ MLX5E_TT_IPV4_IPSEC_AH,
+ MLX5E_TT_IPV6_IPSEC_AH,
+ MLX5E_TT_IPV4_IPSEC_ESP,
+ MLX5E_TT_IPV6_IPSEC_ESP,
+ MLX5E_TT_IPV4,
+ MLX5E_TT_IPV6,
+ MLX5E_TT_ANY,
+ MLX5E_NUM_TT,
+};
+
+enum {
+ MLX5E_RQT_SPREADING = 0,
+ MLX5E_RQT_DEFAULT_RQ = 1,
+ MLX5E_NUM_RQT = 2,
+};
+
+struct mlx5_flow_rule;
+
+struct mlx5e_eth_addr_info {
+ u8 addr [ETH_ALEN + 2];
+ u32 tt_vec;
+ /* flow table rule per traffic type */
+ struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT];
+};
+
+#define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE)
+
+struct mlx5e_eth_addr_hash_node;
+
+struct mlx5e_eth_addr_hash_head {
+ struct mlx5e_eth_addr_hash_node *lh_first;
+};
+
+struct mlx5e_eth_addr_db {
+ struct mlx5e_eth_addr_hash_head if_uc[MLX5E_ETH_ADDR_HASH_SIZE];
+ struct mlx5e_eth_addr_hash_head if_mc[MLX5E_ETH_ADDR_HASH_SIZE];
+ struct mlx5e_eth_addr_info broadcast;
+ struct mlx5e_eth_addr_info allmulti;
+ struct mlx5e_eth_addr_info promisc;
+ bool broadcast_enabled;
+ bool allmulti_enabled;
+ bool promisc_enabled;
+};
+
+enum {
+ MLX5E_STATE_ASYNC_EVENTS_ENABLE,
+ MLX5E_STATE_OPENED,
+};
+
+enum {
+ MLX5_BW_NO_LIMIT = 0,
+ MLX5_100_MBPS_UNIT = 3,
+ MLX5_GBPS_UNIT = 4,
+};
+
+struct mlx5e_vlan_db {
+ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+ struct mlx5_flow_rule *active_vlans_ft_rule[VLAN_N_VID];
+ struct mlx5_flow_rule *untagged_ft_rule;
+ struct mlx5_flow_rule *any_cvlan_ft_rule;
+ struct mlx5_flow_rule *any_svlan_ft_rule;
+ bool filter_disabled;
+};
+
+struct mlx5e_flow_table {
+ int num_groups;
+ struct mlx5_flow_table *t;
+ struct mlx5_flow_group **g;
+};
+
+struct mlx5e_flow_tables {
+ struct mlx5_flow_namespace *ns;
+ struct mlx5e_flow_table vlan;
+ struct mlx5e_flow_table main;
+ struct mlx5e_flow_table inner_rss;
+};
+
+#ifdef RATELIMIT
+#include "en_rl.h"
+#endif
+
+#define MLX5E_TSTMP_PREC 10
+
+struct mlx5e_clbr_point {
+ uint64_t base_curr;
+ uint64_t base_prev;
+ uint64_t clbr_hw_prev;
+ uint64_t clbr_hw_curr;
+ u_int clbr_gen;
+};
+
+struct mlx5e_priv {
+ struct mlx5_core_dev *mdev; /* must be first */
+
+ /* priv data path fields - start */
+ int order_base_2_num_channels;
+ int queue_mapping_channel_mask;
+ int num_tc;
+ int default_vlan_prio;
+ /* priv data path fields - end */
+
+ unsigned long state;
+ int gone;
+#define PRIV_LOCK(priv) sx_xlock(&(priv)->state_lock)
+#define PRIV_UNLOCK(priv) sx_xunlock(&(priv)->state_lock)
+#define PRIV_LOCKED(priv) sx_xlocked(&(priv)->state_lock)
+ struct sx state_lock; /* Protects Interface state */
+ struct mlx5_uar cq_uar;
+ u32 pdn;
+ u32 tdn;
+ struct mlx5_core_mr mr;
+
+ struct mlx5e_channel *volatile *channel;
+ u32 tisn[MLX5E_MAX_TX_NUM_TC];
+ u32 rqtn;
+ u32 tirn[MLX5E_NUM_TT];
+
+ struct mlx5e_flow_tables fts;
+ struct mlx5e_eth_addr_db eth_addr;
+ struct mlx5e_vlan_db vlan;
+
+ struct mlx5e_params params;
+ struct mlx5e_params_ethtool params_ethtool;
+ union mlx5_core_pci_diagnostics params_pci;
+ union mlx5_core_general_diagnostics params_general;
+ struct mtx async_events_mtx; /* sync hw events */
+ struct work_struct update_stats_work;
+ struct work_struct update_carrier_work;
+ struct work_struct set_rx_mode_work;
+ MLX5_DECLARE_DOORBELL_LOCK(doorbell_lock)
+
+ struct ifnet *ifp;
+ struct sysctl_ctx_list sysctl_ctx;
+ struct sysctl_oid *sysctl_ifnet;
+ struct sysctl_oid *sysctl_hw;
+ int sysctl_debug;
+ struct mlx5e_stats stats;
+ struct sysctl_ctx_list sysctl_ctx_channel_debug;
+ int counter_set_id;
+
+ struct workqueue_struct *wq;
+
+ eventhandler_tag vlan_detach;
+ eventhandler_tag vlan_attach;
+ struct ifmedia media;
+ int media_status_last;
+ int media_active_last;
+
+ struct callout watchdog;
+#ifdef RATELIMIT
+ struct mlx5e_rl_priv_data rl;
+#endif
+
+ struct callout tstmp_clbr;
+ int clbr_done;
+ int clbr_curr;
+ struct mlx5e_clbr_point clbr_points[2];
+ u_int clbr_gen;
+};
+
+#define MLX5E_NET_IP_ALIGN 2
+
+struct mlx5e_tx_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_eth_seg eth;
+};
+
+struct mlx5e_rx_wqe {
+ struct mlx5_wqe_srq_next_seg next;
+ struct mlx5_wqe_data_seg data[];
+};
+
+/* the size of the structure above must be power of two */
+CTASSERT(powerof2(sizeof(struct mlx5e_rx_wqe)));
+
+struct mlx5e_eeprom {
+ int lock_bit;
+ int i2c_addr;
+ int page_num;
+ int device_addr;
+ int module_num;
+ int len;
+ int type;
+ int page_valid;
+ u32 *data;
+};
+
+/*
+ * This structure contains rate limit extension to the IEEE 802.1Qaz ETS
+ * managed object.
+ * Values are 64 bits long and specified in Kbps to enable usage over both
+ * slow and very fast networks.
+ *
+ * @tc_maxrate: maximal tc tx bandwidth indexed by traffic class
+ */
+struct ieee_maxrate {
+ __u64 tc_maxrate[IEEE_8021QAZ_MAX_TCS];
+};
+
+
+#define MLX5E_FLD_MAX(typ, fld) ((1ULL << __mlx5_bit_sz(typ, fld)) - 1ULL)
+
+int mlx5e_xmit(struct ifnet *, struct mbuf *);
+
+int mlx5e_open_locked(struct ifnet *);
+int mlx5e_close_locked(struct ifnet *);
+
+void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event);
+void mlx5e_rx_cq_comp(struct mlx5_core_cq *);
+void mlx5e_tx_cq_comp(struct mlx5_core_cq *);
+struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
+void mlx5e_tx_que(void *context, int pending);
+
+int mlx5e_open_flow_table(struct mlx5e_priv *priv);
+void mlx5e_close_flow_table(struct mlx5e_priv *priv);
+void mlx5e_set_rx_mode_core(struct mlx5e_priv *priv);
+void mlx5e_set_rx_mode_work(struct work_struct *work);
+
+void mlx5e_vlan_rx_add_vid(void *, struct ifnet *, u16);
+void mlx5e_vlan_rx_kill_vid(void *, struct ifnet *, u16);
+void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv);
+void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv);
+int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv);
+void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv);
+
+static inline void
+mlx5e_tx_notify_hw(struct mlx5e_sq *sq, u32 *wqe, int bf_sz)
+{
+ u16 ofst = MLX5_BF_OFFSET + sq->bf_offset;
+
+ /* ensure wqe is visible to device before updating doorbell record */
+ wmb();
+
+ *sq->wq.db = cpu_to_be32(sq->pc);
+
+ /*
+ * Ensure the doorbell record is visible to device before ringing
+ * the doorbell:
+ */
+ wmb();
+
+ if (bf_sz) {
+ __iowrite64_copy(sq->uar.bf_map + ofst, wqe, bf_sz);
+
+ /* flush the write-combining mapped buffer */
+ wmb();
+
+ } else {
+ mlx5_write64(wqe, sq->uar.map + ofst,
+ MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock));
+ }
+
+ sq->bf_offset ^= sq->bf_buf_size;
+}
+
+static inline void
+mlx5e_cq_arm(struct mlx5e_cq *cq, spinlock_t *dblock)
+{
+ struct mlx5_core_cq *mcq;
+
+ mcq = &cq->mcq;
+ mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, dblock, cq->wq.cc);
+}
+
+extern const struct ethtool_ops mlx5e_ethtool_ops;
+void mlx5e_create_ethtool(struct mlx5e_priv *);
+void mlx5e_create_stats(struct sysctl_ctx_list *,
+ struct sysctl_oid_list *, const char *,
+ const char **, unsigned, u64 *);
+void mlx5e_send_nop(struct mlx5e_sq *, u32);
+void mlx5e_sq_cev_timeout(void *);
+int mlx5e_refresh_channel_params(struct mlx5e_priv *);
+int mlx5e_open_cq(struct mlx5e_priv *, struct mlx5e_cq_param *,
+ struct mlx5e_cq *, mlx5e_cq_comp_t *, int eq_ix);
+void mlx5e_close_cq(struct mlx5e_cq *);
+void mlx5e_free_sq_db(struct mlx5e_sq *);
+int mlx5e_alloc_sq_db(struct mlx5e_sq *);
+int mlx5e_enable_sq(struct mlx5e_sq *, struct mlx5e_sq_param *, int tis_num);
+int mlx5e_modify_sq(struct mlx5e_sq *, int curr_state, int next_state);
+void mlx5e_disable_sq(struct mlx5e_sq *);
+void mlx5e_drain_sq(struct mlx5e_sq *);
+void mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value);
+void mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value);
+void mlx5e_resume_sq(struct mlx5e_sq *sq);
+u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
+
+#endif /* _MLX5_EN_H_ */
diff --git a/sys/dev/mlx5/mlx5_en/en_rl.h b/sys/dev/mlx5/mlx5_en/en_rl.h
new file mode 100644
index 000000000000..4e2c6c539857
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/en_rl.h
@@ -0,0 +1,174 @@
+/*-
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __MLX5_EN_RL_H__
+#define __MLX5_EN_RL_H__
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sx.h>
+#include <sys/proc.h>
+#include <sys/condvar.h>
+#include <sys/interrupt.h>
+#include <sys/unistd.h>
+
+#include <sys/queue.h>
+
+#define MLX5E_RL_MAX_WORKERS 128 /* limited by Toeplitz hash */
+#define MLX5E_RL_MAX_TX_RATES (64 * 1024) /* software limit */
+#define MLX5E_RL_DEF_SQ_PER_WORKER (12 * 1024) /* software limit */
+#define MLX5E_RL_MAX_SQS (120 * 1024) /* software limit */
+
+#define MLX5E_RL_TX_COAL_USEC_DEFAULT 32
+#define MLX5E_RL_TX_COAL_PKTS_DEFAULT 4
+#define MLX5E_RL_TX_COAL_MODE_DEFAULT 0
+#define MLX5E_RL_TX_COMP_FACT_DEFAULT 1
+
+#define MLX5E_RL_WORKER_LOCK(rlw) mtx_lock(&(rlw)->mtx)
+#define MLX5E_RL_WORKER_UNLOCK(rlw) mtx_unlock(&(rlw)->mtx)
+
+#define MLX5E_RL_RLOCK(rl) sx_slock(&(rl)->rl_sxlock)
+#define MLX5E_RL_RUNLOCK(rl) sx_sunlock(&(rl)->rl_sxlock)
+
+#define MLX5E_RL_WLOCK(rl) sx_xlock(&(rl)->rl_sxlock)
+#define MLX5E_RL_WUNLOCK(rl) sx_xunlock(&(rl)->rl_sxlock)
+
+#define MLX5E_RL_PARAMS(m) \
+ m(+1, u64 tx_queue_size, "tx_queue_size", "Default send queue size") \
+ m(+1, u64 tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining TX packets") \
+ m(+1, u64 tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of TX packets to join") \
+ m(+1, u64 tx_coalesce_mode, "tx_coalesce_mode", "0: EQE mode 1: CQE mode") \
+ m(+1, u64 tx_completion_fact, "tx_completion_fact", "1..MAX: Completion event ratio") \
+ m(+1, u64 tx_completion_fact_max, "tx_completion_fact_max", "Maximum completion event ratio") \
+ m(+1, u64 tx_worker_threads_max, "tx_worker_threads_max", "Max number of TX worker threads") \
+ m(+1, u64 tx_worker_threads_def, "tx_worker_threads_def", "Default number of TX worker threads") \
+ m(+1, u64 tx_channels_per_worker_max, "tx_channels_per_worker_max", "Max number of TX channels per worker") \
+ m(+1, u64 tx_channels_per_worker_def, "tx_channels_per_worker_def", "Default number of TX channels per worker") \
+ m(+1, u64 tx_rates_max, "tx_rates_max", "Max number of TX rates") \
+ m(+1, u64 tx_rates_def, "tx_rates_def", "Default number of TX rates") \
+ m(+1, u64 tx_limit_min, "tx_limit_min", "Minimum TX rate in bits/s") \
+ m(+1, u64 tx_limit_max, "tx_limit_max", "Maximum TX rate in bits/s") \
+ m(+1, u64 tx_burst_size, "tx_burst_size", "Current burst size in number of packets. A value of zero means use firmware default.") \
+ m(+1, u64 tx_burst_size_max, "tx_burst_size_max", "Maximum burst size in number of packets") \
+ m(+1, u64 tx_burst_size_min, "tx_burst_size_min", "Minimum burst size in number of packets")
+
+#define MLX5E_RL_PARAMS_NUM (0 MLX5E_RL_PARAMS(MLX5E_STATS_COUNT))
+
+#define MLX5E_RL_STATS(m) \
+ m(+1, u64 tx_allocate_resource_failure, "tx_allocate_resource_failure", "Number of times firmware resource allocation failed") \
+ m(+1, u64 tx_add_new_rate_failure, "tx_add_new_rate_failure", "Number of times adding a new firmware rate failed") \
+ m(+1, u64 tx_modify_rate_failure, "tx_modify_rate_failure", "Number of times modifying a firmware rate failed") \
+ m(+1, u64 tx_active_connections, "tx_active_connections", "Number of active connections") \
+ m(+1, u64 tx_open_queues, "tx_open_queues", "Number of open TX queues") \
+ m(+1, u64 tx_available_resource_failure, "tx_available_resource_failure", "Number of times TX resources were not available")
+
+#define MLX5E_RL_STATS_NUM (0 MLX5E_RL_STATS(MLX5E_STATS_COUNT))
+
+#define MLX5E_RL_TABLE_PARAMS(m) \
+ m(+1, u64 tx_limit_add, "tx_limit_add", "Add TX rate limit in bits/s to empty slot") \
+ m(+1, u64 tx_limit_clr, "tx_limit_clr", "Clear all TX rates in table") \
+ m(+1, u64 tx_allowed_deviation, "tx_allowed_deviation", "Relative rate deviation allowed in 1/1000") \
+ m(+1, u64 tx_allowed_deviation_min, "tx_allowed_deviation_min", "Minimum allowed rate deviation in 1/1000") \
+ m(+1, u64 tx_allowed_deviation_max, "tx_allowed_deviation_max", "Maximum allowed rate deviation in 1/1000")
+
+#define MLX5E_RL_TABLE_PARAMS_NUM (0 MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_COUNT))
+
+#define MLX5E_RL_PARAMS_INDEX(n) \
+ (__offsetof(struct mlx5e_rl_params, n) / sizeof(uint64_t))
+
+struct mlx5e_priv;
+
+/* Indicates channel's state */
+enum {
+ MLX5E_RL_ST_FREE,
+ MLX5E_RL_ST_USED,
+ MLX5E_RL_ST_MODIFY,
+ MLX5E_RL_ST_DESTROY,
+};
+
+struct mlx5e_rl_stats {
+ u64 arg [0];
+ MLX5E_RL_STATS(MLX5E_STATS_VAR)
+};
+
+struct mlx5e_rl_params {
+ u64 arg [0];
+ MLX5E_RL_PARAMS(MLX5E_STATS_VAR)
+ u64 table_arg [0];
+ MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_VAR)
+};
+
+struct mlx5e_rl_channel_param {
+ struct mlx5e_sq_param sq;
+ struct mlx5e_cq_param cq;
+};
+
+struct mlx5e_rl_channel {
+ struct m_snd_tag m_snd_tag;
+ STAILQ_ENTRY(mlx5e_rl_channel) entry;
+ struct mlx5e_sq * volatile sq;
+ struct mlx5e_rl_worker *worker;
+ uint64_t new_rate;
+ uint64_t init_rate;
+ uint64_t last_rate;
+ uint16_t last_burst;
+ uint16_t state;
+};
+
+struct mlx5e_rl_worker {
+ struct mtx mtx;
+ struct cv cv;
+ STAILQ_HEAD(, mlx5e_rl_channel) index_list_head;
+ STAILQ_HEAD(, mlx5e_rl_channel) process_head;
+ struct mlx5e_priv *priv;
+ struct mlx5e_rl_channel *channels;
+ unsigned worker_done;
+};
+
+struct mlx5e_rl_priv_data {
+ struct sx rl_sxlock;
+ struct sysctl_ctx_list ctx;
+ struct mlx5e_rl_channel_param chan_param;
+ struct mlx5e_rl_params param;
+ struct mlx5e_rl_stats stats;
+ struct mlx5_uar sq_uar;
+ struct mlx5e_rl_worker *workers;
+ struct mlx5e_priv *priv;
+ uint64_t *rate_limit_table;
+ unsigned opened;
+ uint32_t tisn;
+};
+
+int mlx5e_rl_init(struct mlx5e_priv *priv);
+void mlx5e_rl_cleanup(struct mlx5e_priv *priv);
+if_snd_tag_alloc_t mlx5e_rl_snd_tag_alloc;
+if_snd_tag_modify_t mlx5e_rl_snd_tag_modify;
+if_snd_tag_query_t mlx5e_rl_snd_tag_query;
+if_snd_tag_free_t mlx5e_rl_snd_tag_free;
+
+#endif /* __MLX5_EN_RL_H__ */
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c b/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c
new file mode 100644
index 000000000000..85b1fe85617f
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c
@@ -0,0 +1,1203 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+#include <net/sff8472.h>
+
+void
+mlx5e_create_stats(struct sysctl_ctx_list *ctx,
+ struct sysctl_oid_list *parent, const char *buffer,
+ const char **desc, unsigned num, u64 * arg)
+{
+ struct sysctl_oid *node;
+ unsigned x;
+
+ sysctl_ctx_init(ctx);
+
+ node = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO,
+ buffer, CTLFLAG_RD, NULL, "Statistics");
+ if (node == NULL)
+ return;
+ for (x = 0; x != num; x++) {
+ SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ desc[2 * x], CTLFLAG_RD, arg + x, desc[2 * x + 1]);
+ }
+}
+
+static void
+mlx5e_ethtool_sync_tx_completion_fact(struct mlx5e_priv *priv)
+{
+ /*
+ * Limit the maximum distance between completion events to
+ * half of the currently set TX queue size.
+ *
+ * The maximum number of queue entries a single IP packet can
+ * consume is given by MLX5_SEND_WQE_MAX_WQEBBS.
+ *
+ * The worst case max value is then given as below:
+ */
+ uint64_t max = priv->params_ethtool.tx_queue_size /
+ (2 * MLX5_SEND_WQE_MAX_WQEBBS);
+
+ /*
+ * Update the maximum completion factor value in case the
+ * tx_queue_size field changed. Ensure we don't overflow
+ * 16-bits.
+ */
+ if (max < 1)
+ max = 1;
+ else if (max > 65535)
+ max = 65535;
+ priv->params_ethtool.tx_completion_fact_max = max;
+
+ /*
+ * Verify that the current TX completion factor is within the
+ * given limits:
+ */
+ if (priv->params_ethtool.tx_completion_fact < 1)
+ priv->params_ethtool.tx_completion_fact = 1;
+ else if (priv->params_ethtool.tx_completion_fact > max)
+ priv->params_ethtool.tx_completion_fact = max;
+}
+
+static int
+mlx5e_getmaxrate(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS];
+ u8 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+ int err;
+ int i;
+
+ PRIV_LOCK(priv);
+ err = -mlx5_query_port_tc_rate_limit(mdev, max_bw_value, max_bw_unit);
+ if (err)
+ goto done;
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ switch (max_bw_unit[i]) {
+ case MLX5_100_MBPS_UNIT:
+ priv->params_ethtool.max_bw_value[i] = max_bw_value[i] * MLX5E_100MB;
+ break;
+ case MLX5_GBPS_UNIT:
+ priv->params_ethtool.max_bw_value[i] = max_bw_value[i] * MLX5E_1GB;
+ break;
+ case MLX5_BW_NO_LIMIT:
+ priv->params_ethtool.max_bw_value[i] = 0;
+ break;
+ default:
+ priv->params_ethtool.max_bw_value[i] = -1;
+ WARN_ONCE(true, "non-supported BW unit");
+ break;
+ }
+ }
+done:
+ PRIV_UNLOCK(priv);
+ return (err);
+}
+
+static int
+mlx5e_get_dscp(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ if (MLX5_CAP_GEN(mdev, qcam_reg) == 0 ||
+ MLX5_CAP_QCAM_REG(mdev, qpts) == 0 ||
+ MLX5_CAP_QCAM_REG(mdev, qpdpm) == 0)
+ return (EOPNOTSUPP);
+
+ PRIV_LOCK(priv);
+ err = -mlx5_query_dscp2prio(mdev, priv->params_ethtool.dscp2prio);
+ if (err)
+ goto done;
+
+ err = -mlx5_query_trust_state(mdev, &priv->params_ethtool.trust_state);
+ if (err)
+ goto done;
+done:
+ PRIV_UNLOCK(priv);
+ return (err);
+}
+
+static int
+mlx5e_tc_maxrate_handler(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv = arg1;
+ int prio_index = arg2;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS];
+ u8 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+ int i, err;
+ u64 bw_val;
+ u64 result = priv->params_ethtool.max_bw_value[prio_index];
+ const u64 upper_limit_mbps = 255 * MLX5E_100MB;
+ const u64 upper_limit_gbps = 255 * MLX5E_1GB;
+
+ PRIV_LOCK(priv);
+ err = sysctl_handle_64(oidp, &result, 0, req);
+ if (err || !req->newptr ||
+ result == priv->params_ethtool.max_bw_value[prio_index])
+ goto done;
+
+ if (result % MLX5E_100MB) {
+ err = ERANGE;
+ goto done;
+ }
+
+ memset(max_bw_value, 0, sizeof(max_bw_value));
+ memset(max_bw_unit, 0, sizeof(max_bw_unit));
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ bw_val = (i == prio_index) ? result : priv->params_ethtool.max_bw_value[i];
+
+ if (!bw_val) {
+ max_bw_unit[i] = MLX5_BW_NO_LIMIT;
+ } else if (bw_val > upper_limit_gbps) {
+ result = 0;
+ max_bw_unit[i] = MLX5_BW_NO_LIMIT;
+ } else if (bw_val <= upper_limit_mbps) {
+ max_bw_value[i] = howmany(bw_val, MLX5E_100MB);
+ max_bw_unit[i] = MLX5_100_MBPS_UNIT;
+ } else {
+ max_bw_value[i] = howmany(bw_val, MLX5E_1GB);
+ max_bw_unit[i] = MLX5_GBPS_UNIT;
+ }
+ }
+
+ err = -mlx5_modify_port_tc_rate_limit(mdev, max_bw_value, max_bw_unit);
+ if (err)
+ goto done;
+
+ priv->params_ethtool.max_bw_value[prio_index] = result;
+done:
+ PRIV_UNLOCK(priv);
+ return (err);
+}
+
+static int
+mlx5e_get_prio_tc(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err = 0;
+ int i;
+
+ PRIV_LOCK(priv);
+ if (!MLX5_CAP_GEN(priv->mdev, ets)) {
+ PRIV_UNLOCK(priv);
+ return (EOPNOTSUPP);
+ }
+
+ for (i = 0; i <= mlx5_max_tc(priv->mdev); i++) {
+ err = -mlx5_query_port_prio_tc(mdev, i, &(priv->params_ethtool.prio_tc[i]));
+ if (err)
+ break;
+ }
+
+ PRIV_UNLOCK(priv);
+ return (err);
+}
+
+static int
+mlx5e_prio_to_tc_handler(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv = arg1;
+ int prio_index = arg2;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+ uint8_t result = priv->params_ethtool.prio_tc[prio_index];
+
+ PRIV_LOCK(priv);
+ err = sysctl_handle_8(oidp, &result, 0, req);
+ if (err || !req->newptr ||
+ result == priv->params_ethtool.prio_tc[prio_index])
+ goto done;
+
+ if (result > mlx5_max_tc(mdev)) {
+ err = ERANGE;
+ goto done;
+ }
+
+ err = -mlx5_set_port_prio_tc(mdev, prio_index, result);
+ if (err)
+ goto done;
+
+ priv->params_ethtool.prio_tc[prio_index] = result;
+
+done:
+ PRIV_UNLOCK(priv);
+ return (err);
+}
+
+static int
+mlx5e_trust_state_handler(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv = arg1;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+ u8 result;
+
+ PRIV_LOCK(priv);
+ result = priv->params_ethtool.trust_state;
+ err = sysctl_handle_8(oidp, &result, 0, req);
+ if (err || !req->newptr ||
+ result == priv->params_ethtool.trust_state)
+ goto done;
+
+ switch (result) {
+ case MLX5_QPTS_TRUST_PCP:
+ case MLX5_QPTS_TRUST_DSCP:
+ break;
+ case MLX5_QPTS_TRUST_BOTH:
+ if (!MLX5_CAP_QCAM_FEATURE(mdev, qpts_trust_both)) {
+ err = EOPNOTSUPP;
+ goto done;
+ }
+ break;
+ default:
+ err = ERANGE;
+ goto done;
+ }
+
+ err = -mlx5_set_trust_state(mdev, result);
+ if (err)
+ goto done;
+
+ priv->params_ethtool.trust_state = result;
+done:
+ PRIV_UNLOCK(priv);
+ return (err);
+}
+
+static int
+mlx5e_dscp_prio_handler(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv = arg1;
+ int prio_index = arg2;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ uint8_t dscp2prio[MLX5_MAX_SUPPORTED_DSCP];
+ uint8_t x;
+ int err;
+
+ PRIV_LOCK(priv);
+ err = SYSCTL_OUT(req, priv->params_ethtool.dscp2prio + prio_index,
+ sizeof(priv->params_ethtool.dscp2prio) / 8);
+ if (err || !req->newptr)
+ goto done;
+
+ memcpy(dscp2prio, priv->params_ethtool.dscp2prio, sizeof(dscp2prio));
+ err = SYSCTL_IN(req, dscp2prio + prio_index, sizeof(dscp2prio) / 8);
+ if (err)
+ goto done;
+ for (x = 0; x != MLX5_MAX_SUPPORTED_DSCP; x++) {
+ if (dscp2prio[x] > 7) {
+ err = ERANGE;
+ goto done;
+ }
+ }
+ err = -mlx5_set_dscp2prio(mdev, dscp2prio);
+ if (err)
+ goto done;
+
+ /* update local array */
+ memcpy(priv->params_ethtool.dscp2prio, dscp2prio,
+ sizeof(priv->params_ethtool.dscp2prio));
+done:
+ PRIV_UNLOCK(priv);
+ return (err);
+}
+
+#define MLX5_PARAM_OFFSET(n) \
+ __offsetof(struct mlx5e_priv, params_ethtool.n)
+
+static int
+mlx5e_ethtool_handler(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv = arg1;
+ uint64_t value;
+ int mode_modify;
+ int was_opened;
+ int error;
+
+ PRIV_LOCK(priv);
+ value = priv->params_ethtool.arg[arg2];
+ if (req != NULL) {
+ error = sysctl_handle_64(oidp, &value, 0, req);
+ if (error || req->newptr == NULL ||
+ value == priv->params_ethtool.arg[arg2])
+ goto done;
+
+ /* assign new value */
+ priv->params_ethtool.arg[arg2] = value;
+ } else {
+ error = 0;
+ }
+ /* check if device is gone */
+ if (priv->gone) {
+ error = ENXIO;
+ goto done;
+ }
+ was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify);
+
+ switch (MLX5_PARAM_OFFSET(arg[arg2])) {
+ case MLX5_PARAM_OFFSET(rx_coalesce_usecs):
+ /* import RX coal time */
+ if (priv->params_ethtool.rx_coalesce_usecs < 1)
+ priv->params_ethtool.rx_coalesce_usecs = 0;
+ else if (priv->params_ethtool.rx_coalesce_usecs >
+ MLX5E_FLD_MAX(cqc, cq_period)) {
+ priv->params_ethtool.rx_coalesce_usecs =
+ MLX5E_FLD_MAX(cqc, cq_period);
+ }
+ priv->params.rx_cq_moderation_usec =
+ priv->params_ethtool.rx_coalesce_usecs;
+
+ /* check to avoid down and up the network interface */
+ if (was_opened)
+ error = mlx5e_refresh_channel_params(priv);
+ break;
+
+ case MLX5_PARAM_OFFSET(rx_coalesce_pkts):
+ /* import RX coal pkts */
+ if (priv->params_ethtool.rx_coalesce_pkts < 1)
+ priv->params_ethtool.rx_coalesce_pkts = 0;
+ else if (priv->params_ethtool.rx_coalesce_pkts >
+ MLX5E_FLD_MAX(cqc, cq_max_count)) {
+ priv->params_ethtool.rx_coalesce_pkts =
+ MLX5E_FLD_MAX(cqc, cq_max_count);
+ }
+ priv->params.rx_cq_moderation_pkts =
+ priv->params_ethtool.rx_coalesce_pkts;
+
+ /* check to avoid down and up the network interface */
+ if (was_opened)
+ error = mlx5e_refresh_channel_params(priv);
+ break;
+
+ case MLX5_PARAM_OFFSET(tx_coalesce_usecs):
+ /* import TX coal time */
+ if (priv->params_ethtool.tx_coalesce_usecs < 1)
+ priv->params_ethtool.tx_coalesce_usecs = 0;
+ else if (priv->params_ethtool.tx_coalesce_usecs >
+ MLX5E_FLD_MAX(cqc, cq_period)) {
+ priv->params_ethtool.tx_coalesce_usecs =
+ MLX5E_FLD_MAX(cqc, cq_period);
+ }
+ priv->params.tx_cq_moderation_usec =
+ priv->params_ethtool.tx_coalesce_usecs;
+
+ /* check to avoid down and up the network interface */
+ if (was_opened)
+ error = mlx5e_refresh_channel_params(priv);
+ break;
+
+ case MLX5_PARAM_OFFSET(tx_coalesce_pkts):
+ /* import TX coal pkts */
+ if (priv->params_ethtool.tx_coalesce_pkts < 1)
+ priv->params_ethtool.tx_coalesce_pkts = 0;
+ else if (priv->params_ethtool.tx_coalesce_pkts >
+ MLX5E_FLD_MAX(cqc, cq_max_count)) {
+ priv->params_ethtool.tx_coalesce_pkts =
+ MLX5E_FLD_MAX(cqc, cq_max_count);
+ }
+ priv->params.tx_cq_moderation_pkts =
+ priv->params_ethtool.tx_coalesce_pkts;
+
+ /* check to avoid down and up the network interface */
+ if (was_opened)
+ error = mlx5e_refresh_channel_params(priv);
+ break;
+
+ case MLX5_PARAM_OFFSET(tx_queue_size):
+ /* network interface must be down */
+ if (was_opened)
+ mlx5e_close_locked(priv->ifp);
+
+ /* import TX queue size */
+ if (priv->params_ethtool.tx_queue_size <
+ (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) {
+ priv->params_ethtool.tx_queue_size =
+ (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE);
+ } else if (priv->params_ethtool.tx_queue_size >
+ priv->params_ethtool.tx_queue_size_max) {
+ priv->params_ethtool.tx_queue_size =
+ priv->params_ethtool.tx_queue_size_max;
+ }
+ /* store actual TX queue size */
+ priv->params.log_sq_size =
+ order_base_2(priv->params_ethtool.tx_queue_size);
+ priv->params_ethtool.tx_queue_size =
+ 1 << priv->params.log_sq_size;
+
+ /* verify TX completion factor */
+ mlx5e_ethtool_sync_tx_completion_fact(priv);
+
+ /* restart network interface, if any */
+ if (was_opened)
+ mlx5e_open_locked(priv->ifp);
+ break;
+
+ case MLX5_PARAM_OFFSET(rx_queue_size):
+ /* network interface must be down */
+ if (was_opened)
+ mlx5e_close_locked(priv->ifp);
+
+ /* import RX queue size */
+ if (priv->params_ethtool.rx_queue_size <
+ (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) {
+ priv->params_ethtool.rx_queue_size =
+ (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE);
+ } else if (priv->params_ethtool.rx_queue_size >
+ priv->params_ethtool.rx_queue_size_max) {
+ priv->params_ethtool.rx_queue_size =
+ priv->params_ethtool.rx_queue_size_max;
+ }
+ /* store actual RX queue size */
+ priv->params.log_rq_size =
+ order_base_2(priv->params_ethtool.rx_queue_size);
+ priv->params_ethtool.rx_queue_size =
+ 1 << priv->params.log_rq_size;
+
+ /* update least number of RX WQEs */
+ priv->params.min_rx_wqes = min(
+ priv->params_ethtool.rx_queue_size - 1,
+ MLX5E_PARAMS_DEFAULT_MIN_RX_WQES);
+
+ /* restart network interface, if any */
+ if (was_opened)
+ mlx5e_open_locked(priv->ifp);
+ break;
+
+ case MLX5_PARAM_OFFSET(channels_rsss):
+ /* network interface must be down */
+ if (was_opened)
+ mlx5e_close_locked(priv->ifp);
+
+ /* import number of channels */
+ if (priv->params_ethtool.channels_rsss < 1)
+ priv->params_ethtool.channels_rsss = 1;
+ else if (priv->params_ethtool.channels_rsss > 128)
+ priv->params_ethtool.channels_rsss = 128;
+
+ priv->params.channels_rsss = priv->params_ethtool.channels_rsss;
+
+ /* restart network interface, if any */
+ if (was_opened)
+ mlx5e_open_locked(priv->ifp);
+ break;
+
+ case MLX5_PARAM_OFFSET(channels):
+ /* network interface must be down */
+ if (was_opened)
+ mlx5e_close_locked(priv->ifp);
+
+ /* import number of channels */
+ if (priv->params_ethtool.channels < 1)
+ priv->params_ethtool.channels = 1;
+ else if (priv->params_ethtool.channels >
+ (u64) priv->mdev->priv.eq_table.num_comp_vectors) {
+ priv->params_ethtool.channels =
+ (u64) priv->mdev->priv.eq_table.num_comp_vectors;
+ }
+ priv->params.num_channels = priv->params_ethtool.channels;
+
+ /* restart network interface, if any */
+ if (was_opened)
+ mlx5e_open_locked(priv->ifp);
+ break;
+
+ case MLX5_PARAM_OFFSET(rx_coalesce_mode):
+ /* network interface must be down */
+ if (was_opened != 0 && mode_modify == 0)
+ mlx5e_close_locked(priv->ifp);
+
+ /* import RX coalesce mode */
+ if (priv->params_ethtool.rx_coalesce_mode != 0)
+ priv->params_ethtool.rx_coalesce_mode = 1;
+ priv->params.rx_cq_moderation_mode =
+ priv->params_ethtool.rx_coalesce_mode;
+
+ /* restart network interface, if any */
+ if (was_opened != 0) {
+ if (mode_modify == 0)
+ mlx5e_open_locked(priv->ifp);
+ else
+ error = mlx5e_refresh_channel_params(priv);
+ }
+ break;
+
+ case MLX5_PARAM_OFFSET(tx_coalesce_mode):
+ /* network interface must be down */
+ if (was_opened != 0 && mode_modify == 0)
+ mlx5e_close_locked(priv->ifp);
+
+ /* import TX coalesce mode */
+ if (priv->params_ethtool.tx_coalesce_mode != 0)
+ priv->params_ethtool.tx_coalesce_mode = 1;
+ priv->params.tx_cq_moderation_mode =
+ priv->params_ethtool.tx_coalesce_mode;
+
+ /* restart network interface, if any */
+ if (was_opened != 0) {
+ if (mode_modify == 0)
+ mlx5e_open_locked(priv->ifp);
+ else
+ error = mlx5e_refresh_channel_params(priv);
+ }
+ break;
+
+ case MLX5_PARAM_OFFSET(hw_lro):
+ /* network interface must be down */
+ if (was_opened)
+ mlx5e_close_locked(priv->ifp);
+
+ /* import HW LRO mode */
+ if (priv->params_ethtool.hw_lro != 0) {
+ if ((priv->ifp->if_capenable & IFCAP_LRO) &&
+ MLX5_CAP_ETH(priv->mdev, lro_cap)) {
+ priv->params.hw_lro_en = 1;
+ priv->params_ethtool.hw_lro = 1;
+ } else {
+ priv->params.hw_lro_en = 0;
+ priv->params_ethtool.hw_lro = 0;
+ error = EINVAL;
+
+ if_printf(priv->ifp, "Can't enable HW LRO: "
+ "The HW or SW LRO feature is disabled\n");
+ }
+ } else {
+ priv->params.hw_lro_en = 0;
+ }
+ /* restart network interface, if any */
+ if (was_opened)
+ mlx5e_open_locked(priv->ifp);
+ break;
+
+ case MLX5_PARAM_OFFSET(cqe_zipping):
+ /* network interface must be down */
+ if (was_opened)
+ mlx5e_close_locked(priv->ifp);
+
+ /* import CQE zipping mode */
+ if (priv->params_ethtool.cqe_zipping &&
+ MLX5_CAP_GEN(priv->mdev, cqe_compression)) {
+ priv->params.cqe_zipping_en = true;
+ priv->params_ethtool.cqe_zipping = 1;
+ } else {
+ priv->params.cqe_zipping_en = false;
+ priv->params_ethtool.cqe_zipping = 0;
+ }
+ /* restart network interface, if any */
+ if (was_opened)
+ mlx5e_open_locked(priv->ifp);
+ break;
+
+ case MLX5_PARAM_OFFSET(tx_bufring_disable):
+ /* rangecheck input value */
+ priv->params_ethtool.tx_bufring_disable =
+ priv->params_ethtool.tx_bufring_disable ? 1 : 0;
+
+ /* reconfigure the sendqueues, if any */
+ if (was_opened) {
+ mlx5e_close_locked(priv->ifp);
+ mlx5e_open_locked(priv->ifp);
+ }
+ break;
+
+ case MLX5_PARAM_OFFSET(tx_completion_fact):
+ /* network interface must be down */
+ if (was_opened)
+ mlx5e_close_locked(priv->ifp);
+
+ /* verify parameter */
+ mlx5e_ethtool_sync_tx_completion_fact(priv);
+
+ /* restart network interface, if any */
+ if (was_opened)
+ mlx5e_open_locked(priv->ifp);
+ break;
+
+ case MLX5_PARAM_OFFSET(modify_tx_dma):
+ /* check if network interface is opened */
+ if (was_opened) {
+ priv->params_ethtool.modify_tx_dma =
+ priv->params_ethtool.modify_tx_dma ? 1 : 0;
+ /* modify tx according to value */
+ mlx5e_modify_tx_dma(priv, value != 0);
+ } else {
+ /* if closed force enable tx */
+ priv->params_ethtool.modify_tx_dma = 0;
+ }
+ break;
+
+ case MLX5_PARAM_OFFSET(modify_rx_dma):
+ /* check if network interface is opened */
+ if (was_opened) {
+ priv->params_ethtool.modify_rx_dma =
+ priv->params_ethtool.modify_rx_dma ? 1 : 0;
+ /* modify rx according to value */
+ mlx5e_modify_rx_dma(priv, value != 0);
+ } else {
+ /* if closed force enable rx */
+ priv->params_ethtool.modify_rx_dma = 0;
+ }
+ break;
+
+ case MLX5_PARAM_OFFSET(diag_pci_enable):
+ priv->params_ethtool.diag_pci_enable =
+ priv->params_ethtool.diag_pci_enable ? 1 : 0;
+
+ error = -mlx5_core_set_diagnostics_full(priv->mdev,
+ priv->params_ethtool.diag_pci_enable,
+ priv->params_ethtool.diag_general_enable);
+ break;
+
+ case MLX5_PARAM_OFFSET(diag_general_enable):
+ priv->params_ethtool.diag_general_enable =
+ priv->params_ethtool.diag_general_enable ? 1 : 0;
+
+ error = -mlx5_core_set_diagnostics_full(priv->mdev,
+ priv->params_ethtool.diag_pci_enable,
+ priv->params_ethtool.diag_general_enable);
+ break;
+
+ case MLX5_PARAM_OFFSET(mc_local_lb):
+ priv->params_ethtool.mc_local_lb =
+ priv->params_ethtool.mc_local_lb ? 1 : 0;
+
+ if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) {
+ error = mlx5_nic_vport_modify_local_lb(priv->mdev,
+ MLX5_LOCAL_MC_LB, priv->params_ethtool.mc_local_lb);
+ } else {
+ error = EOPNOTSUPP;
+ }
+ break;
+
+ case MLX5_PARAM_OFFSET(uc_local_lb):
+ priv->params_ethtool.uc_local_lb =
+ priv->params_ethtool.uc_local_lb ? 1 : 0;
+
+ if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) {
+ error = mlx5_nic_vport_modify_local_lb(priv->mdev,
+ MLX5_LOCAL_UC_LB, priv->params_ethtool.uc_local_lb);
+ } else {
+ error = EOPNOTSUPP;
+ }
+ break;
+
+ default:
+ break;
+ }
+done:
+ PRIV_UNLOCK(priv);
+ return (error);
+}
+
+/*
+ * Read the first three bytes of the eeprom in order to get the needed info
+ * for the whole reading.
+ * Byte 0 - Identifier byte
+ * Byte 1 - Revision byte
+ * Byte 2 - Status byte
+ */
+static int
+mlx5e_get_eeprom_info(struct mlx5e_priv *priv, struct mlx5e_eeprom *eeprom)
+{
+ struct mlx5_core_dev *dev = priv->mdev;
+ u32 data = 0;
+ int size_read = 0;
+ int ret;
+
+ ret = mlx5_query_module_num(dev, &eeprom->module_num);
+ if (ret) {
+ if_printf(priv->ifp, "%s:%d: Failed query module error=%d\n",
+ __func__, __LINE__, ret);
+ return (ret);
+ }
+
+ /* Read the first three bytes to get Identifier, Revision and Status */
+ ret = mlx5_query_eeprom(dev, eeprom->i2c_addr, eeprom->page_num,
+ eeprom->device_addr, MLX5E_EEPROM_INFO_BYTES, eeprom->module_num, &data,
+ &size_read);
+ if (ret) {
+ if_printf(priv->ifp, "%s:%d: Failed query eeprom module error=0x%x\n",
+ __func__, __LINE__, ret);
+ return (ret);
+ }
+
+ switch (data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK) {
+ case SFF_8024_ID_QSFP:
+ eeprom->type = MLX5E_ETH_MODULE_SFF_8436;
+ eeprom->len = MLX5E_ETH_MODULE_SFF_8436_LEN;
+ break;
+ case SFF_8024_ID_QSFPPLUS:
+ case SFF_8024_ID_QSFP28:
+ if ((data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK) == SFF_8024_ID_QSFP28 ||
+ ((data & MLX5_EEPROM_REVISION_ID_BYTE_MASK) >> 8) >= 0x3) {
+ eeprom->type = MLX5E_ETH_MODULE_SFF_8636;
+ eeprom->len = MLX5E_ETH_MODULE_SFF_8636_LEN;
+ } else {
+ eeprom->type = MLX5E_ETH_MODULE_SFF_8436;
+ eeprom->len = MLX5E_ETH_MODULE_SFF_8436_LEN;
+ }
+ if ((data & MLX5_EEPROM_PAGE_3_VALID_BIT_MASK) == 0)
+ eeprom->page_valid = 1;
+ break;
+ case SFF_8024_ID_SFP:
+ eeprom->type = MLX5E_ETH_MODULE_SFF_8472;
+ eeprom->len = MLX5E_ETH_MODULE_SFF_8472_LEN;
+ break;
+ default:
+ if_printf(priv->ifp, "%s:%d: Not recognized cable type = 0x%x(%s)\n",
+ __func__, __LINE__, data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK,
+ sff_8024_id[data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK]);
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/* Read both low and high pages of the eeprom */
+static int
+mlx5e_get_eeprom(struct mlx5e_priv *priv, struct mlx5e_eeprom *ee)
+{
+ struct mlx5_core_dev *dev = priv->mdev;
+ int size_read = 0;
+ int ret;
+
+ if (ee->len == 0)
+ return (EINVAL);
+
+ /* Read low page of the eeprom */
+ while (ee->device_addr < ee->len) {
+ ret = mlx5_query_eeprom(dev, ee->i2c_addr, ee->page_num, ee->device_addr,
+ ee->len - ee->device_addr, ee->module_num,
+ ee->data + (ee->device_addr / 4), &size_read);
+ if (ret) {
+ if_printf(priv->ifp, "%s:%d: Failed reading eeprom, "
+ "error = 0x%02x\n", __func__, __LINE__, ret);
+ return (ret);
+ }
+ ee->device_addr += size_read;
+ }
+
+ /* Read high page of the eeprom */
+ if (ee->page_valid) {
+ ee->device_addr = MLX5E_EEPROM_HIGH_PAGE_OFFSET;
+ ee->page_num = MLX5E_EEPROM_HIGH_PAGE;
+ size_read = 0;
+ while (ee->device_addr < MLX5E_EEPROM_PAGE_LENGTH) {
+ ret = mlx5_query_eeprom(dev, ee->i2c_addr, ee->page_num,
+ ee->device_addr, MLX5E_EEPROM_PAGE_LENGTH - ee->device_addr,
+ ee->module_num, ee->data + (ee->len / 4) +
+ ((ee->device_addr - MLX5E_EEPROM_HIGH_PAGE_OFFSET) / 4),
+ &size_read);
+ if (ret) {
+ if_printf(priv->ifp, "%s:%d: Failed reading eeprom, "
+ "error = 0x%02x\n", __func__, __LINE__, ret);
+ return (ret);
+ }
+ ee->device_addr += size_read;
+ }
+ }
+ return (0);
+}
+
+static void
+mlx5e_print_eeprom(struct mlx5e_eeprom *eeprom)
+{
+ int row;
+ int index_in_row;
+ int byte_to_write = 0;
+ int line_length = 16;
+
+ printf("\nOffset\t\tValues\n");
+ printf("------\t\t------");
+ while (byte_to_write < eeprom->len) {
+ printf("\n0x%04X\t\t", byte_to_write);
+ for (index_in_row = 0; index_in_row < line_length; index_in_row++) {
+ printf("%02X ", ((u8 *)eeprom->data)[byte_to_write]);
+ byte_to_write++;
+ }
+ }
+
+ if (eeprom->page_valid) {
+ row = MLX5E_EEPROM_HIGH_PAGE_OFFSET;
+ printf("\n\nUpper Page 0x03\n");
+ printf("\nOffset\t\tValues\n");
+ printf("------\t\t------");
+ while (row < MLX5E_EEPROM_PAGE_LENGTH) {
+ printf("\n0x%04X\t\t", row);
+ for (index_in_row = 0; index_in_row < line_length; index_in_row++) {
+ printf("%02X ", ((u8 *)eeprom->data)[byte_to_write]);
+ byte_to_write++;
+ row++;
+ }
+ }
+ }
+}
+
+/*
+ * Read cable EEPROM module information by first inspecting the first
+ * three bytes to get the initial information for a whole reading.
+ * Information will be printed to dmesg.
+ */
+static int
+mlx5e_read_eeprom(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv = arg1;
+ struct mlx5e_eeprom eeprom;
+ int error;
+ int result = 0;
+
+ PRIV_LOCK(priv);
+ error = sysctl_handle_int(oidp, &result, 0, req);
+ if (error || !req->newptr)
+ goto done;
+
+ /* Check if device is gone */
+ if (priv->gone) {
+ error = ENXIO;
+ goto done;
+ }
+
+ if (result == 1) {
+ eeprom.i2c_addr = MLX5E_I2C_ADDR_LOW;
+ eeprom.device_addr = 0;
+ eeprom.page_num = MLX5E_EEPROM_LOW_PAGE;
+ eeprom.page_valid = 0;
+
+ /* Read three first bytes to get important info */
+ error = mlx5e_get_eeprom_info(priv, &eeprom);
+ if (error) {
+ if_printf(priv->ifp, "%s:%d: Failed reading eeprom's "
+ "initial information\n", __func__, __LINE__);
+ error = 0;
+ goto done;
+ }
+ /*
+ * Allocate needed length buffer and additional space for
+ * page 0x03
+ */
+ eeprom.data = malloc(eeprom.len + MLX5E_EEPROM_PAGE_LENGTH,
+ M_MLX5EN, M_WAITOK | M_ZERO);
+
+ /* Read the whole eeprom information */
+ error = mlx5e_get_eeprom(priv, &eeprom);
+ if (error) {
+ if_printf(priv->ifp, "%s:%d: Failed reading eeprom\n",
+ __func__, __LINE__);
+ error = 0;
+ /*
+ * Continue printing partial information in case of
+ * an error
+ */
+ }
+ mlx5e_print_eeprom(&eeprom);
+ free(eeprom.data, M_MLX5EN);
+ }
+done:
+ PRIV_UNLOCK(priv);
+ return (error);
+}
+
+static const char *mlx5e_params_desc[] = {
+ MLX5E_PARAMS(MLX5E_STATS_DESC)
+};
+
+static const char *mlx5e_port_stats_debug_desc[] = {
+ MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_DESC)
+};
+
+static int
+mlx5e_ethtool_debug_channel_info(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv;
+ struct sbuf sb;
+ struct mlx5e_channel *c;
+ struct mlx5e_sq *sq;
+ struct mlx5e_rq *rq;
+ int error, i, tc;
+
+ priv = arg1;
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+ if (sbuf_new_for_sysctl(&sb, NULL, 128, req) == NULL)
+ return (ENOMEM);
+ sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
+
+ PRIV_LOCK(priv);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
+ goto out;
+ for (i = 0; i < priv->params.num_channels; i++) {
+ c = priv->channel[i];
+ rq = &c->rq;
+ sbuf_printf(&sb, "channel %d rq %d cq %d\n",
+ c->ix, rq->rqn, rq->cq.mcq.cqn);
+ for (tc = 0; tc < c->num_tc; tc++) {
+ sq = &c->sq[tc];
+ sbuf_printf(&sb, "channel %d tc %d sq %d cq %d\n",
+ c->ix, tc, sq->sqn, sq->cq.mcq.cqn);
+ }
+ }
+out:
+ PRIV_UNLOCK(priv);
+ error = sbuf_finish(&sb);
+ sbuf_delete(&sb);
+ return (error);
+}
+
+static int
+mlx5e_ethtool_debug_stats(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv = arg1;
+ int error, sys_debug;
+
+ sys_debug = priv->sysctl_debug;
+ error = sysctl_handle_int(oidp, &priv->sysctl_debug, 0, req);
+ if (error != 0 || !req->newptr)
+ return (error);
+ priv->sysctl_debug = priv->sysctl_debug != 0;
+ if (sys_debug == priv->sysctl_debug)
+ return (0);
+
+ PRIV_LOCK(priv);
+ if (priv->sysctl_debug) {
+ mlx5e_create_stats(&priv->stats.port_stats_debug.ctx,
+ SYSCTL_CHILDREN(priv->sysctl_ifnet), "debug_stats",
+ mlx5e_port_stats_debug_desc, MLX5E_PORT_STATS_DEBUG_NUM,
+ priv->stats.port_stats_debug.arg);
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx_channel_debug,
+ SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO,
+ "hw_ctx_debug",
+ CTLFLAG_RD | CTLFLAG_MPSAFE | CTLTYPE_STRING, priv, 0,
+ mlx5e_ethtool_debug_channel_info, "S", "");
+ } else {
+ sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
+ sysctl_ctx_free(&priv->sysctl_ctx_channel_debug);
+ }
+ PRIV_UNLOCK(priv);
+ return (0);
+}
+
+static void
+mlx5e_create_diagnostics(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_diagnostics_entry entry;
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid *node;
+ int x;
+
+ /* sysctl context we are using */
+ ctx = &priv->sysctl_ctx;
+
+ /* create root node */
+ node = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO,
+ "diagnostics", CTLFLAG_RD, NULL, "Diagnostics");
+ if (node == NULL)
+ return;
+
+ /* create PCI diagnostics */
+ for (x = 0; x != MLX5_CORE_PCI_DIAGNOSTICS_NUM; x++) {
+ entry = mlx5_core_pci_diagnostics_table[x];
+ if (mlx5_core_supports_diagnostics(priv->mdev, entry.counter_id) == 0)
+ continue;
+ SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ entry.desc, CTLFLAG_RD, priv->params_pci.array + x,
+ "PCI diagnostics counter");
+ }
+
+ /* create general diagnostics */
+ for (x = 0; x != MLX5_CORE_GENERAL_DIAGNOSTICS_NUM; x++) {
+ entry = mlx5_core_general_diagnostics_table[x];
+ if (mlx5_core_supports_diagnostics(priv->mdev, entry.counter_id) == 0)
+ continue;
+ SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ entry.desc, CTLFLAG_RD, priv->params_general.array + x,
+ "General diagnostics counter");
+ }
+}
+
+void
+mlx5e_create_ethtool(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct sysctl_oid *node, *qos_node;
+ const char *pnameunit;
+ unsigned x;
+ int i;
+
+ /* set some defaults */
+ priv->params_ethtool.tx_queue_size_max = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
+ priv->params_ethtool.rx_queue_size_max = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE;
+ priv->params_ethtool.tx_queue_size = 1 << priv->params.log_sq_size;
+ priv->params_ethtool.rx_queue_size = 1 << priv->params.log_rq_size;
+ priv->params_ethtool.channels = priv->params.num_channels;
+ priv->params_ethtool.channels_rsss = priv->params.channels_rsss;
+ priv->params_ethtool.coalesce_pkts_max = MLX5E_FLD_MAX(cqc, cq_max_count);
+ priv->params_ethtool.coalesce_usecs_max = MLX5E_FLD_MAX(cqc, cq_period);
+ priv->params_ethtool.rx_coalesce_mode = priv->params.rx_cq_moderation_mode;
+ priv->params_ethtool.rx_coalesce_usecs = priv->params.rx_cq_moderation_usec;
+ priv->params_ethtool.rx_coalesce_pkts = priv->params.rx_cq_moderation_pkts;
+ priv->params_ethtool.tx_coalesce_mode = priv->params.tx_cq_moderation_mode;
+ priv->params_ethtool.tx_coalesce_usecs = priv->params.tx_cq_moderation_usec;
+ priv->params_ethtool.tx_coalesce_pkts = priv->params.tx_cq_moderation_pkts;
+ priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
+ priv->params_ethtool.cqe_zipping = priv->params.cqe_zipping_en;
+ mlx5e_ethtool_sync_tx_completion_fact(priv);
+
+ /* get default values for local loopback, if any */
+ if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) {
+ int err;
+ u8 val;
+
+ err = mlx5_nic_vport_query_local_lb(priv->mdev, MLX5_LOCAL_MC_LB, &val);
+ if (err == 0)
+ priv->params_ethtool.mc_local_lb = val;
+
+ err = mlx5_nic_vport_query_local_lb(priv->mdev, MLX5_LOCAL_UC_LB, &val);
+ if (err == 0)
+ priv->params_ethtool.uc_local_lb = val;
+ }
+
+ /* create root node */
+ node = SYSCTL_ADD_NODE(&priv->sysctl_ctx,
+ SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO,
+ "conf", CTLFLAG_RW, NULL, "Configuration");
+ if (node == NULL)
+ return;
+ for (x = 0; x != MLX5E_PARAMS_NUM; x++) {
+ /* check for read-only parameter */
+ if (strstr(mlx5e_params_desc[2 * x], "_max") != NULL ||
+ strstr(mlx5e_params_desc[2 * x], "_mtu") != NULL) {
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ mlx5e_params_desc[2 * x], CTLTYPE_U64 | CTLFLAG_RD |
+ CTLFLAG_MPSAFE, priv, x, &mlx5e_ethtool_handler, "QU",
+ mlx5e_params_desc[2 * x + 1]);
+ } else {
+#if (__FreeBSD_version < 1100000)
+ char path[64];
+#endif
+ /*
+ * NOTE: In FreeBSD-11 and newer the
+ * CTLFLAG_RWTUN flag will take care of
+ * loading default sysctl value from the
+ * kernel environment, if any:
+ */
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ mlx5e_params_desc[2 * x], CTLTYPE_U64 | CTLFLAG_RWTUN |
+ CTLFLAG_MPSAFE, priv, x, &mlx5e_ethtool_handler, "QU",
+ mlx5e_params_desc[2 * x + 1]);
+
+#if (__FreeBSD_version < 1100000)
+ /* compute path for sysctl */
+ snprintf(path, sizeof(path), "dev.mce.%d.conf.%s",
+ device_get_unit(priv->mdev->pdev->dev.bsddev),
+ mlx5e_params_desc[2 * x]);
+
+ /* try to fetch tunable, if any */
+ if (TUNABLE_QUAD_FETCH(path, &priv->params_ethtool.arg[x]))
+ mlx5e_ethtool_handler(NULL, priv, x, NULL);
+#endif
+ }
+ }
+
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "debug_stats", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv,
+ 0, &mlx5e_ethtool_debug_stats, "I", "Extended debug statistics");
+
+ pnameunit = device_get_nameunit(priv->mdev->pdev->dev.bsddev);
+
+ SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(node),
+ OID_AUTO, "device_name", CTLFLAG_RD,
+ __DECONST(void *, pnameunit), 0,
+ "PCI device name");
+
+ /* EEPROM support */
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, "eeprom_info",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
+ mlx5e_read_eeprom, "I", "EEPROM information");
+
+ /* Diagnostics support */
+ mlx5e_create_diagnostics(priv);
+
+ /* create qos node */
+ qos_node = SYSCTL_ADD_NODE(&priv->sysctl_ctx,
+ SYSCTL_CHILDREN(node), OID_AUTO,
+ "qos", CTLFLAG_RW, NULL, "Quality Of Service configuration");
+ if (node == NULL)
+ return;
+
+ /* Prioriry rate limit support */
+ if (mlx5e_getmaxrate(priv))
+ return;
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ char name[32];
+ snprintf(name, sizeof(name), "tc_%d_max_rate", i);
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node),
+ OID_AUTO, name, CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ priv, i, mlx5e_tc_maxrate_handler, "QU",
+ "Max rate for priority, specified in kilobits, where kilo=1000, \
+ max_rate must be divisible by 100000");
+ }
+
+ if (mlx5e_get_prio_tc(priv))
+ return;
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ char name[32];
+ snprintf(name, sizeof(name), "prio_%d_to_tc", i);
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node),
+ OID_AUTO, name, CTLTYPE_U8 | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ priv, i, mlx5e_prio_to_tc_handler, "CU",
+ "Set priority to traffic class");
+ }
+
+ /* DSCP support */
+ if (mlx5e_get_dscp(priv) == 0) {
+ for (i = 0; i != MLX5_MAX_SUPPORTED_DSCP; i += 8) {
+ char name[32];
+ snprintf(name, sizeof(name), "dscp_%d_%d_prio", i, i + 7);
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node),
+ OID_AUTO, name, CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+ priv, i, mlx5e_dscp_prio_handler, "CU",
+ "Set DSCP to priority mapping, 0..7");
+ }
+#define A "Set trust state, 1:PCP 2:DSCP"
+#define B " 3:BOTH"
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(qos_node),
+ OID_AUTO, "trust_state", CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+ priv, 0, mlx5e_trust_state_handler, "CU",
+ MLX5_CAP_QCAM_FEATURE(mdev, qpts_trust_both) ?
+ A B : A);
+#undef B
+#undef A
+ }
+}
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c
new file mode 100644
index 000000000000..2d1e456518e8
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c
@@ -0,0 +1,1487 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+
+#include <linux/list.h>
+#include <dev/mlx5/fs.h>
+
+#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
+
+enum {
+ MLX5E_FULLMATCH = 0,
+ MLX5E_ALLMULTI = 1,
+ MLX5E_PROMISC = 2,
+};
+
+enum {
+ MLX5E_UC = 0,
+ MLX5E_MC_IPV4 = 1,
+ MLX5E_MC_IPV6 = 2,
+ MLX5E_MC_OTHER = 3,
+};
+
+enum {
+ MLX5E_ACTION_NONE = 0,
+ MLX5E_ACTION_ADD = 1,
+ MLX5E_ACTION_DEL = 2,
+};
+
+struct mlx5e_eth_addr_hash_node {
+ LIST_ENTRY(mlx5e_eth_addr_hash_node) hlist;
+ u8 action;
+ struct mlx5e_eth_addr_info ai;
+};
+
+static inline int
+mlx5e_hash_eth_addr(const u8 * addr)
+{
+ return (addr[5]);
+}
+
+static void
+mlx5e_add_eth_addr_to_hash(struct mlx5e_eth_addr_hash_head *hash,
+ const u8 * addr)
+{
+ struct mlx5e_eth_addr_hash_node *hn;
+ int ix = mlx5e_hash_eth_addr(addr);
+
+ LIST_FOREACH(hn, &hash[ix], hlist) {
+ if (bcmp(hn->ai.addr, addr, ETHER_ADDR_LEN) == 0) {
+ if (hn->action == MLX5E_ACTION_DEL)
+ hn->action = MLX5E_ACTION_NONE;
+ return;
+ }
+ }
+
+ hn = malloc(sizeof(*hn), M_MLX5EN, M_NOWAIT | M_ZERO);
+ if (hn == NULL)
+ return;
+
+ ether_addr_copy(hn->ai.addr, addr);
+ hn->action = MLX5E_ACTION_ADD;
+
+ LIST_INSERT_HEAD(&hash[ix], hn, hlist);
+}
+
+static void
+mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn)
+{
+ LIST_REMOVE(hn, hlist);
+ free(hn, M_MLX5EN);
+}
+
+static void
+mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv,
+ struct mlx5e_eth_addr_info *ai)
+{
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV6_IPSEC_ESP))
+ mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV4_IPSEC_ESP))
+ mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV6_IPSEC_AH))
+ mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV4_IPSEC_AH))
+ mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV6_TCP))
+ mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_TCP]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV4_TCP))
+ mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_TCP]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV6_UDP))
+ mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_UDP]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV4_UDP))
+ mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_UDP]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV6))
+ mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV4))
+ mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_ANY))
+ mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_ANY]);
+}
+
+static int
+mlx5e_get_eth_addr_type(const u8 * addr)
+{
+ if (ETHER_IS_MULTICAST(addr) == 0)
+ return (MLX5E_UC);
+
+ if ((addr[0] == 0x01) &&
+ (addr[1] == 0x00) &&
+ (addr[2] == 0x5e) &&
+ !(addr[3] & 0x80))
+ return (MLX5E_MC_IPV4);
+
+ if ((addr[0] == 0x33) &&
+ (addr[1] == 0x33))
+ return (MLX5E_MC_IPV6);
+
+ return (MLX5E_MC_OTHER);
+}
+
+static u32
+mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type)
+{
+ int eth_addr_type;
+ u32 ret;
+
+ switch (type) {
+ case MLX5E_FULLMATCH:
+ eth_addr_type = mlx5e_get_eth_addr_type(ai->addr);
+ switch (eth_addr_type) {
+ case MLX5E_UC:
+ ret =
+ (1 << MLX5E_TT_IPV4_TCP) |
+ (1 << MLX5E_TT_IPV6_TCP) |
+ (1 << MLX5E_TT_IPV4_UDP) |
+ (1 << MLX5E_TT_IPV6_UDP) |
+ (1 << MLX5E_TT_IPV4) |
+ (1 << MLX5E_TT_IPV6) |
+ (1 << MLX5E_TT_ANY) |
+ 0;
+ break;
+
+ case MLX5E_MC_IPV4:
+ ret =
+ (1 << MLX5E_TT_IPV4_UDP) |
+ (1 << MLX5E_TT_IPV4) |
+ 0;
+ break;
+
+ case MLX5E_MC_IPV6:
+ ret =
+ (1 << MLX5E_TT_IPV6_UDP) |
+ (1 << MLX5E_TT_IPV6) |
+ 0;
+ break;
+
+ default:
+ ret =
+ (1 << MLX5E_TT_ANY) |
+ 0;
+ break;
+ }
+ break;
+
+ case MLX5E_ALLMULTI:
+ ret =
+ (1 << MLX5E_TT_IPV4_UDP) |
+ (1 << MLX5E_TT_IPV6_UDP) |
+ (1 << MLX5E_TT_IPV4) |
+ (1 << MLX5E_TT_IPV6) |
+ (1 << MLX5E_TT_ANY) |
+ 0;
+ break;
+
+ default: /* MLX5E_PROMISC */
+ ret =
+ (1 << MLX5E_TT_IPV4_TCP) |
+ (1 << MLX5E_TT_IPV6_TCP) |
+ (1 << MLX5E_TT_IPV4_UDP) |
+ (1 << MLX5E_TT_IPV6_UDP) |
+ (1 << MLX5E_TT_IPV4) |
+ (1 << MLX5E_TT_IPV6) |
+ (1 << MLX5E_TT_ANY) |
+ 0;
+ break;
+ }
+
+ return (ret);
+}
+
+static int
+mlx5e_add_eth_addr_rule_sub(struct mlx5e_priv *priv,
+ struct mlx5e_eth_addr_info *ai, int type,
+ u32 *mc, u32 *mv)
+{
+ struct mlx5_flow_destination dest;
+ u8 mc_enable = 0;
+ struct mlx5_flow_rule **rule_p;
+ struct mlx5_flow_table *ft = priv->fts.main.t;
+ u8 *mc_dmac = MLX5_ADDR_OF(fte_match_param, mc,
+ outer_headers.dmac_47_16);
+ u8 *mv_dmac = MLX5_ADDR_OF(fte_match_param, mv,
+ outer_headers.dmac_47_16);
+ u32 *tirn = priv->tirn;
+ u32 tt_vec;
+ int err = 0;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+
+ switch (type) {
+ case MLX5E_FULLMATCH:
+ mc_enable = MLX5_MATCH_OUTER_HEADERS;
+ memset(mc_dmac, 0xff, ETH_ALEN);
+ ether_addr_copy(mv_dmac, ai->addr);
+ break;
+
+ case MLX5E_ALLMULTI:
+ mc_enable = MLX5_MATCH_OUTER_HEADERS;
+ mc_dmac[0] = 0x01;
+ mv_dmac[0] = 0x01;
+ break;
+
+ case MLX5E_PROMISC:
+ break;
+ default:
+ break;
+ }
+
+ tt_vec = mlx5e_get_tt_vec(ai, type);
+
+ if (tt_vec & BIT(MLX5E_TT_ANY)) {
+ rule_p = &ai->ft_rule[MLX5E_TT_ANY];
+ dest.tir_num = tirn[MLX5E_TT_ANY];
+ *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_ETH_FLOW_TAG, &dest);
+ if (IS_ERR_OR_NULL(*rule_p))
+ goto err_del_ai;
+ ai->tt_vec |= BIT(MLX5E_TT_ANY);
+ }
+
+ mc_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+
+ if (tt_vec & BIT(MLX5E_TT_IPV4)) {
+ rule_p = &ai->ft_rule[MLX5E_TT_IPV4];
+ dest.tir_num = tirn[MLX5E_TT_IPV4];
+ MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+ ETHERTYPE_IP);
+ *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_ETH_FLOW_TAG, &dest);
+ if (IS_ERR_OR_NULL(*rule_p))
+ goto err_del_ai;
+ ai->tt_vec |= BIT(MLX5E_TT_IPV4);
+ }
+
+ if (tt_vec & BIT(MLX5E_TT_IPV6)) {
+ rule_p = &ai->ft_rule[MLX5E_TT_IPV6];
+ dest.tir_num = tirn[MLX5E_TT_IPV6];
+ MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+ ETHERTYPE_IPV6);
+ *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_ETH_FLOW_TAG, &dest);
+ if (IS_ERR_OR_NULL(*rule_p))
+ goto err_del_ai;
+ ai->tt_vec |= BIT(MLX5E_TT_IPV6);
+ }
+
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_UDP);
+
+ if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) {
+ rule_p = &ai->ft_rule[MLX5E_TT_IPV4_UDP];
+ dest.tir_num = tirn[MLX5E_TT_IPV4_UDP];
+ MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+ ETHERTYPE_IP);
+ *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_ETH_FLOW_TAG, &dest);
+ if (IS_ERR_OR_NULL(*rule_p))
+ goto err_del_ai;
+ ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP);
+ }
+
+ if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) {
+ rule_p = &ai->ft_rule[MLX5E_TT_IPV6_UDP];
+ dest.tir_num = tirn[MLX5E_TT_IPV6_UDP];
+ MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+ ETHERTYPE_IPV6);
+ *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_ETH_FLOW_TAG, &dest);
+ if (IS_ERR_OR_NULL(*rule_p))
+ goto err_del_ai;
+ ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP);
+ }
+
+ MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_TCP);
+
+ if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) {
+ rule_p = &ai->ft_rule[MLX5E_TT_IPV4_TCP];
+ dest.tir_num = tirn[MLX5E_TT_IPV4_TCP];
+ MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+ ETHERTYPE_IP);
+ *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_ETH_FLOW_TAG, &dest);
+ if (IS_ERR_OR_NULL(*rule_p))
+ goto err_del_ai;
+ ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP);
+ }
+
+ if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) {
+ rule_p = &ai->ft_rule[MLX5E_TT_IPV6_TCP];
+ dest.tir_num = tirn[MLX5E_TT_IPV6_TCP];
+ MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+ ETHERTYPE_IPV6);
+ *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_ETH_FLOW_TAG, &dest);
+ if (IS_ERR_OR_NULL(*rule_p))
+ goto err_del_ai;
+
+ ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP);
+ }
+
+ MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_AH);
+
+ if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) {
+ rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH];
+ dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_AH];
+ MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+ ETHERTYPE_IP);
+ *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_ETH_FLOW_TAG, &dest);
+ if (IS_ERR_OR_NULL(*rule_p))
+ goto err_del_ai;
+ ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH);
+ }
+
+ if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) {
+ rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH];
+ dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_AH];
+ MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+ ETHERTYPE_IPV6);
+ *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_ETH_FLOW_TAG, &dest);
+ if (IS_ERR_OR_NULL(*rule_p))
+ goto err_del_ai;
+ ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH);
+ }
+
+ MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_ESP);
+
+ if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) {
+ rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP];
+ dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_ESP];
+ MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+ ETHERTYPE_IP);
+ *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_ETH_FLOW_TAG, &dest);
+ if (IS_ERR_OR_NULL(*rule_p))
+ goto err_del_ai;
+ ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP);
+ }
+
+ if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) {
+ rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP];
+ dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_ESP];
+ MLX5_SET(fte_match_param, mv, outer_headers.ethertype,
+ ETHERTYPE_IPV6);
+ *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_ETH_FLOW_TAG, &dest);
+ if (IS_ERR_OR_NULL(*rule_p))
+ goto err_del_ai;
+ ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP);
+ }
+
+ return 0;
+
+err_del_ai:
+ err = PTR_ERR(*rule_p);
+ *rule_p = NULL;
+ mlx5e_del_eth_addr_from_flow_table(priv, ai);
+
+ return err;
+}
+
+static int
+mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv,
+ struct mlx5e_eth_addr_info *ai, int type)
+{
+ u32 *match_criteria;
+ u32 *match_value;
+ int err = 0;
+
+ match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
+ match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
+ if (!match_value || !match_criteria) {
+ if_printf(priv->ifp, "%s: alloc failed\n", __func__);
+ err = -ENOMEM;
+ goto add_eth_addr_rule_out;
+ }
+ err = mlx5e_add_eth_addr_rule_sub(priv, ai, type, match_criteria,
+ match_value);
+
+add_eth_addr_rule_out:
+ kvfree(match_criteria);
+ kvfree(match_value);
+
+ return (err);
+}
+
+static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
+{
+ struct ifnet *ifp = priv->ifp;
+ int max_list_size;
+ int list_size;
+ u16 *vlans;
+ int vlan;
+ int err;
+ int i;
+
+ list_size = 0;
+ for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID)
+ list_size++;
+
+ max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list);
+
+ if (list_size > max_list_size) {
+ if_printf(ifp,
+ "ifnet vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n",
+ list_size, max_list_size);
+ list_size = max_list_size;
+ }
+
+ vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
+ if (!vlans)
+ return -ENOMEM;
+
+ i = 0;
+ for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) {
+ if (i >= list_size)
+ break;
+ vlans[i++] = vlan;
+ }
+
+ err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size);
+ if (err)
+ if_printf(ifp, "Failed to modify vport vlans list err(%d)\n",
+ err);
+
+ kfree(vlans);
+ return err;
+}
+
+enum mlx5e_vlan_rule_type {
+ MLX5E_VLAN_RULE_TYPE_UNTAGGED,
+ MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID,
+ MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID,
+ MLX5E_VLAN_RULE_TYPE_MATCH_VID,
+};
+
+static int
+mlx5e_add_vlan_rule_sub(struct mlx5e_priv *priv,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid,
+ u32 *mc, u32 *mv)
+{
+ struct mlx5_flow_table *ft = priv->fts.vlan.t;
+ struct mlx5_flow_destination dest;
+ u8 mc_enable = 0;
+ struct mlx5_flow_rule **rule_p;
+ int err = 0;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = priv->fts.main.t;
+
+ mc_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ switch (rule_type) {
+ case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+ rule_p = &priv->vlan.untagged_ft_rule;
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
+ rule_p = &priv->vlan.any_cvlan_ft_rule;
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
+ MLX5_SET(fte_match_param, mv, outer_headers.cvlan_tag, 1);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
+ rule_p = &priv->vlan.any_svlan_ft_rule;
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag);
+ MLX5_SET(fte_match_param, mv, outer_headers.svlan_tag, 1);
+ break;
+ default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */
+ rule_p = &priv->vlan.active_vlans_ft_rule[vid];
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
+ MLX5_SET(fte_match_param, mv, outer_headers.cvlan_tag, 1);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
+ MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid);
+ mlx5e_vport_context_update_vlans(priv);
+ break;
+ }
+
+ *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ MLX5_FS_ETH_FLOW_TAG,
+ &dest);
+
+ if (IS_ERR(*rule_p)) {
+ err = PTR_ERR(*rule_p);
+ *rule_p = NULL;
+ if_printf(priv->ifp, "%s: add rule failed\n", __func__);
+ }
+
+ return (err);
+}
+
+static int
+mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid)
+{
+ u32 *match_criteria;
+ u32 *match_value;
+ int err = 0;
+
+ match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
+ match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
+ if (!match_value || !match_criteria) {
+ if_printf(priv->ifp, "%s: alloc failed\n", __func__);
+ err = -ENOMEM;
+ goto add_vlan_rule_out;
+ }
+
+ err = mlx5e_add_vlan_rule_sub(priv, rule_type, vid, match_criteria,
+ match_value);
+
+add_vlan_rule_out:
+ kvfree(match_criteria);
+ kvfree(match_value);
+
+ return (err);
+}
+
+static void
+mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid)
+{
+ switch (rule_type) {
+ case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+ if (priv->vlan.untagged_ft_rule) {
+ mlx5_del_flow_rule(priv->vlan.untagged_ft_rule);
+ priv->vlan.untagged_ft_rule = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
+ if (priv->vlan.any_cvlan_ft_rule) {
+ mlx5_del_flow_rule(priv->vlan.any_cvlan_ft_rule);
+ priv->vlan.any_cvlan_ft_rule = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
+ if (priv->vlan.any_svlan_ft_rule) {
+ mlx5_del_flow_rule(priv->vlan.any_svlan_ft_rule);
+ priv->vlan.any_svlan_ft_rule = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_MATCH_VID:
+ if (priv->vlan.active_vlans_ft_rule[vid]) {
+ mlx5_del_flow_rule(priv->vlan.active_vlans_ft_rule[vid]);
+ priv->vlan.active_vlans_ft_rule[vid] = NULL;
+ }
+ mlx5e_vport_context_update_vlans(priv);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+mlx5e_del_any_vid_rules(struct mlx5e_priv *priv)
+{
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
+}
+
+static int
+mlx5e_add_any_vid_rules(struct mlx5e_priv *priv)
+{
+ int err;
+
+ err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ if (err)
+ return (err);
+
+ return (mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0));
+}
+
+void
+mlx5e_enable_vlan_filter(struct mlx5e_priv *priv)
+{
+ if (priv->vlan.filter_disabled) {
+ priv->vlan.filter_disabled = false;
+ if (priv->ifp->if_flags & IFF_PROMISC)
+ return;
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_del_any_vid_rules(priv);
+ }
+}
+
+void
+mlx5e_disable_vlan_filter(struct mlx5e_priv *priv)
+{
+ if (!priv->vlan.filter_disabled) {
+ priv->vlan.filter_disabled = true;
+ if (priv->ifp->if_flags & IFF_PROMISC)
+ return;
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_add_any_vid_rules(priv);
+ }
+}
+
+void
+mlx5e_vlan_rx_add_vid(void *arg, struct ifnet *ifp, u16 vid)
+{
+ struct mlx5e_priv *priv = arg;
+
+ if (ifp != priv->ifp)
+ return;
+
+ PRIV_LOCK(priv);
+ if (!test_and_set_bit(vid, priv->vlan.active_vlans) &&
+ test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
+ PRIV_UNLOCK(priv);
+}
+
+void
+mlx5e_vlan_rx_kill_vid(void *arg, struct ifnet *ifp, u16 vid)
+{
+ struct mlx5e_priv *priv = arg;
+
+ if (ifp != priv->ifp)
+ return;
+
+ PRIV_LOCK(priv);
+ clear_bit(vid, priv->vlan.active_vlans);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
+ PRIV_UNLOCK(priv);
+}
+
+int
+mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv)
+{
+ int err;
+ int i;
+
+ set_bit(0, priv->vlan.active_vlans);
+ for_each_set_bit(i, priv->vlan.active_vlans, VLAN_N_VID) {
+ err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID,
+ i);
+ if (err)
+ return (err);
+ }
+
+ err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+ if (err)
+ return (err);
+
+ if (priv->vlan.filter_disabled) {
+ err = mlx5e_add_any_vid_rules(priv);
+ if (err)
+ return (err);
+ }
+ return (0);
+}
+
+void
+mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv)
+{
+ int i;
+
+ if (priv->vlan.filter_disabled)
+ mlx5e_del_any_vid_rules(priv);
+
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+ for_each_set_bit(i, priv->vlan.active_vlans, VLAN_N_VID)
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
+ clear_bit(0, priv->vlan.active_vlans);
+}
+
+#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
+ for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \
+ LIST_FOREACH_SAFE(hn, &(hash)[i], hlist, tmp)
+
+static void
+mlx5e_execute_action(struct mlx5e_priv *priv,
+ struct mlx5e_eth_addr_hash_node *hn)
+{
+ switch (hn->action) {
+ case MLX5E_ACTION_ADD:
+ mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH);
+ hn->action = MLX5E_ACTION_NONE;
+ break;
+
+ case MLX5E_ACTION_DEL:
+ mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai);
+ mlx5e_del_eth_addr_from_hash(hn);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+mlx5e_sync_ifp_addr(struct mlx5e_priv *priv)
+{
+ struct ifnet *ifp = priv->ifp;
+ struct ifaddr *ifa;
+ struct ifmultiaddr *ifma;
+
+ /* XXX adding this entry might not be needed */
+ mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc,
+ LLADDR((struct sockaddr_dl *)(ifp->if_addr->ifa_addr)));
+
+ if_addr_rlock(ifp);
+ CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_LINK)
+ continue;
+ mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc,
+ LLADDR((struct sockaddr_dl *)ifa->ifa_addr));
+ }
+ if_addr_runlock(ifp);
+
+ if_maddr_rlock(ifp);
+ CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+ mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_mc,
+ LLADDR((struct sockaddr_dl *)ifma->ifma_addr));
+ }
+ if_maddr_runlock(ifp);
+}
+
+static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type,
+ u8 addr_array[][ETH_ALEN], int size)
+{
+ bool is_uc = (list_type == MLX5_NIC_VPORT_LIST_TYPE_UC);
+ struct ifnet *ifp = priv->ifp;
+ struct mlx5e_eth_addr_hash_node *hn;
+ struct mlx5e_eth_addr_hash_head *addr_list;
+ struct mlx5e_eth_addr_hash_node *tmp;
+ int i = 0;
+ int hi;
+
+ addr_list = is_uc ? priv->eth_addr.if_uc : priv->eth_addr.if_mc;
+
+ if (is_uc) /* Make sure our own address is pushed first */
+ ether_addr_copy(addr_array[i++], IF_LLADDR(ifp));
+ else if (priv->eth_addr.broadcast_enabled)
+ ether_addr_copy(addr_array[i++], ifp->if_broadcastaddr);
+
+ mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) {
+ if (ether_addr_equal(IF_LLADDR(ifp), hn->ai.addr))
+ continue;
+ if (i >= size)
+ break;
+ ether_addr_copy(addr_array[i++], hn->ai.addr);
+ }
+}
+
+static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv,
+ int list_type)
+{
+ bool is_uc = (list_type == MLX5_NIC_VPORT_LIST_TYPE_UC);
+ struct mlx5e_eth_addr_hash_node *hn;
+ u8 (*addr_array)[ETH_ALEN] = NULL;
+ struct mlx5e_eth_addr_hash_head *addr_list;
+ struct mlx5e_eth_addr_hash_node *tmp;
+ int max_size;
+ int size;
+ int err;
+ int hi;
+
+ size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0);
+ max_size = is_uc ?
+ 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list);
+
+ addr_list = is_uc ? priv->eth_addr.if_uc : priv->eth_addr.if_mc;
+ mlx5e_for_each_hash_node(hn, tmp, addr_list, hi)
+ size++;
+
+ if (size > max_size) {
+ if_printf(priv->ifp,
+ "ifp %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n",
+ is_uc ? "UC" : "MC", size, max_size);
+ size = max_size;
+ }
+
+ if (size) {
+ addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL);
+ if (!addr_array) {
+ err = -ENOMEM;
+ goto out;
+ }
+ mlx5e_fill_addr_array(priv, list_type, addr_array, size);
+ }
+
+ err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size);
+out:
+ if (err)
+ if_printf(priv->ifp,
+ "Failed to modify vport %s list err(%d)\n",
+ is_uc ? "UC" : "MC", err);
+ kfree(addr_array);
+}
+
+static void mlx5e_vport_context_update(struct mlx5e_priv *priv)
+{
+ struct mlx5e_eth_addr_db *ea = &priv->eth_addr;
+
+ mlx5e_vport_context_update_addr_list(priv, MLX5_NIC_VPORT_LIST_TYPE_UC);
+ mlx5e_vport_context_update_addr_list(priv, MLX5_NIC_VPORT_LIST_TYPE_MC);
+ mlx5_modify_nic_vport_promisc(priv->mdev, 0,
+ ea->allmulti_enabled,
+ ea->promisc_enabled);
+}
+
+static void
+mlx5e_apply_ifp_addr(struct mlx5e_priv *priv)
+{
+ struct mlx5e_eth_addr_hash_node *hn;
+ struct mlx5e_eth_addr_hash_node *tmp;
+ int i;
+
+ mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_uc, i)
+ mlx5e_execute_action(priv, hn);
+
+ mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_mc, i)
+ mlx5e_execute_action(priv, hn);
+}
+
+static void
+mlx5e_handle_ifp_addr(struct mlx5e_priv *priv)
+{
+ struct mlx5e_eth_addr_hash_node *hn;
+ struct mlx5e_eth_addr_hash_node *tmp;
+ int i;
+
+ mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_uc, i)
+ hn->action = MLX5E_ACTION_DEL;
+ mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_mc, i)
+ hn->action = MLX5E_ACTION_DEL;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_sync_ifp_addr(priv);
+
+ mlx5e_apply_ifp_addr(priv);
+}
+
+void
+mlx5e_set_rx_mode_core(struct mlx5e_priv *priv)
+{
+ struct mlx5e_eth_addr_db *ea = &priv->eth_addr;
+ struct ifnet *ndev = priv->ifp;
+
+ bool rx_mode_enable = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ bool promisc_enabled = rx_mode_enable && (ndev->if_flags & IFF_PROMISC);
+ bool allmulti_enabled = rx_mode_enable && (ndev->if_flags & IFF_ALLMULTI);
+ bool broadcast_enabled = rx_mode_enable;
+
+ bool enable_promisc = !ea->promisc_enabled && promisc_enabled;
+ bool disable_promisc = ea->promisc_enabled && !promisc_enabled;
+ bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled;
+ bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled;
+ bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled;
+ bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled;
+
+ /* update broadcast address */
+ ether_addr_copy(priv->eth_addr.broadcast.addr,
+ priv->ifp->if_broadcastaddr);
+
+ if (enable_promisc) {
+ mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC);
+ if (!priv->vlan.filter_disabled)
+ mlx5e_add_any_vid_rules(priv);
+ }
+ if (enable_allmulti)
+ mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI);
+ if (enable_broadcast)
+ mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH);
+
+ mlx5e_handle_ifp_addr(priv);
+
+ if (disable_broadcast)
+ mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast);
+ if (disable_allmulti)
+ mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti);
+ if (disable_promisc) {
+ if (!priv->vlan.filter_disabled)
+ mlx5e_del_any_vid_rules(priv);
+ mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc);
+ }
+
+ ea->promisc_enabled = promisc_enabled;
+ ea->allmulti_enabled = allmulti_enabled;
+ ea->broadcast_enabled = broadcast_enabled;
+
+ mlx5e_vport_context_update(priv);
+}
+
+void
+mlx5e_set_rx_mode_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv =
+ container_of(work, struct mlx5e_priv, set_rx_mode_work);
+
+ PRIV_LOCK(priv);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_set_rx_mode_core(priv);
+ PRIV_UNLOCK(priv);
+}
+
+static void
+mlx5e_destroy_groups(struct mlx5e_flow_table *ft)
+{
+ int i;
+
+ for (i = ft->num_groups - 1; i >= 0; i--) {
+ if (!IS_ERR_OR_NULL(ft->g[i]))
+ mlx5_destroy_flow_group(ft->g[i]);
+ ft->g[i] = NULL;
+ }
+ ft->num_groups = 0;
+}
+
+static void
+mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
+{
+ mlx5e_destroy_groups(ft);
+ kfree(ft->g);
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+}
+
+#define MLX5E_NUM_MAIN_GROUPS 10
+#define MLX5E_MAIN_GROUP0_SIZE BIT(4)
+#define MLX5E_MAIN_GROUP1_SIZE BIT(3)
+#define MLX5E_MAIN_GROUP2_SIZE BIT(1)
+#define MLX5E_MAIN_GROUP3_SIZE BIT(0)
+#define MLX5E_MAIN_GROUP4_SIZE BIT(14)
+#define MLX5E_MAIN_GROUP5_SIZE BIT(13)
+#define MLX5E_MAIN_GROUP6_SIZE BIT(11)
+#define MLX5E_MAIN_GROUP7_SIZE BIT(2)
+#define MLX5E_MAIN_GROUP8_SIZE BIT(1)
+#define MLX5E_MAIN_GROUP9_SIZE BIT(0)
+#define MLX5E_MAIN_TABLE_SIZE (MLX5E_MAIN_GROUP0_SIZE +\
+ MLX5E_MAIN_GROUP1_SIZE +\
+ MLX5E_MAIN_GROUP2_SIZE +\
+ MLX5E_MAIN_GROUP3_SIZE +\
+ MLX5E_MAIN_GROUP4_SIZE +\
+ MLX5E_MAIN_GROUP5_SIZE +\
+ MLX5E_MAIN_GROUP6_SIZE +\
+ MLX5E_MAIN_GROUP7_SIZE +\
+ MLX5E_MAIN_GROUP8_SIZE +\
+ MLX5E_MAIN_GROUP9_SIZE +\
+ 0)
+
+static int
+mlx5e_create_main_groups_sub(struct mlx5e_flow_table *ft, u32 *in,
+ int inlen)
+{
+ u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ u8 *dmac = MLX5_ADDR_OF(create_flow_group_in, in,
+ match_criteria.outer_headers.dmac_47_16);
+ int err;
+ int ix = 0;
+
+ /* Tunnel rules need to be first in this list of groups */
+
+ /* Start tunnel rules */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.udp_dport);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_MAIN_GROUP0_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+ /* End Tunnel Rules */
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_MAIN_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_MAIN_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_MAIN_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+ memset(dmac, 0xff, ETH_ALEN);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_MAIN_GROUP4_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ memset(dmac, 0xff, ETH_ALEN);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_MAIN_GROUP5_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ memset(dmac, 0xff, ETH_ALEN);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_MAIN_GROUP6_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+ dmac[0] = 0x01;
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_MAIN_GROUP7_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ dmac[0] = 0x01;
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_MAIN_GROUP8_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ dmac[0] = 0x01;
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_MAIN_GROUP9_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ return (0);
+
+err_destory_groups:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ mlx5e_destroy_groups(ft);
+
+ return (err);
+}
+
+static int
+mlx5e_create_main_groups(struct mlx5e_flow_table *ft)
+{
+ u32 *in;
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int err;
+
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return (-ENOMEM);
+
+ err = mlx5e_create_main_groups_sub(ft, in, inlen);
+
+ kvfree(in);
+ return (err);
+}
+
+static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_flow_table *ft = &priv->fts.main;
+ int err;
+
+ ft->num_groups = 0;
+ ft->t = mlx5_create_flow_table(priv->fts.ns, 0, "main",
+ MLX5E_MAIN_TABLE_SIZE);
+
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return (err);
+ }
+ ft->g = kcalloc(MLX5E_NUM_MAIN_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g) {
+ err = -ENOMEM;
+ goto err_destroy_main_flow_table;
+ }
+
+ err = mlx5e_create_main_groups(ft);
+ if (err)
+ goto err_free_g;
+ return (0);
+
+err_free_g:
+ kfree(ft->g);
+
+err_destroy_main_flow_table:
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+
+ return (err);
+}
+
+static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv)
+{
+ mlx5e_destroy_flow_table(&priv->fts.main);
+}
+
+#define MLX5E_NUM_VLAN_GROUPS 3
+#define MLX5E_VLAN_GROUP0_SIZE BIT(12)
+#define MLX5E_VLAN_GROUP1_SIZE BIT(1)
+#define MLX5E_VLAN_GROUP2_SIZE BIT(0)
+#define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\
+ MLX5E_VLAN_GROUP1_SIZE +\
+ MLX5E_VLAN_GROUP2_SIZE +\
+ 0)
+
+static int
+mlx5e_create_vlan_groups_sub(struct mlx5e_flow_table *ft, u32 *in,
+ int inlen)
+{
+ int err;
+ int ix = 0;
+ u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP0_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ return (0);
+
+err_destory_groups:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ mlx5e_destroy_groups(ft);
+
+ return (err);
+}
+
+static int
+mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft)
+{
+ u32 *in;
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int err;
+
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return (-ENOMEM);
+
+ err = mlx5e_create_vlan_groups_sub(ft, in, inlen);
+
+ kvfree(in);
+ return (err);
+}
+
+static int
+mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_flow_table *ft = &priv->fts.vlan;
+ int err;
+
+ ft->num_groups = 0;
+ ft->t = mlx5_create_flow_table(priv->fts.ns, 0, "vlan",
+ MLX5E_VLAN_TABLE_SIZE);
+
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return (err);
+ }
+ ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g) {
+ err = -ENOMEM;
+ goto err_destroy_vlan_flow_table;
+ }
+
+ err = mlx5e_create_vlan_groups(ft);
+ if (err)
+ goto err_free_g;
+
+ return (0);
+
+err_free_g:
+ kfree(ft->g);
+
+err_destroy_vlan_flow_table:
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+
+ return (err);
+}
+
+static void
+mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv)
+{
+ mlx5e_destroy_flow_table(&priv->fts.vlan);
+}
+
+#define MLX5E_NUM_INNER_RSS_GROUPS 3
+#define MLX5E_INNER_RSS_GROUP0_SIZE BIT(3)
+#define MLX5E_INNER_RSS_GROUP1_SIZE BIT(1)
+#define MLX5E_INNER_RSS_GROUP2_SIZE BIT(0)
+#define MLX5E_INNER_RSS_TABLE_SIZE (MLX5E_INNER_RSS_GROUP0_SIZE +\
+ MLX5E_INNER_RSS_GROUP1_SIZE +\
+ MLX5E_INNER_RSS_GROUP2_SIZE +\
+ 0)
+
+static int
+mlx5e_create_inner_rss_groups_sub(struct mlx5e_flow_table *ft, u32 *in,
+ int inlen)
+{
+ u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ int err;
+ int ix = 0;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ethertype);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_INNER_RSS_GROUP0_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ethertype);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_INNER_RSS_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_INNER_RSS_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destory_groups;
+ ft->num_groups++;
+
+ return (0);
+
+err_destory_groups:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ mlx5e_destroy_groups(ft);
+
+ return (err);
+}
+
+static int
+mlx5e_create_inner_rss_groups(struct mlx5e_flow_table *ft)
+{
+ u32 *in;
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int err;
+
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return (-ENOMEM);
+
+ err = mlx5e_create_inner_rss_groups_sub(ft, in, inlen);
+
+ kvfree(in);
+ return (err);
+}
+
+static int
+mlx5e_create_inner_rss_flow_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_flow_table *ft = &priv->fts.inner_rss;
+ int err;
+
+ ft->num_groups = 0;
+ ft->t = mlx5_create_flow_table(priv->fts.ns, 0, "inner_rss",
+ MLX5E_INNER_RSS_TABLE_SIZE);
+
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return (err);
+ }
+ ft->g = kcalloc(MLX5E_NUM_INNER_RSS_GROUPS, sizeof(*ft->g),
+ GFP_KERNEL);
+ if (!ft->g) {
+ err = -ENOMEM;
+ goto err_destroy_inner_rss_flow_table;
+ }
+
+ err = mlx5e_create_inner_rss_groups(ft);
+ if (err)
+ goto err_free_g;
+
+ return (0);
+
+err_free_g:
+ kfree(ft->g);
+
+err_destroy_inner_rss_flow_table:
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+
+ return (err);
+}
+
+static void mlx5e_destroy_inner_rss_flow_table(struct mlx5e_priv *priv)
+{
+ mlx5e_destroy_flow_table(&priv->fts.inner_rss);
+}
+
+int
+mlx5e_open_flow_table(struct mlx5e_priv *priv)
+{
+ int err;
+
+ priv->fts.ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+
+ err = mlx5e_create_vlan_flow_table(priv);
+ if (err)
+ return (err);
+
+ err = mlx5e_create_main_flow_table(priv);
+ if (err)
+ goto err_destroy_vlan_flow_table;
+
+ err = mlx5e_create_inner_rss_flow_table(priv);
+ if (err)
+ goto err_destroy_main_flow_table;
+
+ return (0);
+
+err_destroy_main_flow_table:
+ mlx5e_destroy_main_flow_table(priv);
+err_destroy_vlan_flow_table:
+ mlx5e_destroy_vlan_flow_table(priv);
+
+ return (err);
+}
+
+void
+mlx5e_close_flow_table(struct mlx5e_priv *priv)
+{
+ mlx5e_destroy_inner_rss_flow_table(priv);
+ mlx5e_destroy_main_flow_table(priv);
+ mlx5e_destroy_vlan_flow_table(priv);
+}
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
new file mode 100644
index 000000000000..916ebe72c46c
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -0,0 +1,3901 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+
+#include <sys/sockio.h>
+#include <machine/atomic.h>
+
+#ifndef ETH_DRIVER_VERSION
+#define ETH_DRIVER_VERSION "3.4.2"
+#endif
+
+char mlx5e_version[] = "Mellanox Ethernet driver"
+ " (" ETH_DRIVER_VERSION ")";
+
+static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs);
+
+struct mlx5e_channel_param {
+ struct mlx5e_rq_param rq;
+ struct mlx5e_sq_param sq;
+ struct mlx5e_cq_param rx_cq;
+ struct mlx5e_cq_param tx_cq;
+};
+
+static const struct {
+ u32 subtype;
+ u64 baudrate;
+} mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
+
+ [MLX5E_1000BASE_CX_SGMII] = {
+ .subtype = IFM_1000_CX_SGMII,
+ .baudrate = IF_Mbps(1000ULL),
+ },
+ [MLX5E_1000BASE_KX] = {
+ .subtype = IFM_1000_KX,
+ .baudrate = IF_Mbps(1000ULL),
+ },
+ [MLX5E_10GBASE_CX4] = {
+ .subtype = IFM_10G_CX4,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_10GBASE_KX4] = {
+ .subtype = IFM_10G_KX4,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_10GBASE_KR] = {
+ .subtype = IFM_10G_KR,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_20GBASE_KR2] = {
+ .subtype = IFM_20G_KR2,
+ .baudrate = IF_Gbps(20ULL),
+ },
+ [MLX5E_40GBASE_CR4] = {
+ .subtype = IFM_40G_CR4,
+ .baudrate = IF_Gbps(40ULL),
+ },
+ [MLX5E_40GBASE_KR4] = {
+ .subtype = IFM_40G_KR4,
+ .baudrate = IF_Gbps(40ULL),
+ },
+ [MLX5E_56GBASE_R4] = {
+ .subtype = IFM_56G_R4,
+ .baudrate = IF_Gbps(56ULL),
+ },
+ [MLX5E_10GBASE_CR] = {
+ .subtype = IFM_10G_CR1,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_10GBASE_SR] = {
+ .subtype = IFM_10G_SR,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_10GBASE_ER] = {
+ .subtype = IFM_10G_ER,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_40GBASE_SR4] = {
+ .subtype = IFM_40G_SR4,
+ .baudrate = IF_Gbps(40ULL),
+ },
+ [MLX5E_40GBASE_LR4] = {
+ .subtype = IFM_40G_LR4,
+ .baudrate = IF_Gbps(40ULL),
+ },
+ [MLX5E_100GBASE_CR4] = {
+ .subtype = IFM_100G_CR4,
+ .baudrate = IF_Gbps(100ULL),
+ },
+ [MLX5E_100GBASE_SR4] = {
+ .subtype = IFM_100G_SR4,
+ .baudrate = IF_Gbps(100ULL),
+ },
+ [MLX5E_100GBASE_KR4] = {
+ .subtype = IFM_100G_KR4,
+ .baudrate = IF_Gbps(100ULL),
+ },
+ [MLX5E_100GBASE_LR4] = {
+ .subtype = IFM_100G_LR4,
+ .baudrate = IF_Gbps(100ULL),
+ },
+ [MLX5E_100BASE_TX] = {
+ .subtype = IFM_100_TX,
+ .baudrate = IF_Mbps(100ULL),
+ },
+ [MLX5E_1000BASE_T] = {
+ .subtype = IFM_1000_T,
+ .baudrate = IF_Mbps(1000ULL),
+ },
+ [MLX5E_10GBASE_T] = {
+ .subtype = IFM_10G_T,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_25GBASE_CR] = {
+ .subtype = IFM_25G_CR,
+ .baudrate = IF_Gbps(25ULL),
+ },
+ [MLX5E_25GBASE_KR] = {
+ .subtype = IFM_25G_KR,
+ .baudrate = IF_Gbps(25ULL),
+ },
+ [MLX5E_25GBASE_SR] = {
+ .subtype = IFM_25G_SR,
+ .baudrate = IF_Gbps(25ULL),
+ },
+ [MLX5E_50GBASE_CR2] = {
+ .subtype = IFM_50G_CR2,
+ .baudrate = IF_Gbps(50ULL),
+ },
+ [MLX5E_50GBASE_KR2] = {
+ .subtype = IFM_50G_KR2,
+ .baudrate = IF_Gbps(50ULL),
+ },
+};
+
+MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
+
+static SYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW, 0, "MLX5 driver parameters");
+
+static void
+mlx5e_update_carrier(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ u32 eth_proto_oper;
+ int error;
+ u8 port_state;
+ u8 i;
+
+ port_state = mlx5_query_vport_state(mdev,
+ MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
+
+ if (port_state == VPORT_STATE_UP) {
+ priv->media_status_last |= IFM_ACTIVE;
+ } else {
+ priv->media_status_last &= ~IFM_ACTIVE;
+ priv->media_active_last = IFM_ETHER;
+ if_link_state_change(priv->ifp, LINK_STATE_DOWN);
+ return;
+ }
+
+ error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
+ if (error) {
+ priv->media_active_last = IFM_ETHER;
+ priv->ifp->if_baudrate = 1;
+ if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
+ __func__, error);
+ return;
+ }
+ eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+
+ for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
+ if (mlx5e_mode_table[i].baudrate == 0)
+ continue;
+ if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
+ priv->ifp->if_baudrate =
+ mlx5e_mode_table[i].baudrate;
+ priv->media_active_last =
+ mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
+ }
+ }
+ if_link_state_change(priv->ifp, LINK_STATE_UP);
+}
+
+static void
+mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
+{
+ struct mlx5e_priv *priv = dev->if_softc;
+
+ ifmr->ifm_status = priv->media_status_last;
+ ifmr->ifm_active = priv->media_active_last |
+ (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
+ (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
+
+}
+
+static u32
+mlx5e_find_link_mode(u32 subtype)
+{
+ u32 i;
+ u32 link_mode = 0;
+
+ for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
+ if (mlx5e_mode_table[i].baudrate == 0)
+ continue;
+ if (mlx5e_mode_table[i].subtype == subtype)
+ link_mode |= MLX5E_PROT_MASK(i);
+ }
+
+ return (link_mode);
+}
+
+static int
+mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
+{
+ return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
+ priv->params.rx_pauseframe_control,
+ priv->params.tx_pauseframe_control,
+ priv->params.rx_priority_flow_control,
+ priv->params.tx_priority_flow_control));
+}
+
+static int
+mlx5e_set_port_pfc(struct mlx5e_priv *priv)
+{
+ int error;
+
+ if (priv->params.rx_pauseframe_control ||
+ priv->params.tx_pauseframe_control) {
+ if_printf(priv->ifp,
+ "Global pauseframes must be disabled before enabling PFC.\n");
+ error = -EINVAL;
+ } else {
+ error = mlx5e_set_port_pause_and_pfc(priv);
+ }
+ return (error);
+}
+
+static int
+mlx5e_media_change(struct ifnet *dev)
+{
+ struct mlx5e_priv *priv = dev->if_softc;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 eth_proto_cap;
+ u32 link_mode;
+ int was_opened;
+ int locked;
+ int error;
+
+ locked = PRIV_LOCKED(priv);
+ if (!locked)
+ PRIV_LOCK(priv);
+
+ if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
+ error = EINVAL;
+ goto done;
+ }
+ link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
+
+ /* query supported capabilities */
+ error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
+ if (error != 0) {
+ if_printf(dev, "Query port media capability failed\n");
+ goto done;
+ }
+ /* check for autoselect */
+ if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
+ link_mode = eth_proto_cap;
+ if (link_mode == 0) {
+ if_printf(dev, "Port media capability is zero\n");
+ error = EINVAL;
+ goto done;
+ }
+ } else {
+ link_mode = link_mode & eth_proto_cap;
+ if (link_mode == 0) {
+ if_printf(dev, "Not supported link mode requested\n");
+ error = EINVAL;
+ goto done;
+ }
+ }
+ if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
+ /* check if PFC is enabled */
+ if (priv->params.rx_priority_flow_control ||
+ priv->params.tx_priority_flow_control) {
+ if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n");
+ error = EINVAL;
+ goto done;
+ }
+ }
+ /* update pauseframe control bits */
+ priv->params.rx_pauseframe_control =
+ (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
+ priv->params.tx_pauseframe_control =
+ (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
+
+ /* check if device is opened */
+ was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ /* reconfigure the hardware */
+ mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
+ mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
+ error = -mlx5e_set_port_pause_and_pfc(priv);
+ if (was_opened)
+ mlx5_set_port_status(mdev, MLX5_PORT_UP);
+
+done:
+ if (!locked)
+ PRIV_UNLOCK(priv);
+ return (error);
+}
+
+static void
+mlx5e_update_carrier_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ update_carrier_work);
+
+ PRIV_LOCK(priv);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_update_carrier(priv);
+ PRIV_UNLOCK(priv);
+}
+
+/*
+ * This function reads the physical port counters from the firmware
+ * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
+ * macros. The output is converted from big-endian 64-bit values into
+ * host endian ones and stored in the "priv->stats.pport" structure.
+ */
+static void
+mlx5e_update_pport_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_pport_stats *s = &priv->stats.pport;
+ struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
+ u32 *in;
+ u32 *out;
+ const u64 *ptr;
+ unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ unsigned x;
+ unsigned y;
+ unsigned z;
+
+ /* allocate firmware request structures */
+ in = mlx5_vzalloc(sz);
+ out = mlx5_vzalloc(sz);
+ if (in == NULL || out == NULL)
+ goto free_out;
+
+ /*
+ * Get pointer to the 64-bit counter set which is located at a
+ * fixed offset in the output firmware request structure:
+ */
+ ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+
+ /* read IEEE802_3 counter group using predefined counter layout */
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+ for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
+ x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
+ s->arg[y] = be64toh(ptr[x]);
+
+ /* read RFC2819 counter group using predefined counter layout */
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+ for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
+ s->arg[y] = be64toh(ptr[x]);
+ for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
+ MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
+ s_debug->arg[y] = be64toh(ptr[x]);
+
+ /* read RFC2863 counter group using predefined counter layout */
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+ for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
+ s_debug->arg[y] = be64toh(ptr[x]);
+
+ /* read physical layer stats counter group using predefined counter layout */
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+ for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
+ s_debug->arg[y] = be64toh(ptr[x]);
+
+ /* read per-priority counters */
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
+
+ /* iterate all the priorities */
+ for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
+ MLX5_SET(ppcnt_reg, in, prio_tc, z);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+
+ /* read per priority stats counter group using predefined counter layout */
+ for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
+ MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
+ s->arg[y] = be64toh(ptr[x]);
+ }
+free_out:
+ /* free firmware request structures */
+ kvfree(in);
+ kvfree(out);
+}
+
+/*
+ * This function is called regularly to collect all statistics
+ * counters from the firmware. The values can be viewed through the
+ * sysctl interface. Execution is serialized using the priv's global
+ * configuration lock.
+ */
+static void
+mlx5e_update_stats_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ update_stats_work);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_vport_stats *s = &priv->stats.vport;
+ struct mlx5e_rq_stats *rq_stats;
+ struct mlx5e_sq_stats *sq_stats;
+ struct buf_ring *sq_br;
+#if (__FreeBSD_version < 1100000)
+ struct ifnet *ifp = priv->ifp;
+#endif
+
+ u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+ u64 tso_packets = 0;
+ u64 tso_bytes = 0;
+ u64 tx_queue_dropped = 0;
+ u64 tx_defragged = 0;
+ u64 tx_offload_none = 0;
+ u64 lro_packets = 0;
+ u64 lro_bytes = 0;
+ u64 sw_lro_queued = 0;
+ u64 sw_lro_flushed = 0;
+ u64 rx_csum_none = 0;
+ u64 rx_wqe_err = 0;
+ u32 rx_out_of_buffer = 0;
+ int i;
+ int j;
+
+ PRIV_LOCK(priv);
+ out = mlx5_vzalloc(outlen);
+ if (out == NULL)
+ goto free_out;
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
+ goto free_out;
+
+ /* Collect firts the SW counters and then HW for consistency */
+ for (i = 0; i < priv->params.num_channels; i++) {
+ struct mlx5e_rq *rq = &priv->channel[i]->rq;
+
+ rq_stats = &priv->channel[i]->rq.stats;
+
+ /* collect stats from LRO */
+ rq_stats->sw_lro_queued = rq->lro.lro_queued;
+ rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
+ sw_lro_queued += rq_stats->sw_lro_queued;
+ sw_lro_flushed += rq_stats->sw_lro_flushed;
+ lro_packets += rq_stats->lro_packets;
+ lro_bytes += rq_stats->lro_bytes;
+ rx_csum_none += rq_stats->csum_none;
+ rx_wqe_err += rq_stats->wqe_err;
+
+ for (j = 0; j < priv->num_tc; j++) {
+ sq_stats = &priv->channel[i]->sq[j].stats;
+ sq_br = priv->channel[i]->sq[j].br;
+
+ tso_packets += sq_stats->tso_packets;
+ tso_bytes += sq_stats->tso_bytes;
+ tx_queue_dropped += sq_stats->dropped;
+ if (sq_br != NULL)
+ tx_queue_dropped += sq_br->br_drops;
+ tx_defragged += sq_stats->defragged;
+ tx_offload_none += sq_stats->csum_offload_none;
+ }
+ }
+
+ /* update counters */
+ s->tso_packets = tso_packets;
+ s->tso_bytes = tso_bytes;
+ s->tx_queue_dropped = tx_queue_dropped;
+ s->tx_defragged = tx_defragged;
+ s->lro_packets = lro_packets;
+ s->lro_bytes = lro_bytes;
+ s->sw_lro_queued = sw_lro_queued;
+ s->sw_lro_flushed = sw_lro_flushed;
+ s->rx_csum_none = rx_csum_none;
+ s->rx_wqe_err = rx_wqe_err;
+
+ /* HW counters */
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(query_vport_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+ MLX5_SET(query_vport_counter_in, in, op_mod, 0);
+ MLX5_SET(query_vport_counter_in, in, other_vport, 0);
+
+ memset(out, 0, outlen);
+
+ /* get number of out-of-buffer drops first */
+ if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
+ &rx_out_of_buffer))
+ goto free_out;
+
+ /* accumulate difference into a 64-bit counter */
+ s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
+ s->rx_out_of_buffer_prev = rx_out_of_buffer;
+
+ /* get port statistics */
+ if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
+ goto free_out;
+
+#define MLX5_GET_CTR(out, x) \
+ MLX5_GET64(query_vport_counter_out, out, x)
+
+ s->rx_error_packets =
+ MLX5_GET_CTR(out, received_errors.packets);
+ s->rx_error_bytes =
+ MLX5_GET_CTR(out, received_errors.octets);
+ s->tx_error_packets =
+ MLX5_GET_CTR(out, transmit_errors.packets);
+ s->tx_error_bytes =
+ MLX5_GET_CTR(out, transmit_errors.octets);
+
+ s->rx_unicast_packets =
+ MLX5_GET_CTR(out, received_eth_unicast.packets);
+ s->rx_unicast_bytes =
+ MLX5_GET_CTR(out, received_eth_unicast.octets);
+ s->tx_unicast_packets =
+ MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
+ s->tx_unicast_bytes =
+ MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
+
+ s->rx_multicast_packets =
+ MLX5_GET_CTR(out, received_eth_multicast.packets);
+ s->rx_multicast_bytes =
+ MLX5_GET_CTR(out, received_eth_multicast.octets);
+ s->tx_multicast_packets =
+ MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
+ s->tx_multicast_bytes =
+ MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
+
+ s->rx_broadcast_packets =
+ MLX5_GET_CTR(out, received_eth_broadcast.packets);
+ s->rx_broadcast_bytes =
+ MLX5_GET_CTR(out, received_eth_broadcast.octets);
+ s->tx_broadcast_packets =
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
+ s->tx_broadcast_bytes =
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
+
+ s->rx_packets =
+ s->rx_unicast_packets +
+ s->rx_multicast_packets +
+ s->rx_broadcast_packets -
+ s->rx_out_of_buffer;
+ s->rx_bytes =
+ s->rx_unicast_bytes +
+ s->rx_multicast_bytes +
+ s->rx_broadcast_bytes;
+ s->tx_packets =
+ s->tx_unicast_packets +
+ s->tx_multicast_packets +
+ s->tx_broadcast_packets;
+ s->tx_bytes =
+ s->tx_unicast_bytes +
+ s->tx_multicast_bytes +
+ s->tx_broadcast_bytes;
+
+ /* Update calculated offload counters */
+ s->tx_csum_offload = s->tx_packets - tx_offload_none;
+ s->rx_csum_good = s->rx_packets - s->rx_csum_none;
+
+ /* Get physical port counters */
+ mlx5e_update_pport_counters(priv);
+
+#if (__FreeBSD_version < 1100000)
+ /* no get_counters interface in fbsd 10 */
+ ifp->if_ipackets = s->rx_packets;
+ ifp->if_ierrors = s->rx_error_packets +
+ priv->stats.pport.alignment_err +
+ priv->stats.pport.check_seq_err +
+ priv->stats.pport.crc_align_errors +
+ priv->stats.pport.in_range_len_errors +
+ priv->stats.pport.jabbers +
+ priv->stats.pport.out_of_range_len +
+ priv->stats.pport.oversize_pkts +
+ priv->stats.pport.symbol_err +
+ priv->stats.pport.too_long_errors +
+ priv->stats.pport.undersize_pkts +
+ priv->stats.pport.unsupported_op_rx;
+ ifp->if_iqdrops = s->rx_out_of_buffer +
+ priv->stats.pport.drop_events;
+ ifp->if_opackets = s->tx_packets;
+ ifp->if_oerrors = s->tx_error_packets;
+ ifp->if_snd.ifq_drops = s->tx_queue_dropped;
+ ifp->if_ibytes = s->rx_bytes;
+ ifp->if_obytes = s->tx_bytes;
+ ifp->if_collisions =
+ priv->stats.pport.collisions;
+#endif
+
+free_out:
+ kvfree(out);
+
+ /* Update diagnostics, if any */
+ if (priv->params_ethtool.diag_pci_enable ||
+ priv->params_ethtool.diag_general_enable) {
+ int error = mlx5_core_get_diagnostics_full(mdev,
+ priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
+ priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
+ if (error != 0)
+ if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
+ }
+ PRIV_UNLOCK(priv);
+}
+
+static void
+mlx5e_update_stats(void *arg)
+{
+ struct mlx5e_priv *priv = arg;
+
+ queue_work(priv->wq, &priv->update_stats_work);
+
+ callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
+}
+
+static void
+mlx5e_async_event_sub(struct mlx5e_priv *priv,
+ enum mlx5_dev_event event)
+{
+ switch (event) {
+ case MLX5_DEV_EVENT_PORT_UP:
+ case MLX5_DEV_EVENT_PORT_DOWN:
+ queue_work(priv->wq, &priv->update_carrier_work);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
+ enum mlx5_dev_event event, unsigned long param)
+{
+ struct mlx5e_priv *priv = vpriv;
+
+ mtx_lock(&priv->async_events_mtx);
+ if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
+ mlx5e_async_event_sub(priv, event);
+ mtx_unlock(&priv->async_events_mtx);
+}
+
+static void
+mlx5e_enable_async_events(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
+}
+
+static void
+mlx5e_disable_async_events(struct mlx5e_priv *priv)
+{
+ mtx_lock(&priv->async_events_mtx);
+ clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
+ mtx_unlock(&priv->async_events_mtx);
+}
+
+static void mlx5e_calibration_callout(void *arg);
+static int mlx5e_calibration_duration = 20;
+static int mlx5e_fast_calibration = 1;
+static int mlx5e_normal_calibration = 30;
+
+static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW, 0,
+ "MLX5 timestamp calibration parameteres");
+
+SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN,
+ &mlx5e_calibration_duration, 0,
+ "Duration of initial calibration");
+SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN,
+ &mlx5e_fast_calibration, 0,
+ "Recalibration interval during initial calibration");
+SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN,
+ &mlx5e_normal_calibration, 0,
+ "Recalibration interval during normal operations");
+
+/*
+ * Ignites the calibration process.
+ */
+static void
+mlx5e_reset_calibration_callout(struct mlx5e_priv *priv)
+{
+
+ if (priv->clbr_done == 0)
+ mlx5e_calibration_callout(priv);
+ else
+ callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done <
+ mlx5e_calibration_duration ? mlx5e_fast_calibration :
+ mlx5e_normal_calibration) * hz, mlx5e_calibration_callout,
+ priv);
+}
+
+static uint64_t
+mlx5e_timespec2usec(const struct timespec *ts)
+{
+
+ return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec);
+}
+
+static uint64_t
+mlx5e_hw_clock(struct mlx5e_priv *priv)
+{
+ struct mlx5_init_seg *iseg;
+ uint32_t hw_h, hw_h1, hw_l;
+
+ iseg = priv->mdev->iseg;
+ do {
+ hw_h = ioread32be(&iseg->internal_timer_h);
+ hw_l = ioread32be(&iseg->internal_timer_l);
+ hw_h1 = ioread32be(&iseg->internal_timer_h);
+ } while (hw_h1 != hw_h);
+ return (((uint64_t)hw_h << 32) | hw_l);
+}
+
+/*
+ * The calibration callout, it runs either in the context of the
+ * thread which enables calibration, or in callout. It takes the
+ * snapshot of system and adapter clocks, then advances the pointers to
+ * the calibration point to allow rx path to read the consistent data
+ * lockless.
+ */
+static void
+mlx5e_calibration_callout(void *arg)
+{
+ struct mlx5e_priv *priv;
+ struct mlx5e_clbr_point *next, *curr;
+ struct timespec ts;
+ int clbr_curr_next;
+
+ priv = arg;
+ curr = &priv->clbr_points[priv->clbr_curr];
+ clbr_curr_next = priv->clbr_curr + 1;
+ if (clbr_curr_next >= nitems(priv->clbr_points))
+ clbr_curr_next = 0;
+ next = &priv->clbr_points[clbr_curr_next];
+
+ next->base_prev = curr->base_curr;
+ next->clbr_hw_prev = curr->clbr_hw_curr;
+
+ next->clbr_hw_curr = mlx5e_hw_clock(priv);
+ if (((next->clbr_hw_curr - curr->clbr_hw_prev) >> MLX5E_TSTMP_PREC) ==
+ 0) {
+ if_printf(priv->ifp, "HW failed tstmp frozen %#jx %#jx,"
+ "disabling\n", next->clbr_hw_curr, curr->clbr_hw_prev);
+ priv->clbr_done = 0;
+ return;
+ }
+
+ nanouptime(&ts);
+ next->base_curr = mlx5e_timespec2usec(&ts);
+
+ curr->clbr_gen = 0;
+ atomic_thread_fence_rel();
+ priv->clbr_curr = clbr_curr_next;
+ atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen));
+
+ if (priv->clbr_done < mlx5e_calibration_duration)
+ priv->clbr_done++;
+ mlx5e_reset_calibration_callout(priv);
+}
+
+static const char *mlx5e_rq_stats_desc[] = {
+ MLX5E_RQ_STATS(MLX5E_STATS_DESC)
+};
+
+static int
+mlx5e_create_rq(struct mlx5e_channel *c,
+ struct mlx5e_rq_param *param,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ char buffer[16];
+ void *rqc = param->rqc;
+ void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ int wq_sz;
+ int err;
+ int i;
+ u32 nsegs, wqe_sz;
+
+ err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
+ if (err != 0)
+ goto done;
+
+ /* Create DMA descriptor TAG */
+ if ((err = -bus_dma_tag_create(
+ bus_get_dma_tag(mdev->pdev->dev.bsddev),
+ 1, /* any alignment */
+ 0, /* no boundary */
+ BUS_SPACE_MAXADDR, /* lowaddr */
+ BUS_SPACE_MAXADDR, /* highaddr */
+ NULL, NULL, /* filter, filterarg */
+ nsegs * MLX5E_MAX_RX_BYTES, /* maxsize */
+ nsegs, /* nsegments */
+ nsegs * MLX5E_MAX_RX_BYTES, /* maxsegsize */
+ 0, /* flags */
+ NULL, NULL, /* lockfunc, lockfuncarg */
+ &rq->dma_tag)))
+ goto done;
+
+ err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
+ &rq->wq_ctrl);
+ if (err)
+ goto err_free_dma_tag;
+
+ rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
+
+ err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs);
+ if (err != 0)
+ goto err_rq_wq_destroy;
+
+ wq_sz = mlx5_wq_ll_get_size(&rq->wq);
+
+ err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
+ if (err)
+ goto err_rq_wq_destroy;
+
+ rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
+ for (i = 0; i != wq_sz; i++) {
+ struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
+#if (MLX5E_MAX_RX_SEGS == 1)
+ uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
+#else
+ int j;
+#endif
+
+ err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
+ if (err != 0) {
+ while (i--)
+ bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
+ goto err_rq_mbuf_free;
+ }
+
+ /* set value for constant fields */
+#if (MLX5E_MAX_RX_SEGS == 1)
+ wqe->data[0].lkey = c->mkey_be;
+ wqe->data[0].byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
+#else
+ for (j = 0; j < rq->nsegs; j++)
+ wqe->data[j].lkey = c->mkey_be;
+#endif
+ }
+
+ rq->ifp = c->ifp;
+ rq->channel = c;
+ rq->ix = c->ix;
+
+ snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
+ mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
+ rq->stats.arg);
+ return (0);
+
+err_rq_mbuf_free:
+ free(rq->mbuf, M_MLX5EN);
+ tcp_lro_free(&rq->lro);
+err_rq_wq_destroy:
+ mlx5_wq_destroy(&rq->wq_ctrl);
+err_free_dma_tag:
+ bus_dma_tag_destroy(rq->dma_tag);
+done:
+ return (err);
+}
+
+static void
+mlx5e_destroy_rq(struct mlx5e_rq *rq)
+{
+ int wq_sz;
+ int i;
+
+ /* destroy all sysctl nodes */
+ sysctl_ctx_free(&rq->stats.ctx);
+
+ /* free leftover LRO packets, if any */
+ tcp_lro_free(&rq->lro);
+
+ wq_sz = mlx5_wq_ll_get_size(&rq->wq);
+ for (i = 0; i != wq_sz; i++) {
+ if (rq->mbuf[i].mbuf != NULL) {
+ bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
+ m_freem(rq->mbuf[i].mbuf);
+ }
+ bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
+ }
+ free(rq->mbuf, M_MLX5EN);
+ mlx5_wq_destroy(&rq->wq_ctrl);
+}
+
+static int
+mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
+{
+ struct mlx5e_channel *c = rq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ void *in;
+ void *rqc;
+ void *wq;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
+ sizeof(u64) * rq->wq_ctrl.buf.npages;
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ memcpy(rqc, param->rqc, sizeof(param->rqc));
+
+ MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, flush_in_error_en, 1);
+ if (priv->counter_set_id >= 0)
+ MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
+ MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
+ PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
+
+ mlx5_fill_page_array(&rq->wq_ctrl.buf,
+ (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
+
+ kvfree(in);
+
+ return (err);
+}
+
+static int
+mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
+{
+ struct mlx5e_channel *c = rq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
+ MLX5_SET(modify_rq_in, in, rq_state, curr_state);
+ MLX5_SET(rqc, rqc, state, next_state);
+
+ err = mlx5_core_modify_rq(mdev, in, inlen);
+
+ kvfree(in);
+
+ return (err);
+}
+
+static void
+mlx5e_disable_rq(struct mlx5e_rq *rq)
+{
+ struct mlx5e_channel *c = rq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ mlx5_core_destroy_rq(mdev, rq->rqn);
+}
+
+static int
+mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
+{
+ struct mlx5e_channel *c = rq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_wq_ll *wq = &rq->wq;
+ int i;
+
+ for (i = 0; i < 1000; i++) {
+ if (wq->cur_sz >= priv->params.min_rx_wqes)
+ return (0);
+
+ msleep(4);
+ }
+ return (-ETIMEDOUT);
+}
+
+static int
+mlx5e_open_rq(struct mlx5e_channel *c,
+ struct mlx5e_rq_param *param,
+ struct mlx5e_rq *rq)
+{
+ int err;
+
+ err = mlx5e_create_rq(c, param, rq);
+ if (err)
+ return (err);
+
+ err = mlx5e_enable_rq(rq, param);
+ if (err)
+ goto err_destroy_rq;
+
+ err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err)
+ goto err_disable_rq;
+
+ c->rq.enabled = 1;
+
+ return (0);
+
+err_disable_rq:
+ mlx5e_disable_rq(rq);
+err_destroy_rq:
+ mlx5e_destroy_rq(rq);
+
+ return (err);
+}
+
+static void
+mlx5e_close_rq(struct mlx5e_rq *rq)
+{
+ mtx_lock(&rq->mtx);
+ rq->enabled = 0;
+ callout_stop(&rq->watchdog);
+ mtx_unlock(&rq->mtx);
+
+ callout_drain(&rq->watchdog);
+
+ mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
+}
+
+static void
+mlx5e_close_rq_wait(struct mlx5e_rq *rq)
+{
+ struct mlx5_core_dev *mdev = rq->channel->priv->mdev;
+
+ /* wait till RQ is empty */
+ while (!mlx5_wq_ll_is_empty(&rq->wq) &&
+ (mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
+ msleep(4);
+ rq->cq.mcq.comp(&rq->cq.mcq);
+ }
+
+ mlx5e_disable_rq(rq);
+ mlx5e_destroy_rq(rq);
+}
+
+void
+mlx5e_free_sq_db(struct mlx5e_sq *sq)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int x;
+
+ for (x = 0; x != wq_sz; x++)
+ bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
+ free(sq->mbuf, M_MLX5EN);
+}
+
+int
+mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int err;
+ int x;
+
+ sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
+
+ /* Create DMA descriptor MAPs */
+ for (x = 0; x != wq_sz; x++) {
+ err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
+ if (err != 0) {
+ while (x--)
+ bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
+ free(sq->mbuf, M_MLX5EN);
+ return (err);
+ }
+ }
+ return (0);
+}
+
+static const char *mlx5e_sq_stats_desc[] = {
+ MLX5E_SQ_STATS(MLX5E_STATS_DESC)
+};
+
+static int
+mlx5e_create_sq(struct mlx5e_channel *c,
+ int tc,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_sq *sq)
+{
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ char buffer[16];
+
+ void *sqc = param->sqc;
+ void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
+#ifdef RSS
+ cpuset_t cpu_mask;
+ int cpu_id;
+#endif
+ int err;
+
+ /* Create DMA descriptor TAG */
+ if ((err = -bus_dma_tag_create(
+ bus_get_dma_tag(mdev->pdev->dev.bsddev),
+ 1, /* any alignment */
+ 0, /* no boundary */
+ BUS_SPACE_MAXADDR, /* lowaddr */
+ BUS_SPACE_MAXADDR, /* highaddr */
+ NULL, NULL, /* filter, filterarg */
+ MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */
+ MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */
+ MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */
+ 0, /* flags */
+ NULL, NULL, /* lockfunc, lockfuncarg */
+ &sq->dma_tag)))
+ goto done;
+
+ err = mlx5_alloc_map_uar(mdev, &sq->uar);
+ if (err)
+ goto err_free_dma_tag;
+
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
+ &sq->wq_ctrl);
+ if (err)
+ goto err_unmap_free_uar;
+
+ sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
+ sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
+
+ err = mlx5e_alloc_sq_db(sq);
+ if (err)
+ goto err_sq_wq_destroy;
+
+ sq->mkey_be = c->mkey_be;
+ sq->ifp = priv->ifp;
+ sq->priv = priv;
+ sq->tc = tc;
+ sq->max_inline = priv->params.tx_max_inline;
+ sq->min_inline_mode = priv->params.tx_min_inline_mode;
+ sq->vlan_inline_cap = MLX5_CAP_ETH(mdev, wqe_vlan_insert);
+
+ /* check if we should allocate a second packet buffer */
+ if (priv->params_ethtool.tx_bufring_disable == 0) {
+ sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
+ M_WAITOK, &sq->lock);
+ if (sq->br == NULL) {
+ if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
+ __func__);
+ err = -ENOMEM;
+ goto err_free_sq_db;
+ }
+
+ sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
+ taskqueue_thread_enqueue, &sq->sq_tq);
+ if (sq->sq_tq == NULL) {
+ if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
+ __func__);
+ err = -ENOMEM;
+ goto err_free_drbr;
+ }
+
+ TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
+#ifdef RSS
+ cpu_id = rss_getcpu(c->ix % rss_getnumbuckets());
+ CPU_SETOF(cpu_id, &cpu_mask);
+ taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask,
+ "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id);
+#else
+ taskqueue_start_threads(&sq->sq_tq, 1, PI_NET,
+ "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc);
+#endif
+ }
+ snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
+ mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
+ sq->stats.arg);
+
+ return (0);
+
+err_free_drbr:
+ buf_ring_free(sq->br, M_MLX5EN);
+err_free_sq_db:
+ mlx5e_free_sq_db(sq);
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+err_unmap_free_uar:
+ mlx5_unmap_free_uar(mdev, &sq->uar);
+
+err_free_dma_tag:
+ bus_dma_tag_destroy(sq->dma_tag);
+done:
+ return (err);
+}
+
+static void
+mlx5e_destroy_sq(struct mlx5e_sq *sq)
+{
+ /* destroy all sysctl nodes */
+ sysctl_ctx_free(&sq->stats.ctx);
+
+ mlx5e_free_sq_db(sq);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+ mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
+ if (sq->sq_tq != NULL) {
+ taskqueue_drain(sq->sq_tq, &sq->sq_task);
+ taskqueue_free(sq->sq_tq);
+ }
+ if (sq->br != NULL)
+ buf_ring_free(sq->br, M_MLX5EN);
+}
+
+int
+mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
+ int tis_num)
+{
+ void *in;
+ void *sqc;
+ void *wq;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) * sq->wq_ctrl.buf.npages;
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ memcpy(sqc, param->sqc, sizeof(param->sqc));
+
+ MLX5_SET(sqc, sqc, tis_num_0, tis_num);
+ MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, tis_lst_sz, 1);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, uar_page, sq->uar.index);
+ MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
+ PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
+
+ mlx5_fill_page_array(&sq->wq_ctrl.buf,
+ (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
+
+ kvfree(in);
+
+ return (err);
+}
+
+int
+mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
+{
+ void *in;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+
+ MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
+ MLX5_SET(modify_sq_in, in, sq_state, curr_state);
+ MLX5_SET(sqc, sqc, state, next_state);
+
+ err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
+
+ kvfree(in);
+
+ return (err);
+}
+
+void
+mlx5e_disable_sq(struct mlx5e_sq *sq)
+{
+
+ mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
+}
+
+static int
+mlx5e_open_sq(struct mlx5e_channel *c,
+ int tc,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_sq *sq)
+{
+ int err;
+
+ err = mlx5e_create_sq(c, tc, param, sq);
+ if (err)
+ return (err);
+
+ err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
+ if (err)
+ goto err_destroy_sq;
+
+ err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
+ if (err)
+ goto err_disable_sq;
+
+ WRITE_ONCE(sq->queue_state, MLX5E_SQ_READY);
+
+ return (0);
+
+err_disable_sq:
+ mlx5e_disable_sq(sq);
+err_destroy_sq:
+ mlx5e_destroy_sq(sq);
+
+ return (err);
+}
+
+static void
+mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
+{
+ /* fill up remainder with NOPs */
+ while (sq->cev_counter != 0) {
+ while (!mlx5e_sq_has_room_for(sq, 1)) {
+ if (can_sleep != 0) {
+ mtx_unlock(&sq->lock);
+ msleep(4);
+ mtx_lock(&sq->lock);
+ } else {
+ goto done;
+ }
+ }
+ /* send a single NOP */
+ mlx5e_send_nop(sq, 1);
+ atomic_thread_fence_rel();
+ }
+done:
+ /* Check if we need to write the doorbell */
+ if (likely(sq->doorbell.d64 != 0)) {
+ mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
+ sq->doorbell.d64 = 0;
+ }
+}
+
+void
+mlx5e_sq_cev_timeout(void *arg)
+{
+ struct mlx5e_sq *sq = arg;
+
+ mtx_assert(&sq->lock, MA_OWNED);
+
+ /* check next state */
+ switch (sq->cev_next_state) {
+ case MLX5E_CEV_STATE_SEND_NOPS:
+ /* fill TX ring with NOPs, if any */
+ mlx5e_sq_send_nops_locked(sq, 0);
+
+ /* check if completed */
+ if (sq->cev_counter == 0) {
+ sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
+ return;
+ }
+ break;
+ default:
+ /* send NOPs on next timeout */
+ sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
+ break;
+ }
+
+ /* restart timer */
+ callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
+}
+
+void
+mlx5e_drain_sq(struct mlx5e_sq *sq)
+{
+ int error;
+ struct mlx5_core_dev *mdev= sq->priv->mdev;
+
+ /*
+ * Check if already stopped.
+ *
+ * NOTE: The "stopped" variable is only written when both the
+ * priv's configuration lock and the SQ's lock is locked. It
+ * can therefore safely be read when only one of the two locks
+ * is locked. This function is always called when the priv's
+ * configuration lock is locked.
+ */
+ if (sq->stopped != 0)
+ return;
+
+ mtx_lock(&sq->lock);
+
+ /* don't put more packets into the SQ */
+ sq->stopped = 1;
+
+ /* teardown event factor timer, if any */
+ sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
+ callout_stop(&sq->cev_callout);
+
+ /* send dummy NOPs in order to flush the transmit ring */
+ mlx5e_sq_send_nops_locked(sq, 1);
+ mtx_unlock(&sq->lock);
+
+ /* make sure it is safe to free the callout */
+ callout_drain(&sq->cev_callout);
+
+ /* wait till SQ is empty or link is down */
+ mtx_lock(&sq->lock);
+ while (sq->cc != sq->pc &&
+ (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
+ mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ mtx_unlock(&sq->lock);
+ msleep(1);
+ sq->cq.mcq.comp(&sq->cq.mcq);
+ mtx_lock(&sq->lock);
+ }
+ mtx_unlock(&sq->lock);
+
+ /* error out remaining requests */
+ error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
+ if (error != 0) {
+ if_printf(sq->ifp,
+ "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
+ }
+
+ /* wait till SQ is empty */
+ mtx_lock(&sq->lock);
+ while (sq->cc != sq->pc &&
+ mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ mtx_unlock(&sq->lock);
+ msleep(1);
+ sq->cq.mcq.comp(&sq->cq.mcq);
+ mtx_lock(&sq->lock);
+ }
+ mtx_unlock(&sq->lock);
+}
+
+static void
+mlx5e_close_sq_wait(struct mlx5e_sq *sq)
+{
+
+ mlx5e_drain_sq(sq);
+ mlx5e_disable_sq(sq);
+ mlx5e_destroy_sq(sq);
+}
+
+static int
+mlx5e_create_cq(struct mlx5e_priv *priv,
+ struct mlx5e_cq_param *param,
+ struct mlx5e_cq *cq,
+ mlx5e_cq_comp_t *comp,
+ int eq_ix)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ int eqn_not_used;
+ int irqn;
+ int err;
+ u32 i;
+
+ param->wq.buf_numa_node = 0;
+ param->wq.db_numa_node = 0;
+
+ err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
+ &cq->wq_ctrl);
+ if (err)
+ return (err);
+
+ mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
+
+ mcq->cqe_sz = 64;
+ mcq->set_ci_db = cq->wq_ctrl.db.db;
+ mcq->arm_db = cq->wq_ctrl.db.db + 1;
+ *mcq->set_ci_db = 0;
+ *mcq->arm_db = 0;
+ mcq->vector = eq_ix;
+ mcq->comp = comp;
+ mcq->event = mlx5e_cq_error_event;
+ mcq->irqn = irqn;
+ mcq->uar = &priv->cq_uar;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+
+ cqe->op_own = 0xf1;
+ }
+
+ cq->priv = priv;
+
+ return (0);
+}
+
+static void
+mlx5e_destroy_cq(struct mlx5e_cq *cq)
+{
+ mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int
+mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
+{
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ void *in;
+ void *cqc;
+ int inlen;
+ int irqn_not_used;
+ int eqn;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * cq->wq_ctrl.buf.npages;
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+
+ memcpy(cqc, param->cqc, sizeof(param->cqc));
+
+ mlx5_fill_page_array(&cq->wq_ctrl.buf,
+ (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
+
+ mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
+
+ MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+ PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+ err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
+
+ kvfree(in);
+
+ if (err)
+ return (err);
+
+ mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
+
+ return (0);
+}
+
+static void
+mlx5e_disable_cq(struct mlx5e_cq *cq)
+{
+
+ mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
+}
+
+int
+mlx5e_open_cq(struct mlx5e_priv *priv,
+ struct mlx5e_cq_param *param,
+ struct mlx5e_cq *cq,
+ mlx5e_cq_comp_t *comp,
+ int eq_ix)
+{
+ int err;
+
+ err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
+ if (err)
+ return (err);
+
+ err = mlx5e_enable_cq(cq, param, eq_ix);
+ if (err)
+ goto err_destroy_cq;
+
+ return (0);
+
+err_destroy_cq:
+ mlx5e_destroy_cq(cq);
+
+ return (err);
+}
+
+void
+mlx5e_close_cq(struct mlx5e_cq *cq)
+{
+ mlx5e_disable_cq(cq);
+ mlx5e_destroy_cq(cq);
+}
+
+static int
+mlx5e_open_tx_cqs(struct mlx5e_channel *c,
+ struct mlx5e_channel_param *cparam)
+{
+ int err;
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ /* open completion queue */
+ err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
+ &mlx5e_tx_cq_comp, c->ix);
+ if (err)
+ goto err_close_tx_cqs;
+ }
+ return (0);
+
+err_close_tx_cqs:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_close_cq(&c->sq[tc].cq);
+
+ return (err);
+}
+
+static void
+mlx5e_close_tx_cqs(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_cq(&c->sq[tc].cq);
+}
+
+static int
+mlx5e_open_sqs(struct mlx5e_channel *c,
+ struct mlx5e_channel_param *cparam)
+{
+ int err;
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
+ if (err)
+ goto err_close_sqs;
+ }
+
+ return (0);
+
+err_close_sqs:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_close_sq_wait(&c->sq[tc]);
+
+ return (err);
+}
+
+static void
+mlx5e_close_sqs_wait(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_sq_wait(&c->sq[tc]);
+}
+
+static void
+mlx5e_chan_mtx_init(struct mlx5e_channel *c)
+{
+ int tc;
+
+ mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
+
+ callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ struct mlx5e_sq *sq = c->sq + tc;
+
+ mtx_init(&sq->lock, "mlx5tx",
+ MTX_NETWORK_LOCK " TX", MTX_DEF);
+ mtx_init(&sq->comp_lock, "mlx5comp",
+ MTX_NETWORK_LOCK " TX", MTX_DEF);
+
+ callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
+
+ sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
+
+ /* ensure the TX completion event factor is not zero */
+ if (sq->cev_factor == 0)
+ sq->cev_factor = 1;
+ }
+}
+
+static void
+mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
+{
+ int tc;
+
+ mtx_destroy(&c->rq.mtx);
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ mtx_destroy(&c->sq[tc].lock);
+ mtx_destroy(&c->sq[tc].comp_lock);
+ }
+}
+
+static int
+mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
+ struct mlx5e_channel_param *cparam,
+ struct mlx5e_channel *volatile *cp)
+{
+ struct mlx5e_channel *c;
+ int err;
+
+ c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
+ c->priv = priv;
+ c->ix = ix;
+ c->cpu = 0;
+ c->ifp = priv->ifp;
+ c->mkey_be = cpu_to_be32(priv->mr.key);
+ c->num_tc = priv->num_tc;
+
+ /* init mutexes */
+ mlx5e_chan_mtx_init(c);
+
+ /* open transmit completion queue */
+ err = mlx5e_open_tx_cqs(c, cparam);
+ if (err)
+ goto err_free;
+
+ /* open receive completion queue */
+ err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
+ &mlx5e_rx_cq_comp, c->ix);
+ if (err)
+ goto err_close_tx_cqs;
+
+ err = mlx5e_open_sqs(c, cparam);
+ if (err)
+ goto err_close_rx_cq;
+
+ err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
+ if (err)
+ goto err_close_sqs;
+
+ /* store channel pointer */
+ *cp = c;
+
+ /* poll receive queue initially */
+ c->rq.cq.mcq.comp(&c->rq.cq.mcq);
+
+ return (0);
+
+err_close_sqs:
+ mlx5e_close_sqs_wait(c);
+
+err_close_rx_cq:
+ mlx5e_close_cq(&c->rq.cq);
+
+err_close_tx_cqs:
+ mlx5e_close_tx_cqs(c);
+
+err_free:
+ /* destroy mutexes */
+ mlx5e_chan_mtx_destroy(c);
+ free(c, M_MLX5EN);
+ return (err);
+}
+
+static void
+mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
+{
+ struct mlx5e_channel *c = *pp;
+
+ /* check if channel is already closed */
+ if (c == NULL)
+ return;
+ mlx5e_close_rq(&c->rq);
+}
+
+static void
+mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
+{
+ struct mlx5e_channel *c = *pp;
+
+ /* check if channel is already closed */
+ if (c == NULL)
+ return;
+ /* ensure channel pointer is no longer used */
+ *pp = NULL;
+
+ mlx5e_close_rq_wait(&c->rq);
+ mlx5e_close_sqs_wait(c);
+ mlx5e_close_cq(&c->rq.cq);
+ mlx5e_close_tx_cqs(c);
+ /* destroy mutexes */
+ mlx5e_chan_mtx_destroy(c);
+ free(c, M_MLX5EN);
+}
+
+static int
+mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs)
+{
+ u32 r, n;
+
+ r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz :
+ MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
+ if (r > MJUM16BYTES)
+ return (-ENOMEM);
+
+ if (r > MJUM9BYTES)
+ r = MJUM16BYTES;
+ else if (r > MJUMPAGESIZE)
+ r = MJUM9BYTES;
+ else if (r > MCLBYTES)
+ r = MJUMPAGESIZE;
+ else
+ r = MCLBYTES;
+
+ /*
+ * n + 1 must be a power of two, because stride size must be.
+ * Stride size is 16 * (n + 1), as the first segment is
+ * control.
+ */
+ for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++)
+ ;
+
+ *wqe_sz = r;
+ *nsegs = n;
+ return (0);
+}
+
+static void
+mlx5e_build_rq_param(struct mlx5e_priv *priv,
+ struct mlx5e_rq_param *param)
+{
+ void *rqc = param->rqc;
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ u32 wqe_sz, nsegs;
+
+ mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
+ MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) +
+ nsegs * sizeof(struct mlx5_wqe_data_seg)));
+ MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
+ MLX5_SET(wq, wq, pd, priv->pdn);
+
+ param->wq.buf_numa_node = 0;
+ param->wq.db_numa_node = 0;
+ param->wq.linear = 1;
+}
+
+static void
+mlx5e_build_sq_param(struct mlx5e_priv *priv,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, pd, priv->pdn);
+
+ param->wq.buf_numa_node = 0;
+ param->wq.db_numa_node = 0;
+ param->wq.linear = 1;
+}
+
+static void
+mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
+}
+
+static void
+mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+
+ /*
+ * TODO The sysctl to control on/off is a bool value for now, which means
+ * we only support CSUM, once HASH is implemnted we'll need to address that.
+ */
+ if (priv->params.cqe_zipping_en) {
+ MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
+ MLX5_SET(cqc, cqc, cqe_compression_en, 1);
+ }
+
+ MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
+ MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
+ MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
+
+ switch (priv->params.rx_cq_moderation_mode) {
+ case 0:
+ MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+ break;
+ default:
+ if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
+ MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+ else
+ MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+ break;
+ }
+
+ mlx5e_build_common_cq_param(priv, param);
+}
+
+static void
+mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
+ MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
+ MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
+
+ switch (priv->params.tx_cq_moderation_mode) {
+ case 0:
+ MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+ break;
+ default:
+ if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
+ MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+ else
+ MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+ break;
+ }
+
+ mlx5e_build_common_cq_param(priv, param);
+}
+
+static void
+mlx5e_build_channel_param(struct mlx5e_priv *priv,
+ struct mlx5e_channel_param *cparam)
+{
+ memset(cparam, 0, sizeof(*cparam));
+
+ mlx5e_build_rq_param(priv, &cparam->rq);
+ mlx5e_build_sq_param(priv, &cparam->sq);
+ mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
+ mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
+}
+
+static int
+mlx5e_open_channels(struct mlx5e_priv *priv)
+{
+ struct mlx5e_channel_param cparam;
+ void *ptr;
+ int err;
+ int i;
+ int j;
+
+ priv->channel = malloc(priv->params.num_channels *
+ sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
+
+ mlx5e_build_channel_param(priv, &cparam);
+ for (i = 0; i < priv->params.num_channels; i++) {
+ err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
+ if (err)
+ goto err_close_channels;
+ }
+
+ for (j = 0; j < priv->params.num_channels; j++) {
+ err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
+ if (err)
+ goto err_close_channels;
+ }
+
+ return (0);
+
+err_close_channels:
+ for (i--; i >= 0; i--) {
+ mlx5e_close_channel(&priv->channel[i]);
+ mlx5e_close_channel_wait(&priv->channel[i]);
+ }
+
+ /* remove "volatile" attribute from "channel" pointer */
+ ptr = __DECONST(void *, priv->channel);
+ priv->channel = NULL;
+
+ free(ptr, M_MLX5EN);
+
+ return (err);
+}
+
+static void
+mlx5e_close_channels(struct mlx5e_priv *priv)
+{
+ void *ptr;
+ int i;
+
+ if (priv->channel == NULL)
+ return;
+
+ for (i = 0; i < priv->params.num_channels; i++)
+ mlx5e_close_channel(&priv->channel[i]);
+ for (i = 0; i < priv->params.num_channels; i++)
+ mlx5e_close_channel_wait(&priv->channel[i]);
+
+ /* remove "volatile" attribute from "channel" pointer */
+ ptr = __DECONST(void *, priv->channel);
+ priv->channel = NULL;
+
+ free(ptr, M_MLX5EN);
+}
+
+static int
+mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
+{
+
+ if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
+ uint8_t cq_mode;
+
+ switch (priv->params.tx_cq_moderation_mode) {
+ case 0:
+ cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+ break;
+ default:
+ cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
+ break;
+ }
+
+ return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
+ priv->params.tx_cq_moderation_usec,
+ priv->params.tx_cq_moderation_pkts,
+ cq_mode));
+ }
+
+ return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
+ priv->params.tx_cq_moderation_usec,
+ priv->params.tx_cq_moderation_pkts));
+}
+
+static int
+mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
+{
+
+ if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
+ uint8_t cq_mode;
+ int retval;
+
+ switch (priv->params.rx_cq_moderation_mode) {
+ case 0:
+ cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+ break;
+ default:
+ cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
+ break;
+ }
+
+ retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
+ priv->params.rx_cq_moderation_usec,
+ priv->params.rx_cq_moderation_pkts,
+ cq_mode);
+
+ return (retval);
+ }
+
+ return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
+ priv->params.rx_cq_moderation_usec,
+ priv->params.rx_cq_moderation_pkts));
+}
+
+static int
+mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
+{
+ int err;
+ int i;
+
+ if (c == NULL)
+ return (EINVAL);
+
+ err = mlx5e_refresh_rq_params(priv, &c->rq);
+ if (err)
+ goto done;
+
+ for (i = 0; i != c->num_tc; i++) {
+ err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
+ if (err)
+ goto done;
+ }
+done:
+ return (err);
+}
+
+int
+mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
+{
+ int i;
+
+ if (priv->channel == NULL)
+ return (EINVAL);
+
+ for (i = 0; i < priv->params.num_channels; i++) {
+ int err;
+
+ err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
+ if (err)
+ return (err);
+ }
+ return (0);
+}
+
+static int
+mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+ void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(tisc, tisc, prio, tc);
+ MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
+
+ return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
+}
+
+static void
+mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
+{
+ mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
+}
+
+static int
+mlx5e_open_tises(struct mlx5e_priv *priv)
+{
+ int num_tc = priv->num_tc;
+ int err;
+ int tc;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ err = mlx5e_open_tis(priv, tc);
+ if (err)
+ goto err_close_tises;
+ }
+
+ return (0);
+
+err_close_tises:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_close_tis(priv, tc);
+
+ return (err);
+}
+
+static void
+mlx5e_close_tises(struct mlx5e_priv *priv)
+{
+ int num_tc = priv->num_tc;
+ int tc;
+
+ for (tc = 0; tc < num_tc; tc++)
+ mlx5e_close_tis(priv, tc);
+}
+
+static int
+mlx5e_open_rqt(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 *in;
+ u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
+ void *rqtc;
+ int inlen;
+ int err;
+ int sz;
+ int i;
+
+ sz = 1 << priv->params.rx_hash_log_tbl_sz;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+ rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
+ MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
+
+ for (i = 0; i < sz; i++) {
+ int ix = i;
+#ifdef RSS
+ ix = rss_get_indirection_to_bucket(ix);
+#endif
+ /* ensure we don't overflow */
+ ix %= priv->params.num_channels;
+
+ /* apply receive side scaling stride, if any */
+ ix -= ix % (int)priv->params.channels_rsss;
+
+ MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
+ }
+
+ MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
+
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ if (!err)
+ priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
+
+ kvfree(in);
+
+ return (err);
+}
+
+static void
+mlx5e_close_rqt(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
+
+ MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
+
+ mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static void
+mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
+{
+ void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+ __be32 *hkey;
+
+ MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
+
+#define ROUGH_MAX_L2_L3_HDR_SZ 256
+
+#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP)
+
+#define MLX5_HASH_ALL (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_L4_SPORT |\
+ MLX5_HASH_FIELD_SEL_L4_DPORT)
+
+#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_IPSEC_SPI)
+
+ if (priv->params.hw_lro_en) {
+ MLX5_SET(tirc, tirc, lro_enable_mask,
+ MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
+ MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
+ MLX5_SET(tirc, tirc, lro_max_msg_sz,
+ (priv->params.lro_wqe_sz -
+ ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
+ /* TODO: add the option to choose timer value dynamically */
+ MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
+ MLX5_CAP_ETH(priv->mdev,
+ lro_timer_supported_periods[2]));
+ }
+
+ /* setup parameters for hashing TIR type, if any */
+ switch (tt) {
+ case MLX5E_TT_ANY:
+ MLX5_SET(tirc, tirc, disp_type,
+ MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, inline_rqn,
+ priv->channel[0]->rq.rqn);
+ break;
+ default:
+ MLX5_SET(tirc, tirc, disp_type,
+ MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table,
+ priv->rqtn);
+ MLX5_SET(tirc, tirc, rx_hash_fn,
+ MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
+ hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
+#ifdef RSS
+ /*
+ * The FreeBSD RSS implementation does currently not
+ * support symmetric Toeplitz hashes:
+ */
+ MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
+ rss_getkey((uint8_t *)hkey);
+#else
+ MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+ hkey[0] = cpu_to_be32(0xD181C62C);
+ hkey[1] = cpu_to_be32(0xF7F4DB5B);
+ hkey[2] = cpu_to_be32(0x1983A2FC);
+ hkey[3] = cpu_to_be32(0x943E1ADB);
+ hkey[4] = cpu_to_be32(0xD9389E6B);
+ hkey[5] = cpu_to_be32(0xD1039C2C);
+ hkey[6] = cpu_to_be32(0xA74499AD);
+ hkey[7] = cpu_to_be32(0x593D56D9);
+ hkey[8] = cpu_to_be32(0xF3253C06);
+ hkey[9] = cpu_to_be32(0x2ADC1FFC);
+#endif
+ break;
+ }
+
+ switch (tt) {
+ case MLX5E_TT_IPV4_TCP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_TCP);
+#ifdef RSS
+ if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP);
+ } else
+#endif
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_ALL);
+ break;
+
+ case MLX5E_TT_IPV6_TCP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_TCP);
+#ifdef RSS
+ if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP);
+ } else
+#endif
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_ALL);
+ break;
+
+ case MLX5E_TT_IPV4_UDP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_UDP);
+#ifdef RSS
+ if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP);
+ } else
+#endif
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_ALL);
+ break;
+
+ case MLX5E_TT_IPV6_UDP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_UDP);
+#ifdef RSS
+ if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP);
+ } else
+#endif
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_ALL);
+ break;
+
+ case MLX5E_TT_IPV4_IPSEC_AH:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV6_IPSEC_AH:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV4_IPSEC_ESP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV6_IPSEC_ESP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV4:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP);
+ break;
+
+ case MLX5E_TT_IPV6:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int
+mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 *in;
+ void *tirc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+ tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
+
+ mlx5e_build_tir_ctx(priv, tirc, tt);
+
+ err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
+
+ kvfree(in);
+
+ return (err);
+}
+
+static void
+mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
+{
+ mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
+}
+
+static int
+mlx5e_open_tirs(struct mlx5e_priv *priv)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < MLX5E_NUM_TT; i++) {
+ err = mlx5e_open_tir(priv, i);
+ if (err)
+ goto err_close_tirs;
+ }
+
+ return (0);
+
+err_close_tirs:
+ for (i--; i >= 0; i--)
+ mlx5e_close_tir(priv, i);
+
+ return (err);
+}
+
+static void
+mlx5e_close_tirs(struct mlx5e_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_NUM_TT; i++)
+ mlx5e_close_tir(priv, i);
+}
+
+/*
+ * SW MTU does not include headers,
+ * HW MTU includes all headers and checksums.
+ */
+static int
+mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
+{
+ struct mlx5e_priv *priv = ifp->if_softc;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int hw_mtu;
+ int err;
+
+ hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
+
+ err = mlx5_set_port_mtu(mdev, hw_mtu);
+ if (err) {
+ if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
+ __func__, sw_mtu, err);
+ return (err);
+ }
+
+ /* Update vport context MTU */
+ err = mlx5_set_vport_mtu(mdev, hw_mtu);
+ if (err) {
+ if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n",
+ __func__, err);
+ }
+
+ ifp->if_mtu = sw_mtu;
+
+ err = mlx5_query_vport_mtu(mdev, &hw_mtu);
+ if (err || !hw_mtu) {
+ /* fallback to port oper mtu */
+ err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
+ }
+ if (err) {
+ if_printf(ifp, "Query port MTU, after setting new "
+ "MTU value, failed\n");
+ return (err);
+ } else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
+ err = -E2BIG,
+ if_printf(ifp, "Port MTU %d is smaller than "
+ "ifp mtu %d\n", hw_mtu, sw_mtu);
+ } else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
+ err = -EINVAL;
+ if_printf(ifp, "Port MTU %d is bigger than "
+ "ifp mtu %d\n", hw_mtu, sw_mtu);
+ }
+ priv->params_ethtool.hw_mtu = hw_mtu;
+
+ return (err);
+}
+
+int
+mlx5e_open_locked(struct ifnet *ifp)
+{
+ struct mlx5e_priv *priv = ifp->if_softc;
+ int err;
+ u16 set_id;
+
+ /* check if already opened */
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
+ return (0);
+
+#ifdef RSS
+ if (rss_getnumbuckets() > priv->params.num_channels) {
+ if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
+ "channels(%u) available\n", rss_getnumbuckets(),
+ priv->params.num_channels);
+ }
+#endif
+ err = mlx5e_open_tises(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
+ __func__, err);
+ return (err);
+ }
+ err = mlx5_vport_alloc_q_counter(priv->mdev,
+ MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
+ if (err) {
+ if_printf(priv->ifp,
+ "%s: mlx5_vport_alloc_q_counter failed: %d\n",
+ __func__, err);
+ goto err_close_tises;
+ }
+ /* store counter set ID */
+ priv->counter_set_id = set_id;
+
+ err = mlx5e_open_channels(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
+ __func__, err);
+ goto err_dalloc_q_counter;
+ }
+ err = mlx5e_open_rqt(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
+ __func__, err);
+ goto err_close_channels;
+ }
+ err = mlx5e_open_tirs(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
+ __func__, err);
+ goto err_close_rqls;
+ }
+ err = mlx5e_open_flow_table(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
+ __func__, err);
+ goto err_close_tirs;
+ }
+ err = mlx5e_add_all_vlan_rules(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
+ __func__, err);
+ goto err_close_flow_table;
+ }
+ set_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ mlx5e_update_carrier(priv);
+ mlx5e_set_rx_mode_core(priv);
+
+ return (0);
+
+err_close_flow_table:
+ mlx5e_close_flow_table(priv);
+
+err_close_tirs:
+ mlx5e_close_tirs(priv);
+
+err_close_rqls:
+ mlx5e_close_rqt(priv);
+
+err_close_channels:
+ mlx5e_close_channels(priv);
+
+err_dalloc_q_counter:
+ mlx5_vport_dealloc_q_counter(priv->mdev,
+ MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
+
+err_close_tises:
+ mlx5e_close_tises(priv);
+
+ return (err);
+}
+
+static void
+mlx5e_open(void *arg)
+{
+ struct mlx5e_priv *priv = arg;
+
+ PRIV_LOCK(priv);
+ if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
+ if_printf(priv->ifp,
+ "%s: Setting port status to up failed\n",
+ __func__);
+
+ mlx5e_open_locked(priv->ifp);
+ priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ PRIV_UNLOCK(priv);
+}
+
+int
+mlx5e_close_locked(struct ifnet *ifp)
+{
+ struct mlx5e_priv *priv = ifp->if_softc;
+
+ /* check if already closed */
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
+ return (0);
+
+ clear_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ mlx5e_set_rx_mode_core(priv);
+ mlx5e_del_all_vlan_rules(priv);
+ if_link_state_change(priv->ifp, LINK_STATE_DOWN);
+ mlx5e_close_flow_table(priv);
+ mlx5e_close_tirs(priv);
+ mlx5e_close_rqt(priv);
+ mlx5e_close_channels(priv);
+ mlx5_vport_dealloc_q_counter(priv->mdev,
+ MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
+ mlx5e_close_tises(priv);
+
+ return (0);
+}
+
+#if (__FreeBSD_version >= 1100000)
+static uint64_t
+mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
+{
+ struct mlx5e_priv *priv = ifp->if_softc;
+ u64 retval;
+
+ /* PRIV_LOCK(priv); XXX not allowed */
+ switch (cnt) {
+ case IFCOUNTER_IPACKETS:
+ retval = priv->stats.vport.rx_packets;
+ break;
+ case IFCOUNTER_IERRORS:
+ retval = priv->stats.vport.rx_error_packets +
+ priv->stats.pport.alignment_err +
+ priv->stats.pport.check_seq_err +
+ priv->stats.pport.crc_align_errors +
+ priv->stats.pport.in_range_len_errors +
+ priv->stats.pport.jabbers +
+ priv->stats.pport.out_of_range_len +
+ priv->stats.pport.oversize_pkts +
+ priv->stats.pport.symbol_err +
+ priv->stats.pport.too_long_errors +
+ priv->stats.pport.undersize_pkts +
+ priv->stats.pport.unsupported_op_rx;
+ break;
+ case IFCOUNTER_IQDROPS:
+ retval = priv->stats.vport.rx_out_of_buffer +
+ priv->stats.pport.drop_events;
+ break;
+ case IFCOUNTER_OPACKETS:
+ retval = priv->stats.vport.tx_packets;
+ break;
+ case IFCOUNTER_OERRORS:
+ retval = priv->stats.vport.tx_error_packets;
+ break;
+ case IFCOUNTER_IBYTES:
+ retval = priv->stats.vport.rx_bytes;
+ break;
+ case IFCOUNTER_OBYTES:
+ retval = priv->stats.vport.tx_bytes;
+ break;
+ case IFCOUNTER_IMCASTS:
+ retval = priv->stats.vport.rx_multicast_packets;
+ break;
+ case IFCOUNTER_OMCASTS:
+ retval = priv->stats.vport.tx_multicast_packets;
+ break;
+ case IFCOUNTER_OQDROPS:
+ retval = priv->stats.vport.tx_queue_dropped;
+ break;
+ case IFCOUNTER_COLLISIONS:
+ retval = priv->stats.pport.collisions;
+ break;
+ default:
+ retval = if_get_counter_default(ifp, cnt);
+ break;
+ }
+ /* PRIV_UNLOCK(priv); XXX not allowed */
+ return (retval);
+}
+#endif
+
+static void
+mlx5e_set_rx_mode(struct ifnet *ifp)
+{
+ struct mlx5e_priv *priv = ifp->if_softc;
+
+ queue_work(priv->wq, &priv->set_rx_mode_work);
+}
+
+static int
+mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+{
+ struct mlx5e_priv *priv;
+ struct ifreq *ifr;
+ struct ifi2creq i2c;
+ int error = 0;
+ int mask = 0;
+ int size_read = 0;
+ int module_status;
+ int module_num;
+ int max_mtu;
+ uint8_t read_addr;
+
+ priv = ifp->if_softc;
+
+ /* check if detaching */
+ if (priv == NULL || priv->gone != 0)
+ return (ENXIO);
+
+ switch (command) {
+ case SIOCSIFMTU:
+ ifr = (struct ifreq *)data;
+
+ PRIV_LOCK(priv);
+ mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
+
+ if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
+ ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
+ int was_opened;
+
+ was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (was_opened)
+ mlx5e_close_locked(ifp);
+
+ /* set new MTU */
+ mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
+
+ if (was_opened)
+ mlx5e_open_locked(ifp);
+ } else {
+ error = EINVAL;
+ if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
+ MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
+ }
+ PRIV_UNLOCK(priv);
+ break;
+ case SIOCSIFFLAGS:
+ if ((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ mlx5e_set_rx_mode(ifp);
+ break;
+ }
+ PRIV_LOCK(priv);
+ if (ifp->if_flags & IFF_UP) {
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
+ mlx5e_open_locked(ifp);
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
+ }
+ } else {
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ mlx5_set_port_status(priv->mdev,
+ MLX5_PORT_DOWN);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
+ mlx5e_close_locked(ifp);
+ mlx5e_update_carrier(priv);
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ }
+ }
+ PRIV_UNLOCK(priv);
+ break;
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ mlx5e_set_rx_mode(ifp);
+ break;
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ case SIOCGIFXMEDIA:
+ ifr = (struct ifreq *)data;
+ error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
+ break;
+ case SIOCSIFCAP:
+ ifr = (struct ifreq *)data;
+ PRIV_LOCK(priv);
+ mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+
+ if (mask & IFCAP_TXCSUM) {
+ ifp->if_capenable ^= IFCAP_TXCSUM;
+ ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
+
+ if (IFCAP_TSO4 & ifp->if_capenable &&
+ !(IFCAP_TXCSUM & ifp->if_capenable)) {
+ ifp->if_capenable &= ~IFCAP_TSO4;
+ ifp->if_hwassist &= ~CSUM_IP_TSO;
+ if_printf(ifp,
+ "tso4 disabled due to -txcsum.\n");
+ }
+ }
+ if (mask & IFCAP_TXCSUM_IPV6) {
+ ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
+ ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
+
+ if (IFCAP_TSO6 & ifp->if_capenable &&
+ !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
+ ifp->if_capenable &= ~IFCAP_TSO6;
+ ifp->if_hwassist &= ~CSUM_IP6_TSO;
+ if_printf(ifp,
+ "tso6 disabled due to -txcsum6.\n");
+ }
+ }
+ if (mask & IFCAP_RXCSUM)
+ ifp->if_capenable ^= IFCAP_RXCSUM;
+ if (mask & IFCAP_RXCSUM_IPV6)
+ ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
+ if (mask & IFCAP_TSO4) {
+ if (!(IFCAP_TSO4 & ifp->if_capenable) &&
+ !(IFCAP_TXCSUM & ifp->if_capenable)) {
+ if_printf(ifp, "enable txcsum first.\n");
+ error = EAGAIN;
+ goto out;
+ }
+ ifp->if_capenable ^= IFCAP_TSO4;
+ ifp->if_hwassist ^= CSUM_IP_TSO;
+ }
+ if (mask & IFCAP_TSO6) {
+ if (!(IFCAP_TSO6 & ifp->if_capenable) &&
+ !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
+ if_printf(ifp, "enable txcsum6 first.\n");
+ error = EAGAIN;
+ goto out;
+ }
+ ifp->if_capenable ^= IFCAP_TSO6;
+ ifp->if_hwassist ^= CSUM_IP6_TSO;
+ }
+ if (mask & IFCAP_VLAN_HWFILTER) {
+ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+ mlx5e_disable_vlan_filter(priv);
+ else
+ mlx5e_enable_vlan_filter(priv);
+
+ ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
+ }
+ if (mask & IFCAP_VLAN_HWTAGGING)
+ ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
+ if (mask & IFCAP_WOL_MAGIC)
+ ifp->if_capenable ^= IFCAP_WOL_MAGIC;
+
+ VLAN_CAPABILITIES(ifp);
+ /* turn off LRO means also turn of HW LRO - if it's on */
+ if (mask & IFCAP_LRO) {
+ int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ bool need_restart = false;
+
+ ifp->if_capenable ^= IFCAP_LRO;
+ if (!(ifp->if_capenable & IFCAP_LRO)) {
+ if (priv->params.hw_lro_en) {
+ priv->params.hw_lro_en = false;
+ need_restart = true;
+ /* Not sure this is the correct way */
+ priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
+ }
+ }
+ if (was_opened && need_restart) {
+ mlx5e_close_locked(ifp);
+ mlx5e_open_locked(ifp);
+ }
+ }
+ if (mask & IFCAP_HWRXTSTMP) {
+ ifp->if_capenable ^= IFCAP_HWRXTSTMP;
+ if (ifp->if_capenable & IFCAP_HWRXTSTMP) {
+ if (priv->clbr_done == 0)
+ mlx5e_reset_calibration_callout(priv);
+ } else {
+ callout_drain(&priv->tstmp_clbr);
+ priv->clbr_done = 0;
+ }
+ }
+out:
+ PRIV_UNLOCK(priv);
+ break;
+
+ case SIOCGI2C:
+ ifr = (struct ifreq *)data;
+
+ /*
+ * Copy from the user-space address ifr_data to the
+ * kernel-space address i2c
+ */
+ error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
+ if (error)
+ break;
+
+ if (i2c.len > sizeof(i2c.data)) {
+ error = EINVAL;
+ break;
+ }
+
+ PRIV_LOCK(priv);
+ /* Get module_num which is required for the query_eeprom */
+ error = mlx5_query_module_num(priv->mdev, &module_num);
+ if (error) {
+ if_printf(ifp, "Query module num failed, eeprom "
+ "reading is not supported\n");
+ error = EINVAL;
+ goto err_i2c;
+ }
+ /* Check if module is present before doing an access */
+ module_status = mlx5_query_module_status(priv->mdev, module_num);
+ if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
+ module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
+ error = EINVAL;
+ goto err_i2c;
+ }
+ /*
+ * Currently 0XA0 and 0xA2 are the only addresses permitted.
+ * The internal conversion is as follows:
+ */
+ if (i2c.dev_addr == 0xA0)
+ read_addr = MLX5E_I2C_ADDR_LOW;
+ else if (i2c.dev_addr == 0xA2)
+ read_addr = MLX5E_I2C_ADDR_HIGH;
+ else {
+ if_printf(ifp, "Query eeprom failed, "
+ "Invalid Address: %X\n", i2c.dev_addr);
+ error = EINVAL;
+ goto err_i2c;
+ }
+ error = mlx5_query_eeprom(priv->mdev,
+ read_addr, MLX5E_EEPROM_LOW_PAGE,
+ (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
+ (uint32_t *)i2c.data, &size_read);
+ if (error) {
+ if_printf(ifp, "Query eeprom failed, eeprom "
+ "reading is not supported\n");
+ error = EINVAL;
+ goto err_i2c;
+ }
+
+ if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
+ error = mlx5_query_eeprom(priv->mdev,
+ read_addr, MLX5E_EEPROM_LOW_PAGE,
+ (uint32_t)(i2c.offset + size_read),
+ (uint32_t)(i2c.len - size_read), module_num,
+ (uint32_t *)(i2c.data + size_read), &size_read);
+ }
+ if (error) {
+ if_printf(ifp, "Query eeprom failed, eeprom "
+ "reading is not supported\n");
+ error = EINVAL;
+ goto err_i2c;
+ }
+
+ error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
+err_i2c:
+ PRIV_UNLOCK(priv);
+ break;
+
+ default:
+ error = ether_ioctl(ifp, command, data);
+ break;
+ }
+ return (error);
+}
+
+static int
+mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
+{
+ /*
+ * TODO: uncoment once FW really sets all these bits if
+ * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
+ * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
+ * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
+ * -ENOTSUPP;
+ */
+
+ /* TODO: add more must-to-have features */
+
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return (-ENODEV);
+
+ return (0);
+}
+
+static u16
+mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
+{
+ int bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
+
+ return bf_buf_size -
+ sizeof(struct mlx5e_tx_wqe) +
+ 2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
+}
+
+static void
+mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
+ struct mlx5e_priv *priv,
+ int num_comp_vectors)
+{
+ /*
+ * TODO: Consider link speed for setting "log_sq_size",
+ * "log_rq_size" and "cq_moderation_xxx":
+ */
+ priv->params.log_sq_size =
+ MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ priv->params.log_rq_size =
+ MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+ priv->params.rx_cq_moderation_usec =
+ MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+ MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
+ MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
+ priv->params.rx_cq_moderation_mode =
+ MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
+ priv->params.rx_cq_moderation_pkts =
+ MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
+ priv->params.tx_cq_moderation_usec =
+ MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+ priv->params.tx_cq_moderation_pkts =
+ MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+ priv->params.min_rx_wqes =
+ MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
+ priv->params.rx_hash_log_tbl_sz =
+ (order_base_2(num_comp_vectors) >
+ MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
+ order_base_2(num_comp_vectors) :
+ MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
+ priv->params.num_tc = 1;
+ priv->params.default_vlan_prio = 0;
+ priv->counter_set_id = -1;
+ priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
+ mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
+
+ /*
+ * hw lro is currently defaulted to off. when it won't anymore we
+ * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
+ */
+ priv->params.hw_lro_en = false;
+ priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+
+ priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
+
+ priv->mdev = mdev;
+ priv->params.num_channels = num_comp_vectors;
+ priv->params.channels_rsss = 1;
+ priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
+ priv->queue_mapping_channel_mask =
+ roundup_pow_of_two(num_comp_vectors) - 1;
+ priv->num_tc = priv->params.num_tc;
+ priv->default_vlan_prio = priv->params.default_vlan_prio;
+
+ INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+ INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
+ INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
+}
+
+static int
+mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
+ struct mlx5_core_mr *mkey)
+{
+ struct ifnet *ifp = priv->ifp;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL) {
+ if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
+ return (-ENOMEM);
+ }
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+ if (err)
+ if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
+ __func__, err);
+
+ kvfree(in);
+ return (err);
+}
+
+static const char *mlx5e_vport_stats_desc[] = {
+ MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
+};
+
+static const char *mlx5e_pport_stats_desc[] = {
+ MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
+};
+
+static void
+mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
+{
+ mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
+ sx_init(&priv->state_lock, "mlx5state");
+ callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
+ MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
+}
+
+static void
+mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
+{
+ mtx_destroy(&priv->async_events_mtx);
+ sx_destroy(&priv->state_lock);
+}
+
+static int
+sysctl_firmware(SYSCTL_HANDLER_ARGS)
+{
+ /*
+ * %d.%d%.d the string format.
+ * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
+ * We need at most 5 chars to store that.
+ * It also has: two "." and NULL at the end, which means we need 18
+ * (5*3 + 3) chars at most.
+ */
+ char fw[18];
+ struct mlx5e_priv *priv = arg1;
+ int error;
+
+ snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
+ fw_rev_sub(priv->mdev));
+ error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
+ return (error);
+}
+
+static void
+mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
+{
+ int i;
+
+ for (i = 0; i < ch->num_tc; i++)
+ mlx5e_drain_sq(&ch->sq[i]);
+}
+
+static void
+mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
+{
+
+ sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
+ sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
+ mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
+ sq->doorbell.d64 = 0;
+}
+
+void
+mlx5e_resume_sq(struct mlx5e_sq *sq)
+{
+ int err;
+
+ /* check if already enabled */
+ if (sq->stopped == 0)
+ return;
+
+ err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
+ MLX5_SQC_STATE_RST);
+ if (err != 0) {
+ if_printf(sq->ifp,
+ "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
+ }
+
+ sq->cc = 0;
+ sq->pc = 0;
+
+ /* reset doorbell prior to moving from RST to RDY */
+ mlx5e_reset_sq_doorbell_record(sq);
+
+ err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
+ MLX5_SQC_STATE_RDY);
+ if (err != 0) {
+ if_printf(sq->ifp,
+ "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
+ }
+
+ mtx_lock(&sq->lock);
+ sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
+ sq->stopped = 0;
+ mtx_unlock(&sq->lock);
+
+}
+
+static void
+mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
+{
+ int i;
+
+ for (i = 0; i < ch->num_tc; i++)
+ mlx5e_resume_sq(&ch->sq[i]);
+}
+
+static void
+mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
+{
+ struct mlx5e_rq *rq = &ch->rq;
+ int err;
+
+ mtx_lock(&rq->mtx);
+ rq->enabled = 0;
+ callout_stop(&rq->watchdog);
+ mtx_unlock(&rq->mtx);
+
+ callout_drain(&rq->watchdog);
+
+ err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
+ if (err != 0) {
+ if_printf(rq->ifp,
+ "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
+ }
+
+ while (!mlx5_wq_ll_is_empty(&rq->wq)) {
+ msleep(1);
+ rq->cq.mcq.comp(&rq->cq.mcq);
+ }
+
+ /*
+ * Transitioning into RST state will allow the FW to track less ERR state queues,
+ * thus reducing the recv queue flushing time
+ */
+ err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
+ if (err != 0) {
+ if_printf(rq->ifp,
+ "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
+ }
+}
+
+static void
+mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
+{
+ struct mlx5e_rq *rq = &ch->rq;
+ int err;
+
+ rq->wq.wqe_ctr = 0;
+ mlx5_wq_ll_update_db_record(&rq->wq);
+ err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err != 0) {
+ if_printf(rq->ifp,
+ "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
+ }
+
+ rq->enabled = 1;
+
+ rq->cq.mcq.comp(&rq->cq.mcq);
+}
+
+void
+mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
+{
+ int i;
+
+ if (priv->channel == NULL)
+ return;
+
+ for (i = 0; i < priv->params.num_channels; i++) {
+
+ if (!priv->channel[i])
+ continue;
+
+ if (value)
+ mlx5e_disable_tx_dma(priv->channel[i]);
+ else
+ mlx5e_enable_tx_dma(priv->channel[i]);
+ }
+}
+
+void
+mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
+{
+ int i;
+
+ if (priv->channel == NULL)
+ return;
+
+ for (i = 0; i < priv->params.num_channels; i++) {
+
+ if (!priv->channel[i])
+ continue;
+
+ if (value)
+ mlx5e_disable_rx_dma(priv->channel[i]);
+ else
+ mlx5e_enable_rx_dma(priv->channel[i]);
+ }
+}
+
+u8
+mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev)
+{
+ u8 min_inline_mode;
+
+ min_inline_mode = MLX5_INLINE_MODE_L2;
+ mlx5_query_min_inline(mdev, &min_inline_mode);
+ if (min_inline_mode == MLX5_INLINE_MODE_NONE &&
+ !MLX5_CAP_ETH(mdev, wqe_vlan_insert))
+ min_inline_mode = MLX5_INLINE_MODE_L2;
+
+ return (min_inline_mode);
+}
+
+static void
+mlx5e_add_hw_stats(struct mlx5e_priv *priv)
+{
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
+ OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
+ sysctl_firmware, "A", "HCA firmware version");
+
+ SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
+ OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
+ "Board ID");
+}
+
+static int
+mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv = arg1;
+ uint32_t tx_pfc;
+ uint32_t value;
+ int error;
+
+ PRIV_LOCK(priv);
+
+ tx_pfc = priv->params.tx_priority_flow_control;
+
+ /* get current value */
+ value = (tx_pfc >> arg2) & 1;
+
+ error = sysctl_handle_32(oidp, &value, 0, req);
+
+ /* range check value */
+ if (value != 0)
+ priv->params.tx_priority_flow_control |= (1 << arg2);
+ else
+ priv->params.tx_priority_flow_control &= ~(1 << arg2);
+
+ /* check if update is required */
+ if (error == 0 && priv->gone == 0 &&
+ tx_pfc != priv->params.tx_priority_flow_control) {
+ error = -mlx5e_set_port_pfc(priv);
+ /* restore previous value */
+ if (error != 0)
+ priv->params.tx_priority_flow_control= tx_pfc;
+ }
+ PRIV_UNLOCK(priv);
+
+ return (error);
+}
+
+static int
+mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv = arg1;
+ uint32_t rx_pfc;
+ uint32_t value;
+ int error;
+
+ PRIV_LOCK(priv);
+
+ rx_pfc = priv->params.rx_priority_flow_control;
+
+ /* get current value */
+ value = (rx_pfc >> arg2) & 1;
+
+ error = sysctl_handle_32(oidp, &value, 0, req);
+
+ /* range check value */
+ if (value != 0)
+ priv->params.rx_priority_flow_control |= (1 << arg2);
+ else
+ priv->params.rx_priority_flow_control &= ~(1 << arg2);
+
+ /* check if update is required */
+ if (error == 0 && priv->gone == 0 &&
+ rx_pfc != priv->params.rx_priority_flow_control) {
+ error = -mlx5e_set_port_pfc(priv);
+ /* restore previous value */
+ if (error != 0)
+ priv->params.rx_priority_flow_control= rx_pfc;
+ }
+ PRIV_UNLOCK(priv);
+
+ return (error);
+}
+
+static void
+mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
+{
+ unsigned int x;
+ char path[96];
+ int error;
+
+ /* enable pauseframes by default */
+ priv->params.tx_pauseframe_control = 1;
+ priv->params.rx_pauseframe_control = 1;
+
+ /* disable ports flow control, PFC, by default */
+ priv->params.tx_priority_flow_control = 0;
+ priv->params.rx_priority_flow_control = 0;
+
+#if (__FreeBSD_version < 1100000)
+ /* compute path for sysctl */
+ snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
+ device_get_unit(priv->mdev->pdev->dev.bsddev));
+
+ /* try to fetch tunable, if any */
+ TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
+
+ /* compute path for sysctl */
+ snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
+ device_get_unit(priv->mdev->pdev->dev.bsddev));
+
+ /* try to fetch tunable, if any */
+ TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
+
+ for (x = 0; x != 8; x++) {
+
+ /* compute path for sysctl */
+ snprintf(path, sizeof(path), "dev.mce.%d.tx_priority_flow_control_%u",
+ device_get_unit(priv->mdev->pdev->dev.bsddev), x);
+
+ /* try to fetch tunable, if any */
+ if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
+ priv->params.tx_priority_flow_control |= 1 << x;
+
+ /* compute path for sysctl */
+ snprintf(path, sizeof(path), "dev.mce.%d.rx_priority_flow_control_%u",
+ device_get_unit(priv->mdev->pdev->dev.bsddev), x);
+
+ /* try to fetch tunable, if any */
+ if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
+ priv->params.rx_priority_flow_control |= 1 << x;
+ }
+#endif
+
+ /* register pauseframe SYSCTLs */
+ SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
+ &priv->params.tx_pauseframe_control, 0,
+ "Set to enable TX pause frames. Clear to disable.");
+
+ SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
+ &priv->params.rx_pauseframe_control, 0,
+ "Set to enable RX pause frames. Clear to disable.");
+
+ /* register priority_flow control, PFC, SYSCTLs */
+ for (x = 0; x != 8; x++) {
+ snprintf(path, sizeof(path), "tx_priority_flow_control_%u", x);
+
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
+ CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_tx_priority_flow_control, "IU",
+ "Set to enable TX ports flow control frames for given priority. Clear to disable.");
+
+ snprintf(path, sizeof(path), "rx_priority_flow_control_%u", x);
+
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
+ CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_rx_priority_flow_control, "IU",
+ "Set to enable RX ports flow control frames for given priority. Clear to disable.");
+ }
+
+ PRIV_LOCK(priv);
+
+ /* range check */
+ priv->params.tx_pauseframe_control =
+ priv->params.tx_pauseframe_control ? 1 : 0;
+ priv->params.rx_pauseframe_control =
+ priv->params.rx_pauseframe_control ? 1 : 0;
+
+ /* update firmware */
+ error = mlx5e_set_port_pause_and_pfc(priv);
+ if (error == -EINVAL) {
+ if_printf(priv->ifp,
+ "Global pauseframes must be disabled before enabling PFC.\n");
+ priv->params.rx_priority_flow_control = 0;
+ priv->params.tx_priority_flow_control = 0;
+
+ /* update firmware */
+ (void) mlx5e_set_port_pause_and_pfc(priv);
+ }
+ PRIV_UNLOCK(priv);
+}
+
+static void *
+mlx5e_create_ifp(struct mlx5_core_dev *mdev)
+{
+ struct ifnet *ifp;
+ struct mlx5e_priv *priv;
+ u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
+ struct sysctl_oid_list *child;
+ int ncv = mdev->priv.eq_table.num_comp_vectors;
+ char unit[16];
+ int err;
+ int i;
+ u32 eth_proto_cap;
+
+ if (mlx5e_check_required_hca_cap(mdev)) {
+ mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
+ return (NULL);
+ }
+ priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
+ mlx5e_priv_mtx_init(priv);
+
+ ifp = priv->ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ mlx5_core_err(mdev, "if_alloc() failed\n");
+ goto err_free_priv;
+ }
+ ifp->if_softc = priv;
+ if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_init = mlx5e_open;
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_ioctl = mlx5e_ioctl;
+ ifp->if_transmit = mlx5e_xmit;
+ ifp->if_qflush = if_qflush;
+#if (__FreeBSD_version >= 1100000)
+ ifp->if_get_counter = mlx5e_get_counter;
+#endif
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ /*
+ * Set driver features
+ */
+ ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
+ ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
+ ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
+ ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
+ ifp->if_capabilities |= IFCAP_LRO;
+ ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
+ ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
+#ifdef RATELIMIT
+ ifp->if_capabilities |= IFCAP_TXRTLMT;
+ ifp->if_snd_tag_alloc = mlx5e_rl_snd_tag_alloc;
+ ifp->if_snd_tag_free = mlx5e_rl_snd_tag_free;
+ ifp->if_snd_tag_modify = mlx5e_rl_snd_tag_modify;
+ ifp->if_snd_tag_query = mlx5e_rl_snd_tag_query;
+#endif
+
+ /* set TSO limits so that we don't have to drop TX packets */
+ ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
+ ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
+ ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
+
+ ifp->if_capenable = ifp->if_capabilities;
+ ifp->if_hwassist = 0;
+ if (ifp->if_capenable & IFCAP_TSO)
+ ifp->if_hwassist |= CSUM_TSO;
+ if (ifp->if_capenable & IFCAP_TXCSUM)
+ ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
+ if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+ ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
+
+ sysctl_ctx_init(&priv->sysctl_ctx_channel_debug);
+
+ /* ifnet sysctl tree */
+ sysctl_ctx_init(&priv->sysctl_ctx);
+ priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
+ OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
+ if (priv->sysctl_ifnet == NULL) {
+ mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
+ goto err_free_sysctl;
+ }
+ snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
+ priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
+ if (priv->sysctl_ifnet == NULL) {
+ mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
+ goto err_free_sysctl;
+ }
+
+ /* HW sysctl tree */
+ child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
+ priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
+ OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
+ if (priv->sysctl_hw == NULL) {
+ mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
+ goto err_free_sysctl;
+ }
+ mlx5e_build_ifp_priv(mdev, priv, ncv);
+
+ snprintf(unit, sizeof(unit), "mce%u_wq",
+ device_get_unit(mdev->pdev->dev.bsddev));
+ priv->wq = alloc_workqueue(unit, 0, 1);
+ if (priv->wq == NULL) {
+ if_printf(ifp, "%s: alloc_workqueue failed\n", __func__);
+ goto err_free_sysctl;
+ }
+
+ err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
+ if (err) {
+ if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
+ __func__, err);
+ goto err_free_wq;
+ }
+ err = mlx5_core_alloc_pd(mdev, &priv->pdn);
+ if (err) {
+ if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
+ __func__, err);
+ goto err_unmap_free_uar;
+ }
+ err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
+ if (err) {
+ if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
+ __func__, err);
+ goto err_dealloc_pd;
+ }
+ err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
+ __func__, err);
+ goto err_dealloc_transport_domain;
+ }
+ mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
+
+ /* check if we should generate a random MAC address */
+ if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
+ is_zero_ether_addr(dev_addr)) {
+ random_ether_addr(dev_addr);
+ if_printf(ifp, "Assigned random MAC address\n");
+ }
+#ifdef RATELIMIT
+ err = mlx5e_rl_init(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_rl_init failed, %d\n",
+ __func__, err);
+ goto err_create_mkey;
+ }
+#endif
+
+ /* set default MTU */
+ mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
+
+ /* Set desc */
+ device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
+
+ /* Set default media status */
+ priv->media_status_last = IFM_AVALID;
+ priv->media_active_last = IFM_ETHER | IFM_AUTO |
+ IFM_ETH_RXPAUSE | IFM_FDX;
+
+ /* setup default pauseframes configuration */
+ mlx5e_setup_pauseframes(priv);
+
+ err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
+ if (err) {
+ eth_proto_cap = 0;
+ if_printf(ifp, "%s: Query port media capability failed, %d\n",
+ __func__, err);
+ }
+
+ /* Setup supported medias */
+ ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
+ mlx5e_media_change, mlx5e_media_status);
+
+ for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
+ if (mlx5e_mode_table[i].baudrate == 0)
+ continue;
+ if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
+ ifmedia_add(&priv->media,
+ mlx5e_mode_table[i].subtype |
+ IFM_ETHER, 0, NULL);
+ ifmedia_add(&priv->media,
+ mlx5e_mode_table[i].subtype |
+ IFM_ETHER | IFM_FDX |
+ IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
+ }
+ }
+
+ ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
+ ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
+ IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
+
+ /* Set autoselect by default */
+ ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
+ IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
+ ether_ifattach(ifp, dev_addr);
+
+ /* Register for VLAN events */
+ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
+ mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
+ priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
+ mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
+
+ /* Link is down by default */
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+
+ mlx5e_enable_async_events(priv);
+
+ mlx5e_add_hw_stats(priv);
+
+ mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
+ priv->stats.vport.arg);
+
+ mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
+ priv->stats.pport.arg);
+
+ mlx5e_create_ethtool(priv);
+
+ mtx_lock(&priv->async_events_mtx);
+ mlx5e_update_stats(priv);
+ mtx_unlock(&priv->async_events_mtx);
+
+ SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ OID_AUTO, "rx_clbr_done", CTLFLAG_RD,
+ &priv->clbr_done, 0,
+ "RX timestamps calibration state");
+ callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT);
+ mlx5e_reset_calibration_callout(priv);
+
+ return (priv);
+
+#ifdef RATELIMIT
+err_create_mkey:
+ mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
+#endif
+err_dealloc_transport_domain:
+ mlx5_dealloc_transport_domain(mdev, priv->tdn);
+
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(mdev, priv->pdn);
+
+err_unmap_free_uar:
+ mlx5_unmap_free_uar(mdev, &priv->cq_uar);
+
+err_free_wq:
+ destroy_workqueue(priv->wq);
+
+err_free_sysctl:
+ sysctl_ctx_free(&priv->sysctl_ctx);
+ sysctl_ctx_free(&priv->sysctl_ctx_channel_debug);
+
+ if_free(ifp);
+
+err_free_priv:
+ mlx5e_priv_mtx_destroy(priv);
+ free(priv, M_MLX5EN);
+ return (NULL);
+}
+
+static void
+mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
+{
+ struct mlx5e_priv *priv = vpriv;
+ struct ifnet *ifp = priv->ifp;
+
+ /* don't allow more IOCTLs */
+ priv->gone = 1;
+
+ /*
+ * Clear the device description to avoid use after free,
+ * because the bsddev is not destroyed when this module is
+ * unloaded:
+ */
+ device_set_desc(mdev->pdev->dev.bsddev, NULL);
+
+ /* XXX wait a bit to allow IOCTL handlers to complete */
+ pause("W", hz);
+
+#ifdef RATELIMIT
+ /*
+ * The kernel can have reference(s) via the m_snd_tag's into
+ * the ratelimit channels, and these must go away before
+ * detaching:
+ */
+ while (READ_ONCE(priv->rl.stats.tx_active_connections) != 0) {
+ if_printf(priv->ifp, "Waiting for all ratelimit connections "
+ "to terminate\n");
+ pause("W", hz);
+ }
+#endif
+ /* stop watchdog timer */
+ callout_drain(&priv->watchdog);
+
+ callout_drain(&priv->tstmp_clbr);
+
+ if (priv->vlan_attach != NULL)
+ EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
+ if (priv->vlan_detach != NULL)
+ EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
+
+ /* make sure device gets closed */
+ PRIV_LOCK(priv);
+ mlx5e_close_locked(ifp);
+ PRIV_UNLOCK(priv);
+
+ /* unregister device */
+ ifmedia_removeall(&priv->media);
+ ether_ifdetach(ifp);
+ if_free(ifp);
+
+#ifdef RATELIMIT
+ mlx5e_rl_cleanup(priv);
+#endif
+ /* destroy all remaining sysctl nodes */
+ if (priv->sysctl_debug) {
+ sysctl_ctx_free(&priv->sysctl_ctx_channel_debug);
+ sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
+ }
+ sysctl_ctx_free(&priv->stats.vport.ctx);
+ sysctl_ctx_free(&priv->stats.pport.ctx);
+ sysctl_ctx_free(&priv->sysctl_ctx);
+
+ mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
+ mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
+ mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
+ mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
+ mlx5e_disable_async_events(priv);
+ destroy_workqueue(priv->wq);
+ mlx5e_priv_mtx_destroy(priv);
+ free(priv, M_MLX5EN);
+}
+
+static void *
+mlx5e_get_ifp(void *vpriv)
+{
+ struct mlx5e_priv *priv = vpriv;
+
+ return (priv->ifp);
+}
+
+static struct mlx5_interface mlx5e_interface = {
+ .add = mlx5e_create_ifp,
+ .remove = mlx5e_destroy_ifp,
+ .event = mlx5e_async_event,
+ .protocol = MLX5_INTERFACE_PROTOCOL_ETH,
+ .get_dev = mlx5e_get_ifp,
+};
+
+void
+mlx5e_init(void)
+{
+ mlx5_register_interface(&mlx5e_interface);
+}
+
+void
+mlx5e_cleanup(void)
+{
+ mlx5_unregister_interface(&mlx5e_interface);
+}
+
+module_init_order(mlx5e_init, SI_ORDER_THIRD);
+module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
+
+#if (__FreeBSD_version >= 1100000)
+MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
+#endif
+MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
+MODULE_VERSION(mlx5en, 1);
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
new file mode 100644
index 000000000000..4dac7377cef1
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
@@ -0,0 +1,1542 @@
+/*-
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+
+#ifdef RATELIMIT
+
+static int mlx5e_rl_open_workers(struct mlx5e_priv *);
+static void mlx5e_rl_close_workers(struct mlx5e_priv *);
+static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS);
+static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *, unsigned x,
+ struct sysctl_oid *, const char *name, const char *desc);
+static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x,
+ struct sysctl_oid *node, const char *name, const char *desc);
+static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *, uint64_t value);
+static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *, uint64_t value);
+
+static void
+mlx5e_rl_build_sq_param(struct mlx5e_rl_priv_data *rl,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size);
+
+ MLX5_SET(wq, wq, log_wq_sz, log_sq_size);
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, pd, rl->priv->pdn);
+
+ param->wq.buf_numa_node = 0;
+ param->wq.db_numa_node = 0;
+ param->wq.linear = 1;
+}
+
+static void
+mlx5e_rl_build_cq_param(struct mlx5e_rl_priv_data *rl,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+ uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size);
+
+ MLX5_SET(cqc, cqc, log_cq_size, log_sq_size);
+ MLX5_SET(cqc, cqc, cq_period, rl->param.tx_coalesce_usecs);
+ MLX5_SET(cqc, cqc, cq_max_count, rl->param.tx_coalesce_pkts);
+
+ switch (rl->param.tx_coalesce_mode) {
+ case 0:
+ MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+ break;
+ default:
+ if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_start_from_cqe))
+ MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+ else
+ MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+ break;
+ }
+}
+
+static void
+mlx5e_rl_build_channel_param(struct mlx5e_rl_priv_data *rl,
+ struct mlx5e_rl_channel_param *cparam)
+{
+ memset(cparam, 0, sizeof(*cparam));
+
+ mlx5e_rl_build_sq_param(rl, &cparam->sq);
+ mlx5e_rl_build_cq_param(rl, &cparam->cq);
+}
+
+static int
+mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq,
+ struct mlx5e_sq_param *param, int ix)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *sqc = param->sqc;
+ void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ int err;
+
+ /* Create DMA descriptor TAG */
+ if ((err = -bus_dma_tag_create(
+ bus_get_dma_tag(mdev->pdev->dev.bsddev),
+ 1, /* any alignment */
+ 0, /* no boundary */
+ BUS_SPACE_MAXADDR, /* lowaddr */
+ BUS_SPACE_MAXADDR, /* highaddr */
+ NULL, NULL, /* filter, filterarg */
+ MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */
+ MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */
+ MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */
+ 0, /* flags */
+ NULL, NULL, /* lockfunc, lockfuncarg */
+ &sq->dma_tag)))
+ goto done;
+
+ /* use shared UAR */
+ sq->uar = priv->rl.sq_uar;
+
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
+ &sq->wq_ctrl);
+ if (err)
+ goto err_free_dma_tag;
+
+ sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
+ /*
+ * The sq->bf_buf_size variable is intentionally left zero so
+ * that the doorbell writes will occur at the same memory
+ * location.
+ */
+
+ err = mlx5e_alloc_sq_db(sq);
+ if (err)
+ goto err_sq_wq_destroy;
+
+ sq->mkey_be = cpu_to_be32(priv->mr.key);
+ sq->ifp = priv->ifp;
+ sq->priv = priv;
+ sq->max_inline = priv->params.tx_max_inline;
+ sq->min_inline_mode = priv->params.tx_min_inline_mode;
+ sq->vlan_inline_cap = MLX5_CAP_ETH(mdev, wqe_vlan_insert);
+
+ return (0);
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+err_free_dma_tag:
+ bus_dma_tag_destroy(sq->dma_tag);
+done:
+ return (err);
+}
+
+static void
+mlx5e_rl_destroy_sq(struct mlx5e_sq *sq)
+{
+
+ mlx5e_free_sq_db(sq);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static int
+mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq,
+ struct mlx5e_sq_param *param, int ix)
+{
+ int err;
+
+ err = mlx5e_rl_create_sq(priv, sq, param, ix);
+ if (err)
+ return (err);
+
+ err = mlx5e_enable_sq(sq, param, priv->rl.tisn);
+ if (err)
+ goto err_destroy_sq;
+
+ err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
+ if (err)
+ goto err_disable_sq;
+
+ return (0);
+
+err_disable_sq:
+ mlx5e_disable_sq(sq);
+err_destroy_sq:
+ mlx5e_rl_destroy_sq(sq);
+
+ return (err);
+}
+
+static void
+mlx5e_rl_chan_mtx_init(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
+{
+ mtx_init(&sq->lock, "mlx5tx-rl", NULL, MTX_DEF);
+ mtx_init(&sq->comp_lock, "mlx5comp-rl", NULL, MTX_DEF);
+
+ callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
+
+ sq->cev_factor = priv->rl.param.tx_completion_fact;
+
+ /* ensure the TX completion event factor is not zero */
+ if (sq->cev_factor == 0)
+ sq->cev_factor = 1;
+}
+
+static int
+mlx5e_rl_open_channel(struct mlx5e_rl_worker *rlw, int eq_ix,
+ struct mlx5e_rl_channel_param *cparam,
+ struct mlx5e_sq *volatile *ppsq)
+{
+ struct mlx5e_priv *priv = rlw->priv;
+ struct mlx5e_sq *sq;
+ int err;
+
+ sq = malloc(sizeof(*sq), M_MLX5EN, M_WAITOK | M_ZERO);
+
+ /* init mutexes */
+ mlx5e_rl_chan_mtx_init(priv, sq);
+
+ /* open TX completion queue */
+ err = mlx5e_open_cq(priv, &cparam->cq, &sq->cq,
+ &mlx5e_tx_cq_comp, eq_ix);
+ if (err)
+ goto err_free;
+
+ err = mlx5e_rl_open_sq(priv, sq, &cparam->sq, eq_ix);
+ if (err)
+ goto err_close_tx_cq;
+
+ /* store TX channel pointer */
+ *ppsq = sq;
+
+ /* poll TX queue initially */
+ sq->cq.mcq.comp(&sq->cq.mcq);
+
+ return (0);
+
+err_close_tx_cq:
+ mlx5e_close_cq(&sq->cq);
+
+err_free:
+ /* destroy mutexes */
+ mtx_destroy(&sq->lock);
+ mtx_destroy(&sq->comp_lock);
+ free(sq, M_MLX5EN);
+ atomic_add_64(&priv->rl.stats.tx_allocate_resource_failure, 1ULL);
+ return (err);
+}
+
+static void
+mlx5e_rl_close_channel(struct mlx5e_sq *volatile *ppsq)
+{
+ struct mlx5e_sq *sq = *ppsq;
+
+ /* check if channel is already closed */
+ if (sq == NULL)
+ return;
+ /* ensure channel pointer is no longer used */
+ *ppsq = NULL;
+
+ /* teardown and destroy SQ */
+ mlx5e_drain_sq(sq);
+ mlx5e_disable_sq(sq);
+ mlx5e_rl_destroy_sq(sq);
+
+ /* close CQ */
+ mlx5e_close_cq(&sq->cq);
+
+ /* destroy mutexes */
+ mtx_destroy(&sq->lock);
+ mtx_destroy(&sq->comp_lock);
+
+ free(sq, M_MLX5EN);
+}
+
+static void
+mlx5e_rl_sync_tx_completion_fact(struct mlx5e_rl_priv_data *rl)
+{
+ /*
+ * Limit the maximum distance between completion events to
+ * half of the currently set TX queue size.
+ *
+ * The maximum number of queue entries a single IP packet can
+ * consume is given by MLX5_SEND_WQE_MAX_WQEBBS.
+ *
+ * The worst case max value is then given as below:
+ */
+ uint64_t max = rl->param.tx_queue_size /
+ (2 * MLX5_SEND_WQE_MAX_WQEBBS);
+
+ /*
+ * Update the maximum completion factor value in case the
+ * tx_queue_size field changed. Ensure we don't overflow
+ * 16-bits.
+ */
+ if (max < 1)
+ max = 1;
+ else if (max > 65535)
+ max = 65535;
+ rl->param.tx_completion_fact_max = max;
+
+ /*
+ * Verify that the current TX completion factor is within the
+ * given limits:
+ */
+ if (rl->param.tx_completion_fact < 1)
+ rl->param.tx_completion_fact = 1;
+ else if (rl->param.tx_completion_fact > max)
+ rl->param.tx_completion_fact = max;
+}
+
+static int
+mlx5e_rl_modify_sq(struct mlx5e_sq *sq, uint16_t rl_index)
+{
+ struct mlx5e_priv *priv = sq->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ void *in;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+
+ MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
+ MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RDY);
+ MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY);
+ MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
+
+ err = mlx5_core_modify_sq(mdev, in, inlen);
+
+ kvfree(in);
+
+ return (err);
+}
+
+/*
+ * This function will search the configured rate limit table for the
+ * best match to avoid that a single socket based application can
+ * allocate all the available hardware rates. If the user selected
+ * rate deviates too much from the closes rate available in the rate
+ * limit table, unlimited rate will be selected.
+ */
+static uint64_t
+mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data *rl, uint64_t user_rate)
+{
+ uint64_t distance = -1ULL;
+ uint64_t diff;
+ uint64_t retval = 0; /* unlimited */
+ uint64_t x;
+
+ /* search for closest rate */
+ for (x = 0; x != rl->param.tx_rates_def; x++) {
+ uint64_t rate = rl->rate_limit_table[x];
+ if (rate == 0)
+ continue;
+
+ if (rate > user_rate)
+ diff = rate - user_rate;
+ else
+ diff = user_rate - rate;
+
+ /* check if distance is smaller than previous rate */
+ if (diff < distance) {
+ distance = diff;
+ retval = rate;
+ }
+ }
+
+ /* range check for multiplication below */
+ if (user_rate > rl->param.tx_limit_max)
+ user_rate = rl->param.tx_limit_max;
+
+ /* fallback to unlimited, if rate deviates too much */
+ if (distance > howmany(user_rate *
+ rl->param.tx_allowed_deviation, 1000ULL))
+ retval = 0;
+
+ return (retval);
+}
+
+/*
+ * This function sets the requested rate for a rate limit channel, in
+ * bits per second. The requested rate will be filtered through the
+ * find best rate function above.
+ */
+static int
+mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw,
+ struct mlx5e_rl_channel *channel, uint64_t rate)
+{
+ struct mlx5e_rl_priv_data *rl = &rlw->priv->rl;
+ struct mlx5e_sq *sq;
+ uint64_t temp;
+ uint16_t index;
+ uint16_t burst;
+ int error;
+
+ if (rate != 0) {
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+
+ MLX5E_RL_RLOCK(rl);
+
+ /* get current burst size in bytes */
+ temp = rl->param.tx_burst_size *
+ MLX5E_SW2HW_MTU(rlw->priv->ifp->if_mtu);
+
+ /* limit burst size to 64K currently */
+ if (temp > 65535)
+ temp = 65535;
+ burst = temp;
+
+ /* find best rate */
+ rate = mlx5e_rl_find_best_rate_locked(rl, rate);
+
+ MLX5E_RL_RUNLOCK(rl);
+
+ if (rate == 0) {
+ /* rate doesn't exist, fallback to unlimited */
+ error = EINVAL;
+ index = 0;
+ rate = 0;
+ atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL);
+ } else {
+ /* get a reference on the new rate */
+ error = -mlx5_rl_add_rate(rlw->priv->mdev,
+ howmany(rate, 1000), burst, &index);
+
+ if (error != 0) {
+ /* adding rate failed, fallback to unlimited */
+ index = 0;
+ rate = 0;
+ atomic_add_64(&rlw->priv->rl.stats.tx_add_new_rate_failure, 1ULL);
+ }
+ }
+ MLX5E_RL_WORKER_LOCK(rlw);
+ } else {
+ index = 0;
+ burst = 0; /* default */
+ }
+
+ /* atomically swap rates */
+ temp = channel->last_rate;
+ channel->last_rate = rate;
+ rate = temp;
+
+ /* atomically swap burst size */
+ temp = channel->last_burst;
+ channel->last_burst = burst;
+ burst = temp;
+
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+ /* put reference on the old rate, if any */
+ if (rate != 0) {
+ mlx5_rl_remove_rate(rlw->priv->mdev,
+ howmany(rate, 1000), burst);
+ }
+
+ /* set new rate */
+ sq = channel->sq;
+ if (sq != NULL) {
+ error = mlx5e_rl_modify_sq(sq, index);
+ if (error != 0)
+ atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL);
+ } else
+ error = 0;
+ MLX5E_RL_WORKER_LOCK(rlw);
+
+ return (-error);
+}
+
+static void
+mlx5e_rl_worker(void *arg)
+{
+ struct thread *td;
+ struct mlx5e_rl_worker *rlw = arg;
+ struct mlx5e_rl_channel *channel;
+ struct mlx5e_priv *priv;
+ unsigned ix;
+ uint64_t x;
+ int error;
+
+ /* set thread priority */
+ td = curthread;
+
+ thread_lock(td);
+ sched_prio(td, PI_SWI(SWI_NET));
+ thread_unlock(td);
+
+ priv = rlw->priv;
+
+ /* compute completion vector */
+ ix = (rlw - priv->rl.workers) %
+ priv->mdev->priv.eq_table.num_comp_vectors;
+
+ /* TODO bind to CPU */
+
+ /* open all the SQs */
+ MLX5E_RL_WORKER_LOCK(rlw);
+ for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) {
+ struct mlx5e_rl_channel *channel = rlw->channels + x;
+
+#if !defined(HAVE_RL_PRE_ALLOCATE_CHANNELS)
+ if (channel->state == MLX5E_RL_ST_FREE)
+ continue;
+#endif
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+
+ MLX5E_RL_RLOCK(&priv->rl);
+ error = mlx5e_rl_open_channel(rlw, ix,
+ &priv->rl.chan_param, &channel->sq);
+ MLX5E_RL_RUNLOCK(&priv->rl);
+
+ MLX5E_RL_WORKER_LOCK(rlw);
+ if (error != 0) {
+ if_printf(priv->ifp,
+ "mlx5e_rl_open_channel failed: %d\n", error);
+ break;
+ }
+ mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->init_rate);
+ }
+ while (1) {
+ if (STAILQ_FIRST(&rlw->process_head) == NULL) {
+ /* check if we are tearing down */
+ if (rlw->worker_done != 0)
+ break;
+ cv_wait(&rlw->cv, &rlw->mtx);
+ }
+ /* check if we are tearing down */
+ if (rlw->worker_done != 0)
+ break;
+ channel = STAILQ_FIRST(&rlw->process_head);
+ if (channel != NULL) {
+ STAILQ_REMOVE_HEAD(&rlw->process_head, entry);
+
+ switch (channel->state) {
+ case MLX5E_RL_ST_MODIFY:
+ channel->state = MLX5E_RL_ST_USED;
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+
+ /* create channel by demand */
+ if (channel->sq == NULL) {
+ MLX5E_RL_RLOCK(&priv->rl);
+ error = mlx5e_rl_open_channel(rlw, ix,
+ &priv->rl.chan_param, &channel->sq);
+ MLX5E_RL_RUNLOCK(&priv->rl);
+
+ if (error != 0) {
+ if_printf(priv->ifp,
+ "mlx5e_rl_open_channel failed: %d\n", error);
+ } else {
+ atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, 1ULL);
+ }
+ } else {
+ mlx5e_resume_sq(channel->sq);
+ }
+
+ MLX5E_RL_WORKER_LOCK(rlw);
+ /* convert from bytes/s to bits/s and set new rate */
+ error = mlx5e_rlw_channel_set_rate_locked(rlw, channel,
+ channel->new_rate * 8ULL);
+ if (error != 0) {
+ if_printf(priv->ifp,
+ "mlx5e_rlw_channel_set_rate_locked failed: %d\n",
+ error);
+ }
+ break;
+
+ case MLX5E_RL_ST_DESTROY:
+ error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0);
+ if (error != 0) {
+ if_printf(priv->ifp,
+ "mlx5e_rlw_channel_set_rate_locked failed: %d\n",
+ error);
+ }
+ if (channel->sq != NULL) {
+ /*
+ * Make sure all packets are
+ * transmitted before SQ is
+ * returned to free list:
+ */
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+ mlx5e_drain_sq(channel->sq);
+ MLX5E_RL_WORKER_LOCK(rlw);
+ }
+ /* put the channel back into the free list */
+ STAILQ_INSERT_HEAD(&rlw->index_list_head, channel, entry);
+ channel->state = MLX5E_RL_ST_FREE;
+ atomic_add_64(&priv->rl.stats.tx_active_connections, -1ULL);
+ break;
+ default:
+ /* NOP */
+ break;
+ }
+ }
+ }
+
+ /* close all the SQs */
+ for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) {
+ struct mlx5e_rl_channel *channel = rlw->channels + x;
+
+ /* update the initial rate */
+ channel->init_rate = channel->last_rate;
+
+ /* make sure we free up the rate resource */
+ mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0);
+
+ if (channel->sq != NULL) {
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+ mlx5e_rl_close_channel(&channel->sq);
+ atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, -1ULL);
+ MLX5E_RL_WORKER_LOCK(rlw);
+ }
+ }
+
+ rlw->worker_done = 0;
+ cv_broadcast(&rlw->cv);
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+
+ kthread_exit();
+}
+
+static int
+mlx5e_rl_open_tis(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+ void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(tisc, tisc, prio, 0);
+ MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
+
+ return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->rl.tisn));
+}
+
+static void
+mlx5e_rl_close_tis(struct mlx5e_priv *priv)
+{
+ mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn);
+}
+
+static void
+mlx5e_rl_set_default_params(struct mlx5e_rl_params *param,
+ struct mlx5_core_dev *mdev)
+{
+ /* ratelimit workers */
+ param->tx_worker_threads_def = mdev->priv.eq_table.num_comp_vectors;
+ param->tx_worker_threads_max = MLX5E_RL_MAX_WORKERS;
+
+ /* range check */
+ if (param->tx_worker_threads_def == 0 ||
+ param->tx_worker_threads_def > param->tx_worker_threads_max)
+ param->tx_worker_threads_def = param->tx_worker_threads_max;
+
+ /* ratelimit channels */
+ param->tx_channels_per_worker_def = MLX5E_RL_MAX_SQS /
+ param->tx_worker_threads_def;
+ param->tx_channels_per_worker_max = MLX5E_RL_MAX_SQS;
+
+ /* range check */
+ if (param->tx_channels_per_worker_def > MLX5E_RL_DEF_SQ_PER_WORKER)
+ param->tx_channels_per_worker_def = MLX5E_RL_DEF_SQ_PER_WORKER;
+
+ /* set default burst size */
+ param->tx_burst_size = 4; /* MTUs */
+
+ /*
+ * Set maximum burst size
+ *
+ * The burst size is multiplied by the MTU and clamped to the
+ * range 0 ... 65535 bytes inclusivly before fed into the
+ * firmware.
+ *
+ * NOTE: If the burst size or MTU is changed only ratelimit
+ * connections made after the change will use the new burst
+ * size.
+ */
+ param->tx_burst_size_max = 255;
+
+ /* get firmware rate limits in 1000bit/s and convert them to bit/s */
+ param->tx_limit_min = mdev->priv.rl_table.min_rate * 1000ULL;
+ param->tx_limit_max = mdev->priv.rl_table.max_rate * 1000ULL;
+
+ /* ratelimit table size */
+ param->tx_rates_max = mdev->priv.rl_table.max_size;
+
+ /* range check */
+ if (param->tx_rates_max > MLX5E_RL_MAX_TX_RATES)
+ param->tx_rates_max = MLX5E_RL_MAX_TX_RATES;
+
+ /* set default number of rates */
+ param->tx_rates_def = param->tx_rates_max;
+
+ /* set maximum allowed rate deviation */
+ if (param->tx_limit_max != 0) {
+ /*
+ * Make sure the deviation multiplication doesn't
+ * overflow unsigned 64-bit:
+ */
+ param->tx_allowed_deviation_max = -1ULL /
+ param->tx_limit_max;
+ }
+ /* set default rate deviation */
+ param->tx_allowed_deviation = 50; /* 5.0% */
+
+ /* channel parameters */
+ param->tx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE);
+ param->tx_coalesce_usecs = MLX5E_RL_TX_COAL_USEC_DEFAULT;
+ param->tx_coalesce_pkts = MLX5E_RL_TX_COAL_PKTS_DEFAULT;
+ param->tx_coalesce_mode = MLX5E_RL_TX_COAL_MODE_DEFAULT;
+ param->tx_completion_fact = MLX5E_RL_TX_COMP_FACT_DEFAULT;
+}
+
+static const char *mlx5e_rl_params_desc[] = {
+ MLX5E_RL_PARAMS(MLX5E_STATS_DESC)
+};
+
+static const char *mlx5e_rl_table_params_desc[] = {
+ MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_DESC)
+};
+
+static const char *mlx5e_rl_stats_desc[] = {
+ MLX5E_RL_STATS(MLX5E_STATS_DESC)
+};
+
+int
+mlx5e_rl_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rl_priv_data *rl = &priv->rl;
+ struct sysctl_oid *node;
+ struct sysctl_oid *stats;
+ char buf[64];
+ uint64_t i;
+ uint64_t j;
+ int error;
+
+ /* check if there is support for packet pacing */
+ if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing))
+ return (0);
+
+ rl->priv = priv;
+
+ sysctl_ctx_init(&rl->ctx);
+
+ sx_init(&rl->rl_sxlock, "ratelimit-sxlock");
+
+ /* allocate shared UAR for SQs */
+ error = mlx5_alloc_map_uar(priv->mdev, &rl->sq_uar);
+ if (error)
+ goto done;
+
+ /* open own TIS domain for ratelimit SQs */
+ error = mlx5e_rl_open_tis(priv);
+ if (error)
+ goto err_uar;
+
+ /* setup default value for parameters */
+ mlx5e_rl_set_default_params(&rl->param, priv->mdev);
+
+ /* update the completion factor */
+ mlx5e_rl_sync_tx_completion_fact(rl);
+
+ /* create root node */
+ node = SYSCTL_ADD_NODE(&rl->ctx,
+ SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO,
+ "rate_limit", CTLFLAG_RW, NULL, "Rate limiting support");
+
+ if (node != NULL) {
+ /* create SYSCTLs */
+ for (i = 0; i != MLX5E_RL_PARAMS_NUM; i++) {
+ mlx5e_rl_sysctl_add_u64_oid(rl,
+ MLX5E_RL_PARAMS_INDEX(arg[i]),
+ node, mlx5e_rl_params_desc[2 * i],
+ mlx5e_rl_params_desc[2 * i + 1]);
+ }
+
+ stats = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(node),
+ OID_AUTO, "stats", CTLFLAG_RD, NULL,
+ "Rate limiting statistics");
+ if (stats != NULL) {
+ /* create SYSCTLs */
+ for (i = 0; i != MLX5E_RL_STATS_NUM; i++) {
+ mlx5e_rl_sysctl_add_stats_u64_oid(rl, i,
+ stats, mlx5e_rl_stats_desc[2 * i],
+ mlx5e_rl_stats_desc[2 * i + 1]);
+ }
+ }
+ }
+
+ /* allocate workers array */
+ rl->workers = malloc(sizeof(rl->workers[0]) *
+ rl->param.tx_worker_threads_def, M_MLX5EN, M_WAITOK | M_ZERO);
+
+ /* allocate rate limit array */
+ rl->rate_limit_table = malloc(sizeof(rl->rate_limit_table[0]) *
+ rl->param.tx_rates_def, M_MLX5EN, M_WAITOK | M_ZERO);
+
+ if (node != NULL) {
+ /* create more SYSCTls */
+ SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "tx_rate_show", CTLTYPE_STRING | CTLFLAG_RD |
+ CTLFLAG_MPSAFE, rl, 0, &mlx5e_rl_sysctl_show_rate_table,
+ "A", "Show table of all configured TX rates");
+
+ /* try to fetch rate table from kernel environment */
+ for (i = 0; i != rl->param.tx_rates_def; i++) {
+ /* compute path for tunable */
+ snprintf(buf, sizeof(buf), "dev.mce.%d.rate_limit.tx_rate_add_%d",
+ device_get_unit(priv->mdev->pdev->dev.bsddev), (int)i);
+ if (TUNABLE_QUAD_FETCH(buf, &j))
+ mlx5e_rl_tx_limit_add(rl, j);
+ }
+
+ /* setup rate table sysctls */
+ for (i = 0; i != MLX5E_RL_TABLE_PARAMS_NUM; i++) {
+ mlx5e_rl_sysctl_add_u64_oid(rl,
+ MLX5E_RL_PARAMS_INDEX(table_arg[i]),
+ node, mlx5e_rl_table_params_desc[2 * i],
+ mlx5e_rl_table_params_desc[2 * i + 1]);
+ }
+ }
+
+ for (j = 0; j < rl->param.tx_worker_threads_def; j++) {
+ struct mlx5e_rl_worker *rlw = rl->workers + j;
+
+ rlw->priv = priv;
+
+ cv_init(&rlw->cv, "mlx5-worker-cv");
+ mtx_init(&rlw->mtx, "mlx5-worker-mtx", NULL, MTX_DEF);
+ STAILQ_INIT(&rlw->index_list_head);
+ STAILQ_INIT(&rlw->process_head);
+
+ rlw->channels = malloc(sizeof(rlw->channels[0]) *
+ rl->param.tx_channels_per_worker_def, M_MLX5EN, M_WAITOK | M_ZERO);
+
+ MLX5E_RL_WORKER_LOCK(rlw);
+ for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) {
+ struct mlx5e_rl_channel *channel = rlw->channels + i;
+ channel->worker = rlw;
+ channel->m_snd_tag.ifp = priv->ifp;
+ STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry);
+ }
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+ }
+
+ PRIV_LOCK(priv);
+ error = mlx5e_rl_open_workers(priv);
+ PRIV_UNLOCK(priv);
+
+ if (error != 0) {
+ if_printf(priv->ifp,
+ "mlx5e_rl_open_workers failed: %d\n", error);
+ }
+
+ return (0);
+
+err_uar:
+ mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar);
+done:
+ sysctl_ctx_free(&rl->ctx);
+ sx_destroy(&rl->rl_sxlock);
+ return (error);
+}
+
+static int
+mlx5e_rl_open_workers(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rl_priv_data *rl = &priv->rl;
+ struct thread *rl_thread = NULL;
+ struct proc *rl_proc = NULL;
+ uint64_t j;
+ int error;
+
+ if (priv->gone || rl->opened)
+ return (-EINVAL);
+
+ MLX5E_RL_WLOCK(rl);
+ /* compute channel parameters once */
+ mlx5e_rl_build_channel_param(rl, &rl->chan_param);
+ MLX5E_RL_WUNLOCK(rl);
+
+ for (j = 0; j < rl->param.tx_worker_threads_def; j++) {
+ struct mlx5e_rl_worker *rlw = rl->workers + j;
+
+ /* start worker thread */
+ error = kproc_kthread_add(mlx5e_rl_worker, rlw, &rl_proc, &rl_thread,
+ RFHIGHPID, 0, "mlx5-ratelimit", "mlx5-rl-worker-thread-%d", (int)j);
+ if (error != 0) {
+ if_printf(rl->priv->ifp,
+ "kproc_kthread_add failed: %d\n", error);
+ rlw->worker_done = 1;
+ }
+ }
+
+ rl->opened = 1;
+
+ return (0);
+}
+
+static void
+mlx5e_rl_close_workers(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rl_priv_data *rl = &priv->rl;
+ uint64_t y;
+
+ if (rl->opened == 0)
+ return;
+
+ /* tear down worker threads simultaneously */
+ for (y = 0; y < rl->param.tx_worker_threads_def; y++) {
+ struct mlx5e_rl_worker *rlw = rl->workers + y;
+
+ /* tear down worker before freeing SQs */
+ MLX5E_RL_WORKER_LOCK(rlw);
+ if (rlw->worker_done == 0) {
+ rlw->worker_done = 1;
+ cv_broadcast(&rlw->cv);
+ } else {
+ /* XXX thread not started */
+ rlw->worker_done = 0;
+ }
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+ }
+
+ /* wait for worker threads to exit */
+ for (y = 0; y < rl->param.tx_worker_threads_def; y++) {
+ struct mlx5e_rl_worker *rlw = rl->workers + y;
+
+ /* tear down worker before freeing SQs */
+ MLX5E_RL_WORKER_LOCK(rlw);
+ while (rlw->worker_done != 0)
+ cv_wait(&rlw->cv, &rlw->mtx);
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+ }
+
+ rl->opened = 0;
+}
+
+static void
+mlx5e_rl_reset_rates(struct mlx5e_rl_priv_data *rl)
+{
+ unsigned x;
+
+ MLX5E_RL_WLOCK(rl);
+ for (x = 0; x != rl->param.tx_rates_def; x++)
+ rl->rate_limit_table[x] = 0;
+ MLX5E_RL_WUNLOCK(rl);
+}
+
+void
+mlx5e_rl_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rl_priv_data *rl = &priv->rl;
+ uint64_t y;
+
+ /* check if there is support for packet pacing */
+ if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing))
+ return;
+
+ /* TODO check if there is support for packet pacing */
+
+ sysctl_ctx_free(&rl->ctx);
+
+ PRIV_LOCK(priv);
+ mlx5e_rl_close_workers(priv);
+ PRIV_UNLOCK(priv);
+
+ mlx5e_rl_reset_rates(rl);
+
+ /* free shared UAR for SQs */
+ mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar);
+
+ /* close TIS domain */
+ mlx5e_rl_close_tis(priv);
+
+ for (y = 0; y < rl->param.tx_worker_threads_def; y++) {
+ struct mlx5e_rl_worker *rlw = rl->workers + y;
+
+ cv_destroy(&rlw->cv);
+ mtx_destroy(&rlw->mtx);
+ free(rlw->channels, M_MLX5EN);
+ }
+ free(rl->rate_limit_table, M_MLX5EN);
+ free(rl->workers, M_MLX5EN);
+ sx_destroy(&rl->rl_sxlock);
+}
+
+static void
+mlx5e_rlw_queue_channel_locked(struct mlx5e_rl_worker *rlw,
+ struct mlx5e_rl_channel *channel)
+{
+ STAILQ_INSERT_TAIL(&rlw->process_head, channel, entry);
+ cv_broadcast(&rlw->cv);
+}
+
+static void
+mlx5e_rl_free(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel)
+{
+ if (channel == NULL)
+ return;
+
+ MLX5E_RL_WORKER_LOCK(rlw);
+ switch (channel->state) {
+ case MLX5E_RL_ST_MODIFY:
+ channel->state = MLX5E_RL_ST_DESTROY;
+ break;
+ case MLX5E_RL_ST_USED:
+ channel->state = MLX5E_RL_ST_DESTROY;
+ mlx5e_rlw_queue_channel_locked(rlw, channel);
+ break;
+ default:
+ break;
+ }
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+}
+
+static int
+mlx5e_rl_modify(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate)
+{
+
+ MLX5E_RL_WORKER_LOCK(rlw);
+ channel->new_rate = rate;
+ switch (channel->state) {
+ case MLX5E_RL_ST_USED:
+ channel->state = MLX5E_RL_ST_MODIFY;
+ mlx5e_rlw_queue_channel_locked(rlw, channel);
+ break;
+ default:
+ break;
+ }
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+
+ return (0);
+}
+
+static int
+mlx5e_rl_query(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t *prate)
+{
+ int retval;
+
+ MLX5E_RL_WORKER_LOCK(rlw);
+ switch (channel->state) {
+ case MLX5E_RL_ST_USED:
+ *prate = channel->last_rate;
+ retval = 0;
+ break;
+ case MLX5E_RL_ST_MODIFY:
+ retval = EBUSY;
+ break;
+ default:
+ retval = EINVAL;
+ break;
+ }
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+
+ return (retval);
+}
+
+static int
+mlx5e_find_available_tx_ring_index(struct mlx5e_rl_worker *rlw,
+ struct mlx5e_rl_channel **pchannel)
+{
+ struct mlx5e_rl_channel *channel;
+ int retval = ENOMEM;
+
+ MLX5E_RL_WORKER_LOCK(rlw);
+ /* Check for available channel in free list */
+ if ((channel = STAILQ_FIRST(&rlw->index_list_head)) != NULL) {
+ retval = 0;
+ /* Remove head index from available list */
+ STAILQ_REMOVE_HEAD(&rlw->index_list_head, entry);
+ channel->state = MLX5E_RL_ST_USED;
+ atomic_add_64(&rlw->priv->rl.stats.tx_active_connections, 1ULL);
+ } else {
+ atomic_add_64(&rlw->priv->rl.stats.tx_available_resource_failure, 1ULL);
+ }
+ MLX5E_RL_WORKER_UNLOCK(rlw);
+
+ *pchannel = channel;
+#ifdef RATELIMIT_DEBUG
+ if_printf(rlw->priv->ifp, "Channel pointer for rate limit connection is %p\n", channel);
+#endif
+ return (retval);
+}
+
+int
+mlx5e_rl_snd_tag_alloc(struct ifnet *ifp,
+ union if_snd_tag_alloc_params *params,
+ struct m_snd_tag **ppmt)
+{
+ struct mlx5e_rl_channel *channel;
+ struct mlx5e_rl_worker *rlw;
+ struct mlx5e_priv *priv;
+ int error;
+
+ priv = ifp->if_softc;
+
+ /* check if there is support for packet pacing or if device is going away */
+ if (!MLX5_CAP_GEN(priv->mdev, qos) ||
+ !MLX5_CAP_QOS(priv->mdev, packet_pacing) || priv->gone ||
+ params->rate_limit.hdr.type != IF_SND_TAG_TYPE_RATE_LIMIT)
+ return (EOPNOTSUPP);
+
+ /* compute worker thread this TCP connection belongs to */
+ rlw = priv->rl.workers + ((params->rate_limit.hdr.flowid % 128) %
+ priv->rl.param.tx_worker_threads_def);
+
+ error = mlx5e_find_available_tx_ring_index(rlw, &channel);
+ if (error != 0)
+ goto done;
+
+ error = mlx5e_rl_modify(rlw, channel, params->rate_limit.max_rate);
+ if (error != 0) {
+ mlx5e_rl_free(rlw, channel);
+ goto done;
+ }
+
+ /* store pointer to mbuf tag */
+ *ppmt = &channel->m_snd_tag;
+done:
+ return (error);
+}
+
+
+int
+mlx5e_rl_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params)
+{
+ struct mlx5e_rl_channel *channel =
+ container_of(pmt, struct mlx5e_rl_channel, m_snd_tag);
+
+ return (mlx5e_rl_modify(channel->worker, channel, params->rate_limit.max_rate));
+}
+
+int
+mlx5e_rl_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params)
+{
+ struct mlx5e_rl_channel *channel =
+ container_of(pmt, struct mlx5e_rl_channel, m_snd_tag);
+
+ return (mlx5e_rl_query(channel->worker, channel, &params->rate_limit.max_rate));
+}
+
+void
+mlx5e_rl_snd_tag_free(struct m_snd_tag *pmt)
+{
+ struct mlx5e_rl_channel *channel =
+ container_of(pmt, struct mlx5e_rl_channel, m_snd_tag);
+
+ mlx5e_rl_free(channel->worker, channel);
+}
+
+static int
+mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_rl_priv_data *rl = arg1;
+ struct mlx5e_priv *priv = rl->priv;
+ struct sbuf sbuf;
+ unsigned x;
+ int error;
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+
+ PRIV_LOCK(priv);
+
+ sbuf_new_for_sysctl(&sbuf, NULL, 128 * rl->param.tx_rates_def, req);
+
+ sbuf_printf(&sbuf,
+ "\n\n" "\t" "ENTRY" "\t" "BURST" "\t" "RATE [bit/s]\n"
+ "\t" "--------------------------------------------\n");
+
+ MLX5E_RL_RLOCK(rl);
+ for (x = 0; x != rl->param.tx_rates_def; x++) {
+ if (rl->rate_limit_table[x] == 0)
+ continue;
+
+ sbuf_printf(&sbuf, "\t" "%3u" "\t" "%3u" "\t" "%lld\n",
+ x, (unsigned)rl->param.tx_burst_size,
+ (long long)rl->rate_limit_table[x]);
+ }
+ MLX5E_RL_RUNLOCK(rl);
+
+ error = sbuf_finish(&sbuf);
+ sbuf_delete(&sbuf);
+
+ PRIV_UNLOCK(priv);
+
+ return (error);
+}
+
+static int
+mlx5e_rl_refresh_channel_params(struct mlx5e_rl_priv_data *rl)
+{
+ uint64_t x;
+ uint64_t y;
+
+ MLX5E_RL_WLOCK(rl);
+ /* compute channel parameters once */
+ mlx5e_rl_build_channel_param(rl, &rl->chan_param);
+ MLX5E_RL_WUNLOCK(rl);
+
+ for (y = 0; y != rl->param.tx_worker_threads_def; y++) {
+ struct mlx5e_rl_worker *rlw = rl->workers + y;
+
+ for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) {
+ struct mlx5e_rl_channel *channel;
+ struct mlx5e_sq *sq;
+
+ channel = rlw->channels + x;
+ sq = channel->sq;
+
+ if (sq == NULL)
+ continue;
+
+ if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_mode_modify)) {
+ mlx5_core_modify_cq_moderation_mode(rl->priv->mdev, &sq->cq.mcq,
+ rl->param.tx_coalesce_usecs,
+ rl->param.tx_coalesce_pkts,
+ rl->param.tx_coalesce_mode);
+ } else {
+ mlx5_core_modify_cq_moderation(rl->priv->mdev, &sq->cq.mcq,
+ rl->param.tx_coalesce_usecs,
+ rl->param.tx_coalesce_pkts);
+ }
+ }
+ }
+ return (0);
+}
+
+static int
+mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *rl, uint64_t value)
+{
+ unsigned x;
+ int error;
+
+ if (value < 1000 ||
+ mlx5_rl_is_in_range(rl->priv->mdev, howmany(value, 1000), 0) == 0)
+ return (EINVAL);
+
+ MLX5E_RL_WLOCK(rl);
+ error = ENOMEM;
+
+ /* check if rate already exists */
+ for (x = 0; x != rl->param.tx_rates_def; x++) {
+ if (rl->rate_limit_table[x] != value)
+ continue;
+ error = EEXIST;
+ break;
+ }
+
+ /* check if there is a free rate entry */
+ if (x == rl->param.tx_rates_def) {
+ for (x = 0; x != rl->param.tx_rates_def; x++) {
+ if (rl->rate_limit_table[x] != 0)
+ continue;
+ rl->rate_limit_table[x] = value;
+ error = 0;
+ break;
+ }
+ }
+ MLX5E_RL_WUNLOCK(rl);
+
+ return (error);
+}
+
+static int
+mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *rl, uint64_t value)
+{
+ unsigned x;
+ int error;
+
+ if (value == 0)
+ return (EINVAL);
+
+ MLX5E_RL_WLOCK(rl);
+
+ /* check if rate already exists */
+ for (x = 0; x != rl->param.tx_rates_def; x++) {
+ if (rl->rate_limit_table[x] != value)
+ continue;
+ /* free up rate */
+ rl->rate_limit_table[x] = 0;
+ break;
+ }
+
+ /* check if there is a free rate entry */
+ if (x == rl->param.tx_rates_def)
+ error = ENOENT;
+ else
+ error = 0;
+ MLX5E_RL_WUNLOCK(rl);
+
+ return (error);
+}
+
+static int
+mlx5e_rl_sysctl_handler(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_rl_priv_data *rl = arg1;
+ struct mlx5e_priv *priv = rl->priv;
+ unsigned mode_modify;
+ unsigned was_opened;
+ uint64_t value;
+ uint64_t old;
+ int error;
+
+ PRIV_LOCK(priv);
+
+ MLX5E_RL_RLOCK(rl);
+ value = rl->param.arg[arg2];
+ MLX5E_RL_RUNLOCK(rl);
+
+ if (req != NULL) {
+ old = value;
+ error = sysctl_handle_64(oidp, &value, 0, req);
+ if (error || req->newptr == NULL ||
+ value == rl->param.arg[arg2])
+ goto done;
+ } else {
+ old = 0;
+ error = 0;
+ }
+
+ /* check if device is gone */
+ if (priv->gone) {
+ error = ENXIO;
+ goto done;
+ }
+ was_opened = rl->opened;
+ mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify);
+
+ switch (MLX5E_RL_PARAMS_INDEX(arg[arg2])) {
+ case MLX5E_RL_PARAMS_INDEX(tx_worker_threads_def):
+ if (value > rl->param.tx_worker_threads_max)
+ value = rl->param.tx_worker_threads_max;
+ else if (value < 1)
+ value = 1;
+
+ /* store new value */
+ rl->param.arg[arg2] = value;
+ break;
+
+ case MLX5E_RL_PARAMS_INDEX(tx_channels_per_worker_def):
+ if (value > rl->param.tx_channels_per_worker_max)
+ value = rl->param.tx_channels_per_worker_max;
+ else if (value < 1)
+ value = 1;
+
+ /* store new value */
+ rl->param.arg[arg2] = value;
+ break;
+
+ case MLX5E_RL_PARAMS_INDEX(tx_rates_def):
+ if (value > rl->param.tx_rates_max)
+ value = rl->param.tx_rates_max;
+ else if (value < 1)
+ value = 1;
+
+ /* store new value */
+ rl->param.arg[arg2] = value;
+ break;
+
+ case MLX5E_RL_PARAMS_INDEX(tx_coalesce_usecs):
+ /* range check */
+ if (value < 1)
+ value = 0;
+ else if (value > MLX5E_FLD_MAX(cqc, cq_period))
+ value = MLX5E_FLD_MAX(cqc, cq_period);
+
+ /* store new value */
+ rl->param.arg[arg2] = value;
+
+ /* check to avoid down and up the network interface */
+ if (was_opened)
+ error = mlx5e_rl_refresh_channel_params(rl);
+ break;
+
+ case MLX5E_RL_PARAMS_INDEX(tx_coalesce_pkts):
+ /* import TX coal pkts */
+ if (value < 1)
+ value = 0;
+ else if (value > MLX5E_FLD_MAX(cqc, cq_max_count))
+ value = MLX5E_FLD_MAX(cqc, cq_max_count);
+
+ /* store new value */
+ rl->param.arg[arg2] = value;
+
+ /* check to avoid down and up the network interface */
+ if (was_opened)
+ error = mlx5e_rl_refresh_channel_params(rl);
+ break;
+
+ case MLX5E_RL_PARAMS_INDEX(tx_coalesce_mode):
+ /* network interface must be down */
+ if (was_opened != 0 && mode_modify == 0)
+ mlx5e_rl_close_workers(priv);
+
+ /* import TX coalesce mode */
+ if (value != 0)
+ value = 1;
+
+ /* store new value */
+ rl->param.arg[arg2] = value;
+
+ /* restart network interface, if any */
+ if (was_opened != 0) {
+ if (mode_modify == 0)
+ mlx5e_rl_open_workers(priv);
+ else
+ error = mlx5e_rl_refresh_channel_params(rl);
+ }
+ break;
+
+ case MLX5E_RL_PARAMS_INDEX(tx_queue_size):
+ /* network interface must be down */
+ if (was_opened)
+ mlx5e_rl_close_workers(priv);
+
+ /* import TX queue size */
+ if (value < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE))
+ value = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE);
+ else if (value > priv->params_ethtool.tx_queue_size_max)
+ value = priv->params_ethtool.tx_queue_size_max;
+
+ /* store actual TX queue size */
+ value = 1ULL << order_base_2(value);
+
+ /* store new value */
+ rl->param.arg[arg2] = value;
+
+ /* verify TX completion factor */
+ mlx5e_rl_sync_tx_completion_fact(rl);
+
+ /* restart network interface, if any */
+ if (was_opened)
+ mlx5e_rl_open_workers(priv);
+ break;
+
+ case MLX5E_RL_PARAMS_INDEX(tx_completion_fact):
+ /* network interface must be down */
+ if (was_opened)
+ mlx5e_rl_close_workers(priv);
+
+ /* store new value */
+ rl->param.arg[arg2] = value;
+
+ /* verify parameter */
+ mlx5e_rl_sync_tx_completion_fact(rl);
+
+ /* restart network interface, if any */
+ if (was_opened)
+ mlx5e_rl_open_workers(priv);
+ break;
+
+ case MLX5E_RL_PARAMS_INDEX(tx_limit_add):
+ error = mlx5e_rl_tx_limit_add(rl, value);
+ break;
+
+ case MLX5E_RL_PARAMS_INDEX(tx_limit_clr):
+ error = mlx5e_rl_tx_limit_clr(rl, value);
+ break;
+
+ case MLX5E_RL_PARAMS_INDEX(tx_allowed_deviation):
+ /* range check */
+ if (value > rl->param.tx_allowed_deviation_max)
+ value = rl->param.tx_allowed_deviation_max;
+ else if (value < rl->param.tx_allowed_deviation_min)
+ value = rl->param.tx_allowed_deviation_min;
+
+ MLX5E_RL_WLOCK(rl);
+ rl->param.arg[arg2] = value;
+ MLX5E_RL_WUNLOCK(rl);
+ break;
+
+ case MLX5E_RL_PARAMS_INDEX(tx_burst_size):
+ /* range check */
+ if (value > rl->param.tx_burst_size_max)
+ value = rl->param.tx_burst_size_max;
+ else if (value < rl->param.tx_burst_size_min)
+ value = rl->param.tx_burst_size_min;
+
+ MLX5E_RL_WLOCK(rl);
+ rl->param.arg[arg2] = value;
+ MLX5E_RL_WUNLOCK(rl);
+ break;
+
+ default:
+ break;
+ }
+done:
+ PRIV_UNLOCK(priv);
+ return (error);
+}
+
+static void
+mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x,
+ struct sysctl_oid *node, const char *name, const char *desc)
+{
+ /*
+ * NOTE: In FreeBSD-11 and newer the CTLFLAG_RWTUN flag will
+ * take care of loading default sysctl value from the kernel
+ * environment, if any:
+ */
+ if (strstr(name, "_max") != 0 || strstr(name, "_min") != 0) {
+ /* read-only SYSCTLs */
+ SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ name, CTLTYPE_U64 | CTLFLAG_RD |
+ CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc);
+ } else {
+ if (strstr(name, "_def") != 0) {
+#ifdef RATELIMIT_DEBUG
+ /* tunable read-only advanced SYSCTLs */
+ SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ name, CTLTYPE_U64 | CTLFLAG_RDTUN |
+ CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc);
+#endif
+ } else {
+ /* read-write SYSCTLs */
+ SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ name, CTLTYPE_U64 | CTLFLAG_RWTUN |
+ CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc);
+ }
+ }
+}
+
+static void
+mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x,
+ struct sysctl_oid *node, const char *name, const char *desc)
+{
+ /* read-only SYSCTLs */
+ SYSCTL_ADD_U64(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name,
+ CTLFLAG_RD, &rl->stats.arg[x], 0, desc);
+}
+
+#endif
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
new file mode 100644
index 000000000000..cbd7e00a35b9
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
@@ -0,0 +1,550 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+#include <machine/in_cksum.h>
+
+static inline int
+mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq,
+ struct mlx5e_rx_wqe *wqe, u16 ix)
+{
+ bus_dma_segment_t segs[rq->nsegs];
+ struct mbuf *mb;
+ int nsegs;
+ int err;
+#if (MLX5E_MAX_RX_SEGS != 1)
+ struct mbuf *mb_head;
+ int i;
+#endif
+ if (rq->mbuf[ix].mbuf != NULL)
+ return (0);
+
+#if (MLX5E_MAX_RX_SEGS == 1)
+ mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz);
+ if (unlikely(!mb))
+ return (-ENOMEM);
+
+ mb->m_pkthdr.len = mb->m_len = rq->wqe_sz;
+#else
+ mb_head = mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
+ MLX5E_MAX_RX_BYTES);
+ if (unlikely(mb == NULL))
+ return (-ENOMEM);
+
+ mb->m_len = MLX5E_MAX_RX_BYTES;
+ mb->m_pkthdr.len = MLX5E_MAX_RX_BYTES;
+
+ for (i = 1; i < rq->nsegs; i++) {
+ if (mb_head->m_pkthdr.len >= rq->wqe_sz)
+ break;
+ mb = mb->m_next = m_getjcl(M_NOWAIT, MT_DATA, 0,
+ MLX5E_MAX_RX_BYTES);
+ if (unlikely(mb == NULL)) {
+ m_freem(mb_head);
+ return (-ENOMEM);
+ }
+ mb->m_len = MLX5E_MAX_RX_BYTES;
+ mb_head->m_pkthdr.len += MLX5E_MAX_RX_BYTES;
+ }
+ /* rewind to first mbuf in chain */
+ mb = mb_head;
+#endif
+ /* get IP header aligned */
+ m_adj(mb, MLX5E_NET_IP_ALIGN);
+
+ err = -bus_dmamap_load_mbuf_sg(rq->dma_tag, rq->mbuf[ix].dma_map,
+ mb, segs, &nsegs, BUS_DMA_NOWAIT);
+ if (err != 0)
+ goto err_free_mbuf;
+ if (unlikely(nsegs == 0)) {
+ bus_dmamap_unload(rq->dma_tag, rq->mbuf[ix].dma_map);
+ err = -ENOMEM;
+ goto err_free_mbuf;
+ }
+#if (MLX5E_MAX_RX_SEGS == 1)
+ wqe->data[0].addr = cpu_to_be64(segs[0].ds_addr);
+#else
+ wqe->data[0].addr = cpu_to_be64(segs[0].ds_addr);
+ wqe->data[0].byte_count = cpu_to_be32(segs[0].ds_len |
+ MLX5_HW_START_PADDING);
+ for (i = 1; i != nsegs; i++) {
+ wqe->data[i].addr = cpu_to_be64(segs[i].ds_addr);
+ wqe->data[i].byte_count = cpu_to_be32(segs[i].ds_len);
+ }
+ for (; i < rq->nsegs; i++) {
+ wqe->data[i].addr = 0;
+ wqe->data[i].byte_count = 0;
+ }
+#endif
+
+ rq->mbuf[ix].mbuf = mb;
+ rq->mbuf[ix].data = mb->m_data;
+
+ bus_dmamap_sync(rq->dma_tag, rq->mbuf[ix].dma_map,
+ BUS_DMASYNC_PREREAD);
+ return (0);
+
+err_free_mbuf:
+ m_freem(mb);
+ return (err);
+}
+
+static void
+mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
+{
+ if (unlikely(rq->enabled == 0))
+ return;
+
+ while (!mlx5_wq_ll_is_full(&rq->wq)) {
+ struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, rq->wq.head);
+
+ if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, rq->wq.head))) {
+ callout_reset_curcpu(&rq->watchdog, 1, (void *)&mlx5e_post_rx_wqes, rq);
+ break;
+ }
+ mlx5_wq_ll_push(&rq->wq, be16_to_cpu(wqe->next.next_wqe_index));
+ }
+
+ /* ensure wqes are visible to device before updating doorbell record */
+ atomic_thread_fence_rel();
+
+ mlx5_wq_ll_update_db_record(&rq->wq);
+}
+
+static void
+mlx5e_lro_update_hdr(struct mbuf *mb, struct mlx5_cqe64 *cqe)
+{
+ /* TODO: consider vlans, ip options, ... */
+ struct ether_header *eh;
+ uint16_t eh_type;
+ uint16_t tot_len;
+ struct ip6_hdr *ip6 = NULL;
+ struct ip *ip4 = NULL;
+ struct tcphdr *th;
+ uint32_t *ts_ptr;
+ uint8_t l4_hdr_type;
+ int tcp_ack;
+
+ eh = mtod(mb, struct ether_header *);
+ eh_type = ntohs(eh->ether_type);
+
+ l4_hdr_type = get_cqe_l4_hdr_type(cqe);
+ tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) ||
+ (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
+
+ /* TODO: consider vlan */
+ tot_len = be32_to_cpu(cqe->byte_cnt) - ETHER_HDR_LEN;
+
+ switch (eh_type) {
+ case ETHERTYPE_IP:
+ ip4 = (struct ip *)(eh + 1);
+ th = (struct tcphdr *)(ip4 + 1);
+ break;
+ case ETHERTYPE_IPV6:
+ ip6 = (struct ip6_hdr *)(eh + 1);
+ th = (struct tcphdr *)(ip6 + 1);
+ break;
+ default:
+ return;
+ }
+
+ ts_ptr = (uint32_t *)(th + 1);
+
+ if (get_cqe_lro_tcppsh(cqe))
+ th->th_flags |= TH_PUSH;
+
+ if (tcp_ack) {
+ th->th_flags |= TH_ACK;
+ th->th_ack = cqe->lro_ack_seq_num;
+ th->th_win = cqe->lro_tcp_win;
+
+ /*
+ * FreeBSD handles only 32bit aligned timestamp right after
+ * the TCP hdr
+ * +--------+--------+--------+--------+
+ * | NOP | NOP | TSopt | 10 |
+ * +--------+--------+--------+--------+
+ * | TSval timestamp |
+ * +--------+--------+--------+--------+
+ * | TSecr timestamp |
+ * +--------+--------+--------+--------+
+ */
+ if (get_cqe_lro_timestamp_valid(cqe) &&
+ (__predict_true(*ts_ptr) == ntohl(TCPOPT_NOP << 24 |
+ TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 |
+ TCPOLEN_TIMESTAMP))) {
+ /*
+ * cqe->timestamp is 64bit long.
+ * [0-31] - timestamp.
+ * [32-64] - timestamp echo replay.
+ */
+ ts_ptr[1] = *(uint32_t *)&cqe->timestamp;
+ ts_ptr[2] = *((uint32_t *)&cqe->timestamp + 1);
+ }
+ }
+ if (ip4) {
+ ip4->ip_ttl = cqe->lro_min_ttl;
+ ip4->ip_len = cpu_to_be16(tot_len);
+ ip4->ip_sum = 0;
+ ip4->ip_sum = in_cksum(mb, ip4->ip_hl << 2);
+ } else {
+ ip6->ip6_hlim = cqe->lro_min_ttl;
+ ip6->ip6_plen = cpu_to_be16(tot_len -
+ sizeof(struct ip6_hdr));
+ }
+ /* TODO: handle tcp checksum */
+}
+
+static uint64_t
+mlx5e_mbuf_tstmp(struct mlx5e_priv *priv, uint64_t hw_tstmp)
+{
+ struct mlx5e_clbr_point *cp;
+ uint64_t a1, a2, res;
+ u_int gen;
+
+ do {
+ cp = &priv->clbr_points[priv->clbr_curr];
+ gen = atomic_load_acq_int(&cp->clbr_gen);
+ a1 = (hw_tstmp - cp->clbr_hw_prev) >> MLX5E_TSTMP_PREC;
+ a2 = (cp->base_curr - cp->base_prev) >> MLX5E_TSTMP_PREC;
+ res = (a1 * a2) << MLX5E_TSTMP_PREC;
+
+ /*
+ * Divisor cannot be zero because calibration callback
+ * checks for the condition and disables timestamping
+ * if clock halted.
+ */
+ res /= (cp->clbr_hw_curr - cp->clbr_hw_prev) >>
+ MLX5E_TSTMP_PREC;
+
+ res += cp->base_prev;
+ atomic_thread_fence_acq();
+ } while (gen == 0 || gen != cp->clbr_gen);
+ return (res);
+}
+
+static inline void
+mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe,
+ struct mlx5e_rq *rq, struct mbuf *mb,
+ u32 cqe_bcnt)
+{
+ struct ifnet *ifp = rq->ifp;
+ struct mlx5e_channel *c;
+#if (MLX5E_MAX_RX_SEGS != 1)
+ struct mbuf *mb_head;
+#endif
+ int lro_num_seg; /* HW LRO session aggregated packets counter */
+ uint64_t tstmp;
+
+ lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
+ if (lro_num_seg > 1) {
+ mlx5e_lro_update_hdr(mb, cqe);
+ rq->stats.lro_packets++;
+ rq->stats.lro_bytes += cqe_bcnt;
+ }
+
+#if (MLX5E_MAX_RX_SEGS == 1)
+ mb->m_pkthdr.len = mb->m_len = cqe_bcnt;
+#else
+ mb->m_pkthdr.len = cqe_bcnt;
+ for (mb_head = mb; mb != NULL; mb = mb->m_next) {
+ if (mb->m_len > cqe_bcnt)
+ mb->m_len = cqe_bcnt;
+ cqe_bcnt -= mb->m_len;
+ if (likely(cqe_bcnt == 0)) {
+ if (likely(mb->m_next != NULL)) {
+ /* trim off empty mbufs */
+ m_freem(mb->m_next);
+ mb->m_next = NULL;
+ }
+ break;
+ }
+ }
+ /* rewind to first mbuf in chain */
+ mb = mb_head;
+#endif
+ /* check if a Toeplitz hash was computed */
+ if (cqe->rss_hash_type != 0) {
+ mb->m_pkthdr.flowid = be32_to_cpu(cqe->rss_hash_result);
+#ifdef RSS
+ /* decode the RSS hash type */
+ switch (cqe->rss_hash_type &
+ (CQE_RSS_DST_HTYPE_L4 | CQE_RSS_DST_HTYPE_IP)) {
+ /* IPv4 */
+ case (CQE_RSS_DST_HTYPE_TCP | CQE_RSS_DST_HTYPE_IPV4):
+ M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_TCP_IPV4);
+ break;
+ case (CQE_RSS_DST_HTYPE_UDP | CQE_RSS_DST_HTYPE_IPV4):
+ M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_UDP_IPV4);
+ break;
+ case CQE_RSS_DST_HTYPE_IPV4:
+ M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_IPV4);
+ break;
+ /* IPv6 */
+ case (CQE_RSS_DST_HTYPE_TCP | CQE_RSS_DST_HTYPE_IPV6):
+ M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_TCP_IPV6);
+ break;
+ case (CQE_RSS_DST_HTYPE_UDP | CQE_RSS_DST_HTYPE_IPV6):
+ M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_UDP_IPV6);
+ break;
+ case CQE_RSS_DST_HTYPE_IPV6:
+ M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_IPV6);
+ break;
+ default: /* Other */
+ M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE_HASH);
+ break;
+ }
+#else
+ M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE_HASH);
+#endif
+ } else {
+ mb->m_pkthdr.flowid = rq->ix;
+ M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE);
+ }
+ mb->m_pkthdr.rcvif = ifp;
+
+ if (likely(ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) &&
+ ((cqe->hds_ip_ext & (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK)) ==
+ (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK))) {
+ mb->m_pkthdr.csum_flags =
+ CSUM_IP_CHECKED | CSUM_IP_VALID |
+ CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ mb->m_pkthdr.csum_data = htons(0xffff);
+ } else {
+ rq->stats.csum_none++;
+ }
+
+ if (cqe_has_vlan(cqe)) {
+ mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->vlan_info);
+ mb->m_flags |= M_VLANTAG;
+ }
+
+ c = container_of(rq, struct mlx5e_channel, rq);
+ if (c->priv->clbr_done >= 2) {
+ tstmp = mlx5e_mbuf_tstmp(c->priv, be64_to_cpu(cqe->timestamp));
+ if ((tstmp & MLX5_CQE_TSTMP_PTP) != 0) {
+ /*
+ * Timestamp was taken on the packet entrance,
+ * instead of the cqe generation.
+ */
+ tstmp &= ~MLX5_CQE_TSTMP_PTP;
+ mb->m_flags |= M_TSTMP_HPREC;
+ }
+ mb->m_pkthdr.rcv_tstmp = tstmp;
+ mb->m_flags |= M_TSTMP;
+ }
+}
+
+static inline void
+mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cc, void *data)
+{
+ memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, (cc & cq->wq.sz_m1)),
+ sizeof(struct mlx5_cqe64));
+}
+
+static inline void
+mlx5e_write_cqe_slot(struct mlx5e_cq *cq, u32 cc, void *data)
+{
+ memcpy(mlx5_cqwq_get_wqe(&cq->wq, cc & cq->wq.sz_m1),
+ data, sizeof(struct mlx5_cqe64));
+}
+
+static inline void
+mlx5e_decompress_cqe(struct mlx5e_cq *cq, struct mlx5_cqe64 *title,
+ struct mlx5_mini_cqe8 *mini,
+ u16 wqe_counter, int i)
+{
+ /*
+ * NOTE: The fields which are not set here are copied from the
+ * initial and common title. See memcpy() in
+ * mlx5e_write_cqe_slot().
+ */
+ title->byte_cnt = mini->byte_cnt;
+ title->wqe_counter = cpu_to_be16((wqe_counter + i) & cq->wq.sz_m1);
+ title->check_sum = mini->checksum;
+ title->op_own = (title->op_own & 0xf0) |
+ (((cq->wq.cc + i) >> cq->wq.log_sz) & 1);
+}
+
+#define MLX5E_MINI_ARRAY_SZ 8
+/* Make sure structs are not packet differently */
+CTASSERT(sizeof(struct mlx5_cqe64) ==
+ sizeof(struct mlx5_mini_cqe8) * MLX5E_MINI_ARRAY_SZ);
+static void
+mlx5e_decompress_cqes(struct mlx5e_cq *cq)
+{
+ struct mlx5_mini_cqe8 mini_array[MLX5E_MINI_ARRAY_SZ];
+ struct mlx5_cqe64 title;
+ u32 cqe_count;
+ u32 i = 0;
+ u16 title_wqe_counter;
+
+ mlx5e_read_cqe_slot(cq, cq->wq.cc, &title);
+ title_wqe_counter = be16_to_cpu(title.wqe_counter);
+ cqe_count = be32_to_cpu(title.byte_cnt);
+
+ /* Make sure we won't overflow */
+ KASSERT(cqe_count <= cq->wq.sz_m1,
+ ("%s: cqe_count %u > cq->wq.sz_m1 %u", __func__,
+ cqe_count, cq->wq.sz_m1));
+
+ mlx5e_read_cqe_slot(cq, cq->wq.cc + 1, mini_array);
+ while (true) {
+ mlx5e_decompress_cqe(cq, &title,
+ &mini_array[i % MLX5E_MINI_ARRAY_SZ],
+ title_wqe_counter, i);
+ mlx5e_write_cqe_slot(cq, cq->wq.cc + i, &title);
+ i++;
+
+ if (i == cqe_count)
+ break;
+ if (i % MLX5E_MINI_ARRAY_SZ == 0)
+ mlx5e_read_cqe_slot(cq, cq->wq.cc + i, mini_array);
+ }
+}
+
+static int
+mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget)
+{
+ int i;
+
+ for (i = 0; i < budget; i++) {
+ struct mlx5e_rx_wqe *wqe;
+ struct mlx5_cqe64 *cqe;
+ struct mbuf *mb;
+ __be16 wqe_counter_be;
+ u16 wqe_counter;
+ u32 byte_cnt;
+
+ cqe = mlx5e_get_cqe(&rq->cq);
+ if (!cqe)
+ break;
+
+ if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED)
+ mlx5e_decompress_cqes(&rq->cq);
+
+ mlx5_cqwq_pop(&rq->cq.wq);
+
+ wqe_counter_be = cqe->wqe_counter;
+ wqe_counter = be16_to_cpu(wqe_counter_be);
+ wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
+ byte_cnt = be32_to_cpu(cqe->byte_cnt);
+
+ bus_dmamap_sync(rq->dma_tag,
+ rq->mbuf[wqe_counter].dma_map,
+ BUS_DMASYNC_POSTREAD);
+
+ if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ rq->stats.wqe_err++;
+ goto wq_ll_pop;
+ }
+ if ((MHLEN - MLX5E_NET_IP_ALIGN) >= byte_cnt &&
+ (mb = m_gethdr(M_NOWAIT, MT_DATA)) != NULL) {
+#if (MLX5E_MAX_RX_SEGS != 1)
+ /* set maximum mbuf length */
+ mb->m_len = MHLEN - MLX5E_NET_IP_ALIGN;
+#endif
+ /* get IP header aligned */
+ mb->m_data += MLX5E_NET_IP_ALIGN;
+
+ bcopy(rq->mbuf[wqe_counter].data, mtod(mb, caddr_t),
+ byte_cnt);
+ } else {
+ mb = rq->mbuf[wqe_counter].mbuf;
+ rq->mbuf[wqe_counter].mbuf = NULL; /* safety clear */
+
+ bus_dmamap_unload(rq->dma_tag,
+ rq->mbuf[wqe_counter].dma_map);
+ }
+
+ mlx5e_build_rx_mbuf(cqe, rq, mb, byte_cnt);
+ rq->stats.packets++;
+
+#if !defined(HAVE_TCP_LRO_RX)
+ tcp_lro_queue_mbuf(&rq->lro, mb);
+#else
+ if (mb->m_pkthdr.csum_flags == 0 ||
+ (rq->ifp->if_capenable & IFCAP_LRO) == 0 ||
+ rq->lro.lro_cnt == 0 ||
+ tcp_lro_rx(&rq->lro, mb, 0) != 0) {
+ rq->ifp->if_input(rq->ifp, mb);
+ }
+#endif
+wq_ll_pop:
+ mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
+ &wqe->next.next_wqe_index);
+ }
+
+ mlx5_cqwq_update_db_record(&rq->cq.wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ atomic_thread_fence_rel();
+ return (i);
+}
+
+void
+mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq)
+{
+ struct mlx5e_rq *rq = container_of(mcq, struct mlx5e_rq, cq.mcq);
+ int i = 0;
+
+#ifdef HAVE_PER_CQ_EVENT_PACKET
+#if (MHLEN < 15)
+#error "MHLEN is too small"
+#endif
+ struct mbuf *mb = m_gethdr(M_NOWAIT, MT_DATA);
+
+ if (mb != NULL) {
+ /* this code is used for debugging purpose only */
+ mb->m_pkthdr.len = mb->m_len = 15;
+ memset(mb->m_data, 255, 14);
+ mb->m_data[14] = rq->ix;
+ mb->m_pkthdr.rcvif = rq->ifp;
+ rq->ifp->if_input(rq->ifp, mb);
+ }
+#endif
+
+ mtx_lock(&rq->mtx);
+
+ /*
+ * Polling the entire CQ without posting new WQEs results in
+ * lack of receive WQEs during heavy traffic scenarios.
+ */
+ while (1) {
+ if (mlx5e_poll_rx_cq(rq, MLX5E_RX_BUDGET_MAX) !=
+ MLX5E_RX_BUDGET_MAX)
+ break;
+ i += MLX5E_RX_BUDGET_MAX;
+ if (i >= MLX5E_BUDGET_MAX)
+ break;
+ mlx5e_post_rx_wqes(rq);
+ }
+ mlx5e_post_rx_wqes(rq);
+ mlx5e_cq_arm(&rq->cq, MLX5_GET_DOORBELL_LOCK(&rq->channel->priv->doorbell_lock));
+ tcp_lro_flush_all(&rq->lro);
+ mtx_unlock(&rq->mtx);
+}
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
new file mode 100644
index 000000000000..40d8157c6771
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
@@ -0,0 +1,666 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+#include <machine/atomic.h>
+
+static inline bool
+mlx5e_do_send_cqe(struct mlx5e_sq *sq)
+{
+ sq->cev_counter++;
+ /* interleave the CQEs */
+ if (sq->cev_counter >= sq->cev_factor) {
+ sq->cev_counter = 0;
+ return (1);
+ }
+ return (0);
+}
+
+void
+mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt)
+{
+ u16 pi = sq->pc & sq->wq.sz_m1;
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
+
+ memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
+
+ wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
+ wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ if (mlx5e_do_send_cqe(sq))
+ wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ else
+ wqe->ctrl.fm_ce_se = 0;
+
+ /* Copy data for doorbell */
+ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
+
+ sq->mbuf[pi].mbuf = NULL;
+ sq->mbuf[pi].num_bytes = 0;
+ sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ sq->pc += sq->mbuf[pi].num_wqebbs;
+}
+
+#if (__FreeBSD_version >= 1100000)
+static uint32_t mlx5e_hash_value;
+
+static void
+mlx5e_hash_init(void *arg)
+{
+ mlx5e_hash_value = m_ether_tcpip_hash_init();
+}
+
+/* Make kernel call mlx5e_hash_init after the random stack finished initializing */
+SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL);
+#endif
+
+static struct mlx5e_sq *
+mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb)
+{
+ struct mlx5e_priv *priv = ifp->if_softc;
+ struct mlx5e_channel * volatile *ppch;
+ struct mlx5e_channel *pch;
+ u32 ch;
+ u32 tc;
+
+ ppch = priv->channel;
+
+ /* check if channels are successfully opened */
+ if (unlikely(ppch == NULL))
+ return (NULL);
+
+ /* obtain VLAN information if present */
+ if (mb->m_flags & M_VLANTAG) {
+ tc = (mb->m_pkthdr.ether_vtag >> 13);
+ if (tc >= priv->num_tc)
+ tc = priv->default_vlan_prio;
+ } else {
+ tc = priv->default_vlan_prio;
+ }
+
+ ch = priv->params.num_channels;
+
+#ifdef RATELIMIT
+ if (mb->m_pkthdr.snd_tag != NULL) {
+ struct mlx5e_sq *sq;
+
+ /* check for route change */
+ if (mb->m_pkthdr.snd_tag->ifp != ifp)
+ return (NULL);
+
+ /* get pointer to sendqueue */
+ sq = container_of(mb->m_pkthdr.snd_tag,
+ struct mlx5e_rl_channel, m_snd_tag)->sq;
+
+ /* check if valid */
+ if (sq != NULL && sq->stopped == 0)
+ return (sq);
+
+ /* FALLTHROUGH */
+ }
+#endif
+ /* check if flowid is set */
+ if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) {
+#ifdef RSS
+ u32 temp;
+
+ if (rss_hash2bucket(mb->m_pkthdr.flowid,
+ M_HASHTYPE_GET(mb), &temp) == 0)
+ ch = temp % ch;
+ else
+#endif
+ ch = (mb->m_pkthdr.flowid % 128) % ch;
+ } else {
+#if (__FreeBSD_version >= 1100000)
+ ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 |
+ MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch;
+#else
+ /*
+ * m_ether_tcpip_hash not present in stable, so just
+ * throw unhashed mbufs on queue 0
+ */
+ ch = 0;
+#endif
+ }
+
+ /* check if channel is allocated and not stopped */
+ pch = ppch[ch];
+ if (likely(pch != NULL && pch->sq[tc].stopped == 0))
+ return (&pch->sq[tc]);
+ return (NULL);
+}
+
+static inline u16
+mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, struct mbuf *mb)
+{
+
+ switch(sq->min_inline_mode) {
+ case MLX5_INLINE_MODE_NONE:
+ /*
+ * When inline mode is NONE, we do not need to copy
+ * headers into WQEs, except when vlan tag framing is
+ * requested. Hardware might offload vlan tagging on
+ * transmit. This is a separate capability, which is
+ * known to be disabled on ConnectX-5 due to a hardware
+ * bug RM 931383. If vlan_inline_cap is not present and
+ * the packet has vlan tag, fall back to inlining.
+ */
+ if ((mb->m_flags & M_VLANTAG) != 0 &&
+ sq->vlan_inline_cap == 0)
+ break;
+ return (0);
+ case MLX5_INLINE_MODE_L2:
+ /*
+ * Due to hardware limitations, when trust mode is
+ * DSCP, the hardware may request MLX5_INLINE_MODE_L2
+ * while it really needs all L2 headers and the 4 first
+ * bytes of the IP header (which include the
+ * TOS/traffic-class).
+ *
+ * To avoid doing a firmware command for querying the
+ * trust state and parsing the mbuf for doing
+ * unnecessary checks (VLAN/eth_type) in the fast path,
+ * we are going for the worth case (22 Bytes) if
+ * the mb->m_pkthdr.len allows it.
+ */
+ if (mb->m_pkthdr.len > ETHER_HDR_LEN +
+ ETHER_VLAN_ENCAP_LEN + 4)
+ return (MIN(sq->max_inline, ETHER_HDR_LEN +
+ ETHER_VLAN_ENCAP_LEN + 4));
+ break;
+ }
+ return (MIN(sq->max_inline, mb->m_pkthdr.len));
+}
+
+static int
+mlx5e_get_header_size(struct mbuf *mb)
+{
+ struct ether_vlan_header *eh;
+ struct tcphdr *th;
+ struct ip *ip;
+ int ip_hlen, tcp_hlen;
+ struct ip6_hdr *ip6;
+ uint16_t eth_type;
+ int eth_hdr_len;
+
+ eh = mtod(mb, struct ether_vlan_header *);
+ if (mb->m_len < ETHER_HDR_LEN)
+ return (0);
+ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+ eth_type = ntohs(eh->evl_proto);
+ eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
+ } else {
+ eth_type = ntohs(eh->evl_encap_proto);
+ eth_hdr_len = ETHER_HDR_LEN;
+ }
+ if (mb->m_len < eth_hdr_len)
+ return (0);
+ switch (eth_type) {
+ case ETHERTYPE_IP:
+ ip = (struct ip *)(mb->m_data + eth_hdr_len);
+ if (mb->m_len < eth_hdr_len + sizeof(*ip))
+ return (0);
+ if (ip->ip_p != IPPROTO_TCP)
+ return (0);
+ ip_hlen = ip->ip_hl << 2;
+ eth_hdr_len += ip_hlen;
+ break;
+ case ETHERTYPE_IPV6:
+ ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len);
+ if (mb->m_len < eth_hdr_len + sizeof(*ip6))
+ return (0);
+ if (ip6->ip6_nxt != IPPROTO_TCP)
+ return (0);
+ eth_hdr_len += sizeof(*ip6);
+ break;
+ default:
+ return (0);
+ }
+ if (mb->m_len < eth_hdr_len + sizeof(*th))
+ return (0);
+ th = (struct tcphdr *)(mb->m_data + eth_hdr_len);
+ tcp_hlen = th->th_off << 2;
+ eth_hdr_len += tcp_hlen;
+ if (mb->m_len < eth_hdr_len)
+ return (0);
+ return (eth_hdr_len);
+}
+
+/*
+ * The return value is not going back to the stack because of
+ * the drbr
+ */
+static int
+mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
+{
+ bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5e_tx_wqe *wqe;
+ struct ifnet *ifp;
+ int nsegs;
+ int err;
+ int x;
+ struct mbuf *mb = *mbp;
+ u16 ds_cnt;
+ u16 ihs;
+ u16 pi;
+ u8 opcode;
+
+ /*
+ * Return ENOBUFS if the queue is full, this may trigger reinsertion
+ * of the mbuf into the drbr (see mlx5e_xmit_locked)
+ */
+ if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) {
+ return (ENOBUFS);
+ }
+
+ /* Align SQ edge with NOPs to avoid WQE wrap around */
+ pi = ((~sq->pc) & sq->wq.sz_m1);
+ if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
+ /* Send one multi NOP message instead of many */
+ mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
+ pi = ((~sq->pc) & sq->wq.sz_m1);
+ if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))
+ return (ENOMEM);
+ }
+
+ /* Setup local variables */
+ pi = sq->pc & sq->wq.sz_m1;
+ wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
+ ifp = sq->ifp;
+
+ memset(wqe, 0, sizeof(*wqe));
+
+ /* Send a copy of the frame to the BPF listener, if any */
+ if (ifp != NULL && ifp->if_bpf != NULL)
+ ETHER_BPF_MTAP(ifp, mb);
+
+ if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) {
+ wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM;
+ }
+ if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) {
+ wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+ }
+ if (wqe->eth.cs_flags == 0) {
+ sq->stats.csum_offload_none++;
+ }
+ if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
+ u32 payload_len;
+ u32 mss = mb->m_pkthdr.tso_segsz;
+ u32 num_pkts;
+
+ wqe->eth.mss = cpu_to_be16(mss);
+ opcode = MLX5_OPCODE_LSO;
+ ihs = mlx5e_get_header_size(mb);
+ payload_len = mb->m_pkthdr.len - ihs;
+ if (payload_len == 0)
+ num_pkts = 1;
+ else
+ num_pkts = DIV_ROUND_UP(payload_len, mss);
+ sq->mbuf[pi].num_bytes = payload_len + (num_pkts * ihs);
+
+ sq->stats.tso_packets++;
+ sq->stats.tso_bytes += payload_len;
+ } else {
+ opcode = MLX5_OPCODE_SEND;
+ ihs = mlx5e_get_inline_hdr_size(sq, mb);
+ sq->mbuf[pi].num_bytes = max_t (unsigned int,
+ mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
+ }
+ if (ihs == 0) {
+ if ((mb->m_flags & M_VLANTAG) != 0) {
+ wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */
+ wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag);
+ } else {
+ wqe->eth.inline_hdr_sz = 0;
+ }
+ } else {
+ if ((mb->m_flags & M_VLANTAG) != 0) {
+ struct ether_vlan_header *eh = (struct ether_vlan_header
+ *)wqe->eth.inline_hdr_start;
+
+ /* Range checks */
+ if (ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN))
+ ihs = (MLX5E_MAX_TX_INLINE -
+ ETHER_VLAN_ENCAP_LEN);
+ else if (ihs < ETHER_HDR_LEN) {
+ err = EINVAL;
+ goto tx_drop;
+ }
+ m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
+ m_adj(mb, ETHER_HDR_LEN);
+ /* Insert 4 bytes VLAN tag into data stream */
+ eh->evl_proto = eh->evl_encap_proto;
+ eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
+ eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
+ /* Copy rest of header data, if any */
+ m_copydata(mb, 0, ihs - ETHER_HDR_LEN, (caddr_t)(eh +
+ 1));
+ m_adj(mb, ihs - ETHER_HDR_LEN);
+ /* Extend header by 4 bytes */
+ ihs += ETHER_VLAN_ENCAP_LEN;
+ } else {
+ m_copydata(mb, 0, ihs, wqe->eth.inline_hdr_start);
+ m_adj(mb, ihs);
+ }
+ wqe->eth.inline_hdr_sz = cpu_to_be16(ihs);
+ }
+
+ ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
+ if (ihs > sizeof(wqe->eth.inline_hdr_start)) {
+ ds_cnt += DIV_ROUND_UP(ihs - sizeof(wqe->eth.inline_hdr_start),
+ MLX5_SEND_WQE_DS);
+ }
+ dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt;
+
+ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
+ mb, segs, &nsegs, BUS_DMA_NOWAIT);
+ if (err == EFBIG) {
+ /* Update statistics */
+ sq->stats.defragged++;
+ /* Too many mbuf fragments */
+ mb = m_defrag(*mbp, M_NOWAIT);
+ if (mb == NULL) {
+ mb = *mbp;
+ goto tx_drop;
+ }
+ /* Try again */
+ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
+ mb, segs, &nsegs, BUS_DMA_NOWAIT);
+ }
+ /* Catch errors */
+ if (err != 0)
+ goto tx_drop;
+
+ /* Make sure all mbuf data, if any, is written to RAM */
+ if (nsegs != 0) {
+ bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
+ BUS_DMASYNC_PREWRITE);
+ } else {
+ /* All data was inlined, free the mbuf. */
+ bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
+ m_freem(mb);
+ mb = NULL;
+ }
+
+ for (x = 0; x != nsegs; x++) {
+ if (segs[x].ds_len == 0)
+ continue;
+ dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr);
+ dseg->lkey = sq->mkey_be;
+ dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len);
+ dseg++;
+ }
+
+ ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl));
+
+ wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
+ wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ if (mlx5e_do_send_cqe(sq))
+ wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ else
+ wqe->ctrl.fm_ce_se = 0;
+
+ /* Copy data for doorbell */
+ memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
+
+ /* Store pointer to mbuf */
+ sq->mbuf[pi].mbuf = mb;
+ sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ sq->pc += sq->mbuf[pi].num_wqebbs;
+
+ sq->stats.packets++;
+ *mbp = NULL; /* safety clear */
+ return (0);
+
+tx_drop:
+ sq->stats.dropped++;
+ *mbp = NULL;
+ m_freem(mb);
+ return err;
+}
+
+static void
+mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
+{
+ u16 sqcc;
+
+ /*
+ * sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ sqcc = sq->cc;
+
+ while (budget > 0) {
+ struct mlx5_cqe64 *cqe;
+ struct mbuf *mb;
+ u16 x;
+ u16 ci;
+
+ cqe = mlx5e_get_cqe(&sq->cq);
+ if (!cqe)
+ break;
+
+ mlx5_cqwq_pop(&sq->cq.wq);
+
+ /* update budget according to the event factor */
+ budget -= sq->cev_factor;
+
+ for (x = 0; x != sq->cev_factor; x++) {
+ ci = sqcc & sq->wq.sz_m1;
+ mb = sq->mbuf[ci].mbuf;
+ sq->mbuf[ci].mbuf = NULL; /* Safety clear */
+
+ if (mb == NULL) {
+ if (sq->mbuf[ci].num_bytes == 0) {
+ /* NOP */
+ sq->stats.nop++;
+ }
+ } else {
+ bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map,
+ BUS_DMASYNC_POSTWRITE);
+ bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map);
+
+ /* Free transmitted mbuf */
+ m_freem(mb);
+ }
+ sqcc += sq->mbuf[ci].num_wqebbs;
+ }
+ }
+
+ mlx5_cqwq_update_db_record(&sq->cq.wq);
+
+ /* Ensure cq space is freed before enabling more cqes */
+ atomic_thread_fence_rel();
+
+ sq->cc = sqcc;
+
+ if (sq->sq_tq != NULL &&
+ atomic_cmpset_int(&sq->queue_state, MLX5E_SQ_FULL, MLX5E_SQ_READY))
+ taskqueue_enqueue(sq->sq_tq, &sq->sq_task);
+}
+
+static int
+mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb)
+{
+ struct mbuf *next;
+ int err = 0;
+
+ if (likely(mb != NULL)) {
+ /*
+ * If we can't insert mbuf into drbr, try to xmit anyway.
+ * We keep the error we got so we could return that after xmit.
+ */
+ err = drbr_enqueue(ifp, sq->br, mb);
+ }
+
+ /*
+ * Check if the network interface is closed or if the SQ is
+ * being stopped:
+ */
+ if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+ sq->stopped != 0))
+ return (err);
+
+ /* Process the queue */
+ while ((next = drbr_peek(ifp, sq->br)) != NULL) {
+ if (mlx5e_sq_xmit(sq, &next) != 0) {
+ if (next != NULL) {
+ drbr_putback(ifp, sq->br, next);
+ atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_FULL);
+ break;
+ }
+ }
+ drbr_advance(ifp, sq->br);
+ }
+ /* Check if we need to write the doorbell */
+ if (likely(sq->doorbell.d64 != 0)) {
+ mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
+ sq->doorbell.d64 = 0;
+ }
+ /*
+ * Check if we need to start the event timer which flushes the
+ * transmit ring on timeout:
+ */
+ if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL &&
+ sq->cev_factor != 1)) {
+ /* start the timer */
+ mlx5e_sq_cev_timeout(sq);
+ } else {
+ /* don't send NOPs yet */
+ sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
+ }
+ return (err);
+}
+
+static int
+mlx5e_xmit_locked_no_br(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb)
+{
+ int err = 0;
+
+ if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+ sq->stopped != 0)) {
+ m_freem(mb);
+ return (ENETDOWN);
+ }
+
+ /* Do transmit */
+ if (mlx5e_sq_xmit(sq, &mb) != 0) {
+ /* NOTE: m_freem() is NULL safe */
+ m_freem(mb);
+ err = ENOBUFS;
+ }
+
+ /* Check if we need to write the doorbell */
+ if (likely(sq->doorbell.d64 != 0)) {
+ mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
+ sq->doorbell.d64 = 0;
+ }
+
+ /*
+ * Check if we need to start the event timer which flushes the
+ * transmit ring on timeout:
+ */
+ if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL &&
+ sq->cev_factor != 1)) {
+ /* start the timer */
+ mlx5e_sq_cev_timeout(sq);
+ } else {
+ /* don't send NOPs yet */
+ sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
+ }
+ return (err);
+}
+
+int
+mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb)
+{
+ struct mlx5e_sq *sq;
+ int ret;
+
+ sq = mlx5e_select_queue(ifp, mb);
+ if (unlikely(sq == NULL)) {
+#ifdef RATELIMIT
+ /* Check for route change */
+ if (mb->m_pkthdr.snd_tag != NULL &&
+ mb->m_pkthdr.snd_tag->ifp != ifp) {
+ /* Free mbuf */
+ m_freem(mb);
+
+ /*
+ * Tell upper layers about route change and to
+ * re-transmit this packet:
+ */
+ return (EAGAIN);
+ }
+#endif
+ /* Free mbuf */
+ m_freem(mb);
+
+ /* Invalid send queue */
+ return (ENXIO);
+ }
+
+ if (unlikely(sq->br == NULL)) {
+ /* rate limited traffic */
+ mtx_lock(&sq->lock);
+ ret = mlx5e_xmit_locked_no_br(ifp, sq, mb);
+ mtx_unlock(&sq->lock);
+ } else if (mtx_trylock(&sq->lock)) {
+ ret = mlx5e_xmit_locked(ifp, sq, mb);
+ mtx_unlock(&sq->lock);
+ } else {
+ ret = drbr_enqueue(ifp, sq->br, mb);
+ taskqueue_enqueue(sq->sq_tq, &sq->sq_task);
+ }
+
+ return (ret);
+}
+
+void
+mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq)
+{
+ struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq);
+
+ mtx_lock(&sq->comp_lock);
+ mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX);
+ mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock));
+ mtx_unlock(&sq->comp_lock);
+}
+
+void
+mlx5e_tx_que(void *context, int pending)
+{
+ struct mlx5e_sq *sq = context;
+ struct ifnet *ifp = sq->ifp;
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ mtx_lock(&sq->lock);
+ if (!drbr_empty(ifp, sq->br))
+ mlx5e_xmit_locked(ifp, sq, NULL);
+ mtx_unlock(&sq->lock);
+ }
+}
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c
new file mode 100644
index 000000000000..771b4c69ffbc
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c
@@ -0,0 +1,53 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+
+struct mlx5_cqe64 *
+mlx5e_get_cqe(struct mlx5e_cq *cq)
+{
+ struct mlx5_cqe64 *cqe;
+
+ cqe = mlx5_cqwq_get_wqe(&cq->wq, mlx5_cqwq_get_ci(&cq->wq));
+
+ if ((cqe->op_own ^ mlx5_cqwq_get_wrap_cnt(&cq->wq)) & MLX5_CQE_OWNER_MASK)
+ return (NULL);
+
+ /* ensure cqe content is read after cqe ownership bit */
+ atomic_thread_fence_acq();
+
+ return (cqe);
+}
+
+void
+mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event)
+{
+ struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
+
+ if_printf(cq->priv->ifp, "%s: cqn=0x%.6x event=0x%.2x\n",
+ __func__, mcq->cqn, event);
+}