aboutsummaryrefslogtreecommitdiff
path: root/sys/ofed/drivers/infiniband/core/mad.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/ofed/drivers/infiniband/core/mad.c')
-rw-r--r--sys/ofed/drivers/infiniband/core/mad.c755
1 files changed, 693 insertions, 62 deletions
diff --git a/sys/ofed/drivers/infiniband/core/mad.c b/sys/ofed/drivers/infiniband/core/mad.c
index 64e660c38e4f..11b3ba372186 100644
--- a/sys/ofed/drivers/infiniband/core/mad.c
+++ b/sys/ofed/drivers/infiniband/core/mad.c
@@ -34,6 +34,9 @@
*
*/
#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/string.h>
#include <rdma/ib_cache.h>
#include "mad_priv.h"
@@ -46,8 +49,8 @@ MODULE_DESCRIPTION("kernel IB MAD API");
MODULE_AUTHOR("Hal Rosenstock");
MODULE_AUTHOR("Sean Hefty");
-int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
-int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
+static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
+static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
module_param_named(send_queue_size, mad_sendq_size, int, 0444);
MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
@@ -59,9 +62,26 @@ static struct kmem_cache *ib_mad_cache;
static struct list_head ib_mad_port_list;
static u32 ib_mad_client_id = 0;
-/* Port list lock */
-static spinlock_t ib_mad_port_list_lock;
+/*
+ * Timeout FIFO (tf) param
+ */
+enum {
+ /* min time between 2 consecutive activations of tf workqueue */
+ MIN_BETWEEN_ACTIVATIONS_MS = 5
+};
+
+/*
+ * SA congestion control params
+ */
+enum {
+ MAX_OUTSTANDING_SA_MADS = 10,
+ MIN_TIME_FOR_SA_MAD_SEND_MS = 20,
+ MAX_SA_MADS = 10000
+};
+
+/* Port list lock */
+static DEFINE_SPINLOCK(ib_mad_port_list_lock);
/* Forward declarations */
static int method_in_use(struct ib_mad_mgmt_method_table **method,
@@ -80,6 +100,509 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
u8 mgmt_class);
static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
struct ib_mad_agent_private *agent_priv);
+static int send_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr,
+ u32 timeout_ms, u32 retries_left);
+
+
+/*
+ * Timeout FIFO functions - implements FIFO with timeout mechanism
+ */
+
+static void activate_timeout_handler_task(unsigned long data)
+{
+ struct to_fifo *tf;
+
+ tf = (struct to_fifo *)data;
+ del_timer(&tf->timer);
+ queue_work(tf->workq, &tf->work);
+}
+
+static unsigned long adjusted_time(unsigned long last, unsigned long next)
+{
+ unsigned long min_next;
+
+ min_next = last + msecs_to_jiffies(MIN_BETWEEN_ACTIVATIONS_MS);
+ if (time_after(min_next, next))
+ return min_next;
+
+ return next;
+}
+
+static void notify_failure(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_wc_status status)
+{
+ struct ib_mad_send_wc mad_send_wc;
+ struct ib_mad_agent_private *mad_agent_priv;
+
+ mad_send_wc.status = status;
+ mad_send_wc.vendor_err = 0;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ mad_agent_priv = mad_send_wr->mad_agent_priv;
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc);
+}
+
+static inline struct sa_cc_data *
+get_cc_obj(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ return &mad_send_wr->mad_agent_priv->qp_info->port_priv->sa_cc;
+}
+
+static inline struct ib_mad_send_wr_private *tfe_to_mad(struct tf_entry *tfe)
+{
+ return container_of(tfe, struct ib_mad_send_wr_private, tf_list);
+}
+
+static void timeout_handler_task(struct work_struct *work)
+{
+ struct tf_entry *tmp1, *tmp2;
+ struct list_head *list_item, exp_lst;
+ unsigned long flags, curr_time;
+ int lst_empty;
+ struct to_fifo *tf;
+
+ tf = container_of(work, struct to_fifo, work);
+ do {
+ INIT_LIST_HEAD(&exp_lst);
+
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ curr_time = jiffies;
+ list_for_each(list_item, &tf->to_head) {
+ tmp1 = list_entry(list_item, struct tf_entry, to_list);
+ if (time_before(curr_time, tmp1->exp_time))
+ break;
+ list_del(&tmp1->fifo_list);
+ tf->num_items--;
+ }
+
+ /* cut list up to and including list_item->prev */
+ list_cut_position(&exp_lst, &tf->to_head, list_item->prev);
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+
+ lst_empty = list_empty(&exp_lst);
+ list_for_each_entry_safe(tmp1, tmp2, &exp_lst, to_list) {
+ list_del(&tmp1->to_list);
+ if (tmp1->canceled) {
+ tmp1->canceled = 0;
+ notify_failure(tfe_to_mad(tmp1), IB_WC_WR_FLUSH_ERR);
+ } else {
+ notify_failure(tfe_to_mad(tmp1), IB_WC_RESP_TIMEOUT_ERR);
+ }
+ }
+ } while (!lst_empty);
+
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ if (!list_empty(&tf->to_head)) {
+ tmp1 = list_entry(tf->to_head.next, struct tf_entry, to_list);
+ mod_timer(&tf->timer, adjusted_time(curr_time, tmp1->exp_time));
+ }
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+}
+
+/**
+ * tf_create - creates new timeout-fifo object
+ * @fifo_size: Maximum fifo size
+ *
+ * Allocate and initialize new timeout-fifo object
+ */
+static struct to_fifo *tf_create(u32 fifo_size)
+{
+ struct to_fifo *tf;
+
+ tf = kzalloc(sizeof(*tf), GFP_KERNEL);
+ if (tf) {
+ tf->workq = create_singlethread_workqueue("to_fifo");
+ if (!tf->workq) {
+ kfree(tf);
+ return NULL;
+ }
+ spin_lock_init(&tf->lists_lock);
+ INIT_LIST_HEAD(&tf->to_head);
+ INIT_LIST_HEAD(&tf->fifo_head);
+ init_timer(&tf->timer);
+ INIT_WORK(&tf->work, timeout_handler_task);
+ tf->timer.data = (unsigned long) tf;
+ tf->timer.function = activate_timeout_handler_task;
+ tf->timer.expires = jiffies;
+ tf->fifo_size = fifo_size;
+ tf->stop_enqueue = 0;
+ tf->num_items = 0;
+ }
+
+ return tf;
+}
+
+/**
+ * tf_enqueue - enqueue item to timeout-fifo object
+ * @tf:timeout-fifo object
+ * @item: item to enqueue.
+ * @timeout_ms: item expiration time in ms.
+ *
+ * Enqueue item to fifo and modify expiration timer when required.
+ *
+ * Returns 0 on success and negative on failure.
+ */
+static int tf_enqueue(struct to_fifo *tf, struct tf_entry *item, u32 timeout_ms)
+{
+ struct tf_entry *tmp;
+ struct list_head *list_item;
+ unsigned long flags;
+
+ item->exp_time = jiffies + msecs_to_jiffies(timeout_ms);
+
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ if (tf->num_items >= tf->fifo_size || tf->stop_enqueue) {
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+ return -EBUSY;
+ }
+
+ /* Insert item to timeout list */
+ list_for_each_prev(list_item, &tf->to_head) {
+ tmp = list_entry(list_item, struct tf_entry, to_list);
+ if (time_after(item->exp_time, tmp->exp_time))
+ break;
+ }
+
+ list_add(&item->to_list, list_item);
+
+ /* Insert item to fifo list */
+ list_add_tail(&item->fifo_list, &tf->fifo_head);
+
+ tf->num_items++;
+
+ /* modify expiration timer if required */
+ if (list_item == &tf->to_head)
+ mod_timer(&tf->timer, item->exp_time);
+
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+
+ return 0;
+}
+
+/**
+ * tf_dequeue - dequeue item from timeout-fifo object
+ * @tf:timeout-fifo object
+ * @time_left_ms: returns the time left for expiration in ms.
+ *
+ * Dequeue item from fifo and modify expiration timer when required.
+ *
+ * Returns pointer to tf_entry on success and NULL on failure.
+ */
+static struct tf_entry *tf_dequeue(struct to_fifo *tf, u32 *time_left_ms)
+{
+ unsigned long flags;
+ unsigned long time_left;
+ struct tf_entry *tmp, *tmp1;
+
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ if (list_empty(&tf->fifo_head)) {
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+ return NULL;
+ }
+
+ list_for_each_entry(tmp, &tf->fifo_head, fifo_list) {
+ if (!tmp->canceled)
+ break;
+ }
+
+ if (tmp->canceled) {
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+ return NULL;
+ }
+
+ /* modify timer in case enqueued item is the next to expire */
+ if (tf->to_head.next == &tmp->to_list) {
+ if (list_is_last(&tmp->to_list, &tf->to_head)) {
+ del_timer(&tf->timer);
+ } else {
+ tmp1 = list_entry(tmp->to_list.next, struct tf_entry, to_list);
+ mod_timer(&tf->timer, tmp1->exp_time);
+ }
+ }
+ list_del(&tmp->fifo_list);
+ list_del(&tmp->to_list);
+ tf->num_items--;
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+
+ time_left = tmp->exp_time - jiffies;
+ if ((long) time_left <= 0)
+ time_left = 0;
+ *time_left_ms = jiffies_to_msecs(time_left);
+
+ return tmp;
+}
+
+static void tf_stop_enqueue(struct to_fifo *tf)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ tf->stop_enqueue = 1;
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+}
+
+/**
+ * tf_free - free empty timeout-fifo object
+ * @tf:timeout-fifo object
+ *
+ */
+static void tf_free(struct to_fifo *tf)
+{
+ del_timer_sync(&tf->timer);
+ flush_workqueue(tf->workq);
+ destroy_workqueue(tf->workq);
+
+ kfree(tf);
+}
+
+/**
+ * tf_free_agent - free MADs related to specific MAD agent from timeout-fifo
+ * @tf:timeout-fifo object
+ * @mad_agent_priv: MAD agent.
+ *
+ */
+static void tf_free_agent(struct to_fifo *tf, struct ib_mad_agent_private *mad_agent_priv)
+{
+ unsigned long flags;
+ struct tf_entry *tmp, *tmp1;
+ struct list_head tmp_head;
+
+ INIT_LIST_HEAD(&tmp_head);
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ list_for_each_entry_safe(tmp, tmp1, &tf->fifo_head, fifo_list) {
+ if (tfe_to_mad(tmp)->mad_agent_priv == mad_agent_priv) {
+ list_del(&tmp->to_list);
+ list_move(&tmp->fifo_list, &tmp_head);
+ tf->num_items--;
+ }
+ }
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+
+ list_for_each_entry_safe(tmp, tmp1, &tmp_head, fifo_list) {
+ list_del(&tmp->fifo_list);
+ notify_failure(tfe_to_mad(tmp), IB_WC_WR_FLUSH_ERR);
+ }
+}
+
+/**
+ * tf_modify_item - to modify expiration time for specific item
+ * @tf:timeout-fifo object
+ * @mad_agent_priv: MAD agent.
+ * @send_buf: the MAD to modify in queue
+ * @timeout_ms: new timeout to set.
+ *
+ * Returns 0 if item found on list and -ENXIO if not.
+ *
+ * Note: The send_buf may point on MAD that is already released.
+ * Therefore we can't use this struct before finding it in the list
+ */
+static int tf_modify_item(struct to_fifo *tf,
+ struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_send_buf *send_buf, u32 timeout_ms)
+{
+ struct tf_entry *tmp, *item;
+ struct list_head *list_item;
+ unsigned long flags;
+ int found = 0;
+
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ list_for_each_entry(item, &tf->fifo_head, fifo_list) {
+ if (tfe_to_mad(item)->mad_agent_priv == mad_agent_priv &&
+ &tfe_to_mad(item)->send_buf == send_buf) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+ return -ENXIO;
+ }
+
+ item->exp_time = jiffies + msecs_to_jiffies(timeout_ms);
+
+ if (timeout_ms) {
+ list_del(&item->to_list);
+ list_for_each_prev(list_item, &tf->to_head) {
+ tmp = list_entry(list_item, struct tf_entry, to_list);
+ if (time_after(item->exp_time, tmp->exp_time))
+ break;
+ }
+ list_add(&item->to_list, list_item);
+
+ /* modify expiration timer if required */
+ if (list_item == &tf->to_head)
+ mod_timer(&tf->timer, item->exp_time);
+ } else {
+ /*
+ * when item canceled (timeout_ms == 0) move item to
+ * head of timeout list and to the tail of fifo list
+ */
+ item->canceled = 1;
+ list_move(&item->to_list, &tf->to_head);
+ list_move_tail(&item->fifo_list, &tf->fifo_head);
+ mod_timer(&tf->timer, item->exp_time);
+ }
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+
+ return 0;
+}
+
+/*
+ * SA congestion control functions
+ */
+
+/*
+ * Defines which MAD is under congestion control.
+ */
+static int is_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_mad_hdr *mad;
+
+ mad = (struct ib_mad_hdr *)mad_send_wr->send_buf.mad;
+
+ return ((mad_send_wr->send_buf.timeout_ms) &&
+ (mad->mgmt_class == IB_MGMT_CLASS_SUBN_ADM) &&
+ ((mad->method == IB_MGMT_METHOD_GET) ||
+ (mad->method == IB_MGMT_METHOD_SET)));
+}
+
+/*
+ * Notify that SA congestion controlled MAD is done.
+ * to allow dequeuing SA MAD from congestion control queue.
+ */
+static void sa_cc_mad_done(struct sa_cc_data *cc_obj)
+{
+ unsigned long flags;
+ struct tf_entry *tfe;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ u32 time_left_ms, timeout_ms, retries;
+ int ret;
+
+ do {
+ spin_lock_irqsave(&cc_obj->lock, flags);
+ tfe = tf_dequeue(cc_obj->tf, &time_left_ms);
+ if (!tfe) {
+ if (cc_obj->outstanding > 0)
+ cc_obj->outstanding--;
+ spin_unlock_irqrestore(&cc_obj->lock, flags);
+ break;
+ }
+ spin_unlock_irqrestore(&cc_obj->lock, flags);
+ mad_send_wr = tfe_to_mad(tfe);
+ time_left_ms += MIN_TIME_FOR_SA_MAD_SEND_MS;
+ if (time_left_ms > mad_send_wr->send_buf.timeout_ms) {
+ retries = time_left_ms / mad_send_wr->send_buf.timeout_ms - 1;
+ timeout_ms = mad_send_wr->send_buf.timeout_ms;
+ } else {
+ retries = 0;
+ timeout_ms = time_left_ms;
+ }
+ ret = send_sa_cc_mad(mad_send_wr, timeout_ms, retries);
+ if (ret) {
+ if (ret == -ENOMEM)
+ notify_failure(mad_send_wr, IB_WC_GENERAL_ERR);
+ else
+ notify_failure(mad_send_wr, IB_WC_LOC_QP_OP_ERR);
+ }
+ } while (ret);
+}
+
+/*
+ * Send SA MAD under congestion control.
+ */
+static int sa_cc_mad_send(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ unsigned long flags;
+ int ret;
+ struct sa_cc_data *cc_obj;
+
+ cc_obj = get_cc_obj(mad_send_wr);
+ spin_lock_irqsave(&cc_obj->lock, flags);
+ if (cc_obj->outstanding < MAX_OUTSTANDING_SA_MADS) {
+ cc_obj->outstanding++;
+ spin_unlock_irqrestore(&cc_obj->lock, flags);
+ ret = send_sa_cc_mad(mad_send_wr, mad_send_wr->send_buf.timeout_ms,
+ mad_send_wr->retries_left);
+ if (ret)
+ sa_cc_mad_done(cc_obj);
+
+ } else {
+ int qtime = (mad_send_wr->send_buf.timeout_ms *
+ (mad_send_wr->retries_left + 1))
+ - MIN_TIME_FOR_SA_MAD_SEND_MS;
+
+ if (qtime < 0)
+ qtime = 0;
+ ret = tf_enqueue(cc_obj->tf, &mad_send_wr->tf_list, (u32)qtime);
+
+ spin_unlock_irqrestore(&cc_obj->lock, flags);
+ }
+
+ return ret;
+}
+
+/*
+ * Initialize SA congestion control.
+ */
+static int sa_cc_init(struct sa_cc_data *cc_obj)
+{
+ spin_lock_init(&cc_obj->lock);
+ cc_obj->outstanding = 0;
+ cc_obj->tf = tf_create(MAX_SA_MADS);
+ if (!cc_obj->tf)
+ return -ENOMEM;
+ return 0;
+}
+
+/*
+ * Cancel SA MADs from congestion control queue.
+ */
+static void cancel_sa_cc_mads(struct ib_mad_agent_private *mad_agent_priv)
+{
+ tf_free_agent(mad_agent_priv->qp_info->port_priv->sa_cc.tf,
+ mad_agent_priv);
+}
+
+/*
+ * Modify timeout of SA MAD on congestion control queue.
+ */
+static int modify_sa_cc_mad(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_send_buf *send_buf, u32 timeout_ms)
+{
+ int ret;
+ int qtime = 0;
+
+ if (timeout_ms > MIN_TIME_FOR_SA_MAD_SEND_MS)
+ qtime = timeout_ms - MIN_TIME_FOR_SA_MAD_SEND_MS;
+
+ ret = tf_modify_item(mad_agent_priv->qp_info->port_priv->sa_cc.tf,
+ mad_agent_priv, send_buf, (u32)qtime);
+ return ret;
+}
+
+static void sa_cc_destroy(struct sa_cc_data *cc_obj)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct tf_entry *tfe;
+ struct ib_mad_send_wc mad_send_wc;
+ struct ib_mad_agent_private *mad_agent_priv;
+ u32 time_left_ms;
+
+ mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
+ mad_send_wc.vendor_err = 0;
+
+ tf_stop_enqueue(cc_obj->tf);
+ tfe = tf_dequeue(cc_obj->tf, &time_left_ms);
+ while (tfe) {
+ mad_send_wr = tfe_to_mad(tfe);
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ mad_agent_priv = mad_send_wr->mad_agent_priv;
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ &mad_send_wc);
+ tfe = tf_dequeue(cc_obj->tf, &time_left_ms);
+ }
+ tf_free(cc_obj->tf);
+}
/*
* Returns a ib_mad_port_private structure or NULL for a device/port
@@ -184,15 +707,6 @@ int ib_response_mad(struct ib_mad *mad)
}
EXPORT_SYMBOL(ib_response_mad);
-static void timeout_callback(unsigned long data)
-{
- struct ib_mad_agent_private *mad_agent_priv =
- (struct ib_mad_agent_private *) data;
-
- queue_work(mad_agent_priv->qp_info->port_priv->wq,
- &mad_agent_priv->timeout_work);
-}
-
/*
* ib_register_mad_agent - Register to send/receive MADs
*/
@@ -285,6 +799,13 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
goto error1;
}
+ /* Verify the QP requested is supported. For example, Ethernet devices
+ * will not have QP0 */
+ if (!port_priv->qp_info[qpn].qp) {
+ ret = ERR_PTR(-EPROTONOSUPPORT);
+ goto error1;
+ }
+
/* Allocate structures */
mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
if (!mad_agent_priv) {
@@ -300,13 +821,11 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
}
if (mad_reg_req) {
- reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL);
+ reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL);
if (!reg_req) {
ret = ERR_PTR(-ENOMEM);
goto error3;
}
- /* Make a copy of the MAD registration request */
- memcpy(reg_req, mad_reg_req, sizeof *reg_req);
}
/* Now, fill in the various structures */
@@ -324,9 +843,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
INIT_LIST_HEAD(&mad_agent_priv->wait_list);
INIT_LIST_HEAD(&mad_agent_priv->done_list);
INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
- INIT_WORK(&mad_agent_priv->timeout_work, timeout_sends);
- setup_timer(&mad_agent_priv->timeout_timer, timeout_callback,
- (unsigned long) mad_agent_priv);
+ INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
INIT_LIST_HEAD(&mad_agent_priv->local_list);
INIT_WORK(&mad_agent_priv->local_work, local_completions);
atomic_set(&mad_agent_priv->refcount, 1);
@@ -533,8 +1050,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
*/
cancel_mads(mad_agent_priv);
port_priv = mad_agent_priv->qp_info->port_priv;
- del_timer_sync(&mad_agent_priv->timeout_timer);
- cancel_work_sync(&mad_agent_priv->timeout_work);
+ cancel_delayed_work(&mad_agent_priv->timed_work);
spin_lock_irqsave(&port_priv->reg_lock, flags);
remove_mad_reg_req(mad_agent_priv);
@@ -577,6 +1093,7 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_snoop_private *mad_snoop_priv;
+ if (!IS_ERR(mad_agent)) {
/* If the TID is zero, the agent can only snoop. */
if (mad_agent->hi_tid) {
mad_agent_priv = container_of(mad_agent,
@@ -589,6 +1106,8 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
agent);
unregister_mad_snoop(mad_snoop_priv);
}
+ }
+
return 0;
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
@@ -695,7 +1214,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
struct ib_wc mad_wc;
struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
- if (device->node_type == RDMA_NODE_IB_SWITCH)
+ if (device->node_type == RDMA_NODE_IB_SWITCH &&
+ smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
port_num = send_wr->wr.ud.port_num;
else
port_num = mad_agent_priv->agent.port_num;
@@ -1028,12 +1548,20 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->send_buf.mad,
sge[0].length,
DMA_TO_DEVICE);
- mad_send_wr->header_mapping = sge[0].addr;
+ if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
+ return -ENOMEM;
sge[1].addr = ib_dma_map_single(mad_agent->device,
ib_get_payload(mad_send_wr),
sge[1].length,
DMA_TO_DEVICE);
+
+ if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
+ ret = -ENOMEM;
+ goto dma1_err;
+ }
+
+ mad_send_wr->header_mapping = sge[0].addr;
mad_send_wr->payload_mapping = sge[1].addr;
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
@@ -1051,14 +1579,51 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
list_add_tail(&mad_send_wr->mad_list.list, list);
}
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
- if (ret) {
+
+ if (!ret)
+ return 0;
+
ib_dma_unmap_single(mad_agent->device,
mad_send_wr->header_mapping,
- sge[0].length, DMA_TO_DEVICE);
+ sge[1].length, DMA_TO_DEVICE);
+dma1_err:
ib_dma_unmap_single(mad_agent->device,
mad_send_wr->payload_mapping,
- sge[1].length, DMA_TO_DEVICE);
+ sge[0].length, DMA_TO_DEVICE);
+ return ret;
+}
+
+/*
+ * Send SA MAD that passed congestion control
+ */
+static int send_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr,
+ u32 timeout_ms, u32 retries_left)
+{
+ int ret;
+ unsigned long flags;
+ struct ib_mad_agent_private *mad_agent_priv;
+
+ mad_agent_priv = mad_send_wr->mad_agent_priv;
+ mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
+ mad_send_wr->retries_left = retries_left;
+ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
+
+ /* Reference MAD agent until send completes */
+ atomic_inc(&mad_agent_priv->refcount);
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_add_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->send_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ ret = ib_send_mad(mad_send_wr);
+ if (ret < 0) {
+ /* Fail send request */
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_del(&mad_send_wr->agent_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ atomic_dec(&mad_agent_priv->refcount);
}
+
return ret;
}
@@ -1125,6 +1690,12 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
mad_send_wr->status = IB_WC_SUCCESS;
+ if (is_sa_cc_mad(mad_send_wr)) {
+ mad_send_wr->is_sa_cc_mad = 1;
+ ret = sa_cc_mad_send(mad_send_wr);
+ if (ret < 0)
+ goto error;
+ } else {
/* Reference MAD agent until send completes */
atomic_inc(&mad_agent_priv->refcount);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
@@ -1147,6 +1718,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
goto error;
}
}
+ }
return 0;
error:
if (bad_send_buf)
@@ -1206,10 +1778,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method,
{
int i;
- for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS);
- i < IB_MGMT_MAX_METHODS;
- i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
- 1+i)) {
+ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
if ((*method)->agent[i]) {
printk(KERN_ERR PFX "Method %d already in use\n", i);
return -EINVAL;
@@ -1343,13 +1912,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
goto error3;
/* Finally, add in methods being registered */
- for (i = find_first_bit(mad_reg_req->method_mask,
- IB_MGMT_MAX_METHODS);
- i < IB_MGMT_MAX_METHODS;
- i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
- 1+i)) {
+ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
(*method)->agent[i] = agent_priv;
- }
+
return 0;
error3:
@@ -1442,13 +2007,9 @@ check_in_use:
goto error4;
/* Finally, add in methods being registered */
- for (i = find_first_bit(mad_reg_req->method_mask,
- IB_MGMT_MAX_METHODS);
- i < IB_MGMT_MAX_METHODS;
- i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
- 1+i)) {
+ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
(*method)->agent[i] = agent_priv;
- }
+
return 0;
error4:
@@ -1614,6 +2175,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
mad->mad_hdr.class_version].class;
if (!class)
goto out;
+ if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >=
+ IB_MGMT_MAX_METHODS)
+ goto out;
method = class->method_table[convert_mgmt_class(
mad->mad_hdr.mgmt_class)];
if (method)
@@ -1856,6 +2420,26 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
}
}
+static bool generate_unmatched_resp(struct ib_mad_private *recv,
+ struct ib_mad_private *response)
+{
+ if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET ||
+ recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) {
+ memcpy(response, recv, sizeof *response);
+ response->header.recv_wc.wc = &response->header.wc;
+ response->header.recv_wc.recv_buf.mad = &response->mad.mad;
+ response->header.recv_wc.recv_buf.grh = &response->grh;
+ response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+ response->mad.mad.mad_hdr.status =
+ cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
+ if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION;
+
+ return true;
+ } else {
+ return false;
+ }
+}
static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
struct ib_wc *wc)
{
@@ -1865,6 +2449,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
struct ib_mad_list_head *mad_list;
struct ib_mad_agent_private *mad_agent;
int port_num;
+ int ret = IB_MAD_RESULT_SUCCESS;
mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
qp_info = mad_list->mad_queue->qp_info;
@@ -1948,8 +2533,6 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
local:
/* Give driver "right of first refusal" on incoming MAD */
if (port_priv->device->process_mad) {
- int ret;
-
ret = port_priv->device->process_mad(port_priv->device, 0,
port_priv->port_num,
wc, &recv->grh,
@@ -1977,6 +2560,10 @@ local:
* or via recv_handler in ib_mad_complete_recv()
*/
recv = NULL;
+ } else if ((ret & IB_MAD_RESULT_SUCCESS) &&
+ generate_unmatched_resp(recv, response)) {
+ agent_send_response(&response->mad.mad, &recv->grh, wc,
+ port_priv->device, port_num, qp_info->qp->qp_num);
}
out:
@@ -1992,9 +2579,10 @@ out:
static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_send_wr_private *mad_send_wr;
+ unsigned long delay;
if (list_empty(&mad_agent_priv->wait_list)) {
- del_timer(&mad_agent_priv->timeout_timer);
+ cancel_delayed_work(&mad_agent_priv->timed_work);
} else {
mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
struct ib_mad_send_wr_private,
@@ -2003,8 +2591,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
if (time_after(mad_agent_priv->timeout,
mad_send_wr->timeout)) {
mad_agent_priv->timeout = mad_send_wr->timeout;
- mod_timer(&mad_agent_priv->timeout_timer,
- mad_send_wr->timeout);
+ delay = mad_send_wr->timeout - jiffies;
+ if ((long)delay <= 0)
+ delay = 1;
+ mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
+ &mad_agent_priv->timed_work, delay);
}
}
}
@@ -2031,14 +2622,15 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
temp_mad_send_wr->timeout))
break;
}
- } else
+ }
+ else
list_item = &mad_agent_priv->wait_list;
list_add(&mad_send_wr->agent_list, list_item);
/* Reschedule a work item if we have a shorter timeout */
if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
- mod_timer(&mad_agent_priv->timeout_timer,
- mad_send_wr->timeout);
+ mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
+ &mad_agent_priv->timed_work, delay);
}
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
@@ -2090,9 +2682,12 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
mad_send_wc->status = mad_send_wr->status;
if (ret == IB_RMPP_RESULT_INTERNAL)
ib_rmpp_send_handler(mad_send_wc);
- else
+ else {
+ if (mad_send_wr->is_sa_cc_mad)
+ sa_cc_mad_done(get_cc_obj(mad_send_wr));
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
mad_send_wc);
+ }
/* Release reference on agent taken when sending */
deref_mad_agent(mad_agent_priv);
@@ -2272,6 +2867,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
INIT_LIST_HEAD(&cancel_list);
+ cancel_sa_cc_mads(mad_agent_priv);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
&mad_agent_priv->send_list, agent_list) {
@@ -2293,6 +2889,8 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
&cancel_list, agent_list) {
mad_send_wc.send_buf = &mad_send_wr->send_buf;
list_del(&mad_send_wr->agent_list);
+ if (mad_send_wr->is_sa_cc_mad)
+ sa_cc_mad_done(get_cc_obj(mad_send_wr));
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
atomic_dec(&mad_agent_priv->refcount);
@@ -2332,7 +2930,13 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent,
agent);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
- if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
+ if (!mad_send_wr) {
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ if (modify_sa_cc_mad(mad_agent_priv, send_buf, timeout_ms))
+ return -EINVAL;
+ return 0;
+ }
+ if (mad_send_wr->status != IB_WC_SUCCESS) {
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
return -EINVAL;
}
@@ -2482,10 +3086,10 @@ static void timeout_sends(struct work_struct *work)
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_mad_send_wc mad_send_wc;
- unsigned long flags;
+ unsigned long flags, delay;
mad_agent_priv = container_of(work, struct ib_mad_agent_private,
- timeout_work);
+ timed_work.work);
mad_send_wc.vendor_err = 0;
spin_lock_irqsave(&mad_agent_priv->lock, flags);
@@ -2495,8 +3099,12 @@ static void timeout_sends(struct work_struct *work)
agent_list);
if (time_after(mad_send_wr->timeout, jiffies)) {
- mod_timer(&mad_agent_priv->timeout_timer,
- mad_send_wr->timeout);
+ delay = mad_send_wr->timeout - jiffies;
+ if ((long)delay <= 0)
+ delay = 1;
+ queue_delayed_work(mad_agent_priv->qp_info->
+ port_priv->wq,
+ &mad_agent_priv->timed_work, delay);
break;
}
@@ -2512,6 +3120,8 @@ static void timeout_sends(struct work_struct *work)
else
mad_send_wc.status = mad_send_wr->status;
mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ if (mad_send_wr->is_sa_cc_mad)
+ sa_cc_mad_done(get_cc_obj(mad_send_wr));
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
@@ -2572,6 +3182,14 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
sizeof *mad_priv -
sizeof mad_priv->header,
DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
+ sg_list.addr))) {
+ ret = -ENOMEM;
+ kmem_cache_free(ib_mad_cache, mad_priv);
+ printk(KERN_ERR PFX "ib_dma_map_single failed\n");
+ break;
+ }
+
mad_priv->header.mapping = sg_list.addr;
recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
mad_priv->header.mad_list.mad_queue = recv_queue;
@@ -2645,6 +3263,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
int ret, i;
struct ib_qp_attr *attr;
struct ib_qp *qp;
+ u16 pkey_index = 0;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
@@ -2652,6 +3271,11 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
return -ENOMEM;
}
+ ret = ib_find_pkey(port_priv->device, port_priv->port_num,
+ 0xFFFF, &pkey_index);
+ if (ret)
+ pkey_index = 0;
+
for (i = 0; i < IB_MAD_QPS_CORE; i++) {
qp = port_priv->qp_info[i].qp;
if (!qp)
@@ -2662,7 +3286,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
* one is needed for the Reset to Init transition
*/
attr->qp_state = IB_QPS_INIT;
- attr->pkey_index = 0;
+ attr->pkey_index = pkey_index;
attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
ret = ib_modify_qp(qp, attr, IB_QP_STATE |
IB_QP_PKEY_INDEX | IB_QP_QKEY);
@@ -2858,6 +3482,10 @@ static int ib_mad_port_open(struct ib_device *device,
}
INIT_WORK(&port_priv->work, ib_mad_completion_handler);
+ if (sa_cc_init(&port_priv->sa_cc))
+ goto error9;
+
+
spin_lock_irqsave(&ib_mad_port_list_lock, flags);
list_add_tail(&port_priv->port_list, &ib_mad_port_list);
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
@@ -2865,17 +3493,19 @@ static int ib_mad_port_open(struct ib_device *device,
ret = ib_mad_port_start(port_priv);
if (ret) {
printk(KERN_ERR PFX "Couldn't start port\n");
- goto error9;
+ goto error10;
}
return 0;
-error9:
+error10:
spin_lock_irqsave(&ib_mad_port_list_lock, flags);
list_del_init(&port_priv->port_list);
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
destroy_workqueue(port_priv->wq);
+error9:
+ sa_cc_destroy(&port_priv->sa_cc);
error8:
destroy_mad_qp(&port_priv->qp_info[1]);
error7:
@@ -2915,6 +3545,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
destroy_workqueue(port_priv->wq);
+ sa_cc_destroy(&port_priv->sa_cc);
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
ib_dereg_mr(port_priv->mr);
@@ -2983,6 +3614,9 @@ static void ib_mad_remove_device(struct ib_device *device)
{
int i, num_ports, cur_port;
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
if (device->node_type == RDMA_NODE_IB_SWITCH) {
num_ports = 1;
cur_port = 0;
@@ -3017,8 +3651,6 @@ static int __init ib_mad_init_module(void)
mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
- spin_lock_init(&ib_mad_port_list_lock);
-
ib_mad_cache = kmem_cache_create("ib_mad",
sizeof(struct ib_mad_private),
0,
@@ -3054,4 +3686,3 @@ static void __exit ib_mad_cleanup_module(void)
module_init(ib_mad_init_module);
module_exit(ib_mad_cleanup_module);
-