diff options
Diffstat (limited to 'sys/ofed/drivers/infiniband/core/mad.c')
-rw-r--r-- | sys/ofed/drivers/infiniband/core/mad.c | 755 |
1 files changed, 693 insertions, 62 deletions
diff --git a/sys/ofed/drivers/infiniband/core/mad.c b/sys/ofed/drivers/infiniband/core/mad.c index 64e660c38e4f..11b3ba372186 100644 --- a/sys/ofed/drivers/infiniband/core/mad.c +++ b/sys/ofed/drivers/infiniband/core/mad.c @@ -34,6 +34,9 @@ * */ #include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/string.h> #include <rdma/ib_cache.h> #include "mad_priv.h" @@ -46,8 +49,8 @@ MODULE_DESCRIPTION("kernel IB MAD API"); MODULE_AUTHOR("Hal Rosenstock"); MODULE_AUTHOR("Sean Hefty"); -int mad_sendq_size = IB_MAD_QP_SEND_SIZE; -int mad_recvq_size = IB_MAD_QP_RECV_SIZE; +static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; +static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; module_param_named(send_queue_size, mad_sendq_size, int, 0444); MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests"); @@ -59,9 +62,26 @@ static struct kmem_cache *ib_mad_cache; static struct list_head ib_mad_port_list; static u32 ib_mad_client_id = 0; -/* Port list lock */ -static spinlock_t ib_mad_port_list_lock; +/* + * Timeout FIFO (tf) param + */ +enum { + /* min time between 2 consecutive activations of tf workqueue */ + MIN_BETWEEN_ACTIVATIONS_MS = 5 +}; + +/* + * SA congestion control params + */ +enum { + MAX_OUTSTANDING_SA_MADS = 10, + MIN_TIME_FOR_SA_MAD_SEND_MS = 20, + MAX_SA_MADS = 10000 +}; + +/* Port list lock */ +static DEFINE_SPINLOCK(ib_mad_port_list_lock); /* Forward declarations */ static int method_in_use(struct ib_mad_mgmt_method_table **method, @@ -80,6 +100,509 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, u8 mgmt_class); static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv); +static int send_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr, + u32 timeout_ms, u32 retries_left); + + +/* + * Timeout FIFO functions - implements FIFO with timeout mechanism + */ + +static void activate_timeout_handler_task(unsigned long data) +{ + struct to_fifo *tf; + + tf = (struct to_fifo *)data; + del_timer(&tf->timer); + queue_work(tf->workq, &tf->work); +} + +static unsigned long adjusted_time(unsigned long last, unsigned long next) +{ + unsigned long min_next; + + min_next = last + msecs_to_jiffies(MIN_BETWEEN_ACTIVATIONS_MS); + if (time_after(min_next, next)) + return min_next; + + return next; +} + +static void notify_failure(struct ib_mad_send_wr_private *mad_send_wr, + enum ib_wc_status status) +{ + struct ib_mad_send_wc mad_send_wc; + struct ib_mad_agent_private *mad_agent_priv; + + mad_send_wc.status = status; + mad_send_wc.vendor_err = 0; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_agent_priv = mad_send_wr->mad_agent_priv; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); +} + +static inline struct sa_cc_data * +get_cc_obj(struct ib_mad_send_wr_private *mad_send_wr) +{ + return &mad_send_wr->mad_agent_priv->qp_info->port_priv->sa_cc; +} + +static inline struct ib_mad_send_wr_private *tfe_to_mad(struct tf_entry *tfe) +{ + return container_of(tfe, struct ib_mad_send_wr_private, tf_list); +} + +static void timeout_handler_task(struct work_struct *work) +{ + struct tf_entry *tmp1, *tmp2; + struct list_head *list_item, exp_lst; + unsigned long flags, curr_time; + int lst_empty; + struct to_fifo *tf; + + tf = container_of(work, struct to_fifo, work); + do { + INIT_LIST_HEAD(&exp_lst); + + spin_lock_irqsave(&tf->lists_lock, flags); + curr_time = jiffies; + list_for_each(list_item, &tf->to_head) { + tmp1 = list_entry(list_item, struct tf_entry, to_list); + if (time_before(curr_time, tmp1->exp_time)) + break; + list_del(&tmp1->fifo_list); + tf->num_items--; + } + + /* cut list up to and including list_item->prev */ + list_cut_position(&exp_lst, &tf->to_head, list_item->prev); + spin_unlock_irqrestore(&tf->lists_lock, flags); + + lst_empty = list_empty(&exp_lst); + list_for_each_entry_safe(tmp1, tmp2, &exp_lst, to_list) { + list_del(&tmp1->to_list); + if (tmp1->canceled) { + tmp1->canceled = 0; + notify_failure(tfe_to_mad(tmp1), IB_WC_WR_FLUSH_ERR); + } else { + notify_failure(tfe_to_mad(tmp1), IB_WC_RESP_TIMEOUT_ERR); + } + } + } while (!lst_empty); + + spin_lock_irqsave(&tf->lists_lock, flags); + if (!list_empty(&tf->to_head)) { + tmp1 = list_entry(tf->to_head.next, struct tf_entry, to_list); + mod_timer(&tf->timer, adjusted_time(curr_time, tmp1->exp_time)); + } + spin_unlock_irqrestore(&tf->lists_lock, flags); +} + +/** + * tf_create - creates new timeout-fifo object + * @fifo_size: Maximum fifo size + * + * Allocate and initialize new timeout-fifo object + */ +static struct to_fifo *tf_create(u32 fifo_size) +{ + struct to_fifo *tf; + + tf = kzalloc(sizeof(*tf), GFP_KERNEL); + if (tf) { + tf->workq = create_singlethread_workqueue("to_fifo"); + if (!tf->workq) { + kfree(tf); + return NULL; + } + spin_lock_init(&tf->lists_lock); + INIT_LIST_HEAD(&tf->to_head); + INIT_LIST_HEAD(&tf->fifo_head); + init_timer(&tf->timer); + INIT_WORK(&tf->work, timeout_handler_task); + tf->timer.data = (unsigned long) tf; + tf->timer.function = activate_timeout_handler_task; + tf->timer.expires = jiffies; + tf->fifo_size = fifo_size; + tf->stop_enqueue = 0; + tf->num_items = 0; + } + + return tf; +} + +/** + * tf_enqueue - enqueue item to timeout-fifo object + * @tf:timeout-fifo object + * @item: item to enqueue. + * @timeout_ms: item expiration time in ms. + * + * Enqueue item to fifo and modify expiration timer when required. + * + * Returns 0 on success and negative on failure. + */ +static int tf_enqueue(struct to_fifo *tf, struct tf_entry *item, u32 timeout_ms) +{ + struct tf_entry *tmp; + struct list_head *list_item; + unsigned long flags; + + item->exp_time = jiffies + msecs_to_jiffies(timeout_ms); + + spin_lock_irqsave(&tf->lists_lock, flags); + if (tf->num_items >= tf->fifo_size || tf->stop_enqueue) { + spin_unlock_irqrestore(&tf->lists_lock, flags); + return -EBUSY; + } + + /* Insert item to timeout list */ + list_for_each_prev(list_item, &tf->to_head) { + tmp = list_entry(list_item, struct tf_entry, to_list); + if (time_after(item->exp_time, tmp->exp_time)) + break; + } + + list_add(&item->to_list, list_item); + + /* Insert item to fifo list */ + list_add_tail(&item->fifo_list, &tf->fifo_head); + + tf->num_items++; + + /* modify expiration timer if required */ + if (list_item == &tf->to_head) + mod_timer(&tf->timer, item->exp_time); + + spin_unlock_irqrestore(&tf->lists_lock, flags); + + return 0; +} + +/** + * tf_dequeue - dequeue item from timeout-fifo object + * @tf:timeout-fifo object + * @time_left_ms: returns the time left for expiration in ms. + * + * Dequeue item from fifo and modify expiration timer when required. + * + * Returns pointer to tf_entry on success and NULL on failure. + */ +static struct tf_entry *tf_dequeue(struct to_fifo *tf, u32 *time_left_ms) +{ + unsigned long flags; + unsigned long time_left; + struct tf_entry *tmp, *tmp1; + + spin_lock_irqsave(&tf->lists_lock, flags); + if (list_empty(&tf->fifo_head)) { + spin_unlock_irqrestore(&tf->lists_lock, flags); + return NULL; + } + + list_for_each_entry(tmp, &tf->fifo_head, fifo_list) { + if (!tmp->canceled) + break; + } + + if (tmp->canceled) { + spin_unlock_irqrestore(&tf->lists_lock, flags); + return NULL; + } + + /* modify timer in case enqueued item is the next to expire */ + if (tf->to_head.next == &tmp->to_list) { + if (list_is_last(&tmp->to_list, &tf->to_head)) { + del_timer(&tf->timer); + } else { + tmp1 = list_entry(tmp->to_list.next, struct tf_entry, to_list); + mod_timer(&tf->timer, tmp1->exp_time); + } + } + list_del(&tmp->fifo_list); + list_del(&tmp->to_list); + tf->num_items--; + spin_unlock_irqrestore(&tf->lists_lock, flags); + + time_left = tmp->exp_time - jiffies; + if ((long) time_left <= 0) + time_left = 0; + *time_left_ms = jiffies_to_msecs(time_left); + + return tmp; +} + +static void tf_stop_enqueue(struct to_fifo *tf) +{ + unsigned long flags; + + spin_lock_irqsave(&tf->lists_lock, flags); + tf->stop_enqueue = 1; + spin_unlock_irqrestore(&tf->lists_lock, flags); +} + +/** + * tf_free - free empty timeout-fifo object + * @tf:timeout-fifo object + * + */ +static void tf_free(struct to_fifo *tf) +{ + del_timer_sync(&tf->timer); + flush_workqueue(tf->workq); + destroy_workqueue(tf->workq); + + kfree(tf); +} + +/** + * tf_free_agent - free MADs related to specific MAD agent from timeout-fifo + * @tf:timeout-fifo object + * @mad_agent_priv: MAD agent. + * + */ +static void tf_free_agent(struct to_fifo *tf, struct ib_mad_agent_private *mad_agent_priv) +{ + unsigned long flags; + struct tf_entry *tmp, *tmp1; + struct list_head tmp_head; + + INIT_LIST_HEAD(&tmp_head); + spin_lock_irqsave(&tf->lists_lock, flags); + list_for_each_entry_safe(tmp, tmp1, &tf->fifo_head, fifo_list) { + if (tfe_to_mad(tmp)->mad_agent_priv == mad_agent_priv) { + list_del(&tmp->to_list); + list_move(&tmp->fifo_list, &tmp_head); + tf->num_items--; + } + } + spin_unlock_irqrestore(&tf->lists_lock, flags); + + list_for_each_entry_safe(tmp, tmp1, &tmp_head, fifo_list) { + list_del(&tmp->fifo_list); + notify_failure(tfe_to_mad(tmp), IB_WC_WR_FLUSH_ERR); + } +} + +/** + * tf_modify_item - to modify expiration time for specific item + * @tf:timeout-fifo object + * @mad_agent_priv: MAD agent. + * @send_buf: the MAD to modify in queue + * @timeout_ms: new timeout to set. + * + * Returns 0 if item found on list and -ENXIO if not. + * + * Note: The send_buf may point on MAD that is already released. + * Therefore we can't use this struct before finding it in the list + */ +static int tf_modify_item(struct to_fifo *tf, + struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_buf *send_buf, u32 timeout_ms) +{ + struct tf_entry *tmp, *item; + struct list_head *list_item; + unsigned long flags; + int found = 0; + + spin_lock_irqsave(&tf->lists_lock, flags); + list_for_each_entry(item, &tf->fifo_head, fifo_list) { + if (tfe_to_mad(item)->mad_agent_priv == mad_agent_priv && + &tfe_to_mad(item)->send_buf == send_buf) { + found = 1; + break; + } + } + + if (!found) { + spin_unlock_irqrestore(&tf->lists_lock, flags); + return -ENXIO; + } + + item->exp_time = jiffies + msecs_to_jiffies(timeout_ms); + + if (timeout_ms) { + list_del(&item->to_list); + list_for_each_prev(list_item, &tf->to_head) { + tmp = list_entry(list_item, struct tf_entry, to_list); + if (time_after(item->exp_time, tmp->exp_time)) + break; + } + list_add(&item->to_list, list_item); + + /* modify expiration timer if required */ + if (list_item == &tf->to_head) + mod_timer(&tf->timer, item->exp_time); + } else { + /* + * when item canceled (timeout_ms == 0) move item to + * head of timeout list and to the tail of fifo list + */ + item->canceled = 1; + list_move(&item->to_list, &tf->to_head); + list_move_tail(&item->fifo_list, &tf->fifo_head); + mod_timer(&tf->timer, item->exp_time); + } + spin_unlock_irqrestore(&tf->lists_lock, flags); + + return 0; +} + +/* + * SA congestion control functions + */ + +/* + * Defines which MAD is under congestion control. + */ +static int is_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_hdr *mad; + + mad = (struct ib_mad_hdr *)mad_send_wr->send_buf.mad; + + return ((mad_send_wr->send_buf.timeout_ms) && + (mad->mgmt_class == IB_MGMT_CLASS_SUBN_ADM) && + ((mad->method == IB_MGMT_METHOD_GET) || + (mad->method == IB_MGMT_METHOD_SET))); +} + +/* + * Notify that SA congestion controlled MAD is done. + * to allow dequeuing SA MAD from congestion control queue. + */ +static void sa_cc_mad_done(struct sa_cc_data *cc_obj) +{ + unsigned long flags; + struct tf_entry *tfe; + struct ib_mad_send_wr_private *mad_send_wr; + u32 time_left_ms, timeout_ms, retries; + int ret; + + do { + spin_lock_irqsave(&cc_obj->lock, flags); + tfe = tf_dequeue(cc_obj->tf, &time_left_ms); + if (!tfe) { + if (cc_obj->outstanding > 0) + cc_obj->outstanding--; + spin_unlock_irqrestore(&cc_obj->lock, flags); + break; + } + spin_unlock_irqrestore(&cc_obj->lock, flags); + mad_send_wr = tfe_to_mad(tfe); + time_left_ms += MIN_TIME_FOR_SA_MAD_SEND_MS; + if (time_left_ms > mad_send_wr->send_buf.timeout_ms) { + retries = time_left_ms / mad_send_wr->send_buf.timeout_ms - 1; + timeout_ms = mad_send_wr->send_buf.timeout_ms; + } else { + retries = 0; + timeout_ms = time_left_ms; + } + ret = send_sa_cc_mad(mad_send_wr, timeout_ms, retries); + if (ret) { + if (ret == -ENOMEM) + notify_failure(mad_send_wr, IB_WC_GENERAL_ERR); + else + notify_failure(mad_send_wr, IB_WC_LOC_QP_OP_ERR); + } + } while (ret); +} + +/* + * Send SA MAD under congestion control. + */ +static int sa_cc_mad_send(struct ib_mad_send_wr_private *mad_send_wr) +{ + unsigned long flags; + int ret; + struct sa_cc_data *cc_obj; + + cc_obj = get_cc_obj(mad_send_wr); + spin_lock_irqsave(&cc_obj->lock, flags); + if (cc_obj->outstanding < MAX_OUTSTANDING_SA_MADS) { + cc_obj->outstanding++; + spin_unlock_irqrestore(&cc_obj->lock, flags); + ret = send_sa_cc_mad(mad_send_wr, mad_send_wr->send_buf.timeout_ms, + mad_send_wr->retries_left); + if (ret) + sa_cc_mad_done(cc_obj); + + } else { + int qtime = (mad_send_wr->send_buf.timeout_ms * + (mad_send_wr->retries_left + 1)) + - MIN_TIME_FOR_SA_MAD_SEND_MS; + + if (qtime < 0) + qtime = 0; + ret = tf_enqueue(cc_obj->tf, &mad_send_wr->tf_list, (u32)qtime); + + spin_unlock_irqrestore(&cc_obj->lock, flags); + } + + return ret; +} + +/* + * Initialize SA congestion control. + */ +static int sa_cc_init(struct sa_cc_data *cc_obj) +{ + spin_lock_init(&cc_obj->lock); + cc_obj->outstanding = 0; + cc_obj->tf = tf_create(MAX_SA_MADS); + if (!cc_obj->tf) + return -ENOMEM; + return 0; +} + +/* + * Cancel SA MADs from congestion control queue. + */ +static void cancel_sa_cc_mads(struct ib_mad_agent_private *mad_agent_priv) +{ + tf_free_agent(mad_agent_priv->qp_info->port_priv->sa_cc.tf, + mad_agent_priv); +} + +/* + * Modify timeout of SA MAD on congestion control queue. + */ +static int modify_sa_cc_mad(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_buf *send_buf, u32 timeout_ms) +{ + int ret; + int qtime = 0; + + if (timeout_ms > MIN_TIME_FOR_SA_MAD_SEND_MS) + qtime = timeout_ms - MIN_TIME_FOR_SA_MAD_SEND_MS; + + ret = tf_modify_item(mad_agent_priv->qp_info->port_priv->sa_cc.tf, + mad_agent_priv, send_buf, (u32)qtime); + return ret; +} + +static void sa_cc_destroy(struct sa_cc_data *cc_obj) +{ + struct ib_mad_send_wr_private *mad_send_wr; + struct tf_entry *tfe; + struct ib_mad_send_wc mad_send_wc; + struct ib_mad_agent_private *mad_agent_priv; + u32 time_left_ms; + + mad_send_wc.status = IB_WC_WR_FLUSH_ERR; + mad_send_wc.vendor_err = 0; + + tf_stop_enqueue(cc_obj->tf); + tfe = tf_dequeue(cc_obj->tf, &time_left_ms); + while (tfe) { + mad_send_wr = tfe_to_mad(tfe); + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_agent_priv = mad_send_wr->mad_agent_priv; + mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, + &mad_send_wc); + tfe = tf_dequeue(cc_obj->tf, &time_left_ms); + } + tf_free(cc_obj->tf); +} /* * Returns a ib_mad_port_private structure or NULL for a device/port @@ -184,15 +707,6 @@ int ib_response_mad(struct ib_mad *mad) } EXPORT_SYMBOL(ib_response_mad); -static void timeout_callback(unsigned long data) -{ - struct ib_mad_agent_private *mad_agent_priv = - (struct ib_mad_agent_private *) data; - - queue_work(mad_agent_priv->qp_info->port_priv->wq, - &mad_agent_priv->timeout_work); -} - /* * ib_register_mad_agent - Register to send/receive MADs */ @@ -285,6 +799,13 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error1; } + /* Verify the QP requested is supported. For example, Ethernet devices + * will not have QP0 */ + if (!port_priv->qp_info[qpn].qp) { + ret = ERR_PTR(-EPROTONOSUPPORT); + goto error1; + } + /* Allocate structures */ mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL); if (!mad_agent_priv) { @@ -300,13 +821,11 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, } if (mad_reg_req) { - reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL); + reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL); if (!reg_req) { ret = ERR_PTR(-ENOMEM); goto error3; } - /* Make a copy of the MAD registration request */ - memcpy(reg_req, mad_reg_req, sizeof *reg_req); } /* Now, fill in the various structures */ @@ -324,9 +843,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, INIT_LIST_HEAD(&mad_agent_priv->wait_list); INIT_LIST_HEAD(&mad_agent_priv->done_list); INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); - INIT_WORK(&mad_agent_priv->timeout_work, timeout_sends); - setup_timer(&mad_agent_priv->timeout_timer, timeout_callback, - (unsigned long) mad_agent_priv); + INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions); atomic_set(&mad_agent_priv->refcount, 1); @@ -533,8 +1050,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) */ cancel_mads(mad_agent_priv); port_priv = mad_agent_priv->qp_info->port_priv; - del_timer_sync(&mad_agent_priv->timeout_timer); - cancel_work_sync(&mad_agent_priv->timeout_work); + cancel_delayed_work(&mad_agent_priv->timed_work); spin_lock_irqsave(&port_priv->reg_lock, flags); remove_mad_reg_req(mad_agent_priv); @@ -577,6 +1093,7 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_snoop_private *mad_snoop_priv; + if (!IS_ERR(mad_agent)) { /* If the TID is zero, the agent can only snoop. */ if (mad_agent->hi_tid) { mad_agent_priv = container_of(mad_agent, @@ -589,6 +1106,8 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) agent); unregister_mad_snoop(mad_snoop_priv); } + } + return 0; } EXPORT_SYMBOL(ib_unregister_mad_agent); @@ -695,7 +1214,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_wc mad_wc; struct ib_send_wr *send_wr = &mad_send_wr->send_wr; - if (device->node_type == RDMA_NODE_IB_SWITCH) + if (device->node_type == RDMA_NODE_IB_SWITCH && + smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) port_num = send_wr->wr.ud.port_num; else port_num = mad_agent_priv->agent.port_num; @@ -1028,12 +1548,20 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->send_buf.mad, sge[0].length, DMA_TO_DEVICE); - mad_send_wr->header_mapping = sge[0].addr; + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) + return -ENOMEM; sge[1].addr = ib_dma_map_single(mad_agent->device, ib_get_payload(mad_send_wr), sge[1].length, DMA_TO_DEVICE); + + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { + ret = -ENOMEM; + goto dma1_err; + } + + mad_send_wr->header_mapping = sge[0].addr; mad_send_wr->payload_mapping = sge[1].addr; spin_lock_irqsave(&qp_info->send_queue.lock, flags); @@ -1051,14 +1579,51 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) list_add_tail(&mad_send_wr->mad_list.list, list); } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); - if (ret) { + + if (!ret) + return 0; + ib_dma_unmap_single(mad_agent->device, mad_send_wr->header_mapping, - sge[0].length, DMA_TO_DEVICE); + sge[1].length, DMA_TO_DEVICE); +dma1_err: ib_dma_unmap_single(mad_agent->device, mad_send_wr->payload_mapping, - sge[1].length, DMA_TO_DEVICE); + sge[0].length, DMA_TO_DEVICE); + return ret; +} + +/* + * Send SA MAD that passed congestion control + */ +static int send_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr, + u32 timeout_ms, u32 retries_left) +{ + int ret; + unsigned long flags; + struct ib_mad_agent_private *mad_agent_priv; + + mad_agent_priv = mad_send_wr->mad_agent_priv; + mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); + mad_send_wr->retries_left = retries_left; + mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); + + /* Reference MAD agent until send completes */ + atomic_inc(&mad_agent_priv->refcount); + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_add_tail(&mad_send_wr->agent_list, + &mad_agent_priv->send_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + + ret = ib_send_mad(mad_send_wr); + if (ret < 0) { + /* Fail send request */ + spin_lock_irqsave(&mad_agent_priv->lock, flags); + list_del(&mad_send_wr->agent_list); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + atomic_dec(&mad_agent_priv->refcount); } + return ret; } @@ -1125,6 +1690,12 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; + if (is_sa_cc_mad(mad_send_wr)) { + mad_send_wr->is_sa_cc_mad = 1; + ret = sa_cc_mad_send(mad_send_wr); + if (ret < 0) + goto error; + } else { /* Reference MAD agent until send completes */ atomic_inc(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); @@ -1147,6 +1718,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, goto error; } } + } return 0; error: if (bad_send_buf) @@ -1206,10 +1778,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, { int i; - for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS); - i < IB_MGMT_MAX_METHODS; - i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, - 1+i)) { + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { if ((*method)->agent[i]) { printk(KERN_ERR PFX "Method %d already in use\n", i); return -EINVAL; @@ -1343,13 +1912,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, goto error3; /* Finally, add in methods being registered */ - for (i = find_first_bit(mad_reg_req->method_mask, - IB_MGMT_MAX_METHODS); - i < IB_MGMT_MAX_METHODS; - i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, - 1+i)) { + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; - } + return 0; error3: @@ -1442,13 +2007,9 @@ check_in_use: goto error4; /* Finally, add in methods being registered */ - for (i = find_first_bit(mad_reg_req->method_mask, - IB_MGMT_MAX_METHODS); - i < IB_MGMT_MAX_METHODS; - i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS, - 1+i)) { + for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; - } + return 0; error4: @@ -1614,6 +2175,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv, mad->mad_hdr.class_version].class; if (!class) goto out; + if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >= + IB_MGMT_MAX_METHODS) + goto out; method = class->method_table[convert_mgmt_class( mad->mad_hdr.mgmt_class)]; if (method) @@ -1856,6 +2420,26 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } } +static bool generate_unmatched_resp(struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET || + recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) { + memcpy(response, recv, sizeof *response); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = &response->mad.mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + response->mad.mad.mad_hdr.status = + cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); + if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION; + + return true; + } else { + return false; + } +} static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_wc *wc) { @@ -1865,6 +2449,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_list_head *mad_list; struct ib_mad_agent_private *mad_agent; int port_num; + int ret = IB_MAD_RESULT_SUCCESS; mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; qp_info = mad_list->mad_queue->qp_info; @@ -1948,8 +2533,6 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, local: /* Give driver "right of first refusal" on incoming MAD */ if (port_priv->device->process_mad) { - int ret; - ret = port_priv->device->process_mad(port_priv->device, 0, port_priv->port_num, wc, &recv->grh, @@ -1977,6 +2560,10 @@ local: * or via recv_handler in ib_mad_complete_recv() */ recv = NULL; + } else if ((ret & IB_MAD_RESULT_SUCCESS) && + generate_unmatched_resp(recv, response)) { + agent_send_response(&response->mad.mad, &recv->grh, wc, + port_priv->device, port_num, qp_info->qp->qp_num); } out: @@ -1992,9 +2579,10 @@ out: static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_send_wr_private *mad_send_wr; + unsigned long delay; if (list_empty(&mad_agent_priv->wait_list)) { - del_timer(&mad_agent_priv->timeout_timer); + cancel_delayed_work(&mad_agent_priv->timed_work); } else { mad_send_wr = list_entry(mad_agent_priv->wait_list.next, struct ib_mad_send_wr_private, @@ -2003,8 +2591,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) if (time_after(mad_agent_priv->timeout, mad_send_wr->timeout)) { mad_agent_priv->timeout = mad_send_wr->timeout; - mod_timer(&mad_agent_priv->timeout_timer, - mad_send_wr->timeout); + delay = mad_send_wr->timeout - jiffies; + if ((long)delay <= 0) + delay = 1; + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); } } } @@ -2031,14 +2622,15 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) temp_mad_send_wr->timeout)) break; } - } else + } + else list_item = &mad_agent_priv->wait_list; list_add(&mad_send_wr->agent_list, list_item); /* Reschedule a work item if we have a shorter timeout */ if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) - mod_timer(&mad_agent_priv->timeout_timer, - mad_send_wr->timeout); + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); } void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, @@ -2090,9 +2682,12 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, mad_send_wc->status = mad_send_wr->status; if (ret == IB_RMPP_RESULT_INTERNAL) ib_rmpp_send_handler(mad_send_wc); - else + else { + if (mad_send_wr->is_sa_cc_mad) + sa_cc_mad_done(get_cc_obj(mad_send_wr)); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, mad_send_wc); + } /* Release reference on agent taken when sending */ deref_mad_agent(mad_agent_priv); @@ -2272,6 +2867,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) INIT_LIST_HEAD(&cancel_list); + cancel_sa_cc_mads(mad_agent_priv); spin_lock_irqsave(&mad_agent_priv->lock, flags); list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &mad_agent_priv->send_list, agent_list) { @@ -2293,6 +2889,8 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) &cancel_list, agent_list) { mad_send_wc.send_buf = &mad_send_wr->send_buf; list_del(&mad_send_wr->agent_list); + if (mad_send_wr->is_sa_cc_mad) + sa_cc_mad_done(get_cc_obj(mad_send_wr)); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); atomic_dec(&mad_agent_priv->refcount); @@ -2332,7 +2930,13 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = find_send_wr(mad_agent_priv, send_buf); - if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { + if (!mad_send_wr) { + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + if (modify_sa_cc_mad(mad_agent_priv, send_buf, timeout_ms)) + return -EINVAL; + return 0; + } + if (mad_send_wr->status != IB_WC_SUCCESS) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return -EINVAL; } @@ -2482,10 +3086,10 @@ static void timeout_sends(struct work_struct *work) struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; - unsigned long flags; + unsigned long flags, delay; mad_agent_priv = container_of(work, struct ib_mad_agent_private, - timeout_work); + timed_work.work); mad_send_wc.vendor_err = 0; spin_lock_irqsave(&mad_agent_priv->lock, flags); @@ -2495,8 +3099,12 @@ static void timeout_sends(struct work_struct *work) agent_list); if (time_after(mad_send_wr->timeout, jiffies)) { - mod_timer(&mad_agent_priv->timeout_timer, - mad_send_wr->timeout); + delay = mad_send_wr->timeout - jiffies; + if ((long)delay <= 0) + delay = 1; + queue_delayed_work(mad_agent_priv->qp_info-> + port_priv->wq, + &mad_agent_priv->timed_work, delay); break; } @@ -2512,6 +3120,8 @@ static void timeout_sends(struct work_struct *work) else mad_send_wc.status = mad_send_wr->status; mad_send_wc.send_buf = &mad_send_wr->send_buf; + if (mad_send_wr->is_sa_cc_mad) + sa_cc_mad_done(get_cc_obj(mad_send_wr)); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); @@ -2572,6 +3182,14 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, sizeof *mad_priv - sizeof mad_priv->header, DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, + sg_list.addr))) { + ret = -ENOMEM; + kmem_cache_free(ib_mad_cache, mad_priv); + printk(KERN_ERR PFX "ib_dma_map_single failed\n"); + break; + } + mad_priv->header.mapping = sg_list.addr; recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; mad_priv->header.mad_list.mad_queue = recv_queue; @@ -2645,6 +3263,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) int ret, i; struct ib_qp_attr *attr; struct ib_qp *qp; + u16 pkey_index = 0; attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) { @@ -2652,6 +3271,11 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) return -ENOMEM; } + ret = ib_find_pkey(port_priv->device, port_priv->port_num, + 0xFFFF, &pkey_index); + if (ret) + pkey_index = 0; + for (i = 0; i < IB_MAD_QPS_CORE; i++) { qp = port_priv->qp_info[i].qp; if (!qp) @@ -2662,7 +3286,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) * one is needed for the Reset to Init transition */ attr->qp_state = IB_QPS_INIT; - attr->pkey_index = 0; + attr->pkey_index = pkey_index; attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY); @@ -2858,6 +3482,10 @@ static int ib_mad_port_open(struct ib_device *device, } INIT_WORK(&port_priv->work, ib_mad_completion_handler); + if (sa_cc_init(&port_priv->sa_cc)) + goto error9; + + spin_lock_irqsave(&ib_mad_port_list_lock, flags); list_add_tail(&port_priv->port_list, &ib_mad_port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); @@ -2865,17 +3493,19 @@ static int ib_mad_port_open(struct ib_device *device, ret = ib_mad_port_start(port_priv); if (ret) { printk(KERN_ERR PFX "Couldn't start port\n"); - goto error9; + goto error10; } return 0; -error9: +error10: spin_lock_irqsave(&ib_mad_port_list_lock, flags); list_del_init(&port_priv->port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); destroy_workqueue(port_priv->wq); +error9: + sa_cc_destroy(&port_priv->sa_cc); error8: destroy_mad_qp(&port_priv->qp_info[1]); error7: @@ -2915,6 +3545,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); destroy_workqueue(port_priv->wq); + sa_cc_destroy(&port_priv->sa_cc); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); ib_dereg_mr(port_priv->mr); @@ -2983,6 +3614,9 @@ static void ib_mad_remove_device(struct ib_device *device) { int i, num_ports, cur_port; + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + if (device->node_type == RDMA_NODE_IB_SWITCH) { num_ports = 1; cur_port = 0; @@ -3017,8 +3651,6 @@ static int __init ib_mad_init_module(void) mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); - spin_lock_init(&ib_mad_port_list_lock); - ib_mad_cache = kmem_cache_create("ib_mad", sizeof(struct ib_mad_private), 0, @@ -3054,4 +3686,3 @@ static void __exit ib_mad_cleanup_module(void) module_init(ib_mad_init_module); module_exit(ib_mad_cleanup_module); - |