aboutsummaryrefslogtreecommitdiff
path: root/sys/ofed
diff options
context:
space:
mode:
authorHans Petter Selasky <hselasky@FreeBSD.org>2021-06-16 13:01:44 +0000
committerHans Petter Selasky <hselasky@FreeBSD.org>2021-07-12 12:22:31 +0000
commit468a6b5055f0b6ea0bdb1ee8cbdf749204cb3b25 (patch)
tree9ca177d047d8fa91557176b6d33bf87b23680d8e /sys/ofed
parent507389a35a41f5f15592d2156d34039e3ee1c3e5 (diff)
downloadsrc-468a6b5055f0b6ea0bdb1ee8cbdf749204cb3b25.tar.gz
src-468a6b5055f0b6ea0bdb1ee8cbdf749204cb3b25.zip
ibcore: Fix use-after-free in IB mad completion handling.
We encountered a use-after-free bug when unloading the driver: BUG: KASAN: use-after-free in ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] Read of size 4 at addr ffff8882ca5aa868 by task kworker/u13:2/23862 Workqueue: ib-comp-unb-wq ib_cq_poll_work [ib_core] Call Trace: dump_stack+0x9a/0xeb print_address_description+0xe3/0x2e0 ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] __kasan_report+0x15c/0x1df ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] kasan_report+0xe/0x20 ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] find_mad_agent+0xa00/0xa00 [ib_core] qlist_free_all+0x51/0xb0 mlx4_ib_sqp_comp_worker+0x1970/0x1970 [mlx4_ib] quarantine_reduce+0x1fa/0x270 kasan_unpoison_shadow+0x30/0x40 ib_mad_recv_done+0xdf6/0x3000 [ib_core] _raw_spin_unlock_irqrestore+0x46/0x70 ib_mad_send_done+0x1810/0x1810 [ib_core] mlx4_ib_destroy_cq+0x2a0/0x2a0 [mlx4_ib] _raw_spin_unlock_irqrestore+0x46/0x70 debug_object_deactivate+0x2b9/0x4a0 __ib_process_cq+0xe2/0x1d0 [ib_core] ib_cq_poll_work+0x45/0xf0 [ib_core] process_one_work+0x90c/0x1860 pwq_dec_nr_in_flight+0x320/0x320 worker_thread+0x87/0xbb0 __kthread_parkme+0xb6/0x180 process_one_work+0x1860/0x1860 kthread+0x320/0x3e0 kthread_park+0x120/0x120 ret_from_fork+0x24/0x30 ... Freed by task 31682: save_stack+0x19/0x80 __kasan_slab_free+0x11d/0x160 kfree+0xf5/0x2f0 ib_mad_port_close+0x200/0x380 [ib_core] ib_mad_remove_device+0xf0/0x230 [ib_core] remove_client_context+0xa6/0xe0 [ib_core] disable_device+0x14e/0x260 [ib_core] __ib_unregister_device+0x79/0x150 [ib_core] ib_unregister_device+0x21/0x30 [ib_core] mlx4_ib_remove+0x162/0x690 [mlx4_ib] mlx4_remove_device+0x204/0x2c0 [mlx4_core] mlx4_unregister_interface+0x49/0x1d0 [mlx4_core] mlx4_ib_cleanup+0xc/0x1d [mlx4_ib] __x64_sys_delete_module+0x2d2/0x400 do_syscall_64+0x95/0x470 entry_SYSCALL_64_after_hwframe+0x49/0xbe The problem was that the MAD PD was deallocated before the MAD CQ. There was completion work pending for the CQ when the PD got deallocated. When the mad completion handling reached procedure ib_mad_post_receive_mads(), we got a use-after-free bug in the following line of code in that procedure: sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; (the pd pointer in the above line is no longer valid, because the pd has been deallocated). We fix this by allocating the PD before the CQ in procedure ib_mad_port_open(), and deallocating the PD after freeing the CQ in procedure ib_mad_port_close(). Since the CQ completion work queue is flushed during ib_free_cq(), no completions will be pending for that CQ when the PD is later deallocated. Note that freeing the CQ before deallocating the PD is the practice in the ULPs. Linux commit: 770b7d96cfff6a8bf6c9f261ba6f135dc9edf484 MFC after: 1 week Reviewed by: kib Sponsored by: Mellanox Technologies // NVIDIA Networking
Diffstat (limited to 'sys/ofed')
-rw-r--r--sys/ofed/drivers/infiniband/core/ib_mad.c24
1 files changed, 11 insertions, 13 deletions
diff --git a/sys/ofed/drivers/infiniband/core/ib_mad.c b/sys/ofed/drivers/infiniband/core/ib_mad.c
index 2f14535688aa..b9f45ff5e41d 100644
--- a/sys/ofed/drivers/infiniband/core/ib_mad.c
+++ b/sys/ofed/drivers/infiniband/core/ib_mad.c
@@ -3146,10 +3146,8 @@ static int ib_mad_port_open(struct ib_device *device,
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
- if (!port_priv) {
- dev_err(&device->dev, "No memory for ib_mad_port_private\n");
+ if (!port_priv)
return -ENOMEM;
- }
port_priv->device = device;
port_priv->port_num = port_num;
@@ -3163,18 +3161,18 @@ static int ib_mad_port_open(struct ib_device *device,
if (has_smi)
cq_size *= 2;
+ port_priv->pd = ib_alloc_pd(device, 0);
+ if (IS_ERR(port_priv->pd)) {
+ dev_err(&device->dev, "Couldn't create ib_mad PD\n");
+ ret = PTR_ERR(port_priv->pd);
+ goto error3;
+ }
+
port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
IB_POLL_WORKQUEUE);
if (IS_ERR(port_priv->cq)) {
dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
- goto error3;
- }
-
- port_priv->pd = ib_alloc_pd(device, 0);
- if (IS_ERR(port_priv->pd)) {
- dev_err(&device->dev, "Couldn't create ib_mad PD\n");
- ret = PTR_ERR(port_priv->pd);
goto error4;
}
@@ -3217,11 +3215,11 @@ error8:
error7:
destroy_mad_qp(&port_priv->qp_info[0]);
error6:
- ib_dealloc_pd(port_priv->pd);
-error4:
ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
+error4:
+ ib_dealloc_pd(port_priv->pd);
error3:
kfree(port_priv);
@@ -3251,8 +3249,8 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
destroy_workqueue(port_priv->wq);
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
- ib_dealloc_pd(port_priv->pd);
ib_free_cq(port_priv->cq);
+ ib_dealloc_pd(port_priv->pd);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
/* XXX: Handle deallocation of MAD registration tables */