aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/compat/linuxkpi/common/include/linux/srcu.h5
-rw-r--r--sys/compat/linuxkpi/common/include/linux/types.h4
-rw-r--r--sys/compat/linuxkpi/common/src/linux_compat.c3
-rw-r--r--sys/compat/linuxkpi/common/src/linux_rcu.c278
4 files changed, 202 insertions, 88 deletions
diff --git a/sys/compat/linuxkpi/common/include/linux/srcu.h b/sys/compat/linuxkpi/common/include/linux/srcu.h
index ea195c3d9ae4..c9d0423a116c 100644
--- a/sys/compat/linuxkpi/common/include/linux/srcu.h
+++ b/sys/compat/linuxkpi/common/include/linux/srcu.h
@@ -29,9 +29,9 @@
#ifndef _LINUX_SRCU_H_
#define _LINUX_SRCU_H_
-struct ck_epoch_record;
+struct srcu_epoch_record;
struct srcu_struct {
- struct ck_epoch_record *ss_epoch_record;
+ struct srcu_epoch_record *ss_epoch_record;
};
#define srcu_dereference(ptr,srcu) ((__typeof(*(ptr)) *)(ptr))
@@ -41,6 +41,7 @@ struct srcu_struct {
extern int srcu_read_lock(struct srcu_struct *);
extern void srcu_read_unlock(struct srcu_struct *, int index);
extern void synchronize_srcu(struct srcu_struct *);
+extern void srcu_barrier(struct srcu_struct *);
extern int init_srcu_struct(struct srcu_struct *);
extern void cleanup_srcu_struct(struct srcu_struct *);
extern void srcu_barrier(struct srcu_struct *);
diff --git a/sys/compat/linuxkpi/common/include/linux/types.h b/sys/compat/linuxkpi/common/include/linux/types.h
index 6ad7a9001e32..5c61f0c4290c 100644
--- a/sys/compat/linuxkpi/common/include/linux/types.h
+++ b/sys/compat/linuxkpi/common/include/linux/types.h
@@ -2,7 +2,7 @@
* Copyright (c) 2010 Isilon Systems, Inc.
* Copyright (c) 2010 iX Systems, Inc.
* Copyright (c) 2010 Panasas, Inc.
- * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
+ * Copyright (c) 2013-2017 Mellanox Technologies, Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -65,7 +65,7 @@ typedef u64 phys_addr_t;
unsigned long n[howmany(bits, sizeof(long) * 8)]
struct rcu_head {
- void *raw[8];
+ void *raw[2];
} __aligned(sizeof(void *));
typedef void (*rcu_callback_t)(struct rcu_head *head);
diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c
index cac224a7b816..8eff760721ca 100644
--- a/sys/compat/linuxkpi/common/src/linux_compat.c
+++ b/sys/compat/linuxkpi/common/src/linux_compat.c
@@ -69,7 +69,6 @@ __FBSDID("$FreeBSD$");
#include <linux/netdevice.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
-#include <linux/rcupdate.h>
#include <linux/interrupt.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
@@ -1503,8 +1502,6 @@ linux_compat_uninit(void *arg)
linux_kobject_kfree_name(&linux_class_root);
linux_kobject_kfree_name(&linux_root_device.kobj);
linux_kobject_kfree_name(&linux_class_misc.kobj);
-
- synchronize_rcu();
}
SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL);
diff --git a/sys/compat/linuxkpi/common/src/linux_rcu.c b/sys/compat/linuxkpi/common/src/linux_rcu.c
index cc108177e913..17007711aae5 100644
--- a/sys/compat/linuxkpi/common/src/linux_rcu.c
+++ b/sys/compat/linuxkpi/common/src/linux_rcu.c
@@ -46,11 +46,24 @@ __FBSDID("$FreeBSD$");
#include <linux/slab.h>
#include <linux/kernel.h>
+struct callback_head;
+struct writer_epoch_record {
+ ck_epoch_record_t epoch_record;
+ struct mtx head_lock;
+ struct mtx sync_lock;
+ struct task task;
+ STAILQ_HEAD(, callback_head) head;
+} __aligned(CACHE_LINE_SIZE);
+
struct callback_head {
- ck_epoch_entry_t epoch_entry;
+ STAILQ_ENTRY(callback_head) entry;
rcu_callback_t func;
- ck_epoch_record_t *epoch_record;
- struct task task;
+};
+
+struct srcu_epoch_record {
+ ck_epoch_record_t epoch_record;
+ struct mtx read_lock;
+ struct mtx sync_lock;
};
/*
@@ -61,33 +74,55 @@ struct callback_head {
*/
CTASSERT(sizeof(struct rcu_head) >= sizeof(struct callback_head));
+/*
+ * Verify that "epoch_record" is at beginning of "struct
+ * writer_epoch_record":
+ */
+CTASSERT(offsetof(struct writer_epoch_record, epoch_record) == 0);
+
+/*
+ * Verify that "epoch_record" is at beginning of "struct
+ * srcu_epoch_record":
+ */
+CTASSERT(offsetof(struct srcu_epoch_record, epoch_record) == 0);
+
static ck_epoch_t linux_epoch;
-static MALLOC_DEFINE(M_LRCU, "lrcu", "Linux RCU");
-static DPCPU_DEFINE(ck_epoch_record_t *, epoch_record);
+static MALLOC_DEFINE(M_LRCU, "lrcu", "Linux RCU");
+static DPCPU_DEFINE(ck_epoch_record_t *, linux_reader_epoch_record);
+static DPCPU_DEFINE(struct writer_epoch_record *, linux_writer_epoch_record);
+
+static void linux_rcu_cleaner_func(void *, int);
static void
linux_rcu_runtime_init(void *arg __unused)
{
- ck_epoch_record_t **pcpu_record;
- ck_epoch_record_t *record;
int i;
ck_epoch_init(&linux_epoch);
+ /* setup reader records */
CPU_FOREACH(i) {
+ ck_epoch_record_t *record;
+
record = malloc(sizeof(*record), M_LRCU, M_WAITOK | M_ZERO);
ck_epoch_register(&linux_epoch, record);
- pcpu_record = DPCPU_ID_PTR(i, epoch_record);
- *pcpu_record = record;
+
+ DPCPU_ID_SET(i, linux_reader_epoch_record, record);
}
- /*
- * Populate the epoch with 5 * ncpus # of records
- */
- for (i = 0; i < 5 * mp_ncpus; i++) {
+ /* setup writer records */
+ CPU_FOREACH(i) {
+ struct writer_epoch_record *record;
+
record = malloc(sizeof(*record), M_LRCU, M_WAITOK | M_ZERO);
- ck_epoch_register(&linux_epoch, record);
- ck_epoch_unregister(record);
+
+ ck_epoch_register(&linux_epoch, &record->epoch_record);
+ mtx_init(&record->head_lock, "LRCU-HEAD", NULL, MTX_DEF);
+ mtx_init(&record->sync_lock, "LRCU-SYNC", NULL, MTX_DEF);
+ TASK_INIT(&record->task, 0, linux_rcu_cleaner_func, record);
+ STAILQ_INIT(&record->head);
+
+ DPCPU_ID_SET(i, linux_writer_epoch_record, record);
}
}
SYSINIT(linux_rcu_runtime, SI_SUB_LOCK, SI_ORDER_SECOND, linux_rcu_runtime_init, NULL);
@@ -95,66 +130,99 @@ SYSINIT(linux_rcu_runtime, SI_SUB_LOCK, SI_ORDER_SECOND, linux_rcu_runtime_init,
static void
linux_rcu_runtime_uninit(void *arg __unused)
{
- ck_epoch_record_t **pcpu_record;
- ck_epoch_record_t *record;
+ ck_stack_entry_t *cursor;
+ ck_stack_entry_t *next;
int i;
- while ((record = ck_epoch_recycle(&linux_epoch)) != NULL)
- free(record, M_LRCU);
+ /* make sure all callbacks have been called */
+ linux_rcu_barrier();
+ /* destroy all writer record mutexes */
CPU_FOREACH(i) {
- pcpu_record = DPCPU_ID_PTR(i, epoch_record);
- record = *pcpu_record;
- *pcpu_record = NULL;
+ struct writer_epoch_record *record;
+
+ record = DPCPU_ID_GET(i, linux_writer_epoch_record);
+
+ mtx_destroy(&record->head_lock);
+ mtx_destroy(&record->sync_lock);
+ }
+
+ /* free all registered reader and writer records */
+ CK_STACK_FOREACH_SAFE(&linux_epoch.records, cursor, next) {
+ ck_epoch_record_t *record;
+
+ record = container_of(cursor,
+ struct ck_epoch_record, record_next);
free(record, M_LRCU);
}
}
SYSUNINIT(linux_rcu_runtime, SI_SUB_LOCK, SI_ORDER_SECOND, linux_rcu_runtime_uninit, NULL);
-static ck_epoch_record_t *
-linux_rcu_get_record(int canblock)
+static inline struct srcu_epoch_record *
+linux_srcu_get_record(void)
{
- ck_epoch_record_t *record;
+ struct srcu_epoch_record *record;
- if (__predict_true((record = ck_epoch_recycle(&linux_epoch)) != NULL))
- return (record);
- if ((record = malloc(sizeof(*record), M_LRCU, M_NOWAIT | M_ZERO)) != NULL) {
- ck_epoch_register(&linux_epoch, record);
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
+ "linux_srcu_get_record() might sleep");
+
+ /*
+ * NOTE: The only records that are unregistered and can be
+ * recycled are srcu_epoch_records.
+ */
+ record = (struct srcu_epoch_record *)ck_epoch_recycle(&linux_epoch);
+ if (__predict_true(record != NULL))
return (record);
- } else if (!canblock)
- return (NULL);
record = malloc(sizeof(*record), M_LRCU, M_WAITOK | M_ZERO);
- ck_epoch_register(&linux_epoch, record);
+ mtx_init(&record->read_lock, "SRCU-READ", NULL, MTX_DEF | MTX_NOWITNESS);
+ mtx_init(&record->sync_lock, "SRCU-SYNC", NULL, MTX_DEF | MTX_NOWITNESS);
+ ck_epoch_register(&linux_epoch, &record->epoch_record);
+
return (record);
}
+static inline void
+linux_rcu_synchronize_sub(struct writer_epoch_record *record)
+{
+
+ /* protect access to epoch_record */
+ mtx_lock(&record->sync_lock);
+ ck_epoch_synchronize(&record->epoch_record);
+ mtx_unlock(&record->sync_lock);
+}
+
static void
-linux_rcu_destroy_object(ck_epoch_entry_t *e)
+linux_rcu_cleaner_func(void *context, int pending __unused)
{
+ struct writer_epoch_record *record;
struct callback_head *rcu;
- uintptr_t offset;
+ STAILQ_HEAD(, callback_head) head;
- rcu = container_of(e, struct callback_head, epoch_entry);
+ record = context;
- offset = (uintptr_t)rcu->func;
+ /* move current callbacks into own queue */
+ mtx_lock(&record->head_lock);
+ STAILQ_INIT(&head);
+ STAILQ_CONCAT(&head, &record->head);
+ mtx_unlock(&record->head_lock);
- MPASS(rcu->task.ta_pending == 0);
+ /* synchronize */
+ linux_rcu_synchronize_sub(record);
- if (offset < LINUX_KFREE_RCU_OFFSET_MAX)
- kfree((char *)rcu - offset);
- else
- rcu->func((struct rcu_head *)rcu);
-}
+ /* dispatch all callbacks, if any */
+ while ((rcu = STAILQ_FIRST(&head)) != NULL) {
+ uintptr_t offset;
-static void
-linux_rcu_cleaner_func(void *context, int pending __unused)
-{
- struct callback_head *rcu = context;
- ck_epoch_record_t *record = rcu->epoch_record;
+ STAILQ_REMOVE_HEAD(&head, entry);
- ck_epoch_barrier(record);
- ck_epoch_unregister(record);
+ offset = (uintptr_t)rcu->func;
+
+ if (offset < LINUX_KFREE_RCU_OFFSET_MAX)
+ kfree((char *)rcu - offset);
+ else
+ rcu->func((struct rcu_head *)rcu);
+ }
}
void
@@ -162,11 +230,21 @@ linux_rcu_read_lock(void)
{
ck_epoch_record_t *record;
+ /*
+ * Pin thread to current CPU so that the unlock code gets the
+ * same per-CPU reader epoch record:
+ */
sched_pin();
- record = DPCPU_GET(epoch_record);
- MPASS(record != NULL);
+ record = DPCPU_GET(linux_reader_epoch_record);
+
+ /*
+ * Use a critical section to prevent recursion inside
+ * ck_epoch_begin(). Else this function supports recursion.
+ */
+ critical_enter();
ck_epoch_begin(record, NULL);
+ critical_exit();
}
void
@@ -174,57 +252,63 @@ linux_rcu_read_unlock(void)
{
ck_epoch_record_t *record;
- record = DPCPU_GET(epoch_record);
+ record = DPCPU_GET(linux_reader_epoch_record);
+
+ /*
+ * Use a critical section to prevent recursion inside
+ * ck_epoch_end(). Else this function supports recursion.
+ */
+ critical_enter();
ck_epoch_end(record, NULL);
+ critical_exit();
+
sched_unpin();
}
void
linux_synchronize_rcu(void)
{
- ck_epoch_record_t *record;
-
- sched_pin();
- record = DPCPU_GET(epoch_record);
- MPASS(record != NULL);
- ck_epoch_synchronize(record);
- sched_unpin();
+ linux_rcu_synchronize_sub(DPCPU_GET(linux_writer_epoch_record));
}
void
linux_rcu_barrier(void)
{
- ck_epoch_record_t *record;
+ int i;
- record = linux_rcu_get_record(0);
- ck_epoch_barrier(record);
- ck_epoch_unregister(record);
+ CPU_FOREACH(i) {
+ struct writer_epoch_record *record;
+
+ record = DPCPU_ID_GET(i, linux_writer_epoch_record);
+
+ linux_rcu_synchronize_sub(record);
+
+ /* wait for callbacks to complete */
+ taskqueue_drain(taskqueue_fast, &record->task);
+ }
}
void
linux_call_rcu(struct rcu_head *context, rcu_callback_t func)
{
- struct callback_head *ptr = (struct callback_head *)context;
- ck_epoch_record_t *record;
+ struct callback_head *rcu = (struct callback_head *)context;
+ struct writer_epoch_record *record;
- record = linux_rcu_get_record(0);
+ record = DPCPU_GET(linux_writer_epoch_record);
- sched_pin();
- MPASS(record != NULL);
- ptr->func = func;
- ptr->epoch_record = record;
- ck_epoch_call(record, &ptr->epoch_entry, linux_rcu_destroy_object);
- TASK_INIT(&ptr->task, 0, linux_rcu_cleaner_func, ptr);
- taskqueue_enqueue(taskqueue_fast, &ptr->task);
- sched_unpin();
+ mtx_lock(&record->head_lock);
+ rcu->func = func;
+ STAILQ_INSERT_TAIL(&record->head, rcu, entry);
+ taskqueue_enqueue(taskqueue_fast, &record->task);
+ mtx_unlock(&record->head_lock);
}
int
init_srcu_struct(struct srcu_struct *srcu)
{
- ck_epoch_record_t *record;
+ struct srcu_epoch_record *record;
- record = linux_rcu_get_record(0);
+ record = linux_srcu_get_record();
srcu->ss_epoch_record = record;
return (0);
}
@@ -232,28 +316,60 @@ init_srcu_struct(struct srcu_struct *srcu)
void
cleanup_srcu_struct(struct srcu_struct *srcu)
{
- ck_epoch_record_t *record;
+ struct srcu_epoch_record *record;
record = srcu->ss_epoch_record;
srcu->ss_epoch_record = NULL;
- ck_epoch_unregister(record);
+
+ ck_epoch_unregister(&record->epoch_record);
}
int
srcu_read_lock(struct srcu_struct *srcu)
{
- ck_epoch_begin(srcu->ss_epoch_record, NULL);
+ struct srcu_epoch_record *record;
+
+ record = srcu->ss_epoch_record;
+
+ mtx_lock(&record->read_lock);
+ ck_epoch_begin(&record->epoch_record, NULL);
+ mtx_unlock(&record->read_lock);
+
return (0);
}
void
srcu_read_unlock(struct srcu_struct *srcu, int key __unused)
{
- ck_epoch_end(srcu->ss_epoch_record, NULL);
+ struct srcu_epoch_record *record;
+
+ record = srcu->ss_epoch_record;
+
+ mtx_lock(&record->read_lock);
+ ck_epoch_end(&record->epoch_record, NULL);
+ mtx_unlock(&record->read_lock);
}
void
synchronize_srcu(struct srcu_struct *srcu)
{
- ck_epoch_synchronize(srcu->ss_epoch_record);
+ struct srcu_epoch_record *record;
+
+ record = srcu->ss_epoch_record;
+
+ mtx_lock(&record->sync_lock);
+ ck_epoch_synchronize(&record->epoch_record);
+ mtx_unlock(&record->sync_lock);
+}
+
+void
+srcu_barrier(struct srcu_struct *srcu)
+{
+ struct srcu_epoch_record *record;
+
+ record = srcu->ss_epoch_record;
+
+ mtx_lock(&record->sync_lock);
+ ck_epoch_barrier(&record->epoch_record);
+ mtx_unlock(&record->sync_lock);
}