aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorJeff Roberson <jeff@FreeBSD.org>2019-12-15 21:11:15 +0000
committerJeff Roberson <jeff@FreeBSD.org>2019-12-15 21:11:15 +0000
commit61a74c5ccd65d1a00a96779f16eda8c41ff3a426 (patch)
tree0325e01f4affe5d9ef25e68fae1a7cbd5d2ebde9 /sys
parent054802650063bea1cb817ef22a887c3116813ba9 (diff)
downloadsrc-61a74c5ccd65d1a00a96779f16eda8c41ff3a426.tar.gz
src-61a74c5ccd65d1a00a96779f16eda8c41ff3a426.zip
schedlock 1/4
Eliminate recursion from most thread_lock consumers. Return from sched_add() without the thread_lock held. This eliminates unnecessary atomics and lock word loads as well as reducing the hold time for scheduler locks. This will eventually allow for lockless remote adds. Discussed with: kib Reviewed by: jhb Tested by: pho Differential Revision: https://reviews.freebsd.org/D22626
Notes
Notes: svn path=/head/; revision=355779
Diffstat (limited to 'sys')
-rw-r--r--sys/cddl/compat/opensolaris/sys/proc.h1
-rw-r--r--sys/compat/linux/linux_fork.c4
-rw-r--r--sys/compat/linuxkpi/common/src/linux_kthread.c1
-rw-r--r--sys/dev/ocs_fc/ocs_os.c2
-rw-r--r--sys/kern/init_main.c1
-rw-r--r--sys/kern/kern_clock.c3
-rw-r--r--sys/kern/kern_fork.c1
-rw-r--r--sys/kern/kern_intr.c6
-rw-r--r--sys/kern/kern_kthread.c4
-rw-r--r--sys/kern/kern_mutex.c21
-rw-r--r--sys/kern/kern_resource.c25
-rw-r--r--sys/kern/kern_sig.c51
-rw-r--r--sys/kern/kern_synch.c40
-rw-r--r--sys/kern/kern_thr.c1
-rw-r--r--sys/kern/kern_thread.c56
-rw-r--r--sys/kern/sched_4bsd.c66
-rw-r--r--sys/kern/sched_ule.c88
-rw-r--r--sys/kern/subr_gtaskqueue.c1
-rw-r--r--sys/kern/subr_pcpu.c5
-rw-r--r--sys/kern/subr_sleepqueue.c160
-rw-r--r--sys/kern/subr_taskqueue.c1
-rw-r--r--sys/kern/subr_turnstile.c11
-rw-r--r--sys/mips/nlm/cms.c6
-rw-r--r--sys/sys/proc.h25
-rw-r--r--sys/sys/resourcevar.h1
-rw-r--r--sys/sys/sched.h11
-rw-r--r--sys/vm/vm_swapout.c7
27 files changed, 360 insertions, 239 deletions
diff --git a/sys/cddl/compat/opensolaris/sys/proc.h b/sys/cddl/compat/opensolaris/sys/proc.h
index b26ad11ca875..d91833a58f8c 100644
--- a/sys/cddl/compat/opensolaris/sys/proc.h
+++ b/sys/cddl/compat/opensolaris/sys/proc.h
@@ -89,7 +89,6 @@ do_thread_create(caddr_t stk, size_t stksize, void (*proc)(void *), void *arg,
thread_lock(td);
sched_prio(td, pri);
sched_add(td, SRQ_BORING);
- thread_unlock(td);
}
return (td);
}
diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c
index 066640e2fc9c..0c772ea5cb4a 100644
--- a/sys/compat/linux/linux_fork.c
+++ b/sys/compat/linux/linux_fork.c
@@ -92,7 +92,6 @@ linux_fork(struct thread *td, struct linux_fork_args *args)
thread_lock(td2);
TD_SET_CAN_RUN(td2);
sched_add(td2, SRQ_BORING);
- thread_unlock(td2);
return (0);
}
@@ -123,7 +122,6 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args)
thread_lock(td2);
TD_SET_CAN_RUN(td2);
sched_add(td2, SRQ_BORING);
- thread_unlock(td2);
return (0);
}
@@ -228,7 +226,6 @@ linux_clone_proc(struct thread *td, struct linux_clone_args *args)
thread_lock(td2);
TD_SET_CAN_RUN(td2);
sched_add(td2, SRQ_BORING);
- thread_unlock(td2);
td->td_retval[0] = p2->p_pid;
@@ -343,7 +340,6 @@ linux_clone_thread(struct thread *td, struct linux_clone_args *args)
thread_lock(newtd);
TD_SET_CAN_RUN(newtd);
sched_add(newtd, SRQ_BORING);
- thread_unlock(newtd);
td->td_retval[0] = newtd->td_tid;
diff --git a/sys/compat/linuxkpi/common/src/linux_kthread.c b/sys/compat/linuxkpi/common/src/linux_kthread.c
index 198082615076..26afe005ea59 100644
--- a/sys/compat/linuxkpi/common/src/linux_kthread.c
+++ b/sys/compat/linuxkpi/common/src/linux_kthread.c
@@ -142,7 +142,6 @@ linux_kthread_setup_and_run(struct thread *td, linux_task_fn_t *task_fn, void *a
sched_prio(td, PI_SWI(SWI_NET));
/* put thread into run-queue */
sched_add(td, SRQ_BORING);
- thread_unlock(td);
return (task);
}
diff --git a/sys/dev/ocs_fc/ocs_os.c b/sys/dev/ocs_fc/ocs_os.c
index 054b6b8f62f9..5d434fb527ac 100644
--- a/sys/dev/ocs_fc/ocs_os.c
+++ b/sys/dev/ocs_fc/ocs_os.c
@@ -659,6 +659,8 @@ ocs_thread_create(ocs_os_handle_t os, ocs_thread_t *thread, ocs_thread_fctn fctn
int32_t ocs_thread_start(ocs_thread_t *thread)
{
+
+ thread_lock(thread->tcb);
sched_add(thread->tcb, SRQ_BORING);
return 0;
}
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 358b79445708..6d4e7b432818 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -853,6 +853,5 @@ kick_init(const void *udata __unused)
thread_lock(td);
TD_SET_CAN_RUN(td);
sched_add(td, SRQ_BORING);
- thread_unlock(td);
}
SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL);
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 612d73da7cdd..378bf0cc43fc 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -282,8 +282,7 @@ deadlkres(void)
if (TD_ON_LOCK(td))
deadlres_td_on_lock(p, td,
blkticks);
- else if (TD_IS_SLEEPING(td) &&
- TD_ON_SLEEPQ(td))
+ else if (TD_IS_SLEEPING(td))
deadlres_td_sleep_q(p, td,
slpticks);
thread_unlock(td);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index b397dee1aaa4..e8ac950a5d78 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -758,7 +758,6 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
thread_lock(td2);
TD_SET_CAN_RUN(td2);
sched_add(td2, SRQ_BORING);
- thread_unlock(td2);
} else {
*fr->fr_procp = p2;
}
diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c
index 3c1b08cf913c..49defdb52f4c 100644
--- a/sys/kern/kern_intr.c
+++ b/sys/kern/kern_intr.c
@@ -558,8 +558,8 @@ ithread_destroy(struct intr_thread *ithread)
if (TD_AWAITING_INTR(td)) {
TD_CLR_IWAIT(td);
sched_add(td, SRQ_INTR);
- }
- thread_unlock(td);
+ } else
+ thread_unlock(td);
}
int
@@ -985,8 +985,8 @@ intr_event_schedule_thread(struct intr_event *ie)
} else {
CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
__func__, td->td_proc->p_pid, td->td_name, it->it_need, td->td_state);
+ thread_unlock(td);
}
- thread_unlock(td);
return (0);
}
diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c
index ebf9a4c6daa4..d2c7b3c8f1ed 100644
--- a/sys/kern/kern_kthread.c
+++ b/sys/kern/kern_kthread.c
@@ -146,7 +146,8 @@ kproc_create(void (*func)(void *), void *arg,
/* Delay putting it on the run queue until now. */
if (!(flags & RFSTOPPED))
sched_add(td, SRQ_BORING);
- thread_unlock(td);
+ else
+ thread_unlock(td);
return 0;
}
@@ -324,7 +325,6 @@ kthread_add(void (*func)(void *), void *arg, struct proc *p,
if (!(flags & RFSTOPPED)) {
thread_lock(newtd);
sched_add(newtd, SRQ_BORING);
- thread_unlock(newtd);
}
if (newtdp)
*newtdp = newtd;
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index ca6fbb157324..66d7c16d1f1d 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -960,10 +960,9 @@ thread_lock_block(struct thread *td)
{
struct mtx *lock;
- THREAD_LOCK_ASSERT(td, MA_OWNED);
lock = td->td_lock;
+ mtx_assert(lock, MA_OWNED);
td->td_lock = &blocked_lock;
- mtx_unlock_spin(lock);
return (lock);
}
@@ -971,19 +970,33 @@ thread_lock_block(struct thread *td)
void
thread_lock_unblock(struct thread *td, struct mtx *new)
{
+
mtx_assert(new, MA_OWNED);
- MPASS(td->td_lock == &blocked_lock);
+ KASSERT(td->td_lock == &blocked_lock,
+ ("thread %p lock %p not blocked_lock %p",
+ td, td->td_lock, &blocked_lock));
atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);
}
void
+thread_lock_block_wait(struct thread *td)
+{
+
+ while (td->td_lock == &blocked_lock)
+ cpu_spinwait();
+
+ /* Acquire fence to be certain that all thread state is visible. */
+ atomic_thread_fence_acq();
+}
+
+void
thread_lock_set(struct thread *td, struct mtx *new)
{
struct mtx *lock;
mtx_assert(new, MA_OWNED);
- THREAD_LOCK_ASSERT(td, MA_OWNED);
lock = td->td_lock;
+ mtx_assert(lock, MA_OWNED);
td->td_lock = new;
mtx_unlock_spin(lock);
}
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 3bb8dee40284..99efe979aa20 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -78,7 +78,7 @@ static void calcru1(struct proc *p, struct rusage_ext *ruxp,
struct timeval *up, struct timeval *sp);
static int donice(struct thread *td, struct proc *chgp, int n);
static struct uidinfo *uilookup(uid_t uid);
-static void ruxagg_locked(struct rusage_ext *rux, struct thread *td);
+static void ruxagg_ext_locked(struct rusage_ext *rux, struct thread *td);
/*
* Resource controls and accounting.
@@ -858,7 +858,7 @@ rufetchtd(struct thread *td, struct rusage *ru)
td->td_incruntime += runtime;
PCPU_SET(switchtime, u);
}
- ruxagg(p, td);
+ ruxagg_locked(p, td);
*ru = td->td_ru;
calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime);
}
@@ -1114,11 +1114,9 @@ ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
* Aggregate tick counts into the proc's rusage_ext.
*/
static void
-ruxagg_locked(struct rusage_ext *rux, struct thread *td)
+ruxagg_ext_locked(struct rusage_ext *rux, struct thread *td)
{
- THREAD_LOCK_ASSERT(td, MA_OWNED);
- PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED);
rux->rux_runtime += td->td_incruntime;
rux->rux_uticks += td->td_uticks;
rux->rux_sticks += td->td_sticks;
@@ -1126,16 +1124,25 @@ ruxagg_locked(struct rusage_ext *rux, struct thread *td)
}
void
-ruxagg(struct proc *p, struct thread *td)
+ruxagg_locked(struct proc *p, struct thread *td)
{
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
+ PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED);
- thread_lock(td);
- ruxagg_locked(&p->p_rux, td);
- ruxagg_locked(&td->td_rux, td);
+ ruxagg_ext_locked(&p->p_rux, td);
+ ruxagg_ext_locked(&td->td_rux, td);
td->td_incruntime = 0;
td->td_uticks = 0;
td->td_iticks = 0;
td->td_sticks = 0;
+}
+
+void
+ruxagg(struct proc *p, struct thread *td)
+{
+
+ thread_lock(td);
+ ruxagg_locked(p, td);
thread_unlock(td);
}
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 203c47bea360..429a64631b32 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -2250,6 +2250,8 @@ tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)
p->p_step = 0;
wakeup(&p->p_step);
}
+ wakeup_swapper = 0;
+
/*
* Some signals have a process-wide effect and a per-thread
* component. Most processing occurs when the process next
@@ -2352,15 +2354,13 @@ tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)
* the PROCESS runnable, leave it stopped.
* It may run a bit until it hits a thread_suspend_check().
*/
- wakeup_swapper = 0;
PROC_SLOCK(p);
thread_lock(td);
- if (TD_ON_SLEEPQ(td) && (td->td_flags & TDF_SINTR))
+ if (TD_CAN_ABORT(td))
wakeup_swapper = sleepq_abort(td, intrval);
- thread_unlock(td);
+ else
+ thread_unlock(td);
PROC_SUNLOCK(p);
- if (wakeup_swapper)
- kick_proc0();
goto out;
/*
* Mutexes are short lived. Threads waiting on them will
@@ -2394,8 +2394,6 @@ tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)
sigqueue_delete_proc(p, p->p_xsig);
} else
PROC_SUNLOCK(p);
- if (wakeup_swapper)
- kick_proc0();
goto out;
}
} else {
@@ -2416,6 +2414,9 @@ runfast:
out:
/* If we jump here, proc slock should not be owned. */
PROC_SLOCK_ASSERT(p, MA_NOTOWNED);
+ if (wakeup_swapper)
+ kick_proc0();
+
return (ret);
}
@@ -2428,10 +2429,8 @@ static void
tdsigwakeup(struct thread *td, int sig, sig_t action, int intrval)
{
struct proc *p = td->td_proc;
- int prop;
- int wakeup_swapper;
+ int prop, wakeup_swapper;
- wakeup_swapper = 0;
PROC_LOCK_ASSERT(p, MA_OWNED);
prop = sigprop(sig);
@@ -2487,22 +2486,25 @@ tdsigwakeup(struct thread *td, int sig, sig_t action, int intrval)
sched_prio(td, PUSER);
wakeup_swapper = sleepq_abort(td, intrval);
- } else {
- /*
- * Other states do nothing with the signal immediately,
- * other than kicking ourselves if we are running.
- * It will either never be noticed, or noticed very soon.
- */
+ PROC_SUNLOCK(p);
+ if (wakeup_swapper)
+ kick_proc0();
+ return;
+ }
+
+ /*
+ * Other states do nothing with the signal immediately,
+ * other than kicking ourselves if we are running.
+ * It will either never be noticed, or noticed very soon.
+ */
#ifdef SMP
- if (TD_IS_RUNNING(td) && td != curthread)
- forward_signal(td);
+ if (TD_IS_RUNNING(td) && td != curthread)
+ forward_signal(td);
#endif
- }
+
out:
PROC_SUNLOCK(p);
thread_unlock(td);
- if (wakeup_swapper)
- kick_proc0();
}
static int
@@ -2530,12 +2532,13 @@ sig_suspend_threads(struct thread *td, struct proc *p, int sending)
*/
KASSERT(!TD_IS_SUSPENDED(td2),
("thread with deferred stops suspended"));
- if (TD_SBDRY_INTR(td2))
+ if (TD_SBDRY_INTR(td2)) {
wakeup_swapper |= sleepq_abort(td2,
TD_SBDRY_ERRNO(td2));
- } else if (!TD_IS_SUSPENDED(td2)) {
+ continue;
+ }
+ } else if (!TD_IS_SUSPENDED(td2))
thread_suspend_one(td2);
- }
} else if (!TD_IS_SUSPENDED(td2)) {
if (sending || td != td2)
td2->td_flags |= TDF_ASTPENDING;
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 6c937401a161..8cb84086b3ca 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -538,40 +538,48 @@ mi_switch(int flags, struct thread *newtd)
* Change thread state to be runnable, placing it on the run queue if
* it is in memory. If it is swapped out, return true so our caller
* will know to awaken the swapper.
+ *
+ * Requires the thread lock on entry, drops on exit.
*/
int
-setrunnable(struct thread *td)
+setrunnable(struct thread *td, int srqflags)
{
+ int swapin;
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(td->td_proc->p_state != PRS_ZOMBIE,
("setrunnable: pid %d is a zombie", td->td_proc->p_pid));
+
+ swapin = 0;
switch (td->td_state) {
case TDS_RUNNING:
case TDS_RUNQ:
+ break;
+ case TDS_CAN_RUN:
+ KASSERT((td->td_flags & TDF_INMEM) != 0,
+ ("setrunnable: td %p not in mem, flags 0x%X inhibit 0x%X",
+ td, td->td_flags, td->td_inhibitors));
+ /* unlocks thread lock according to flags */
+ sched_wakeup(td, srqflags);
return (0);
case TDS_INHIBITED:
/*
* If we are only inhibited because we are swapped out
- * then arange to swap in this process. Otherwise just return.
+ * arrange to swap in this process.
*/
- if (td->td_inhibitors != TDI_SWAPPED)
- return (0);
- /* FALLTHROUGH */
- case TDS_CAN_RUN:
+ if (td->td_inhibitors == TDI_SWAPPED &&
+ (td->td_flags & TDF_SWAPINREQ) == 0) {
+ td->td_flags |= TDF_SWAPINREQ;
+ swapin = 1;
+ }
break;
default:
- printf("state is 0x%x", td->td_state);
- panic("setrunnable(2)");
+ panic("setrunnable: state 0x%x", td->td_state);
}
- if ((td->td_flags & TDF_INMEM) == 0) {
- if ((td->td_flags & TDF_SWAPINREQ) == 0) {
- td->td_flags |= TDF_SWAPINREQ;
- return (1);
- }
- } else
- sched_wakeup(td);
- return (0);
+ if ((srqflags & (SRQ_HOLD | SRQ_HOLDTD)) == 0)
+ thread_unlock(td);
+
+ return (swapin);
}
/*
diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c
index dd8e2c8d90af..da47cf9a517e 100644
--- a/sys/kern/kern_thr.c
+++ b/sys/kern/kern_thr.c
@@ -281,7 +281,6 @@ thread_create(struct thread *td, struct rtprio *rtp,
}
TD_SET_CAN_RUN(newtd);
sched_add(newtd, SRQ_BORING);
- thread_unlock(newtd);
return (0);
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 9d389eb8917f..a3f02a9d4111 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -565,7 +565,6 @@ thread_exit(void)
thread_lock(p->p_singlethread);
wakeup_swapper = thread_unsuspend_one(
p->p_singlethread, p, false);
- thread_unlock(p->p_singlethread);
if (wakeup_swapper)
kick_proc0();
}
@@ -606,7 +605,7 @@ thread_exit(void)
/* Save our resource usage in our process. */
td->td_ru.ru_nvcsw++;
- ruxagg(p, td);
+ ruxagg_locked(p, td);
rucollect(&p->p_ru, &td->td_ru);
PROC_STATUNLOCK(p);
@@ -730,19 +729,36 @@ weed_inhib(int mode, struct thread *td2, struct proc *p)
THREAD_LOCK_ASSERT(td2, MA_OWNED);
wakeup_swapper = 0;
+
+ /*
+ * Since the thread lock is dropped by the scheduler we have
+ * to retry to check for races.
+ */
+restart:
switch (mode) {
case SINGLE_EXIT:
- if (TD_IS_SUSPENDED(td2))
+ if (TD_IS_SUSPENDED(td2)) {
wakeup_swapper |= thread_unsuspend_one(td2, p, true);
- if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
+ thread_lock(td2);
+ goto restart;
+ }
+ if (TD_CAN_ABORT(td2)) {
wakeup_swapper |= sleepq_abort(td2, EINTR);
+ return (wakeup_swapper);
+ }
break;
case SINGLE_BOUNDARY:
case SINGLE_NO_EXIT:
- if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0)
+ if (TD_IS_SUSPENDED(td2) &&
+ (td2->td_flags & TDF_BOUNDARY) == 0) {
wakeup_swapper |= thread_unsuspend_one(td2, p, false);
- if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
+ thread_lock(td2);
+ goto restart;
+ }
+ if (TD_CAN_ABORT(td2)) {
wakeup_swapper |= sleepq_abort(td2, ERESTART);
+ return (wakeup_swapper);
+ }
break;
case SINGLE_ALLPROC:
/*
@@ -754,18 +770,25 @@ weed_inhib(int mode, struct thread *td2, struct proc *p)
* is used to avoid immediate un-suspend.
*/
if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY |
- TDF_ALLPROCSUSP)) == 0)
+ TDF_ALLPROCSUSP)) == 0) {
wakeup_swapper |= thread_unsuspend_one(td2, p, false);
- if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0) {
+ thread_lock(td2);
+ goto restart;
+ }
+ if (TD_CAN_ABORT(td2)) {
if ((td2->td_flags & TDF_SBDRY) == 0) {
thread_suspend_one(td2);
td2->td_flags |= TDF_ALLPROCSUSP;
} else {
wakeup_swapper |= sleepq_abort(td2, ERESTART);
+ return (wakeup_swapper);
}
}
break;
+ default:
+ break;
}
+ thread_unlock(td2);
return (wakeup_swapper);
}
@@ -842,9 +865,10 @@ thread_single(struct proc *p, int mode)
#ifdef SMP
} else if (TD_IS_RUNNING(td2) && td != td2) {
forward_signal(td2);
+ thread_unlock(td2);
#endif
- }
- thread_unlock(td2);
+ } else
+ thread_unlock(td2);
}
if (wakeup_swapper)
kick_proc0();
@@ -1028,7 +1052,6 @@ thread_suspend_check(int return_instead)
thread_lock(p->p_singlethread);
wakeup_swapper = thread_unsuspend_one(
p->p_singlethread, p, false);
- thread_unlock(p->p_singlethread);
if (wakeup_swapper)
kick_proc0();
}
@@ -1112,7 +1135,7 @@ thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary)
p->p_boundary_count--;
}
}
- return (setrunnable(td));
+ return (setrunnable(td, 0));
}
/*
@@ -1133,8 +1156,8 @@ thread_unsuspend(struct proc *p)
if (TD_IS_SUSPENDED(td)) {
wakeup_swapper |= thread_unsuspend_one(td, p,
true);
- }
- thread_unlock(td);
+ } else
+ thread_unlock(td);
}
} else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
p->p_numthreads == p->p_suspcount) {
@@ -1147,7 +1170,6 @@ thread_unsuspend(struct proc *p)
thread_lock(p->p_singlethread);
wakeup_swapper = thread_unsuspend_one(
p->p_singlethread, p, false);
- thread_unlock(p->p_singlethread);
}
}
if (wakeup_swapper)
@@ -1193,8 +1215,8 @@ thread_single_end(struct proc *p, int mode)
if (TD_IS_SUSPENDED(td)) {
wakeup_swapper |= thread_unsuspend_one(td, p,
mode == SINGLE_BOUNDARY);
- }
- thread_unlock(td);
+ } else
+ thread_unlock(td);
}
}
KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 3cfc76a1afd3..c558b9135749 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -846,7 +846,7 @@ sched_priority(struct thread *td, u_char prio)
td->td_priority = prio;
if (TD_ON_RUNQ(td) && td->td_rqindex != (prio / RQ_PPQ)) {
sched_rem(td);
- sched_add(td, SRQ_BORING);
+ sched_add(td, SRQ_BORING | SRQ_HOLDTD);
}
}
@@ -980,25 +980,12 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
struct proc *p;
int preempted;
- tmtx = NULL;
+ tmtx = &sched_lock;
ts = td_get_sched(td);
p = td->td_proc;
THREAD_LOCK_ASSERT(td, MA_OWNED);
- /*
- * Switch to the sched lock to fix things up and pick
- * a new thread.
- * Block the td_lock in order to avoid breaking the critical path.
- */
- if (td->td_lock != &sched_lock) {
- mtx_lock_spin(&sched_lock);
- tmtx = thread_lock_block(td);
- }
-
- if ((td->td_flags & TDF_NOLOAD) == 0)
- sched_load_rem();
-
td->td_lastcpu = td->td_oncpu;
preempted = (td->td_flags & TDF_SLICEEND) == 0 &&
(flags & SW_PREEMPT) != 0;
@@ -1021,10 +1008,25 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
if (TD_IS_RUNNING(td)) {
/* Put us back on the run queue. */
sched_add(td, preempted ?
- SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
- SRQ_OURSELF|SRQ_YIELDING);
+ SRQ_HOLDTD|SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
+ SRQ_HOLDTD|SRQ_OURSELF|SRQ_YIELDING);
}
}
+
+ /*
+ * Switch to the sched lock to fix things up and pick
+ * a new thread. Block the td_lock in order to avoid
+ * breaking the critical path.
+ */
+ if (td->td_lock != &sched_lock) {
+ mtx_lock_spin(&sched_lock);
+ tmtx = thread_lock_block(td);
+ mtx_unlock_spin(tmtx);
+ }
+
+ if ((td->td_flags & TDF_NOLOAD) == 0)
+ sched_load_rem();
+
if (newtd) {
/*
* The thread we are about to run needs to be counted
@@ -1042,9 +1044,10 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
sched_load_add();
} else {
newtd = choosethread();
- MPASS(newtd->td_lock == &sched_lock);
}
+ MPASS(newtd->td_lock == &sched_lock);
+
#if (KTR_COMPILE & KTR_SCHED) != 0
if (TD_IS_IDLETHREAD(td))
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle",
@@ -1075,7 +1078,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
(*dtrace_vtime_switch_func)(newtd);
#endif
- cpu_switch(td, newtd, tmtx != NULL ? tmtx : td->td_lock);
+ cpu_switch(td, newtd, tmtx);
lock_profile_obtain_lock_success(&sched_lock.lock_object,
0, 0, __FILE__, __LINE__);
/*
@@ -1100,8 +1103,10 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
#endif
- } else
+ } else {
+ td->td_lock = &sched_lock;
SDT_PROBE0(sched, , , remain__cpu);
+ }
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
"prio:%d", td->td_priority);
@@ -1116,7 +1121,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
}
void
-sched_wakeup(struct thread *td)
+sched_wakeup(struct thread *td, int srqflags)
{
struct td_sched *ts;
@@ -1130,7 +1135,7 @@ sched_wakeup(struct thread *td)
td->td_slptick = 0;
ts->ts_slptime = 0;
ts->ts_slice = sched_slice;
- sched_add(td, SRQ_BORING);
+ sched_add(td, srqflags);
}
#ifdef SMP
@@ -1316,7 +1321,11 @@ sched_add(struct thread *td, int flags)
*/
if (td->td_lock != &sched_lock) {
mtx_lock_spin(&sched_lock);
- thread_lock_set(td, &sched_lock);
+ if ((flags & SRQ_HOLD) != 0)
+ td->td_lock = &sched_lock;
+ else
+ thread_lock_set(td, &sched_lock);
+
}
TD_SET_RUNQ(td);
@@ -1380,6 +1389,8 @@ sched_add(struct thread *td, int flags)
maybe_resched(td);
}
}
+ if ((flags & SRQ_HOLDTD) == 0)
+ thread_unlock(td);
}
#else /* SMP */
{
@@ -1407,7 +1418,10 @@ sched_add(struct thread *td, int flags)
*/
if (td->td_lock != &sched_lock) {
mtx_lock_spin(&sched_lock);
- thread_lock_set(td, &sched_lock);
+ if ((flags & SRQ_HOLD) != 0)
+ td->td_lock = &sched_lock;
+ else
+ thread_lock_set(td, &sched_lock);
}
TD_SET_RUNQ(td);
CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to runq", ts, td);
@@ -1418,6 +1432,8 @@ sched_add(struct thread *td, int flags)
runq_add(ts->ts_runq, td, flags);
if (!maybe_preempt(td))
maybe_resched(td);
+ if ((flags & SRQ_HOLDTD) == 0)
+ thread_unlock(td);
}
#endif /* SMP */
@@ -1776,7 +1792,7 @@ sched_affinity(struct thread *td)
/* Put this thread on a valid per-CPU runqueue. */
sched_rem(td);
- sched_add(td, SRQ_BORING);
+ sched_add(td, SRQ_HOLDTD | SRQ_BORING);
break;
case TDS_RUNNING:
/*
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index b921a68c6e52..24015bcf3c6d 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -464,7 +464,7 @@ tdq_runq_add(struct tdq *tdq, struct thread *td, int flags)
u_char pri;
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
- THREAD_LOCK_ASSERT(td, MA_OWNED);
+ THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
pri = td->td_priority;
ts = td_get_sched(td);
@@ -515,6 +515,7 @@ tdq_runq_rem(struct tdq *tdq, struct thread *td)
ts = td_get_sched(td);
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
+ THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
KASSERT(ts->ts_runq != NULL,
("tdq_runq_remove: thread %p null ts_runq", td));
if (ts->ts_flags & TSF_XFERABLE) {
@@ -539,7 +540,7 @@ tdq_load_add(struct tdq *tdq, struct thread *td)
{
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
- THREAD_LOCK_ASSERT(td, MA_OWNED);
+ THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
tdq->tdq_load++;
if ((td->td_flags & TDF_NOLOAD) == 0)
@@ -556,8 +557,8 @@ static void
tdq_load_rem(struct tdq *tdq, struct thread *td)
{
- THREAD_LOCK_ASSERT(td, MA_OWNED);
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
+ THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
KASSERT(tdq->tdq_load != 0,
("tdq_load_rem: Removing with 0 load on queue %d", TDQ_ID(tdq)));
@@ -949,7 +950,6 @@ sched_balance_pair(struct tdq *high, struct tdq *low)
static struct thread *
tdq_move(struct tdq *from, struct tdq *to)
{
- struct td_sched *ts;
struct thread *td;
struct tdq *tdq;
int cpu;
@@ -962,18 +962,18 @@ tdq_move(struct tdq *from, struct tdq *to)
td = tdq_steal(tdq, cpu);
if (td == NULL)
return (NULL);
- ts = td_get_sched(td);
+
/*
- * Although the run queue is locked the thread may be blocked. Lock
- * it to clear this and acquire the run-queue lock.
+ * Although the run queue is locked the thread may be
+ * blocked. We can not set the lock until it is unblocked.
*/
- thread_lock(td);
- /* Drop recursive lock on from acquired via thread_lock(). */
- TDQ_UNLOCK(from);
+ thread_lock_block_wait(td);
sched_rem(td);
- ts->ts_cpu = cpu;
+ THREAD_LOCKPTR_ASSERT(td, TDQ_LOCKPTR(from));
td->td_lock = TDQ_LOCKPTR(to);
+ td_get_sched(td)->ts_cpu = cpu;
tdq_add(to, td, SRQ_YIELDING);
+
return (td);
}
@@ -1205,6 +1205,7 @@ sched_setcpu(struct thread *td, int cpu, int flags)
{
struct tdq *tdq;
+ struct mtx *mtx;
THREAD_LOCK_ASSERT(td, MA_OWNED);
tdq = TDQ_CPU(cpu);
@@ -1212,26 +1213,20 @@ sched_setcpu(struct thread *td, int cpu, int flags)
/*
* If the lock matches just return the queue.
*/
- if (td->td_lock == TDQ_LOCKPTR(tdq))
- return (tdq);
-#ifdef notyet
- /*
- * If the thread isn't running its lockptr is a
- * turnstile or a sleepqueue. We can just lock_set without
- * blocking.
- */
- if (TD_CAN_RUN(td)) {
- TDQ_LOCK(tdq);
- thread_lock_set(td, TDQ_LOCKPTR(tdq));
+ if (td->td_lock == TDQ_LOCKPTR(tdq)) {
+ KASSERT((flags & SRQ_HOLD) == 0,
+ ("sched_setcpu: Invalid lock for SRQ_HOLD"));
return (tdq);
}
-#endif
+
/*
* The hard case, migration, we need to block the thread first to
* prevent order reversals with other cpus locks.
*/
spinlock_enter();
- thread_lock_block(td);
+ mtx = thread_lock_block(td);
+ if ((flags & SRQ_HOLD) == 0)
+ mtx_unlock_spin(mtx);
TDQ_LOCK(tdq);
thread_lock_unblock(td, TDQ_LOCKPTR(tdq));
spinlock_exit();
@@ -1422,8 +1417,7 @@ tdq_setup(struct tdq *tdq, int id)
tdq->tdq_id = id;
snprintf(tdq->tdq_name, sizeof(tdq->tdq_name),
"sched lock %d", (int)TDQ_ID(tdq));
- mtx_init(&tdq->tdq_lock, tdq->tdq_name, "sched lock",
- MTX_SPIN | MTX_RECURSE);
+ mtx_init(&tdq->tdq_lock, tdq->tdq_name, "sched lock", MTX_SPIN);
#ifdef KTR
snprintf(tdq->tdq_loadname, sizeof(tdq->tdq_loadname),
"CPU %d load", (int)TDQ_ID(tdq));
@@ -1785,7 +1779,7 @@ sched_thread_priority(struct thread *td, u_char prio)
if (TD_ON_RUNQ(td) && prio < td->td_priority) {
sched_rem(td);
td->td_priority = prio;
- sched_add(td, SRQ_BORROWING);
+ sched_add(td, SRQ_BORROWING | SRQ_HOLDTD);
return;
}
/*
@@ -2011,6 +2005,7 @@ static struct mtx *
sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags)
{
struct tdq *tdn;
+ struct mtx *mtx;
KASSERT(!CPU_ABSENT(td_get_sched(td)->ts_cpu), ("sched_switch_migrate: "
"thread %s queued on absent CPU %d.", td->td_name,
@@ -2024,7 +2019,8 @@ sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags)
* not holding either run-queue lock.
*/
spinlock_enter();
- thread_lock_block(td); /* This releases the lock on tdq. */
+ mtx = thread_lock_block(td);
+ mtx_unlock_spin(mtx);
/*
* Acquire both run-queue locks before placing the thread on the new
@@ -2044,8 +2040,7 @@ sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags)
}
/*
- * Variadic version of thread_lock_unblock() that does not assume td_lock
- * is blocked.
+ * thread_lock_unblock() that does not assume td_lock is blocked.
*/
static inline void
thread_unblock_switch(struct thread *td, struct mtx *mtx)
@@ -2114,8 +2109,13 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
}
} else {
/* This thread must be going to sleep. */
- TDQ_LOCK(tdq);
mtx = thread_lock_block(td);
+ if (mtx != TDQ_LOCKPTR(tdq)) {
+ spinlock_enter();
+ mtx_unlock_spin(mtx);
+ TDQ_LOCK(tdq);
+ spinlock_exit();
+ }
tdq_load_rem(tdq, td);
#ifdef SMP
if (tdq->tdq_load == 0)
@@ -2237,9 +2237,11 @@ sched_sleep(struct thread *td, int prio)
/*
* Schedule a thread to resume execution and record how long it voluntarily
* slept. We also update the pctcpu, interactivity, and priority.
+ *
+ * Requires the thread lock on entry, drops on exit.
*/
void
-sched_wakeup(struct thread *td)
+sched_wakeup(struct thread *td, int srqflags)
{
struct td_sched *ts;
int slptick;
@@ -2247,6 +2249,7 @@ sched_wakeup(struct thread *td)
THREAD_LOCK_ASSERT(td, MA_OWNED);
ts = td_get_sched(td);
td->td_flags &= ~TDF_CANSWAP;
+
/*
* If we slept for more than a tick update our interactivity and
* priority.
@@ -2262,7 +2265,7 @@ sched_wakeup(struct thread *td)
* Reset the slice value since we slept and advanced the round-robin.
*/
ts->ts_slice = 0;
- sched_add(td, SRQ_BORING);
+ sched_add(td, SRQ_BORING | srqflags);
}
/*
@@ -2578,6 +2581,7 @@ tdq_add(struct tdq *tdq, struct thread *td, int flags)
{
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
+ THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
KASSERT((td->td_inhibitors == 0),
("sched_add: trying to run inhibited thread"));
KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
@@ -2594,6 +2598,8 @@ tdq_add(struct tdq *tdq, struct thread *td, int flags)
/*
* Select the target thread queue and add a thread to it. Request
* preemption or IPI a remote processor if required.
+ *
+ * Requires the thread lock on entry, drops on exit.
*/
void
sched_add(struct thread *td, int flags)
@@ -2625,10 +2631,10 @@ sched_add(struct thread *td, int flags)
cpu = sched_pickcpu(td, flags);
tdq = sched_setcpu(td, cpu, flags);
tdq_add(tdq, td, flags);
- if (cpu != PCPU_GET(cpuid)) {
+ if (cpu != PCPU_GET(cpuid))
tdq_notify(tdq, td);
- return;
- }
+ else if (!(flags & SRQ_YIELDING))
+ sched_setpreempt(td);
#else
tdq = TDQ_SELF();
TDQ_LOCK(tdq);
@@ -2636,11 +2642,16 @@ sched_add(struct thread *td, int flags)
* Now that the thread is moving to the run-queue, set the lock
* to the scheduler's lock.
*/
- thread_lock_set(td, TDQ_LOCKPTR(tdq));
+ if ((flags & SRQ_HOLD) != 0)
+ td->td_lock = TDQ_LOCKPTR(tdq);
+ else
+ thread_lock_set(td, TDQ_LOCKPTR(tdq));
tdq_add(tdq, td, flags);
-#endif
if (!(flags & SRQ_YIELDING))
sched_setpreempt(td);
+#endif
+ if (!(flags & SRQ_HOLDTD))
+ thread_unlock(td);
}
/*
@@ -2927,6 +2938,7 @@ sched_throw(struct thread *td)
PCPU_SET(switchticks, ticks);
PCPU_GET(idlethread)->td_lock = TDQ_LOCKPTR(tdq);
} else {
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
tdq = TDQ_SELF();
MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
tdq_load_rem(tdq, td);
diff --git a/sys/kern/subr_gtaskqueue.c b/sys/kern/subr_gtaskqueue.c
index f2227150d3a9..3ad9275095af 100644
--- a/sys/kern/subr_gtaskqueue.c
+++ b/sys/kern/subr_gtaskqueue.c
@@ -492,7 +492,6 @@ _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
thread_lock(td);
sched_prio(td, pri);
sched_add(td, SRQ_BORING);
- thread_unlock(td);
}
return (0);
diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c
index 430689556d04..666428b9b08c 100644
--- a/sys/kern/subr_pcpu.c
+++ b/sys/kern/subr_pcpu.c
@@ -352,8 +352,9 @@ show_pcpu(struct pcpu *pc)
db_printf("curthread = ");
td = pc->pc_curthread;
if (td != NULL)
- db_printf("%p: pid %d tid %d \"%s\"\n", td, td->td_proc->p_pid,
- td->td_tid, td->td_name);
+ db_printf("%p: pid %d tid %d critnest %d \"%s\"\n", td,
+ td->td_proc->p_pid, td->td_tid, td->td_critnest,
+ td->td_name);
else
db_printf("none\n");
db_printf("curpcb = %p\n", pc->pc_curpcb);
diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c
index fbb7a169aa45..b89d07c6797d 100644
--- a/sys/kern/subr_sleepqueue.c
+++ b/sys/kern/subr_sleepqueue.c
@@ -171,7 +171,8 @@ static void sleepq_dtor(void *mem, int size, void *arg);
#endif
static int sleepq_init(void *mem, int size, int flags);
static int sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
- int pri);
+ int pri, int srqflags);
+static void sleepq_remove_thread(struct sleepqueue *sq, struct thread *td);
static void sleepq_switch(void *wchan, int pri);
static void sleepq_timeout(void *arg);
@@ -220,7 +221,7 @@ init_sleepqueues(void)
for (i = 0; i < SC_TABLESIZE; i++) {
LIST_INIT(&sleepq_chains[i].sc_queues);
mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
- MTX_SPIN | MTX_RECURSE);
+ MTX_SPIN);
}
sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
#ifdef INVARIANTS
@@ -542,15 +543,7 @@ out:
*/
if (TD_ON_SLEEPQ(td)) {
sq = sleepq_lookup(wchan);
- if (sleepq_resume_thread(sq, td, 0)) {
-#ifdef INVARIANTS
- /*
- * This thread hasn't gone to sleep yet, so it
- * should not be swapped out.
- */
- panic("not waking up swapper");
-#endif
- }
+ sleepq_remove_thread(sq, td);
}
mtx_unlock_spin(&sc->sc_lock);
MPASS(td->td_lock != &sc->sc_lock);
@@ -609,15 +602,7 @@ sleepq_switch(void *wchan, int pri)
}
MPASS(TD_ON_SLEEPQ(td));
sq = sleepq_lookup(wchan);
- if (sleepq_resume_thread(sq, td, 0)) {
-#ifdef INVARIANTS
- /*
- * This thread hasn't gone to sleep yet, so it
- * should not be swapped out.
- */
- panic("not waking up swapper");
-#endif
- }
+ sleepq_remove_thread(sq, td);
mtx_unlock_spin(&sc->sc_lock);
return;
}
@@ -782,23 +767,82 @@ sleepq_type(void *wchan)
MPASS(wchan != NULL);
- sleepq_lock(wchan);
sq = sleepq_lookup(wchan);
- if (sq == NULL) {
- sleepq_release(wchan);
+ if (sq == NULL)
return (-1);
- }
type = sq->sq_type;
- sleepq_release(wchan);
+
return (type);
}
/*
* Removes a thread from a sleep queue and makes it
* runnable.
+ *
+ * Requires the sc chain locked on entry. If SRQ_HOLD is specified it will
+ * be locked on return. Returns without the thread lock held.
*/
static int
-sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
+sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri,
+ int srqflags)
+{
+ struct sleepqueue_chain *sc;
+ bool drop;
+
+ MPASS(td != NULL);
+ MPASS(sq->sq_wchan != NULL);
+ MPASS(td->td_wchan == sq->sq_wchan);
+
+ sc = SC_LOOKUP(sq->sq_wchan);
+ mtx_assert(&sc->sc_lock, MA_OWNED);
+
+ /*
+ * Avoid recursing on the chain lock. If the locks don't match we
+ * need to acquire the thread lock which setrunnable will drop for
+ * us. In this case we need to drop the chain lock afterwards.
+ *
+ * There is no race that will make td_lock equal to sc_lock because
+ * we hold sc_lock.
+ */
+ drop = false;
+ if (!TD_IS_SLEEPING(td)) {
+ thread_lock(td);
+ drop = true;
+ } else
+ thread_lock_block_wait(td);
+
+ /* Remove thread from the sleepq. */
+ sleepq_remove_thread(sq, td);
+
+ /* If we're done with the sleepqueue release it. */
+ if ((srqflags & SRQ_HOLD) == 0 && drop)
+ mtx_unlock_spin(&sc->sc_lock);
+
+ /* Adjust priority if requested. */
+ MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
+ if (pri != 0 && td->td_priority > pri &&
+ PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
+ sched_prio(td, pri);
+
+ /*
+ * Note that thread td might not be sleeping if it is running
+ * sleepq_catch_signals() on another CPU or is blocked on its
+ * proc lock to check signals. There's no need to mark the
+ * thread runnable in that case.
+ */
+ if (TD_IS_SLEEPING(td)) {
+ MPASS(!drop);
+ TD_CLR_SLEEPING(td);
+ return (setrunnable(td, srqflags));
+ }
+ MPASS(drop);
+ thread_unlock(td);
+
+ return (0);
+}
+
+static void
+sleepq_remove_thread(struct sleepqueue *sq, struct thread *td)
{
struct sleepqueue_chain *sc __unused;
@@ -839,24 +883,6 @@ sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
(void *)td, (long)td->td_proc->p_pid, td->td_name);
-
- /* Adjust priority if requested. */
- MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
- if (pri != 0 && td->td_priority > pri &&
- PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
- sched_prio(td, pri);
-
- /*
- * Note that thread td might not be sleeping if it is running
- * sleepq_catch_signals() on another CPU or is blocked on its
- * proc lock to check signals. There's no need to mark the
- * thread runnable in that case.
- */
- if (TD_IS_SLEEPING(td)) {
- TD_CLR_SLEEPING(td);
- return (setrunnable(td));
- }
- return (0);
}
#ifdef INVARIANTS
@@ -946,9 +972,7 @@ sleepq_signal(void *wchan, int flags, int pri, int queue)
}
}
MPASS(besttd != NULL);
- thread_lock(besttd);
- wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);
- thread_unlock(besttd);
+ wakeup_swapper = sleepq_resume_thread(sq, besttd, pri, SRQ_HOLD);
return (wakeup_swapper);
}
@@ -997,10 +1021,9 @@ sleepq_remove_matching(struct sleepqueue *sq, int queue,
*/
wakeup_swapper = 0;
TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
- thread_lock(td);
if (matches(td))
- wakeup_swapper |= sleepq_resume_thread(sq, td, pri);
- thread_unlock(td);
+ wakeup_swapper |= sleepq_resume_thread(sq, td, pri,
+ SRQ_HOLD);
}
return (wakeup_swapper);
@@ -1020,12 +1043,10 @@ sleepq_timeout(void *arg)
int wakeup_swapper;
td = arg;
- wakeup_swapper = 0;
CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
thread_lock(td);
-
if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
/*
* The thread does not want a timeout (yet).
@@ -1041,7 +1062,10 @@ sleepq_timeout(void *arg)
sq = sleepq_lookup(wchan);
MPASS(sq != NULL);
td->td_flags |= TDF_TIMEOUT;
- wakeup_swapper = sleepq_resume_thread(sq, td, 0);
+ wakeup_swapper = sleepq_resume_thread(sq, td, 0, 0);
+ if (wakeup_swapper)
+ kick_proc0();
+ return;
} else if (TD_ON_SLEEPQ(td)) {
/*
* If the thread is on the SLEEPQ but isn't sleeping
@@ -1051,10 +1075,7 @@ sleepq_timeout(void *arg)
*/
td->td_flags |= TDF_TIMEOUT;
}
-
thread_unlock(td);
- if (wakeup_swapper)
- kick_proc0();
}
/*
@@ -1064,6 +1085,7 @@ sleepq_timeout(void *arg)
void
sleepq_remove(struct thread *td, void *wchan)
{
+ struct sleepqueue_chain *sc;
struct sleepqueue *sq;
int wakeup_swapper;
@@ -1073,8 +1095,8 @@ sleepq_remove(struct thread *td, void *wchan)
* bail.
*/
MPASS(wchan != NULL);
- sleepq_lock(wchan);
- sq = sleepq_lookup(wchan);
+ sc = SC_LOOKUP(wchan);
+ mtx_lock_spin(&sc->sc_lock);
/*
* We can not lock the thread here as it may be sleeping on a
* different sleepq. However, holding the sleepq lock for this
@@ -1082,16 +1104,15 @@ sleepq_remove(struct thread *td, void *wchan)
* channel. The asserts below will catch any false positives.
*/
if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
- sleepq_release(wchan);
+ mtx_unlock_spin(&sc->sc_lock);
return;
}
+
/* Thread is asleep on sleep queue sq, so wake it up. */
- thread_lock(td);
+ sq = sleepq_lookup(wchan);
MPASS(sq != NULL);
MPASS(td->td_wchan == wchan);
- wakeup_swapper = sleepq_resume_thread(sq, td, 0);
- thread_unlock(td);
- sleepq_release(wchan);
+ wakeup_swapper = sleepq_resume_thread(sq, td, 0, 0);
if (wakeup_swapper)
kick_proc0();
}
@@ -1099,6 +1120,8 @@ sleepq_remove(struct thread *td, void *wchan)
/*
* Abort a thread as if an interrupt had occurred. Only abort
* interruptible waits (unfortunately it isn't safe to abort others).
+ *
+ * Requires thread lock on entry, releases on return.
*/
int
sleepq_abort(struct thread *td, int intrval)
@@ -1115,27 +1138,32 @@ sleepq_abort(struct thread *td, int intrval)
* If the TDF_TIMEOUT flag is set, just leave. A
* timeout is scheduled anyhow.
*/
- if (td->td_flags & TDF_TIMEOUT)
+ if (td->td_flags & TDF_TIMEOUT) {
+ thread_unlock(td);
return (0);
+ }
CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
td->td_intrval = intrval;
td->td_flags |= TDF_SLEEPABORT;
+
/*
* If the thread has not slept yet it will find the signal in
* sleepq_catch_signals() and call sleepq_resume_thread. Otherwise
* we have to do it here.
*/
- if (!TD_IS_SLEEPING(td))
+ if (!TD_IS_SLEEPING(td)) {
+ thread_unlock(td);
return (0);
+ }
wchan = td->td_wchan;
MPASS(wchan != NULL);
sq = sleepq_lookup(wchan);
MPASS(sq != NULL);
/* Thread is asleep on sleep queue sq, so wake it up. */
- return (sleepq_resume_thread(sq, td, 0));
+ return (sleepq_resume_thread(sq, td, 0, 0));
}
void
diff --git a/sys/kern/subr_taskqueue.c b/sys/kern/subr_taskqueue.c
index b643c0150c41..69f6c5376c9d 100644
--- a/sys/kern/subr_taskqueue.c
+++ b/sys/kern/subr_taskqueue.c
@@ -710,7 +710,6 @@ _taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
thread_lock(td);
sched_prio(td, pri);
sched_add(td, SRQ_BORING);
- thread_unlock(td);
}
return (0);
diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c
index 12272e39d45d..6d303cef210c 100644
--- a/sys/kern/subr_turnstile.c
+++ b/sys/kern/subr_turnstile.c
@@ -314,7 +314,7 @@ turnstile_adjust_thread(struct turnstile *ts, struct thread *td)
* It needs to be moved if either its priority is lower than
* the previous thread or higher than the next thread.
*/
- THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);
+ THREAD_LOCKPTR_BLOCKED_ASSERT(td, &ts->ts_lock);
td1 = TAILQ_PREV(td, threadqueue, td_lockq);
td2 = TAILQ_NEXT(td, td_lockq);
if ((td1 != NULL && td->td_priority < td1->td_priority) ||
@@ -429,7 +429,7 @@ turnstile_adjust(struct thread *td, u_char oldpri)
*/
ts = td->td_blocked;
MPASS(ts != NULL);
- THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);
+ THREAD_LOCKPTR_BLOCKED_ASSERT(td, &ts->ts_lock);
mtx_assert(&ts->ts_lock, MA_OWNED);
/* Resort the turnstile on the list. */
@@ -693,7 +693,7 @@ turnstile_claim(struct turnstile *ts)
td = turnstile_first_waiter(ts);
MPASS(td != NULL);
MPASS(td->td_proc->p_magic == P_MAGIC);
- THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);
+ THREAD_LOCKPTR_BLOCKED_ASSERT(td, &ts->ts_lock);
/*
* Update the priority of the new owner if needed.
@@ -979,7 +979,7 @@ turnstile_unpend(struct turnstile *ts)
td = TAILQ_FIRST(&pending_threads);
TAILQ_REMOVE(&pending_threads, td, td_lockq);
SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
- thread_lock(td);
+ thread_lock_block_wait(td);
THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);
MPASS(td->td_proc->p_magic == P_MAGIC);
MPASS(TD_ON_LOCK(td));
@@ -991,8 +991,7 @@ turnstile_unpend(struct turnstile *ts)
#ifdef INVARIANTS
td->td_tsqueue = 0xff;
#endif
- sched_add(td, SRQ_BORING);
- thread_unlock(td);
+ sched_add(td, SRQ_HOLD | SRQ_BORING);
}
mtx_unlock_spin(&ts->ts_lock);
}
diff --git a/sys/mips/nlm/cms.c b/sys/mips/nlm/cms.c
index b1105606e8d6..0567c30ab595 100644
--- a/sys/mips/nlm/cms.c
+++ b/sys/mips/nlm/cms.c
@@ -291,10 +291,11 @@ msgring_process_fast_intr(void *arg)
msgring_wakeup_sleep[cpu]++;
TD_CLR_IWAIT(td);
sched_add(td, SRQ_INTR);
- } else
+ } else {
+ thread_unlock(td);
msgring_wakeup_nosleep[cpu]++;
+ }
- thread_unlock(td);
return (FILTER_HANDLED);
}
@@ -382,7 +383,6 @@ create_msgring_thread(int hwtid)
thread_lock(td);
sched_class(td, PRI_ITHD);
sched_add(td, SRQ_INTR);
- thread_unlock(td);
}
int
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 7be1941416be..cd856657f341 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -376,9 +376,13 @@ struct thread0_storage {
};
struct mtx *thread_lock_block(struct thread *);
-void thread_lock_unblock(struct thread *, struct mtx *);
+void thread_lock_block_wait(struct thread *);
void thread_lock_set(struct thread *, struct mtx *);
+void thread_lock_unblock(struct thread *, struct mtx *);
#define THREAD_LOCK_ASSERT(td, type) \
+ mtx_assert((td)->td_lock, (type))
+
+#define THREAD_LOCK_BLOCKED_ASSERT(td, type) \
do { \
struct mtx *__m = (td)->td_lock; \
if (__m != &blocked_lock) \
@@ -388,8 +392,17 @@ do { \
#ifdef INVARIANTS
#define THREAD_LOCKPTR_ASSERT(td, lock) \
do { \
- struct mtx *__m = (td)->td_lock; \
- KASSERT((__m == &blocked_lock || __m == (lock)), \
+ struct mtx *__m; \
+ __m = (td)->td_lock; \
+ KASSERT(__m == (lock), \
+ ("Thread %p lock %p does not match %p", td, __m, (lock))); \
+} while (0)
+
+#define THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock) \
+do { \
+ struct mtx *__m; \
+ __m = (td)->td_lock; \
+ KASSERT(__m == (lock) || __m == &blocked_lock, \
("Thread %p lock %p does not match %p", td, __m, (lock))); \
} while (0)
@@ -401,6 +414,7 @@ do { \
} while (0)
#else
#define THREAD_LOCKPTR_ASSERT(td, lock)
+#define THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock)
#define TD_LOCKS_INC(td)
#define TD_LOCKS_DEC(td)
@@ -519,6 +533,9 @@ do { \
#define TD_ON_UPILOCK(td) ((td)->td_flags & TDF_UPIBLOCKED)
#define TD_IS_IDLETHREAD(td) ((td)->td_flags & TDF_IDLETD)
+#define TD_CAN_ABORT(td) (TD_ON_SLEEPQ((td)) && \
+ ((td)->td_flags & TDF_SINTR) != 0)
+
#define KTDSTATE(td) \
(((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \
((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \
@@ -1089,7 +1106,7 @@ int securelevel_ge(struct ucred *cr, int level);
int securelevel_gt(struct ucred *cr, int level);
void sess_hold(struct session *);
void sess_release(struct session *);
-int setrunnable(struct thread *);
+int setrunnable(struct thread *, int);
void setsugid(struct proc *p);
int should_yield(void);
int sigonstack(size_t sp);
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
index 02560f1418ae..cba2667555c9 100644
--- a/sys/sys/resourcevar.h
+++ b/sys/sys/resourcevar.h
@@ -162,6 +162,7 @@ void rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
struct timeval *sp);
void rufetchtd(struct thread *td, struct rusage *ru);
void ruxagg(struct proc *p, struct thread *td);
+void ruxagg_locked(struct proc *p, struct thread *td);
struct uidinfo
*uifind(uid_t uid);
void uifree(struct uidinfo *uip);
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
index 2f5a6654f61b..f02c0a105c79 100644
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -105,7 +105,6 @@ void sched_throw(struct thread *td);
void sched_unlend_prio(struct thread *td, u_char prio);
void sched_user_prio(struct thread *td, u_char prio);
void sched_userret_slowpath(struct thread *td);
-void sched_wakeup(struct thread *td);
#ifdef RACCT
#ifdef SCHED_4BSD
fixpt_t sched_pctcpu_delta(struct thread *td);
@@ -136,11 +135,13 @@ sched_userret(struct thread *td)
*/
void sched_add(struct thread *td, int flags);
void sched_clock(struct thread *td, int ticks);
-void sched_preempt(struct thread *td);
-void sched_rem(struct thread *td);
-void sched_relinquish(struct thread *td);
struct thread *sched_choose(void);
+void sched_clock(struct thread *td, int cnt);
void sched_idletd(void *);
+void sched_preempt(struct thread *td);
+void sched_relinquish(struct thread *td);
+void sched_rem(struct thread *td);
+void sched_wakeup(struct thread *td, int srqflags);
/*
* Binding makes cpu affinity permanent while pinning is used to temporarily
@@ -190,6 +191,8 @@ sched_unpin(void)
#define SRQ_INTR 0x0004 /* It is probably urgent. */
#define SRQ_PREEMPTED 0x0008 /* has been preempted.. be kind */
#define SRQ_BORROWING 0x0010 /* Priority updated due to prio_lend */
+#define SRQ_HOLD 0x0020 /* Return holding original td lock */
+#define SRQ_HOLDTD 0x0040 /* Return holding td lock */
/* Scheduler stats. */
#ifdef SCHED_STATS
diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c
index 0b67641de343..7e2212e9c533 100644
--- a/sys/vm/vm_swapout.c
+++ b/sys/vm/vm_swapout.c
@@ -901,8 +901,8 @@ swapclear(struct proc *p)
td->td_flags |= TDF_INMEM;
td->td_flags &= ~TDF_SWAPINREQ;
TD_CLR_SWAPPED(td);
- if (TD_CAN_RUN(td))
- if (setrunnable(td)) {
+ if (TD_CAN_RUN(td)) {
+ if (setrunnable(td, 0)) {
#ifdef INVARIANTS
/*
* XXX: We just cleared TDI_SWAPPED
@@ -912,7 +912,8 @@ swapclear(struct proc *p)
panic("not waking up swapper");
#endif
}
- thread_unlock(td);
+ } else
+ thread_unlock(td);
}
p->p_flag &= ~(P_SWAPPINGIN | P_SWAPPINGOUT);
p->p_flag |= P_INMEM;