src - FreeBSD source tree

diff options


context:
space:
mode:

author	Jeff Roberson <jeff@FreeBSD.org>	2019-12-15 21:11:15 +0000
committer	Jeff Roberson <jeff@FreeBSD.org>	2019-12-15 21:11:15 +0000
commit	61a74c5ccd65d1a00a96779f16eda8c41ff3a426 (patch)
tree	0325e01f4affe5d9ef25e68fae1a7cbd5d2ebde9 /sys
parent	054802650063bea1cb817ef22a887c3116813ba9 (diff)
download	src-61a74c5ccd65d1a00a96779f16eda8c41ff3a426.tar.gz src-61a74c5ccd65d1a00a96779f16eda8c41ff3a426.zip

schedlock 1/4

Eliminate recursion from most thread_lock consumers. Return from sched_add() without the thread_lock held. This eliminates unnecessary atomics and lock word loads as well as reducing the hold time for scheduler locks. This will eventually allow for lockless remote adds. Discussed with: kib Reviewed by: jhb Tested by: pho Differential Revision: https://reviews.freebsd.org/D22626

Notes

Notes: svn path=/head/; revision=355779

Diffstat (limited to 'sys')

-rw-r--r--

sys/cddl/compat/opensolaris/sys/proc.h

-rw-r--r--

sys/compat/linux/linux_fork.c

-rw-r--r--

sys/compat/linuxkpi/common/src/linux_kthread.c

-rw-r--r--

sys/dev/ocs_fc/ocs_os.c

-rw-r--r--

sys/kern/init_main.c

-rw-r--r--

sys/kern/kern_clock.c

-rw-r--r--

sys/kern/kern_fork.c

-rw-r--r--

sys/kern/kern_intr.c

-rw-r--r--

sys/kern/kern_kthread.c

-rw-r--r--

sys/kern/kern_mutex.c

-rw-r--r--

sys/kern/kern_resource.c

-rw-r--r--

sys/kern/kern_sig.c

-rw-r--r--

sys/kern/kern_synch.c

-rw-r--r--

sys/kern/kern_thr.c

-rw-r--r--

sys/kern/kern_thread.c

-rw-r--r--

sys/kern/sched_4bsd.c

-rw-r--r--

sys/kern/sched_ule.c

-rw-r--r--

sys/kern/subr_gtaskqueue.c

-rw-r--r--

sys/kern/subr_pcpu.c

-rw-r--r--

sys/kern/subr_sleepqueue.c

160

-rw-r--r--

sys/kern/subr_taskqueue.c

-rw-r--r--

sys/kern/subr_turnstile.c

-rw-r--r--

sys/mips/nlm/cms.c

-rw-r--r--

sys/sys/proc.h

-rw-r--r--

sys/sys/resourcevar.h

-rw-r--r--

sys/sys/sched.h

-rw-r--r--

sys/vm/vm_swapout.c

27 files changed, 360 insertions, 239 deletions

diff --git a/sys/cddl/compat/opensolaris/sys/proc.h b/sys/cddl/compat/opensolaris/sys/proc.h
index b26ad11ca875..d91833a58f8c 100644
--- a/sys/cddl/compat/opensolaris/sys/proc.h
+++ b/sys/cddl/compat/opensolaris/sys/proc.h

@@ -89,7 +89,6 @@ do_thread_create(caddr_t stk, size_t stksize, void (*proc)(void *), void *arg,

thread_lock(td);

sched_prio(td, pri);

sched_add(td, SRQ_BORING);

- thread_unlock(td);

}

return (td);

}

diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c
index 066640e2fc9c..0c772ea5cb4a 100644
--- a/sys/compat/linux/linux_fork.c
+++ b/sys/compat/linux/linux_fork.c

@@ -92,7 +92,6 @@ linux_fork(struct thread *td, struct linux_fork_args *args)

thread_lock(td2);

TD_SET_CAN_RUN(td2);

sched_add(td2, SRQ_BORING);

- thread_unlock(td2);

return (0);

}

@@ -123,7 +122,6 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args)

thread_lock(td2);

TD_SET_CAN_RUN(td2);

sched_add(td2, SRQ_BORING);

- thread_unlock(td2);

return (0);

}

@@ -228,7 +226,6 @@ linux_clone_proc(struct thread *td, struct linux_clone_args *args)

thread_lock(td2);

TD_SET_CAN_RUN(td2);

sched_add(td2, SRQ_BORING);

- thread_unlock(td2);

td->td_retval[0] = p2->p_pid;

@@ -343,7 +340,6 @@ linux_clone_thread(struct thread *td, struct linux_clone_args *args)

thread_lock(newtd);

TD_SET_CAN_RUN(newtd);

sched_add(newtd, SRQ_BORING);

- thread_unlock(newtd);

td->td_retval[0] = newtd->td_tid;

diff --git a/sys/compat/linuxkpi/common/src/linux_kthread.c b/sys/compat/linuxkpi/common/src/linux_kthread.c
index 198082615076..26afe005ea59 100644
--- a/sys/compat/linuxkpi/common/src/linux_kthread.c
+++ b/sys/compat/linuxkpi/common/src/linux_kthread.c

@@ -142,7 +142,6 @@ linux_kthread_setup_and_run(struct thread *td, linux_task_fn_t *task_fn, void *a

sched_prio(td, PI_SWI(SWI_NET));

/* put thread into run-queue */

sched_add(td, SRQ_BORING);

- thread_unlock(td);

return (task);

}

diff --git a/sys/dev/ocs_fc/ocs_os.c b/sys/dev/ocs_fc/ocs_os.c
index 054b6b8f62f9..5d434fb527ac 100644
--- a/sys/dev/ocs_fc/ocs_os.c
+++ b/sys/dev/ocs_fc/ocs_os.c

@@ -659,6 +659,8 @@ ocs_thread_create(ocs_os_handle_t os, ocs_thread_t *thread, ocs_thread_fctn fctn

int32_t ocs_thread_start(ocs_thread_t *thread)

{

+ thread_lock(thread->tcb);

sched_add(thread->tcb, SRQ_BORING);

return 0;

}

diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 358b79445708..6d4e7b432818 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c

@@ -853,6 +853,5 @@ kick_init(const void *udata __unused)

thread_lock(td);

TD_SET_CAN_RUN(td);

sched_add(td, SRQ_BORING);

- thread_unlock(td);

}

SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL);

diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 612d73da7cdd..378bf0cc43fc 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c

@@ -282,8 +282,7 @@ deadlkres(void)

if (TD_ON_LOCK(td))

deadlres_td_on_lock(p, td,

blkticks);

- else if (TD_IS_SLEEPING(td) &&

- TD_ON_SLEEPQ(td))

+ else if (TD_IS_SLEEPING(td))

deadlres_td_sleep_q(p, td,

slpticks);

thread_unlock(td);

diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index b397dee1aaa4..e8ac950a5d78 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c

@@ -758,7 +758,6 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *

thread_lock(td2);

TD_SET_CAN_RUN(td2);

sched_add(td2, SRQ_BORING);

- thread_unlock(td2);

} else {

*fr->fr_procp = p2;

}

diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c
index 3c1b08cf913c..49defdb52f4c 100644
--- a/sys/kern/kern_intr.c
+++ b/sys/kern/kern_intr.c

@@ -558,8 +558,8 @@ ithread_destroy(struct intr_thread *ithread)

if (TD_AWAITING_INTR(td)) {

TD_CLR_IWAIT(td);

sched_add(td, SRQ_INTR);

- }

- thread_unlock(td);

+ } else

+ thread_unlock(td);

}

int

@@ -985,8 +985,8 @@ intr_event_schedule_thread(struct intr_event *ie)

} else {

CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",

__func__, td->td_proc->p_pid, td->td_name, it->it_need, td->td_state);

+ thread_unlock(td);

}

- thread_unlock(td);

return (0);

}

diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c
index ebf9a4c6daa4..d2c7b3c8f1ed 100644
--- a/sys/kern/kern_kthread.c
+++ b/sys/kern/kern_kthread.c

@@ -146,7 +146,8 @@ kproc_create(void (*func)(void *), void *arg,

/* Delay putting it on the run queue until now. */

if (!(flags & RFSTOPPED))

sched_add(td, SRQ_BORING);

- thread_unlock(td);

+ else

+ thread_unlock(td);

return 0;

}

@@ -324,7 +325,6 @@ kthread_add(void (*func)(void *), void *arg, struct proc *p,

if (!(flags & RFSTOPPED)) {

thread_lock(newtd);

sched_add(newtd, SRQ_BORING);

- thread_unlock(newtd);

}

if (newtdp)

*newtdp = newtd;

diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index ca6fbb157324..66d7c16d1f1d 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c

@@ -960,10 +960,9 @@ thread_lock_block(struct thread *td)

{

struct mtx *lock;

- THREAD_LOCK_ASSERT(td, MA_OWNED);

lock = td->td_lock;

+ mtx_assert(lock, MA_OWNED);

td->td_lock = &blocked_lock;

- mtx_unlock_spin(lock);

return (lock);

}

@@ -971,19 +970,33 @@ thread_lock_block(struct thread *td)

void

thread_lock_unblock(struct thread *td, struct mtx *new)

{

mtx_assert(new, MA_OWNED);

- MPASS(td->td_lock == &blocked_lock);

+ KASSERT(td->td_lock == &blocked_lock,

+ ("thread %p lock %p not blocked_lock %p",

+ td, td->td_lock, &blocked_lock));

atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);

}

void

+thread_lock_block_wait(struct thread *td)

+ while (td->td_lock == &blocked_lock)

+ cpu_spinwait();

+ /* Acquire fence to be certain that all thread state is visible. */

+ atomic_thread_fence_acq();

+void

thread_lock_set(struct thread *td, struct mtx *new)

{

struct mtx *lock;

mtx_assert(new, MA_OWNED);

- THREAD_LOCK_ASSERT(td, MA_OWNED);

lock = td->td_lock;

+ mtx_assert(lock, MA_OWNED);

td->td_lock = new;

mtx_unlock_spin(lock);

}

diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 3bb8dee40284..99efe979aa20 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c

@@ -78,7 +78,7 @@ static void calcru1(struct proc *p, struct rusage_ext *ruxp,

struct timeval *up, struct timeval *sp);

static int donice(struct thread *td, struct proc *chgp, int n);

static struct uidinfo *uilookup(uid_t uid);

-static void ruxagg_locked(struct rusage_ext *rux, struct thread *td);

+static void ruxagg_ext_locked(struct rusage_ext *rux, struct thread *td);

* Resource controls and accounting.

@@ -858,7 +858,7 @@ rufetchtd(struct thread *td, struct rusage *ru)

td->td_incruntime += runtime;

PCPU_SET(switchtime, u);

}

- ruxagg(p, td);

+ ruxagg_locked(p, td);

*ru = td->td_ru;

calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime);

}

@@ -1114,11 +1114,9 @@ ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,

* Aggregate tick counts into the proc's rusage_ext.

static void

-ruxagg_locked(struct rusage_ext *rux, struct thread *td)

+ruxagg_ext_locked(struct rusage_ext *rux, struct thread *td)

{

- THREAD_LOCK_ASSERT(td, MA_OWNED);

- PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED);

rux->rux_runtime += td->td_incruntime;

rux->rux_uticks += td->td_uticks;

rux->rux_sticks += td->td_sticks;

@@ -1126,16 +1124,25 @@ ruxagg_locked(struct rusage_ext *rux, struct thread *td)

}

void

-ruxagg(struct proc *p, struct thread *td)

+ruxagg_locked(struct proc *p, struct thread *td)

{

+ THREAD_LOCK_ASSERT(td, MA_OWNED);

+ PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED);

- thread_lock(td);

- ruxagg_locked(&p->p_rux, td);

- ruxagg_locked(&td->td_rux, td);

+ ruxagg_ext_locked(&p->p_rux, td);

+ ruxagg_ext_locked(&td->td_rux, td);

td->td_incruntime = 0;

td->td_uticks = 0;

td->td_iticks = 0;

td->td_sticks = 0;

+void

+ruxagg(struct proc *p, struct thread *td)

+ thread_lock(td);

+ ruxagg_locked(p, td);

thread_unlock(td);

}

diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 203c47bea360..429a64631b32 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c

@@ -2250,6 +2250,8 @@ tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)

p->p_step = 0;

wakeup(&p->p_step);

}

+ wakeup_swapper = 0;

* Some signals have a process-wide effect and a per-thread

* component. Most processing occurs when the process next

@@ -2352,15 +2354,13 @@ tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)

* the PROCESS runnable, leave it stopped.

* It may run a bit until it hits a thread_suspend_check().

- wakeup_swapper = 0;

PROC_SLOCK(p);

thread_lock(td);

- if (TD_ON_SLEEPQ(td) && (td->td_flags & TDF_SINTR))

+ if (TD_CAN_ABORT(td))

wakeup_swapper = sleepq_abort(td, intrval);

- thread_unlock(td);

+ else

+ thread_unlock(td);

PROC_SUNLOCK(p);

- if (wakeup_swapper)

- kick_proc0();

goto out;

* Mutexes are short lived. Threads waiting on them will

@@ -2394,8 +2394,6 @@ tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)

sigqueue_delete_proc(p, p->p_xsig);

} else

PROC_SUNLOCK(p);

- if (wakeup_swapper)

- kick_proc0();

goto out;

}

} else {

@@ -2416,6 +2414,9 @@ runfast:

out:

/* If we jump here, proc slock should not be owned. */

PROC_SLOCK_ASSERT(p, MA_NOTOWNED);

+ if (wakeup_swapper)

+ kick_proc0();

return (ret);

}

@@ -2428,10 +2429,8 @@ static void

tdsigwakeup(struct thread *td, int sig, sig_t action, int intrval)

{

struct proc *p = td->td_proc;

- int prop;

- int wakeup_swapper;

+ int prop, wakeup_swapper;

- wakeup_swapper = 0;

PROC_LOCK_ASSERT(p, MA_OWNED);

prop = sigprop(sig);

@@ -2487,22 +2486,25 @@ tdsigwakeup(struct thread *td, int sig, sig_t action, int intrval)

sched_prio(td, PUSER);

wakeup_swapper = sleepq_abort(td, intrval);

- } else {

- /*

- * Other states do nothing with the signal immediately,

- * other than kicking ourselves if we are running.

- * It will either never be noticed, or noticed very soon.

- */

+ PROC_SUNLOCK(p);

+ if (wakeup_swapper)

+ kick_proc0();

+ return;

+ }

+ /*

+ * Other states do nothing with the signal immediately,

+ * other than kicking ourselves if we are running.

+ * It will either never be noticed, or noticed very soon.

+ */

#ifdef SMP

- if (TD_IS_RUNNING(td) && td != curthread)

- forward_signal(td);

+ if (TD_IS_RUNNING(td) && td != curthread)

+ forward_signal(td);

#endif

- }

out:

PROC_SUNLOCK(p);

thread_unlock(td);

- if (wakeup_swapper)

- kick_proc0();

}

static int

@@ -2530,12 +2532,13 @@ sig_suspend_threads(struct thread *td, struct proc *p, int sending)

KASSERT(!TD_IS_SUSPENDED(td2),

("thread with deferred stops suspended"));

- if (TD_SBDRY_INTR(td2))

+ if (TD_SBDRY_INTR(td2)) {

wakeup_swapper |= sleepq_abort(td2,

TD_SBDRY_ERRNO(td2));

- } else if (!TD_IS_SUSPENDED(td2)) {

+ continue;

+ }

+ } else if (!TD_IS_SUSPENDED(td2))

thread_suspend_one(td2);

- }

} else if (!TD_IS_SUSPENDED(td2)) {

if (sending || td != td2)

td2->td_flags |= TDF_ASTPENDING;

diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 6c937401a161..8cb84086b3ca 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c

@@ -538,40 +538,48 @@ mi_switch(int flags, struct thread *newtd)

* Change thread state to be runnable, placing it on the run queue if

* it is in memory. If it is swapped out, return true so our caller

* will know to awaken the swapper.

+ *

+ * Requires the thread lock on entry, drops on exit.

int

-setrunnable(struct thread *td)

+setrunnable(struct thread *td, int srqflags)

{

+ int swapin;

THREAD_LOCK_ASSERT(td, MA_OWNED);

KASSERT(td->td_proc->p_state != PRS_ZOMBIE,

("setrunnable: pid %d is a zombie", td->td_proc->p_pid));

+ swapin = 0;

switch (td->td_state) {

case TDS_RUNNING:

case TDS_RUNQ:

+ break;

+ case TDS_CAN_RUN:

+ KASSERT((td->td_flags & TDF_INMEM) != 0,

+ ("setrunnable: td %p not in mem, flags 0x%X inhibit 0x%X",

+ td, td->td_flags, td->td_inhibitors));

+ /* unlocks thread lock according to flags */

+ sched_wakeup(td, srqflags);

return (0);

case TDS_INHIBITED:

* If we are only inhibited because we are swapped out

- * then arange to swap in this process. Otherwise just return.

+ * arrange to swap in this process.

- if (td->td_inhibitors != TDI_SWAPPED)

- return (0);

- /* FALLTHROUGH */

- case TDS_CAN_RUN:

+ if (td->td_inhibitors == TDI_SWAPPED &&

+ (td->td_flags & TDF_SWAPINREQ) == 0) {

+ td->td_flags |= TDF_SWAPINREQ;

+ swapin = 1;

+ }

break;

default:

- printf("state is 0x%x", td->td_state);

- panic("setrunnable(2)");

+ panic("setrunnable: state 0x%x", td->td_state);

}

- if ((td->td_flags & TDF_INMEM) == 0) {

- if ((td->td_flags & TDF_SWAPINREQ) == 0) {

- td->td_flags |= TDF_SWAPINREQ;

- return (1);

- }

- } else

- sched_wakeup(td);

- return (0);

+ if ((srqflags & (SRQ_HOLD | SRQ_HOLDTD)) == 0)

+ thread_unlock(td);

+ return (swapin);

}

diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c
index dd8e2c8d90af..da47cf9a517e 100644
--- a/sys/kern/kern_thr.c
+++ b/sys/kern/kern_thr.c

@@ -281,7 +281,6 @@ thread_create(struct thread *td, struct rtprio *rtp,

}

TD_SET_CAN_RUN(newtd);

sched_add(newtd, SRQ_BORING);

- thread_unlock(newtd);

return (0);

diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 9d389eb8917f..a3f02a9d4111 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c

@@ -565,7 +565,6 @@ thread_exit(void)

thread_lock(p->p_singlethread);

wakeup_swapper = thread_unsuspend_one(

p->p_singlethread, p, false);

- thread_unlock(p->p_singlethread);

if (wakeup_swapper)

kick_proc0();

}

@@ -606,7 +605,7 @@ thread_exit(void)

/* Save our resource usage in our process. */

td->td_ru.ru_nvcsw++;

- ruxagg(p, td);

+ ruxagg_locked(p, td);

rucollect(&p->p_ru, &td->td_ru);

PROC_STATUNLOCK(p);

@@ -730,19 +729,36 @@ weed_inhib(int mode, struct thread *td2, struct proc *p)

THREAD_LOCK_ASSERT(td2, MA_OWNED);

wakeup_swapper = 0;

+ /*

+ * Since the thread lock is dropped by the scheduler we have

+ * to retry to check for races.

+ */

+restart:

switch (mode) {

case SINGLE_EXIT:

- if (TD_IS_SUSPENDED(td2))

+ if (TD_IS_SUSPENDED(td2)) {

wakeup_swapper |= thread_unsuspend_one(td2, p, true);

- if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)

+ thread_lock(td2);

+ goto restart;

+ }

+ if (TD_CAN_ABORT(td2)) {

wakeup_swapper |= sleepq_abort(td2, EINTR);

+ return (wakeup_swapper);

+ }

break;

case SINGLE_BOUNDARY:

case SINGLE_NO_EXIT:

- if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0)

+ if (TD_IS_SUSPENDED(td2) &&

+ (td2->td_flags & TDF_BOUNDARY) == 0) {

wakeup_swapper |= thread_unsuspend_one(td2, p, false);

- if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)

+ thread_lock(td2);

+ goto restart;

+ }

+ if (TD_CAN_ABORT(td2)) {

wakeup_swapper |= sleepq_abort(td2, ERESTART);

+ return (wakeup_swapper);

+ }

break;

case SINGLE_ALLPROC:

@@ -754,18 +770,25 @@ weed_inhib(int mode, struct thread *td2, struct proc *p)

* is used to avoid immediate un-suspend.

if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY |

- TDF_ALLPROCSUSP)) == 0)

+ TDF_ALLPROCSUSP)) == 0) {

wakeup_swapper |= thread_unsuspend_one(td2, p, false);

- if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0) {

+ thread_lock(td2);

+ goto restart;

+ }

+ if (TD_CAN_ABORT(td2)) {

if ((td2->td_flags & TDF_SBDRY) == 0) {

thread_suspend_one(td2);

td2->td_flags |= TDF_ALLPROCSUSP;

} else {

wakeup_swapper |= sleepq_abort(td2, ERESTART);

+ return (wakeup_swapper);

}

break;

+ default:

+ break;

}

+ thread_unlock(td2);

return (wakeup_swapper);

}

@@ -842,9 +865,10 @@ thread_single(struct proc *p, int mode)

#ifdef SMP

} else if (TD_IS_RUNNING(td2) && td != td2) {

forward_signal(td2);

+ thread_unlock(td2);

#endif

- }

- thread_unlock(td2);

+ } else

+ thread_unlock(td2);

}

if (wakeup_swapper)

kick_proc0();

@@ -1028,7 +1052,6 @@ thread_suspend_check(int return_instead)

thread_lock(p->p_singlethread);

wakeup_swapper = thread_unsuspend_one(

p->p_singlethread, p, false);

- thread_unlock(p->p_singlethread);

if (wakeup_swapper)

kick_proc0();

}

@@ -1112,7 +1135,7 @@ thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary)

p->p_boundary_count--;

}

- return (setrunnable(td));

+ return (setrunnable(td, 0));

}

@@ -1133,8 +1156,8 @@ thread_unsuspend(struct proc *p)

if (TD_IS_SUSPENDED(td)) {

wakeup_swapper |= thread_unsuspend_one(td, p,

true);

- }

- thread_unlock(td);

+ } else

+ thread_unlock(td);

}

} else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&

p->p_numthreads == p->p_suspcount) {

@@ -1147,7 +1170,6 @@ thread_unsuspend(struct proc *p)

thread_lock(p->p_singlethread);

wakeup_swapper = thread_unsuspend_one(

p->p_singlethread, p, false);

- thread_unlock(p->p_singlethread);

}

if (wakeup_swapper)

@@ -1193,8 +1215,8 @@ thread_single_end(struct proc *p, int mode)

if (TD_IS_SUSPENDED(td)) {

wakeup_swapper |= thread_unsuspend_one(td, p,

mode == SINGLE_BOUNDARY);

- }

- thread_unlock(td);

+ } else

+ thread_unlock(td);

}

KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,

diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 3cfc76a1afd3..c558b9135749 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c

@@ -846,7 +846,7 @@ sched_priority(struct thread *td, u_char prio)

td->td_priority = prio;

if (TD_ON_RUNQ(td) && td->td_rqindex != (prio / RQ_PPQ)) {

sched_rem(td);

- sched_add(td, SRQ_BORING);

+ sched_add(td, SRQ_BORING | SRQ_HOLDTD);

}

@@ -980,25 +980,12 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

struct proc *p;

int preempted;

- tmtx = NULL;

+ tmtx = &sched_lock;

ts = td_get_sched(td);

p = td->td_proc;

THREAD_LOCK_ASSERT(td, MA_OWNED);

- /*

- * Switch to the sched lock to fix things up and pick

- * a new thread.

- * Block the td_lock in order to avoid breaking the critical path.

- */

- if (td->td_lock != &sched_lock) {

- mtx_lock_spin(&sched_lock);

- tmtx = thread_lock_block(td);

- }

- if ((td->td_flags & TDF_NOLOAD) == 0)

- sched_load_rem();

td->td_lastcpu = td->td_oncpu;

preempted = (td->td_flags & TDF_SLICEEND) == 0 &&

(flags & SW_PREEMPT) != 0;

@@ -1021,10 +1008,25 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

if (TD_IS_RUNNING(td)) {

/* Put us back on the run queue. */

sched_add(td, preempted ?

- SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :

- SRQ_OURSELF|SRQ_YIELDING);

+ SRQ_HOLDTD|SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :

+ SRQ_HOLDTD|SRQ_OURSELF|SRQ_YIELDING);

}

+ /*

+ * Switch to the sched lock to fix things up and pick

+ * a new thread. Block the td_lock in order to avoid

+ * breaking the critical path.

+ */

+ if (td->td_lock != &sched_lock) {

+ mtx_lock_spin(&sched_lock);

+ tmtx = thread_lock_block(td);

+ mtx_unlock_spin(tmtx);

+ }

+ if ((td->td_flags & TDF_NOLOAD) == 0)

+ sched_load_rem();

if (newtd) {

* The thread we are about to run needs to be counted

@@ -1042,9 +1044,10 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

sched_load_add();

} else {

newtd = choosethread();

- MPASS(newtd->td_lock == &sched_lock);

}

+ MPASS(newtd->td_lock == &sched_lock);

#if (KTR_COMPILE & KTR_SCHED) != 0

if (TD_IS_IDLETHREAD(td))

KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle",

@@ -1075,7 +1078,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

(*dtrace_vtime_switch_func)(newtd);

#endif

- cpu_switch(td, newtd, tmtx != NULL ? tmtx : td->td_lock);

+ cpu_switch(td, newtd, tmtx);

lock_profile_obtain_lock_success(&sched_lock.lock_object,

0, 0, __FILE__, __LINE__);

@@ -1100,8 +1103,10 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

if (PMC_PROC_IS_USING_PMCS(td->td_proc))

PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);

#endif

- } else

+ } else {

+ td->td_lock = &sched_lock;

SDT_PROBE0(sched, , , remain__cpu);

+ }

KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",

"prio:%d", td->td_priority);

@@ -1116,7 +1121,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

}

void

-sched_wakeup(struct thread *td)

+sched_wakeup(struct thread *td, int srqflags)

{

struct td_sched *ts;

@@ -1130,7 +1135,7 @@ sched_wakeup(struct thread *td)

td->td_slptick = 0;

ts->ts_slptime = 0;

ts->ts_slice = sched_slice;

- sched_add(td, SRQ_BORING);

+ sched_add(td, srqflags);

}

#ifdef SMP

@@ -1316,7 +1321,11 @@ sched_add(struct thread *td, int flags)

if (td->td_lock != &sched_lock) {

mtx_lock_spin(&sched_lock);

- thread_lock_set(td, &sched_lock);

+ if ((flags & SRQ_HOLD) != 0)

+ td->td_lock = &sched_lock;

+ else

+ thread_lock_set(td, &sched_lock);

}

TD_SET_RUNQ(td);

@@ -1380,6 +1389,8 @@ sched_add(struct thread *td, int flags)

maybe_resched(td);

}

+ if ((flags & SRQ_HOLDTD) == 0)

+ thread_unlock(td);

}

#else /* SMP */

{

@@ -1407,7 +1418,10 @@ sched_add(struct thread *td, int flags)

if (td->td_lock != &sched_lock) {

mtx_lock_spin(&sched_lock);

- thread_lock_set(td, &sched_lock);

+ if ((flags & SRQ_HOLD) != 0)

+ td->td_lock = &sched_lock;

+ else

+ thread_lock_set(td, &sched_lock);

}

TD_SET_RUNQ(td);

CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to runq", ts, td);

@@ -1418,6 +1432,8 @@ sched_add(struct thread *td, int flags)

runq_add(ts->ts_runq, td, flags);

if (!maybe_preempt(td))

maybe_resched(td);

+ if ((flags & SRQ_HOLDTD) == 0)

+ thread_unlock(td);

}

#endif /* SMP */

@@ -1776,7 +1792,7 @@ sched_affinity(struct thread *td)

/* Put this thread on a valid per-CPU runqueue. */

sched_rem(td);

- sched_add(td, SRQ_BORING);

+ sched_add(td, SRQ_HOLDTD | SRQ_BORING);

break;

case TDS_RUNNING:

diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index b921a68c6e52..24015bcf3c6d 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c

@@ -464,7 +464,7 @@ tdq_runq_add(struct tdq *tdq, struct thread *td, int flags)

u_char pri;

TDQ_LOCK_ASSERT(tdq, MA_OWNED);

- THREAD_LOCK_ASSERT(td, MA_OWNED);

+ THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);

pri = td->td_priority;

ts = td_get_sched(td);

@@ -515,6 +515,7 @@ tdq_runq_rem(struct tdq *tdq, struct thread *td)

ts = td_get_sched(td);

TDQ_LOCK_ASSERT(tdq, MA_OWNED);

+ THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);

KASSERT(ts->ts_runq != NULL,

("tdq_runq_remove: thread %p null ts_runq", td));

if (ts->ts_flags & TSF_XFERABLE) {

@@ -539,7 +540,7 @@ tdq_load_add(struct tdq *tdq, struct thread *td)

{

TDQ_LOCK_ASSERT(tdq, MA_OWNED);

- THREAD_LOCK_ASSERT(td, MA_OWNED);

+ THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);

tdq->tdq_load++;

if ((td->td_flags & TDF_NOLOAD) == 0)

@@ -556,8 +557,8 @@ static void

tdq_load_rem(struct tdq *tdq, struct thread *td)

{

- THREAD_LOCK_ASSERT(td, MA_OWNED);

TDQ_LOCK_ASSERT(tdq, MA_OWNED);

+ THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);

KASSERT(tdq->tdq_load != 0,

("tdq_load_rem: Removing with 0 load on queue %d", TDQ_ID(tdq)));

@@ -949,7 +950,6 @@ sched_balance_pair(struct tdq *high, struct tdq *low)

static struct thread *

tdq_move(struct tdq *from, struct tdq *to)

{

- struct td_sched *ts;

struct thread *td;

struct tdq *tdq;

int cpu;

@@ -962,18 +962,18 @@ tdq_move(struct tdq *from, struct tdq *to)

td = tdq_steal(tdq, cpu);

if (td == NULL)

return (NULL);

- ts = td_get_sched(td);

- * Although the run queue is locked the thread may be blocked. Lock

- * it to clear this and acquire the run-queue lock.

+ * Although the run queue is locked the thread may be

+ * blocked. We can not set the lock until it is unblocked.

- thread_lock(td);

- /* Drop recursive lock on from acquired via thread_lock(). */

- TDQ_UNLOCK(from);

+ thread_lock_block_wait(td);

sched_rem(td);

- ts->ts_cpu = cpu;

+ THREAD_LOCKPTR_ASSERT(td, TDQ_LOCKPTR(from));

td->td_lock = TDQ_LOCKPTR(to);

+ td_get_sched(td)->ts_cpu = cpu;

tdq_add(to, td, SRQ_YIELDING);

return (td);

}

@@ -1205,6 +1205,7 @@ sched_setcpu(struct thread *td, int cpu, int flags)

{

struct tdq *tdq;

+ struct mtx *mtx;

THREAD_LOCK_ASSERT(td, MA_OWNED);

tdq = TDQ_CPU(cpu);

@@ -1212,26 +1213,20 @@ sched_setcpu(struct thread *td, int cpu, int flags)

* If the lock matches just return the queue.

- if (td->td_lock == TDQ_LOCKPTR(tdq))

- return (tdq);

-#ifdef notyet

- /*

- * If the thread isn't running its lockptr is a

- * turnstile or a sleepqueue. We can just lock_set without

- * blocking.

- */

- if (TD_CAN_RUN(td)) {

- TDQ_LOCK(tdq);

- thread_lock_set(td, TDQ_LOCKPTR(tdq));

+ if (td->td_lock == TDQ_LOCKPTR(tdq)) {

+ KASSERT((flags & SRQ_HOLD) == 0,

+ ("sched_setcpu: Invalid lock for SRQ_HOLD"));

return (tdq);

}

-#endif

* The hard case, migration, we need to block the thread first to

* prevent order reversals with other cpus locks.

spinlock_enter();

- thread_lock_block(td);

+ mtx = thread_lock_block(td);

+ if ((flags & SRQ_HOLD) == 0)

+ mtx_unlock_spin(mtx);

TDQ_LOCK(tdq);

thread_lock_unblock(td, TDQ_LOCKPTR(tdq));

spinlock_exit();

@@ -1422,8 +1417,7 @@ tdq_setup(struct tdq *tdq, int id)

tdq->tdq_id = id;

snprintf(tdq->tdq_name, sizeof(tdq->tdq_name),

"sched lock %d", (int)TDQ_ID(tdq));

- mtx_init(&tdq->tdq_lock, tdq->tdq_name, "sched lock",

- MTX_SPIN | MTX_RECURSE);

+ mtx_init(&tdq->tdq_lock, tdq->tdq_name, "sched lock", MTX_SPIN);

#ifdef KTR

snprintf(tdq->tdq_loadname, sizeof(tdq->tdq_loadname),

"CPU %d load", (int)TDQ_ID(tdq));

@@ -1785,7 +1779,7 @@ sched_thread_priority(struct thread *td, u_char prio)

if (TD_ON_RUNQ(td) && prio < td->td_priority) {

sched_rem(td);

td->td_priority = prio;

- sched_add(td, SRQ_BORROWING);

+ sched_add(td, SRQ_BORROWING | SRQ_HOLDTD);

return;

}

@@ -2011,6 +2005,7 @@ static struct mtx *

sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags)

{

struct tdq *tdn;

+ struct mtx *mtx;

KASSERT(!CPU_ABSENT(td_get_sched(td)->ts_cpu), ("sched_switch_migrate: "

"thread %s queued on absent CPU %d.", td->td_name,

@@ -2024,7 +2019,8 @@ sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags)

* not holding either run-queue lock.

spinlock_enter();

- thread_lock_block(td); /* This releases the lock on tdq. */

+ mtx = thread_lock_block(td);

+ mtx_unlock_spin(mtx);

* Acquire both run-queue locks before placing the thread on the new

@@ -2044,8 +2040,7 @@ sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags)

}

- * Variadic version of thread_lock_unblock() that does not assume td_lock

- * is blocked.

+ * thread_lock_unblock() that does not assume td_lock is blocked.

static inline void

thread_unblock_switch(struct thread *td, struct mtx *mtx)

@@ -2114,8 +2109,13 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

}

} else {

/* This thread must be going to sleep. */

- TDQ_LOCK(tdq);

mtx = thread_lock_block(td);

+ if (mtx != TDQ_LOCKPTR(tdq)) {

+ spinlock_enter();

+ mtx_unlock_spin(mtx);

+ TDQ_LOCK(tdq);

+ spinlock_exit();

+ }

tdq_load_rem(tdq, td);

#ifdef SMP

if (tdq->tdq_load == 0)

@@ -2237,9 +2237,11 @@ sched_sleep(struct thread *td, int prio)

* Schedule a thread to resume execution and record how long it voluntarily

* slept. We also update the pctcpu, interactivity, and priority.

+ *

+ * Requires the thread lock on entry, drops on exit.

void

-sched_wakeup(struct thread *td)

+sched_wakeup(struct thread *td, int srqflags)

{

struct td_sched *ts;

int slptick;

@@ -2247,6 +2249,7 @@ sched_wakeup(struct thread *td)

THREAD_LOCK_ASSERT(td, MA_OWNED);

ts = td_get_sched(td);

td->td_flags &= ~TDF_CANSWAP;

* If we slept for more than a tick update our interactivity and

* priority.

@@ -2262,7 +2265,7 @@ sched_wakeup(struct thread *td)

* Reset the slice value since we slept and advanced the round-robin.

ts->ts_slice = 0;

- sched_add(td, SRQ_BORING);

+ sched_add(td, SRQ_BORING | srqflags);

}

@@ -2578,6 +2581,7 @@ tdq_add(struct tdq *tdq, struct thread *td, int flags)

{

TDQ_LOCK_ASSERT(tdq, MA_OWNED);

+ THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);

KASSERT((td->td_inhibitors == 0),

("sched_add: trying to run inhibited thread"));

KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),

@@ -2594,6 +2598,8 @@ tdq_add(struct tdq *tdq, struct thread *td, int flags)

* Select the target thread queue and add a thread to it. Request

* preemption or IPI a remote processor if required.

+ *

+ * Requires the thread lock on entry, drops on exit.

void

sched_add(struct thread *td, int flags)

@@ -2625,10 +2631,10 @@ sched_add(struct thread *td, int flags)

cpu = sched_pickcpu(td, flags);

tdq = sched_setcpu(td, cpu, flags);

tdq_add(tdq, td, flags);

- if (cpu != PCPU_GET(cpuid)) {

+ if (cpu != PCPU_GET(cpuid))

tdq_notify(tdq, td);

- return;

- }

+ else if (!(flags & SRQ_YIELDING))

+ sched_setpreempt(td);

#else

tdq = TDQ_SELF();

TDQ_LOCK(tdq);

@@ -2636,11 +2642,16 @@ sched_add(struct thread *td, int flags)

* Now that the thread is moving to the run-queue, set the lock

* to the scheduler's lock.

- thread_lock_set(td, TDQ_LOCKPTR(tdq));

+ if ((flags & SRQ_HOLD) != 0)

+ td->td_lock = TDQ_LOCKPTR(tdq);

+ else

+ thread_lock_set(td, TDQ_LOCKPTR(tdq));

tdq_add(tdq, td, flags);

-#endif

if (!(flags & SRQ_YIELDING))

sched_setpreempt(td);

+#endif

+ if (!(flags & SRQ_HOLDTD))

+ thread_unlock(td);

}

@@ -2927,6 +2938,7 @@ sched_throw(struct thread *td)

PCPU_SET(switchticks, ticks);

PCPU_GET(idlethread)->td_lock = TDQ_LOCKPTR(tdq);

} else {

+ THREAD_LOCK_ASSERT(td, MA_OWNED);

tdq = TDQ_SELF();

MPASS(td->td_lock == TDQ_LOCKPTR(tdq));

tdq_load_rem(tdq, td);

diff --git a/sys/kern/subr_gtaskqueue.c b/sys/kern/subr_gtaskqueue.c
index f2227150d3a9..3ad9275095af 100644
--- a/sys/kern/subr_gtaskqueue.c
+++ b/sys/kern/subr_gtaskqueue.c

@@ -492,7 +492,6 @@ _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,

thread_lock(td);

sched_prio(td, pri);

sched_add(td, SRQ_BORING);

- thread_unlock(td);

}

return (0);

diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c
index 430689556d04..666428b9b08c 100644
--- a/sys/kern/subr_pcpu.c
+++ b/sys/kern/subr_pcpu.c

@@ -352,8 +352,9 @@ show_pcpu(struct pcpu *pc)

db_printf("curthread = ");

td = pc->pc_curthread;

if (td != NULL)

- db_printf("%p: pid %d tid %d \"%s\"\n", td, td->td_proc->p_pid,

- td->td_tid, td->td_name);

+ db_printf("%p: pid %d tid %d critnest %d \"%s\"\n", td,

+ td->td_proc->p_pid, td->td_tid, td->td_critnest,

+ td->td_name);

else

db_printf("none\n");

db_printf("curpcb = %p\n", pc->pc_curpcb);

diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c
index fbb7a169aa45..b89d07c6797d 100644
--- a/sys/kern/subr_sleepqueue.c
+++ b/sys/kern/subr_sleepqueue.c

@@ -171,7 +171,8 @@ static void sleepq_dtor(void *mem, int size, void *arg);

#endif

static int sleepq_init(void *mem, int size, int flags);

static int sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,

- int pri);

+ int pri, int srqflags);

+static void sleepq_remove_thread(struct sleepqueue *sq, struct thread *td);

static void sleepq_switch(void *wchan, int pri);

static void sleepq_timeout(void *arg);

@@ -220,7 +221,7 @@ init_sleepqueues(void)

for (i = 0; i < SC_TABLESIZE; i++) {

LIST_INIT(&sleepq_chains[i].sc_queues);

mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,

- MTX_SPIN | MTX_RECURSE);

+ MTX_SPIN);

}

sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),

#ifdef INVARIANTS

@@ -542,15 +543,7 @@ out:

if (TD_ON_SLEEPQ(td)) {

sq = sleepq_lookup(wchan);

- if (sleepq_resume_thread(sq, td, 0)) {

-#ifdef INVARIANTS

- /*

- * This thread hasn't gone to sleep yet, so it

- * should not be swapped out.

- */

- panic("not waking up swapper");

-#endif

- }

+ sleepq_remove_thread(sq, td);

}

mtx_unlock_spin(&sc->sc_lock);

MPASS(td->td_lock != &sc->sc_lock);

@@ -609,15 +602,7 @@ sleepq_switch(void *wchan, int pri)

}

MPASS(TD_ON_SLEEPQ(td));

sq = sleepq_lookup(wchan);

- if (sleepq_resume_thread(sq, td, 0)) {

-#ifdef INVARIANTS

- /*

- * This thread hasn't gone to sleep yet, so it

- * should not be swapped out.

- */

- panic("not waking up swapper");

-#endif

- }

+ sleepq_remove_thread(sq, td);

mtx_unlock_spin(&sc->sc_lock);

return;

}

@@ -782,23 +767,82 @@ sleepq_type(void *wchan)

MPASS(wchan != NULL);

- sleepq_lock(wchan);

sq = sleepq_lookup(wchan);

- if (sq == NULL) {

- sleepq_release(wchan);

+ if (sq == NULL)

return (-1);

- }

type = sq->sq_type;

- sleepq_release(wchan);

return (type);

}

* Removes a thread from a sleep queue and makes it

* runnable.

+ *

+ * Requires the sc chain locked on entry. If SRQ_HOLD is specified it will

+ * be locked on return. Returns without the thread lock held.

static int

-sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)

+sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri,

+ int srqflags)

+ struct sleepqueue_chain *sc;

+ bool drop;

+ MPASS(td != NULL);

+ MPASS(sq->sq_wchan != NULL);

+ MPASS(td->td_wchan == sq->sq_wchan);

+ sc = SC_LOOKUP(sq->sq_wchan);

+ mtx_assert(&sc->sc_lock, MA_OWNED);

+ /*

+ * Avoid recursing on the chain lock. If the locks don't match we

+ * need to acquire the thread lock which setrunnable will drop for

+ * us. In this case we need to drop the chain lock afterwards.

+ *

+ * There is no race that will make td_lock equal to sc_lock because

+ * we hold sc_lock.

+ */

+ drop = false;

+ if (!TD_IS_SLEEPING(td)) {

+ thread_lock(td);

+ drop = true;

+ } else

+ thread_lock_block_wait(td);

+ /* Remove thread from the sleepq. */

+ sleepq_remove_thread(sq, td);

+ /* If we're done with the sleepqueue release it. */

+ if ((srqflags & SRQ_HOLD) == 0 && drop)

+ mtx_unlock_spin(&sc->sc_lock);

+ /* Adjust priority if requested. */

+ MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));

+ if (pri != 0 && td->td_priority > pri &&

+ PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)

+ sched_prio(td, pri);

+ /*

+ * Note that thread td might not be sleeping if it is running

+ * sleepq_catch_signals() on another CPU or is blocked on its

+ * proc lock to check signals. There's no need to mark the

+ * thread runnable in that case.

+ */

+ if (TD_IS_SLEEPING(td)) {

+ MPASS(!drop);

+ TD_CLR_SLEEPING(td);

+ return (setrunnable(td, srqflags));

+ }

+ MPASS(drop);

+ thread_unlock(td);

+ return (0);

+static void

+sleepq_remove_thread(struct sleepqueue *sq, struct thread *td)

{

struct sleepqueue_chain *sc __unused;

@@ -839,24 +883,6 @@ sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)

CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",

(void *)td, (long)td->td_proc->p_pid, td->td_name);

- /* Adjust priority if requested. */

- MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));

- if (pri != 0 && td->td_priority > pri &&

- PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)

- sched_prio(td, pri);

- /*

- * Note that thread td might not be sleeping if it is running

- * sleepq_catch_signals() on another CPU or is blocked on its

- * proc lock to check signals. There's no need to mark the

- * thread runnable in that case.

- */

- if (TD_IS_SLEEPING(td)) {

- TD_CLR_SLEEPING(td);

- return (setrunnable(td));

- }

- return (0);

}

#ifdef INVARIANTS

@@ -946,9 +972,7 @@ sleepq_signal(void *wchan, int flags, int pri, int queue)

}

MPASS(besttd != NULL);

- thread_lock(besttd);

- wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);

- thread_unlock(besttd);

+ wakeup_swapper = sleepq_resume_thread(sq, besttd, pri, SRQ_HOLD);

return (wakeup_swapper);

}

@@ -997,10 +1021,9 @@ sleepq_remove_matching(struct sleepqueue *sq, int queue,

wakeup_swapper = 0;

TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {

- thread_lock(td);

if (matches(td))

- wakeup_swapper |= sleepq_resume_thread(sq, td, pri);

- thread_unlock(td);

+ wakeup_swapper |= sleepq_resume_thread(sq, td, pri,

+ SRQ_HOLD);

}

return (wakeup_swapper);

@@ -1020,12 +1043,10 @@ sleepq_timeout(void *arg)

int wakeup_swapper;

td = arg;

- wakeup_swapper = 0;

CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",

(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);

thread_lock(td);

if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {

* The thread does not want a timeout (yet).

@@ -1041,7 +1062,10 @@ sleepq_timeout(void *arg)

sq = sleepq_lookup(wchan);

MPASS(sq != NULL);

td->td_flags |= TDF_TIMEOUT;

- wakeup_swapper = sleepq_resume_thread(sq, td, 0);

+ wakeup_swapper = sleepq_resume_thread(sq, td, 0, 0);

+ if (wakeup_swapper)

+ kick_proc0();

+ return;

} else if (TD_ON_SLEEPQ(td)) {

* If the thread is on the SLEEPQ but isn't sleeping

@@ -1051,10 +1075,7 @@ sleepq_timeout(void *arg)

td->td_flags |= TDF_TIMEOUT;

}

thread_unlock(td);

- if (wakeup_swapper)

- kick_proc0();

}

@@ -1064,6 +1085,7 @@ sleepq_timeout(void *arg)

void

sleepq_remove(struct thread *td, void *wchan)

{

+ struct sleepqueue_chain *sc;

struct sleepqueue *sq;

int wakeup_swapper;

@@ -1073,8 +1095,8 @@ sleepq_remove(struct thread *td, void *wchan)

* bail.

MPASS(wchan != NULL);

- sleepq_lock(wchan);

- sq = sleepq_lookup(wchan);

+ sc = SC_LOOKUP(wchan);

+ mtx_lock_spin(&sc->sc_lock);

* We can not lock the thread here as it may be sleeping on a

* different sleepq. However, holding the sleepq lock for this

@@ -1082,16 +1104,15 @@ sleepq_remove(struct thread *td, void *wchan)

* channel. The asserts below will catch any false positives.

if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {

- sleepq_release(wchan);

+ mtx_unlock_spin(&sc->sc_lock);

return;

}

/* Thread is asleep on sleep queue sq, so wake it up. */

- thread_lock(td);

+ sq = sleepq_lookup(wchan);

MPASS(sq != NULL);

MPASS(td->td_wchan == wchan);

- wakeup_swapper = sleepq_resume_thread(sq, td, 0);

- thread_unlock(td);

- sleepq_release(wchan);

+ wakeup_swapper = sleepq_resume_thread(sq, td, 0, 0);

if (wakeup_swapper)

kick_proc0();

}

@@ -1099,6 +1120,8 @@ sleepq_remove(struct thread *td, void *wchan)

* Abort a thread as if an interrupt had occurred. Only abort

* interruptible waits (unfortunately it isn't safe to abort others).

+ *

+ * Requires thread lock on entry, releases on return.

int

sleepq_abort(struct thread *td, int intrval)

@@ -1115,27 +1138,32 @@ sleepq_abort(struct thread *td, int intrval)

* If the TDF_TIMEOUT flag is set, just leave. A

* timeout is scheduled anyhow.

- if (td->td_flags & TDF_TIMEOUT)

+ if (td->td_flags & TDF_TIMEOUT) {

+ thread_unlock(td);

return (0);

+ }

CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",

(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);

td->td_intrval = intrval;

td->td_flags |= TDF_SLEEPABORT;

* If the thread has not slept yet it will find the signal in

* sleepq_catch_signals() and call sleepq_resume_thread. Otherwise

* we have to do it here.

- if (!TD_IS_SLEEPING(td))

+ if (!TD_IS_SLEEPING(td)) {

+ thread_unlock(td);

return (0);

+ }

wchan = td->td_wchan;

MPASS(wchan != NULL);

sq = sleepq_lookup(wchan);

MPASS(sq != NULL);

/* Thread is asleep on sleep queue sq, so wake it up. */

- return (sleepq_resume_thread(sq, td, 0));

+ return (sleepq_resume_thread(sq, td, 0, 0));

}

void

diff --git a/sys/kern/subr_taskqueue.c b/sys/kern/subr_taskqueue.c
index b643c0150c41..69f6c5376c9d 100644
--- a/sys/kern/subr_taskqueue.c
+++ b/sys/kern/subr_taskqueue.c

@@ -710,7 +710,6 @@ _taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,

thread_lock(td);

sched_prio(td, pri);

sched_add(td, SRQ_BORING);

- thread_unlock(td);

}

return (0);

diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c
index 12272e39d45d..6d303cef210c 100644
--- a/sys/kern/subr_turnstile.c
+++ b/sys/kern/subr_turnstile.c

@@ -314,7 +314,7 @@ turnstile_adjust_thread(struct turnstile *ts, struct thread *td)

* It needs to be moved if either its priority is lower than

* the previous thread or higher than the next thread.

- THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);

+ THREAD_LOCKPTR_BLOCKED_ASSERT(td, &ts->ts_lock);

td1 = TAILQ_PREV(td, threadqueue, td_lockq);

td2 = TAILQ_NEXT(td, td_lockq);

if ((td1 != NULL && td->td_priority < td1->td_priority) ||

@@ -429,7 +429,7 @@ turnstile_adjust(struct thread *td, u_char oldpri)

ts = td->td_blocked;

MPASS(ts != NULL);

- THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);

+ THREAD_LOCKPTR_BLOCKED_ASSERT(td, &ts->ts_lock);

mtx_assert(&ts->ts_lock, MA_OWNED);

/* Resort the turnstile on the list. */

@@ -693,7 +693,7 @@ turnstile_claim(struct turnstile *ts)

td = turnstile_first_waiter(ts);

MPASS(td != NULL);

MPASS(td->td_proc->p_magic == P_MAGIC);

- THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);

+ THREAD_LOCKPTR_BLOCKED_ASSERT(td, &ts->ts_lock);

* Update the priority of the new owner if needed.

@@ -979,7 +979,7 @@ turnstile_unpend(struct turnstile *ts)

td = TAILQ_FIRST(&pending_threads);

TAILQ_REMOVE(&pending_threads, td, td_lockq);

SDT_PROBE2(sched, , , wakeup, td, td->td_proc);

- thread_lock(td);

+ thread_lock_block_wait(td);

THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);

MPASS(td->td_proc->p_magic == P_MAGIC);

MPASS(TD_ON_LOCK(td));

@@ -991,8 +991,7 @@ turnstile_unpend(struct turnstile *ts)

#ifdef INVARIANTS

td->td_tsqueue = 0xff;

#endif

- sched_add(td, SRQ_BORING);

- thread_unlock(td);

+ sched_add(td, SRQ_HOLD | SRQ_BORING);

}

mtx_unlock_spin(&ts->ts_lock);

}

diff --git a/sys/mips/nlm/cms.c b/sys/mips/nlm/cms.c
index b1105606e8d6..0567c30ab595 100644
--- a/sys/mips/nlm/cms.c
+++ b/sys/mips/nlm/cms.c

@@ -291,10 +291,11 @@ msgring_process_fast_intr(void *arg)

msgring_wakeup_sleep[cpu]++;

TD_CLR_IWAIT(td);

sched_add(td, SRQ_INTR);

- } else

+ } else {

+ thread_unlock(td);

msgring_wakeup_nosleep[cpu]++;

+ }

- thread_unlock(td);

return (FILTER_HANDLED);

}

@@ -382,7 +383,6 @@ create_msgring_thread(int hwtid)

thread_lock(td);

sched_class(td, PRI_ITHD);

sched_add(td, SRQ_INTR);

- thread_unlock(td);

}

int

diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 7be1941416be..cd856657f341 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h

@@ -376,9 +376,13 @@ struct thread0_storage {

};

struct mtx *thread_lock_block(struct thread *);

-void thread_lock_unblock(struct thread *, struct mtx *);

+void thread_lock_block_wait(struct thread *);

void thread_lock_set(struct thread *, struct mtx *);

+void thread_lock_unblock(struct thread *, struct mtx *);

#define THREAD_LOCK_ASSERT(td, type) \

+ mtx_assert((td)->td_lock, (type))

+#define THREAD_LOCK_BLOCKED_ASSERT(td, type) \

do { \

struct mtx *__m = (td)->td_lock; \

if (__m != &blocked_lock) \

@@ -388,8 +392,17 @@ do { \

#ifdef INVARIANTS

#define THREAD_LOCKPTR_ASSERT(td, lock) \

do { \

- struct mtx *__m = (td)->td_lock; \

- KASSERT((__m == &blocked_lock || __m == (lock)), \

+ struct mtx *__m; \

+ __m = (td)->td_lock; \

+ KASSERT(__m == (lock), \

+ ("Thread %p lock %p does not match %p", td, __m, (lock))); \

+} while (0)

+#define THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock) \

+do { \

+ struct mtx *__m; \

+ __m = (td)->td_lock; \

+ KASSERT(__m == (lock) || __m == &blocked_lock, \

("Thread %p lock %p does not match %p", td, __m, (lock))); \

} while (0)

@@ -401,6 +414,7 @@ do { \

} while (0)

#else

#define THREAD_LOCKPTR_ASSERT(td, lock)

+#define THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock)

#define TD_LOCKS_INC(td)

#define TD_LOCKS_DEC(td)

@@ -519,6 +533,9 @@ do { \

#define TD_ON_UPILOCK(td) ((td)->td_flags & TDF_UPIBLOCKED)

#define TD_IS_IDLETHREAD(td) ((td)->td_flags & TDF_IDLETD)

+#define TD_CAN_ABORT(td) (TD_ON_SLEEPQ((td)) && \

+ ((td)->td_flags & TDF_SINTR) != 0)

#define KTDSTATE(td) \

(((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \

((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \

@@ -1089,7 +1106,7 @@ int securelevel_ge(struct ucred *cr, int level);

int securelevel_gt(struct ucred *cr, int level);

void sess_hold(struct session *);

void sess_release(struct session *);

-int setrunnable(struct thread *);

+int setrunnable(struct thread *, int);

void setsugid(struct proc *p);

int should_yield(void);

int sigonstack(size_t sp);

diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
index 02560f1418ae..cba2667555c9 100644
--- a/sys/sys/resourcevar.h
+++ b/sys/sys/resourcevar.h

@@ -162,6 +162,7 @@ void rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,

struct timeval *sp);

void rufetchtd(struct thread *td, struct rusage *ru);

void ruxagg(struct proc *p, struct thread *td);

+void ruxagg_locked(struct proc *p, struct thread *td);

struct uidinfo

*uifind(uid_t uid);

void uifree(struct uidinfo *uip);

diff --git a/sys/sys/sched.h b/sys/sys/sched.h
index 2f5a6654f61b..f02c0a105c79 100644
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h

@@ -105,7 +105,6 @@ void sched_throw(struct thread *td);

void sched_unlend_prio(struct thread *td, u_char prio);

void sched_user_prio(struct thread *td, u_char prio);

void sched_userret_slowpath(struct thread *td);

-void sched_wakeup(struct thread *td);

#ifdef RACCT

#ifdef SCHED_4BSD

fixpt_t sched_pctcpu_delta(struct thread *td);

@@ -136,11 +135,13 @@ sched_userret(struct thread *td)

void sched_add(struct thread *td, int flags);

void sched_clock(struct thread *td, int ticks);

-void sched_preempt(struct thread *td);

-void sched_rem(struct thread *td);

-void sched_relinquish(struct thread *td);

struct thread *sched_choose(void);

+void sched_clock(struct thread *td, int cnt);

void sched_idletd(void *);

+void sched_preempt(struct thread *td);

+void sched_relinquish(struct thread *td);

+void sched_rem(struct thread *td);

+void sched_wakeup(struct thread *td, int srqflags);

* Binding makes cpu affinity permanent while pinning is used to temporarily

@@ -190,6 +191,8 @@ sched_unpin(void)

#define SRQ_INTR 0x0004 /* It is probably urgent. */

#define SRQ_PREEMPTED 0x0008 /* has been preempted.. be kind */

#define SRQ_BORROWING 0x0010 /* Priority updated due to prio_lend */

+#define SRQ_HOLD 0x0020 /* Return holding original td lock */

+#define SRQ_HOLDTD 0x0040 /* Return holding td lock */

/* Scheduler stats. */

#ifdef SCHED_STATS

diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c
index 0b67641de343..7e2212e9c533 100644
--- a/sys/vm/vm_swapout.c
+++ b/sys/vm/vm_swapout.c

@@ -901,8 +901,8 @@ swapclear(struct proc *p)

td->td_flags |= TDF_INMEM;

td->td_flags &= ~TDF_SWAPINREQ;

TD_CLR_SWAPPED(td);

- if (TD_CAN_RUN(td))

- if (setrunnable(td)) {

+ if (TD_CAN_RUN(td)) {

+ if (setrunnable(td, 0)) {

#ifdef INVARIANTS

* XXX: We just cleared TDI_SWAPPED

@@ -912,7 +912,8 @@ swapclear(struct proc *p)

panic("not waking up swapper");

#endif

}

- thread_unlock(td);

+ } else

+ thread_unlock(td);

}

p->p_flag &= ~(P_SWAPPINGIN | P_SWAPPINGOUT);

p->p_flag |= P_INMEM;