src - FreeBSD source tree

diff options


context:
space:
mode:

author	Jeff Roberson <jeff@FreeBSD.org>	2019-12-15 21:26:50 +0000
committer	Jeff Roberson <jeff@FreeBSD.org>	2019-12-15 21:26:50 +0000
commit	686bcb5c14aba6e67524be84e125bfdd3514db9e (patch)
tree	b10f0aa09f2e058a51defcaf01c077e1f19351fa /sys/kern
parent	1223b40ebaf44102da51bedbd20f79829177982e (diff)
download	src-686bcb5c14aba6e67524be84e125bfdd3514db9e.tar.gz src-686bcb5c14aba6e67524be84e125bfdd3514db9e.zip

schedlock 4/4

Don't hold the scheduler lock while doing context switches. Instead we unlock after selecting the new thread and switch within a spinlock section leaving interrupts and preemption disabled to prevent local concurrency. This means that mi_switch() is entered with the thread locked but returns without. This dramatically simplifies scheduler locking because we will not hold the schedlock while spinning on blocked lock in switch. This change has not been made to 4BSD but in principle it would be more straightforward. Discussed with: markj Reviewed by: kib Tested by: pho Differential Revision: https://reviews.freebsd.org/D22778

Notes

Notes: svn path=/head/; revision=355784

Diffstat (limited to 'sys/kern')

-rw-r--r--

sys/kern/kern_intr.c

-rw-r--r--

sys/kern/kern_poll.c

-rw-r--r--

sys/kern/kern_switch.c

-rw-r--r--

sys/kern/kern_synch.c

-rw-r--r--

sys/kern/kern_thread.c

-rw-r--r--

sys/kern/sched_4bsd.c

-rw-r--r--

sys/kern/sched_ule.c

129

-rw-r--r--

sys/kern/subr_epoch.c

-rw-r--r--

sys/kern/subr_sleepqueue.c

-rw-r--r--

sys/kern/subr_trap.c

-rw-r--r--

sys/kern/subr_turnstile.c

-rw-r--r--

sys/kern/vfs_bio.c

12 files changed, 93 insertions, 155 deletions

diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c
index 49defdb52f4c..67a799ec076f 100644
--- a/sys/kern/kern_intr.c
+++ b/sys/kern/kern_intr.c

@@ -1251,13 +1251,14 @@ ithread_loop(void *arg)

(ithd->it_flags & (IT_DEAD | IT_WAIT)) == 0) {

TD_SET_IWAIT(td);

ie->ie_count = 0;

- mi_switch(SW_VOL | SWT_IWAIT, NULL);

- }

- if (ithd->it_flags & IT_WAIT) {

- wake = 1;

- ithd->it_flags &= ~IT_WAIT;

+ mi_switch(SW_VOL | SWT_IWAIT);

+ } else {

+ if (ithd->it_flags & IT_WAIT) {

+ wake = 1;

+ ithd->it_flags &= ~IT_WAIT;

+ }

+ thread_unlock(td);

}

- thread_unlock(td);

if (wake) {

wakeup(ithd);

wake = 0;

diff --git a/sys/kern/kern_poll.c b/sys/kern/kern_poll.c
index f7ff32663464..c6e1e7bd0944 100644
--- a/sys/kern/kern_poll.c
+++ b/sys/kern/kern_poll.c

@@ -557,8 +557,7 @@ poll_idle(void)

idlepoll_sleeping = 0;

ether_poll(poll_each_burst);

thread_lock(td);

- mi_switch(SW_VOL, NULL);

- thread_unlock(td);

+ mi_switch(SW_VOL);

} else {

idlepoll_sleeping = 1;

tsleep(&idlepoll_sleeping, 0, "pollid", hz * 3);

diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index 254c4ffd2674..1090f906d017 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c

@@ -240,8 +240,7 @@ critical_exit_preempt(void)

flags |= SWT_IDLE;

else

flags |= SWT_OWEPREEMPT;

- mi_switch(flags, NULL);

- thread_unlock(td);

+ mi_switch(flags);

}

void

diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 8cb84086b3ca..944a48044f63 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c

@@ -464,9 +464,11 @@ kdb_switch(void)

* The machine independent parts of context switching.

+ *

+ * The thread lock is required on entry and is no longer held on return.

void

-mi_switch(int flags, struct thread *newtd)

+mi_switch(int flags)

{

uint64_t runtime, new_switchtime;

struct thread *td;

@@ -482,7 +484,6 @@ mi_switch(int flags, struct thread *newtd)

("mi_switch: switch in a critical section"));

KASSERT((flags & (SW_INVOL | SW_VOL)) != 0,

("mi_switch: switch must be voluntary or involuntary"));

- KASSERT(newtd != curthread, ("mi_switch: preempting back to ourself"));

* Don't perform context switches from the debugger.

@@ -521,7 +522,7 @@ mi_switch(int flags, struct thread *newtd)

(flags & SW_TYPE_MASK) == SWT_NEEDRESCHED)))

SDT_PROBE0(sched, , , preempt);

#endif

- sched_switch(td, newtd, flags);

+ sched_switch(td, flags);

CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)",

td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name);

@@ -532,6 +533,7 @@ mi_switch(int flags, struct thread *newtd)

PCPU_SET(deadthread, NULL);

thread_stash(td);

}

+ spinlock_exit();

}

@@ -646,8 +648,7 @@ kern_yield(int prio)

prio = td->td_user_pri;

if (prio >= 0)

sched_prio(td, prio);

- mi_switch(SW_VOL | SWT_RELINQUISH, NULL);

- thread_unlock(td);

+ mi_switch(SW_VOL | SWT_RELINQUISH);

PICKUP_GIANT();

}

@@ -661,8 +662,7 @@ sys_yield(struct thread *td, struct yield_args *uap)

thread_lock(td);

if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)

sched_prio(td, PRI_MAX_TIMESHARE);

- mi_switch(SW_VOL | SWT_RELINQUISH, NULL);

- thread_unlock(td);

+ mi_switch(SW_VOL | SWT_RELINQUISH);

td->td_retval[0] = 0;

return (0);

}

diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index a3f02a9d4111..93da291dd72d 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c

@@ -1068,8 +1068,7 @@ thread_suspend_check(int return_instead)

td->td_flags |= TDF_BOUNDARY;

}

PROC_SUNLOCK(p);

- mi_switch(SW_INVOL | SWT_SUSPEND, NULL);

- thread_unlock(td);

+ mi_switch(SW_INVOL | SWT_SUSPEND);

PROC_LOCK(p);

}

return (0);

@@ -1097,8 +1096,7 @@ thread_suspend_switch(struct thread *td, struct proc *p)

sched_sleep(td, 0);

PROC_SUNLOCK(p);

DROP_GIANT();

- mi_switch(SW_VOL | SWT_SUSPEND, NULL);

- thread_unlock(td);

+ mi_switch(SW_VOL | SWT_SUSPEND);

PICKUP_GIANT();

PROC_LOCK(p);

PROC_SLOCK(p);

diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index c558b9135749..9ac26355624d 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c

@@ -671,7 +671,7 @@ schedinit(void)

thread0.td_lock = &sched_lock;

td_get_sched(&thread0)->ts_slice = sched_slice;

- mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);

+ mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN);

}

int

@@ -973,8 +973,9 @@ sched_sleep(struct thread *td, int pri)

}

void

-sched_switch(struct thread *td, struct thread *newtd, int flags)

+sched_switch(struct thread *td, int flags)

{

+ struct thread *newtd;

struct mtx *tmtx;

struct td_sched *ts;

struct proc *p;

@@ -1027,25 +1028,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

if ((td->td_flags & TDF_NOLOAD) == 0)

sched_load_rem();

- if (newtd) {

- /*

- * The thread we are about to run needs to be counted

- * as if it had been added to the run queue and selected.

- * It came from:

- * * A preemption

- * * An upcall

- * * A followon

- */

- KASSERT((newtd->td_inhibitors == 0),

- ("trying to run inhibited thread"));

- newtd->td_flags |= TDF_DIDRUN;

- TD_SET_RUNNING(newtd);

- if ((newtd->td_flags & TDF_NOLOAD) == 0)

- sched_load_add();

- } else {

- newtd = choosethread();

- }

+ newtd = choosethread();

MPASS(newtd->td_lock == &sched_lock);

#if (KTR_COMPILE & KTR_SCHED) != 0

@@ -1117,7 +1100,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

#endif

sched_lock.mtx_lock = (uintptr_t)td;

td->td_oncpu = PCPU_GET(cpuid);

- MPASS(td->td_lock == &sched_lock);

+ spinlock_enter();

+ mtx_unlock_spin(&sched_lock);

}

void

@@ -1517,12 +1501,12 @@ sched_preempt(struct thread *td)

{

SDT_PROBE2(sched, , , surrender, td, td->td_proc);

- thread_lock(td);

- if (td->td_critnest > 1)

+ if (td->td_critnest > 1) {

td->td_owepreempt = 1;

- else

- mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, NULL);

- thread_unlock(td);

+ } else {

+ thread_lock(td);

+ mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT);

+ }

}

void

@@ -1551,7 +1535,8 @@ sched_bind(struct thread *td, int cpu)

if (PCPU_GET(cpuid) == cpu)

return;

- mi_switch(SW_VOL, NULL);

+ mi_switch(SW_VOL);

+ thread_lock(td);

#endif

}

@@ -1574,8 +1559,7 @@ void

sched_relinquish(struct thread *td)

{

thread_lock(td);

- mi_switch(SW_VOL | SWT_RELINQUISH, NULL);

- thread_unlock(td);

+ mi_switch(SW_VOL | SWT_RELINQUISH);

}

int

@@ -1666,8 +1650,7 @@ sched_idletd(void *dummy)

}

mtx_lock_spin(&sched_lock);

- mi_switch(SW_VOL | SWT_IDLE, NULL);

- mtx_unlock_spin(&sched_lock);

+ mi_switch(SW_VOL | SWT_IDLE);

}

diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index 24015bcf3c6d..703fdb51b2c3 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c

@@ -332,7 +332,6 @@ static void sched_balance(void);

static int sched_balance_pair(struct tdq *, struct tdq *);

static inline struct tdq *sched_setcpu(struct thread *, int, int);

static inline void thread_unblock_switch(struct thread *, struct mtx *);

-static struct mtx *sched_switch_migrate(struct tdq *, struct thread *, int);

static int sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS);

static int sysctl_kern_sched_topology_spec_internal(struct sbuf *sb,

struct cpu_group *cg, int indent);

@@ -1058,8 +1057,7 @@ tdq_idled(struct tdq *tdq)

tdq_unlock_pair(tdq, steal);

}

TDQ_UNLOCK(steal);

- mi_switch(SW_VOL | SWT_IDLE, NULL);

- thread_unlock(curthread);

+ mi_switch(SW_VOL | SWT_IDLE);

return (0);

}

@@ -2005,8 +2003,10 @@ static struct mtx *

sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags)

{

struct tdq *tdn;

- struct mtx *mtx;

+ KASSERT(THREAD_CAN_MIGRATE(td) ||

+ (td_get_sched(td)->ts_flags & TSF_BOUND) != 0,

+ ("Thread %p shouldn't migrate", td));

KASSERT(!CPU_ABSENT(td_get_sched(td)->ts_cpu), ("sched_switch_migrate: "

"thread %s queued on absent CPU %d.", td->td_name,

td_get_sched(td)->ts_cpu));

@@ -2014,27 +2014,16 @@ sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags)

#ifdef SMP

tdq_load_rem(tdq, td);

- * Do the lock dance required to avoid LOR. We grab an extra

- * spinlock nesting to prevent preemption while we're

- * not holding either run-queue lock.

- */

- spinlock_enter();

- mtx = thread_lock_block(td);

- mtx_unlock_spin(mtx);

- /*

- * Acquire both run-queue locks before placing the thread on the new

- * run-queue to avoid deadlocks created by placing a thread with a

- * blocked lock on the run-queue of a remote processor. The deadlock

- * occurs when a third processor attempts to lock the two queues in

- * question while the target processor is spinning with its own

- * run-queue lock held while waiting for the blocked lock to clear.

+ * Do the lock dance required to avoid LOR. We have an

+ * extra spinlock nesting from sched_switch() which will

+ * prevent preemption while we're holding neither run-queue lock.

- tdq_lock_pair(tdn, tdq);

+ TDQ_UNLOCK(tdq);

+ TDQ_LOCK(tdn);

tdq_add(tdn, td, flags);

tdq_notify(tdn, td);

TDQ_UNLOCK(tdn);

- spinlock_exit();

+ TDQ_LOCK(tdq);

#endif

return (TDQ_LOCKPTR(tdn));

}

@@ -2056,8 +2045,9 @@ thread_unblock_switch(struct thread *td, struct mtx *mtx)

* be assigned elsewhere via binding.

void

-sched_switch(struct thread *td, struct thread *newtd, int flags)

+sched_switch(struct thread *td, int flags)

{

+ struct thread *newtd;

struct tdq *tdq;

struct td_sched *ts;

struct mtx *mtx;

@@ -2065,16 +2055,13 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

int cpuid, preempted;

THREAD_LOCK_ASSERT(td, MA_OWNED);

- KASSERT(newtd == NULL, ("sched_switch: Unsupported newtd argument"));

cpuid = PCPU_GET(cpuid);

tdq = TDQ_SELF();

ts = td_get_sched(td);

- mtx = td->td_lock;

sched_pctcpu_update(ts, 1);

ts->ts_rltick = ticks;

td->td_lastcpu = td->td_oncpu;

- td->td_oncpu = NOCPU;

preempted = (td->td_flags & TDF_SLICEEND) == 0 &&

(flags & SW_PREEMPT) != 0;

td->td_flags &= ~(TDF_NEEDRESCHED | TDF_SLICEEND);

@@ -2084,14 +2071,15 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

tdq->tdq_switchcnt++;

- * The lock pointer in an idle thread should never change. Reset it

- * to CAN_RUN as well.

+ * Always block the thread lock so we can drop the tdq lock early.

+ mtx = thread_lock_block(td);

+ spinlock_enter();

if (TD_IS_IDLETHREAD(td)) {

- MPASS(td->td_lock == TDQ_LOCKPTR(tdq));

+ MPASS(mtx == TDQ_LOCKPTR(tdq));

TD_SET_CAN_RUN(td);

} else if (TD_IS_RUNNING(td)) {

- MPASS(td->td_lock == TDQ_LOCKPTR(tdq));

+ MPASS(mtx == TDQ_LOCKPTR(tdq));

srqflag = preempted ?

SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :

SRQ_OURSELF|SRQ_YIELDING;

@@ -2101,20 +2089,13 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

#endif

if (ts->ts_cpu == cpuid)

tdq_runq_add(tdq, td, srqflag);

- else {

- KASSERT(THREAD_CAN_MIGRATE(td) ||

- (ts->ts_flags & TSF_BOUND) != 0,

- ("Thread %p shouldn't migrate", td));

+ else

mtx = sched_switch_migrate(tdq, td, srqflag);

- }

} else {

/* This thread must be going to sleep. */

- mtx = thread_lock_block(td);

if (mtx != TDQ_LOCKPTR(tdq)) {

- spinlock_enter();

mtx_unlock_spin(mtx);

TDQ_LOCK(tdq);

- spinlock_exit();

}

tdq_load_rem(tdq, td);

#ifdef SMP

@@ -2140,6 +2121,9 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

TDQ_LOCK_ASSERT(tdq, MA_OWNED | MA_NOTRECURSED);

newtd = choosethread();

+ sched_pctcpu_update(td_get_sched(newtd), 0);

+ TDQ_UNLOCK(tdq);

* Call the MD code to switch contexts if necessary.

@@ -2149,9 +2133,6 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);

#endif

SDT_PROBE2(sched, , , off__cpu, newtd, newtd->td_proc);

- lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);

- TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd;

- sched_pctcpu_update(td_get_sched(newtd), 0);

#ifdef KDTRACE_HOOKS

@@ -2162,17 +2143,9 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

if (dtrace_vtime_active)

(*dtrace_vtime_switch_func)(newtd);

#endif

+ td->td_oncpu = NOCPU;

cpu_switch(td, newtd, mtx);

- /*

- * We may return from cpu_switch on a different cpu. However,

- * we always return with td_lock pointing to the current cpu's

- * run queue lock.

- */

- cpuid = PCPU_GET(cpuid);

- tdq = TDQ_SELF();

- lock_profile_obtain_lock_success(

- &TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);

+ cpuid = td->td_oncpu = PCPU_GET(cpuid);

SDT_PROBE0(sched, , , on__cpu);

#ifdef HWPMC_HOOKS

@@ -2183,16 +2156,11 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)

thread_unblock_switch(td, mtx);

SDT_PROBE0(sched, , , remain__cpu);

}

+ KASSERT(curthread->td_md.md_spinlock_count == 1,

+ ("invalid count %d", curthread->td_md.md_spinlock_count));

KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",

"prio:%d", td->td_priority);

- /*

- * Assert that all went well and return.

- */

- TDQ_LOCK_ASSERT(tdq, MA_OWNED|MA_NOTRECURSED);

- MPASS(td->td_lock == TDQ_LOCKPTR(tdq));

- td->td_oncpu = cpuid;

}

@@ -2390,6 +2358,7 @@ void

sched_preempt(struct thread *td)

{

struct tdq *tdq;

+ int flags;

SDT_PROBE2(sched, , , surrender, td, td->td_proc);

@@ -2397,15 +2366,15 @@ sched_preempt(struct thread *td)

tdq = TDQ_SELF();

TDQ_LOCK_ASSERT(tdq, MA_OWNED);

if (td->td_priority > tdq->tdq_lowpri) {

- int flags;

- flags = SW_INVOL | SW_PREEMPT;

- if (td->td_critnest > 1)

- td->td_owepreempt = 1;

- else if (TD_IS_IDLETHREAD(td))

- mi_switch(flags | SWT_REMOTEWAKEIDLE, NULL);

- else

- mi_switch(flags | SWT_REMOTEPREEMPT, NULL);

+ if (td->td_critnest == 1) {

+ flags = SW_INVOL | SW_PREEMPT;

+ flags |= TD_IS_IDLETHREAD(td) ? SWT_REMOTEWAKEIDLE :

+ SWT_REMOTEPREEMPT;

+ mi_switch(flags);

+ /* Switch dropped thread lock. */

+ return;

+ }

+ td->td_owepreempt = 1;

} else {

tdq->tdq_owepreempt = 0;

}

@@ -2756,7 +2725,8 @@ sched_bind(struct thread *td, int cpu)

return;

ts->ts_cpu = cpu;

/* When we return from mi_switch we'll be on the correct cpu. */

- mi_switch(SW_VOL, NULL);

+ mi_switch(SW_VOL);

+ thread_lock(td);

}

@@ -2790,8 +2760,7 @@ void

sched_relinquish(struct thread *td)

{

thread_lock(td);

- mi_switch(SW_VOL | SWT_RELINQUISH, NULL);

- thread_unlock(td);

+ mi_switch(SW_VOL | SWT_RELINQUISH);

}

@@ -2851,8 +2820,7 @@ sched_idletd(void *dummy)

for (;;) {

if (tdq->tdq_load) {

thread_lock(td);

- mi_switch(SW_VOL | SWT_IDLE, NULL);

- thread_unlock(td);

+ mi_switch(SW_VOL | SWT_IDLE);

}

switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;

#ifdef SMP

@@ -2938,17 +2906,18 @@ sched_throw(struct thread *td)

PCPU_SET(switchticks, ticks);

PCPU_GET(idlethread)->td_lock = TDQ_LOCKPTR(tdq);

} else {

- THREAD_LOCK_ASSERT(td, MA_OWNED);

tdq = TDQ_SELF();

- MPASS(td->td_lock == TDQ_LOCKPTR(tdq));

+ THREAD_LOCK_ASSERT(td, MA_OWNED);

+ THREAD_LOCKPTR_ASSERT(td, TDQ_LOCKPTR(tdq));

tdq_load_rem(tdq, td);

- lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);

td->td_lastcpu = td->td_oncpu;

td->td_oncpu = NOCPU;

}

- KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));

newtd = choosethread();

- TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd;

+ spinlock_enter();

+ TDQ_UNLOCK(tdq);

+ KASSERT(curthread->td_md.md_spinlock_count == 1,

+ ("invalid count %d", curthread->td_md.md_spinlock_count));

cpu_throw(td, newtd); /* doesn't return */

}

@@ -2966,14 +2935,14 @@ sched_fork_exit(struct thread *td)

* Finish setting up thread glue so that it begins execution in a

* non-nested critical section with the scheduler lock held.

+ KASSERT(curthread->td_md.md_spinlock_count == 1,

+ ("invalid count %d", curthread->td_md.md_spinlock_count));

cpuid = PCPU_GET(cpuid);

tdq = TDQ_SELF();

+ TDQ_LOCK(tdq);

+ spinlock_exit();

MPASS(td->td_lock == TDQ_LOCKPTR(tdq));

td->td_oncpu = cpuid;

- TDQ_LOCK_ASSERT(tdq, MA_OWNED | MA_NOTRECURSED);

- lock_profile_obtain_lock_success(

- &TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);

KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",

"prio:%d", td->td_priority);

SDT_PROBE0(sched, , , on__cpu);

diff --git a/sys/kern/subr_epoch.c b/sys/kern/subr_epoch.c
index be2242144c20..7d04bb92928c 100644
--- a/sys/kern/subr_epoch.c
+++ b/sys/kern/subr_epoch.c

@@ -577,15 +577,7 @@ epoch_block_handler_preempt(struct ck_epoch *global __unused,

* so we have nothing to do except context switch away.

counter_u64_add(switch_count, 1);

- mi_switch(SW_VOL | SWT_RELINQUISH, NULL);

- /*

- * Release the thread lock while yielding to

- * allow other threads to acquire the lock

- * pointed to by TDQ_LOCKPTR(td). Else a

- * deadlock like situation might happen. (HPS)

- */

- thread_unlock(td);

+ mi_switch(SW_VOL | SWT_RELINQUISH);

thread_lock(td);

}

diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c
index 6e7410c4f46f..bd5150c1134b 100644
--- a/sys/kern/subr_sleepqueue.c
+++ b/sys/kern/subr_sleepqueue.c

@@ -546,8 +546,10 @@ out:

sq = sleepq_lookup(wchan);

sleepq_remove_thread(sq, td);

}

- mtx_unlock_spin(&sc->sc_lock);

MPASS(td->td_lock != &sc->sc_lock);

+ mtx_unlock_spin(&sc->sc_lock);

+ thread_unlock(td);

return (ret);

}

@@ -574,6 +576,7 @@ sleepq_switch(void *wchan, int pri)

if (td->td_sleepqueue != NULL) {

mtx_unlock_spin(&sc->sc_lock);

+ thread_unlock(td);

return;

}

@@ -605,6 +608,7 @@ sleepq_switch(void *wchan, int pri)

sq = sleepq_lookup(wchan);

sleepq_remove_thread(sq, td);

mtx_unlock_spin(&sc->sc_lock);

+ thread_unlock(td);

return;

}

#ifdef SLEEPQUEUE_PROFILING

@@ -616,7 +620,7 @@ sleepq_switch(void *wchan, int pri)

thread_lock_set(td, &sc->sc_lock);

SDT_PROBE0(sched, , , sleep);

TD_SET_SLEEPING(td);

- mi_switch(SW_VOL | SWT_SLEEPQ, NULL);

+ mi_switch(SW_VOL | SWT_SLEEPQ);

KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));

CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",

(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);

@@ -668,7 +672,6 @@ sleepq_wait(void *wchan, int pri)

MPASS(!(td->td_flags & TDF_SINTR));

thread_lock(td);

sleepq_switch(wchan, pri);

- thread_unlock(td);

}

@@ -681,7 +684,6 @@ sleepq_wait_sig(void *wchan, int pri)

int rcatch;

rcatch = sleepq_catch_signals(wchan, pri);

- thread_unlock(curthread);

if (rcatch)

return (rcatch);

return (sleepq_check_signals());

@@ -698,9 +700,9 @@ sleepq_timedwait(void *wchan, int pri)

td = curthread;

MPASS(!(td->td_flags & TDF_SINTR));

thread_lock(td);

sleepq_switch(wchan, pri);

- thread_unlock(td);

return (sleepq_check_timeout());

}

@@ -715,8 +717,6 @@ sleepq_timedwait_sig(void *wchan, int pri)

int rcatch, rvalt, rvals;

rcatch = sleepq_catch_signals(wchan, pri);

- thread_unlock(curthread);

/* We must always call check_timeout() to clear sleeptimo. */

rvalt = sleepq_check_timeout();

rvals = sleepq_check_signals();

diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 6a73f700e12e..2abbc67de40c 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c

@@ -280,8 +280,7 @@ ast(struct trapframe *framep)

#endif

thread_lock(td);

sched_prio(td, td->td_user_pri);

- mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL);

- thread_unlock(td);

+ mi_switch(SW_INVOL | SWT_NEEDRESCHED);

#ifdef KTRACE

if (KTRPOINT(td, KTR_CSW))

ktrcsw(0, 1, __func__);

diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c
index ef1f013063b0..371825207033 100644
--- a/sys/kern/subr_turnstile.c
+++ b/sys/kern/subr_turnstile.c

@@ -813,12 +813,11 @@ turnstile_wait(struct turnstile *ts, struct thread *owner, int queue)

SDT_PROBE0(sched, , , sleep);

THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);

- mi_switch(SW_VOL | SWT_TURNSTILE, NULL);

+ mi_switch(SW_VOL | SWT_TURNSTILE);

if (LOCK_LOG_TEST(lock, 0))

CTR4(KTR_LOCK, "%s: td %d free from blocked on [%p] %s",

__func__, td->td_tid, lock, lock->lo_name);

- thread_unlock(td);

}

diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 541e4ec7414b..e7b67bfc408f 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c

@@ -1383,8 +1383,7 @@ bufshutdown(int show_busybufs)

for (subiter = 0; subiter < 50 * iter; subiter++) {

thread_lock(curthread);

- mi_switch(SW_VOL, NULL);

- thread_unlock(curthread);

+ mi_switch(SW_VOL);

DELAY(1000);

}

#endif