aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/alpha/alpha/trap.c6
-rw-r--r--sys/amd64/amd64/apic_vector.S274
-rw-r--r--sys/amd64/amd64/cpu_switch.S25
-rw-r--r--sys/amd64/amd64/exception.S131
-rw-r--r--sys/amd64/amd64/exception.s131
-rw-r--r--sys/amd64/amd64/genassym.c1
-rw-r--r--sys/amd64/amd64/mp_machdep.c2
-rw-r--r--sys/amd64/amd64/mptable.c2
-rw-r--r--sys/amd64/amd64/support.S14
-rw-r--r--sys/amd64/amd64/support.s14
-rw-r--r--sys/amd64/amd64/swtch.s25
-rw-r--r--sys/amd64/amd64/sys_machdep.c1
-rw-r--r--sys/amd64/amd64/trap.c151
-rw-r--r--sys/amd64/amd64/vm_machdep.c12
-rw-r--r--sys/amd64/include/cpu.h18
-rw-r--r--sys/amd64/include/mptable.h2
-rw-r--r--sys/amd64/include/pcpu.h1
-rw-r--r--sys/i386/i386/apic_vector.s274
-rw-r--r--sys/i386/i386/exception.s131
-rw-r--r--sys/i386/i386/genassym.c1
-rw-r--r--sys/i386/i386/globals.s6
-rw-r--r--sys/i386/i386/mp_machdep.c2
-rw-r--r--sys/i386/i386/mplock.s286
-rw-r--r--sys/i386/i386/mptable.c2
-rw-r--r--sys/i386/i386/simplelock.s5
-rw-r--r--sys/i386/i386/support.s14
-rw-r--r--sys/i386/i386/swtch.s25
-rw-r--r--sys/i386/i386/sys_machdep.c1
-rw-r--r--sys/i386/i386/trap.c151
-rw-r--r--sys/i386/i386/vm86bios.s7
-rw-r--r--sys/i386/i386/vm_machdep.c12
-rw-r--r--sys/i386/include/asnames.h5
-rw-r--r--sys/i386/include/cpu.h18
-rw-r--r--sys/i386/include/globaldata.h1
-rw-r--r--sys/i386/include/globals.h2
-rw-r--r--sys/i386/include/ipl.h6
-rw-r--r--sys/i386/include/lock.h78
-rw-r--r--sys/i386/include/mptable.h2
-rw-r--r--sys/i386/include/pcpu.h1
-rw-r--r--sys/i386/include/smptests.h101
-rw-r--r--sys/i386/isa/apic_ipl.s48
-rw-r--r--sys/i386/isa/apic_vector.s274
-rw-r--r--sys/i386/isa/ipl.s119
-rw-r--r--sys/i386/isa/ipl_funcs.c241
-rw-r--r--sys/kern/init_sysent.c8
-rw-r--r--sys/kern/kern_prot.c15
-rw-r--r--sys/kern/kern_sig.c1
-rw-r--r--sys/kern/kern_switch.c2
-rw-r--r--sys/kern/kern_synch.c1
-rw-r--r--sys/kern/ksched.c1
-rw-r--r--sys/kern/subr_prof.c1
-rw-r--r--sys/kern/subr_smp.c2
-rw-r--r--sys/kern/subr_trap.c151
-rw-r--r--sys/kern/vfs_extattr.c2
-rw-r--r--sys/kern/vfs_syscalls.c2
-rw-r--r--sys/posix4/ksched.c1
-rw-r--r--sys/sys/ktrace.h2
-rw-r--r--sys/sys/proc.h13
-rw-r--r--sys/sys/signalvar.h17
-rw-r--r--sys/sys/sysent.h4
60 files changed, 1003 insertions, 1843 deletions
diff --git a/sys/alpha/alpha/trap.c b/sys/alpha/alpha/trap.c
index 7e9b15e82651..dd69b9a08585 100644
--- a/sys/alpha/alpha/trap.c
+++ b/sys/alpha/alpha/trap.c
@@ -615,7 +615,7 @@ syscall(code, framep)
else
callp = &p->p_sysent->sv_table[code];
- nargs = callp->sy_narg + hidden;
+ nargs = (callp->sy_narg & SYF_ARGMASK) + hidden;
switch (nargs) {
default:
if (nargs > 10) /* XXX */
@@ -639,13 +639,13 @@ syscall(code, framep)
}
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL))
- ktrsyscall(p->p_tracep, code, callp->sy_narg, args + hidden);
+ ktrsyscall(p->p_tracep, code, (callp->sy_narg & SYF_ARGMASK), args + hidden);
#endif
if (error == 0) {
p->p_retval[0] = 0;
p->p_retval[1] = 0;
- STOPEVENT(p, S_SCE, callp->sy_narg);
+ STOPEVENT(p, S_SCE, (callp->sy_narg & SYF_ARGMASK));
error = (*callp->sy_call)(p, args + hidden);
}
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index ca909d907e69..587d763a4573 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -9,28 +9,17 @@
#include "i386/isa/intr_machdep.h"
-
-#ifdef FAST_SIMPLELOCK
-
-#define GET_FAST_INTR_LOCK \
- pushl $_fast_intr_lock ; /* address of lock */ \
- call _s_lock ; /* MP-safe */ \
- addl $4,%esp
-
-#define REL_FAST_INTR_LOCK \
- movl $0, _fast_intr_lock
-
-#else /* FAST_SIMPLELOCK */
+/*
+ * Interrupts must be enabled while waiting for the MP lock.
+ */
#define GET_FAST_INTR_LOCK \
- call _get_isrlock
+ sti; call _get_mplock; cli
#define REL_FAST_INTR_LOCK \
movl $_mp_lock, %edx ; /* GIANT_LOCK */ \
call _MPrellock_edx
-#endif /* FAST_SIMPLELOCK */
-
/* convert an absolute IRQ# into a bitmask */
#define IRQ_BIT(irq_num) (1 << (irq_num))
@@ -42,10 +31,6 @@
* Macros for interrupt interrupt entry, call to handler, and exit.
*/
-#ifdef FAST_WITHOUTCPL
-
-/*
- */
#define FAST_INTR(irq_num, vec_name) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -82,83 +67,6 @@ IDTVEC(vec_name) ; \
popl %eax ; \
iret
-#else /* FAST_WITHOUTCPL */
-
-#define FAST_INTR(irq_num, vec_name) \
- .text ; \
- SUPERALIGN_TEXT ; \
-IDTVEC(vec_name) ; \
- pushl %eax ; /* save only call-used registers */ \
- pushl %ecx ; \
- pushl %edx ; \
- pushl %ds ; \
- MAYBE_PUSHL_ES ; \
- pushl %fs ; \
- movl $KDSEL, %eax ; \
- movl %ax, %ds ; \
- MAYBE_MOVW_AX_ES ; \
- movl $KPSEL, %eax ; \
- movl %ax, %fs ; \
- FAKE_MCOUNT((5+ACTUALLY_PUSHED)*4(%esp)) ; \
- GET_FAST_INTR_LOCK ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- call *_intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \
- addl $4, %esp ; \
- movl $0, lapic_eoi ; \
- lock ; \
- incl _cnt+V_INTR ; /* book-keeping can wait */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- lock ; \
- incl (%eax) ; \
- movl _cpl, %eax ; /* unmasking pending HWIs or SWIs? */ \
- notl %eax ; \
- andl _ipending, %eax ; \
- jne 2f ; /* yes, maybe handle them */ \
-1: ; \
- MEXITCOUNT ; \
- REL_FAST_INTR_LOCK ; \
- popl %fs ; \
- MAYBE_POPL_ES ; \
- popl %ds ; \
- popl %edx ; \
- popl %ecx ; \
- popl %eax ; \
- iret ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- cmpb $3, _intr_nesting_level ; /* enough stack? */ \
- jae 1b ; /* no, return */ \
- movl _cpl, %eax ; \
- /* XXX next line is probably unnecessary now. */ \
- movl $HWI_MASK|SWI_MASK, _cpl ; /* limit nesting ... */ \
- lock ; \
- incb _intr_nesting_level ; /* ... really limit it ... */ \
- sti ; /* to do this as early as possible */ \
- popl %fs ; /* discard most of thin frame ... */ \
- MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
- popl %ecx ; /* ... original %ds ... */ \
- popl %edx ; \
- xchgl %eax, 4(%esp) ; /* orig %eax; save cpl */ \
- pushal ; /* build fat frame (grrr) ... */ \
- pushl %ecx ; /* ... actually %ds ... */ \
- pushl %es ; \
- pushl %fs ;
- movl $KDSEL, %eax ; \
- movl %ax, %es ; \
- movl $KPSEL, %eax ;
- movl %ax, %fs ;
- movl (3+8+0)*4(%esp), %ecx ; /* %ecx from thin frame ... */ \
- movl %ecx, (3+6)*4(%esp) ; /* ... to fat frame ... */ \
- movl (3+8+1)*4(%esp), %eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
- subl $4, %esp ; /* junk for unit number */ \
- MEXITCOUNT ; \
- jmp _doreti
-
-#endif /** FAST_WITHOUTCPL */
-
-
/*
*
*/
@@ -242,19 +150,6 @@ IDTVEC(vec_name) ; \
7: ; \
IMASK_UNLOCK
-#ifdef INTR_SIMPLELOCK
-#define ENLOCK
-#define DELOCK
-#define LATELOCK call _get_isrlock
-#else
-#define ENLOCK \
- ISR_TRYLOCK ; /* XXX this is going away... */ \
- testl %eax, %eax ; /* did we get it? */ \
- jz 3f
-#define DELOCK ISR_RELLOCK
-#define LATELOCK
-#endif
-
#ifdef APIC_INTR_DIAGNOSTIC
#ifdef APIC_INTR_DIAGNOSTIC_IRQ
log_intr_event:
@@ -319,125 +214,6 @@ log_intr_event:
#define APIC_ITRACE(name, irq_num, id)
#endif
-#ifdef CPL_AND_CML
-
-#define INTR(irq_num, vec_name, maybe_extra_ipending) \
- .text ; \
- SUPERALIGN_TEXT ; \
-/* _XintrNN: entry point used by IDT/HWIs & splz_unpend via _vec[]. */ \
-IDTVEC(vec_name) ; \
- PUSH_FRAME ; \
- movl $KDSEL, %eax ; /* reload with kernel's data segment */ \
- movl %ax, %ds ; \
- movl %ax, %es ; \
- movl $KPSEL, %eax ; \
- movl %ax, %fs ; \
-; \
- maybe_extra_ipending ; \
-; \
- APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \
- lock ; /* MP-safe */ \
- btsl $(irq_num), iactive ; /* lazy masking */ \
- jc 1f ; /* already active */ \
-; \
- MASK_LEVEL_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
-0: ; \
- APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- ENLOCK ; \
-; \
- APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- AVCPL_LOCK ; /* MP-safe */ \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 2f ; /* this INT masked */ \
- testl $IRQ_BIT(irq_num), _cml ; \
- jne 2f ; /* this INT masked */ \
- orl $IRQ_BIT(irq_num), _cil ; \
- AVCPL_UNLOCK ; \
-; \
- incb _intr_nesting_level ; \
-; \
- /* entry point used by doreti_unpend for HWIs. */ \
-__CONCAT(Xresume,irq_num): ; \
- FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \
- lock ; incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4, %eax ; \
- lock ; incl (%eax) ; \
-; \
- AVCPL_LOCK ; /* MP-safe */ \
- movl _cml, %eax ; \
- pushl %eax ; \
- orl _intr_mask + (irq_num) * 4, %eax ; \
- movl %eax, _cml ; \
- AVCPL_UNLOCK ; \
-; \
- pushl _intr_unit + (irq_num) * 4 ; \
- incl _inside_intr ; \
- APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
- sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; \
- APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \
- decl _inside_intr ; \
-; \
- lock ; andl $~IRQ_BIT(irq_num), iactive ; \
- lock ; andl $~IRQ_BIT(irq_num), _cil ; \
- UNMASK_IRQ(irq_num) ; \
- APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \
- sti ; /* doreti repeats cli/sti */ \
- MEXITCOUNT ; \
- LATELOCK ; \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-1: ; /* active */ \
- APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
- MASK_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
- AVCPL_LOCK ; /* MP-safe */ \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
- lock ; \
- btsl $(irq_num), iactive ; /* still active */ \
- jnc 0b ; /* retry */ \
- POP_FRAME ; \
- iret ; \
-; \
- ALIGN_TEXT ; \
-2: ; /* masked by cpl|cml */ \
- APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
- DELOCK ; /* XXX this is going away... */ \
- POP_FRAME ; \
- iret ; \
- ALIGN_TEXT ; \
-3: ; /* other cpu has isr lock */ \
- APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- AVCPL_LOCK ; /* MP-safe */ \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 4f ; /* this INT masked */ \
- testl $IRQ_BIT(irq_num), _cml ; \
- jne 4f ; /* this INT masked */ \
- orl $IRQ_BIT(irq_num), _cil ; \
- AVCPL_UNLOCK ; \
- call forward_irq ; /* forward irq to lock holder */ \
- POP_FRAME ; /* and return */ \
- iret ; \
- ALIGN_TEXT ; \
-4: ; /* blocked */ \
- APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- AVCPL_UNLOCK ; \
- POP_FRAME ; /* and return */ \
- iret
-
-#else /* CPL_AND_CML */
-
-
#define INTR(irq_num, vec_name, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -461,15 +237,13 @@ IDTVEC(vec_name) ; \
EOI_IRQ(irq_num) ; \
0: ; \
APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- ISR_TRYLOCK ; /* XXX this is going away... */ \
+ MP_TRYLOCK ; /* XXX this is going away... */ \
testl %eax, %eax ; /* did we get it? */ \
jz 3f ; /* no */ \
; \
APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- AVCPL_LOCK ; /* MP-safe */ \
testl $IRQ_BIT(irq_num), _cpl ; \
jne 2f ; /* this INT masked */ \
- AVCPL_UNLOCK ; \
; \
incb _intr_nesting_level ; \
; \
@@ -480,14 +254,12 @@ __CONCAT(Xresume,irq_num): ; \
movl _intr_countp + (irq_num) * 4, %eax ; \
lock ; incl (%eax) ; \
; \
- AVCPL_LOCK ; /* MP-safe */ \
movl _cpl, %eax ; \
pushl %eax ; \
orl _intr_mask + (irq_num) * 4, %eax ; \
movl %eax, _cpl ; \
lock ; \
andl $~IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
; \
pushl _intr_unit + (irq_num) * 4 ; \
APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
@@ -508,10 +280,8 @@ __CONCAT(Xresume,irq_num): ; \
APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
MASK_IRQ(irq_num) ; \
EOI_IRQ(irq_num) ; \
- AVCPL_LOCK ; /* MP-safe */ \
lock ; \
orl $IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
lock ; \
btsl $(irq_num), iactive ; /* still active */ \
jnc 0b ; /* retry */ \
@@ -522,32 +292,25 @@ __CONCAT(Xresume,irq_num): ; \
APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
lock ; \
orl $IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
- ISR_RELLOCK ; /* XXX this is going away... */ \
+ MP_RELLOCK ; \
POP_FRAME ; \
iret ; \
ALIGN_TEXT ; \
3: ; /* other cpu has isr lock */ \
APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- AVCPL_LOCK ; /* MP-safe */ \
lock ; \
orl $IRQ_BIT(irq_num), _ipending ; \
testl $IRQ_BIT(irq_num), _cpl ; \
jne 4f ; /* this INT masked */ \
- AVCPL_UNLOCK ; \
call forward_irq ; /* forward irq to lock holder */ \
POP_FRAME ; /* and return */ \
iret ; \
ALIGN_TEXT ; \
4: ; /* blocked */ \
APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- AVCPL_UNLOCK ; \
POP_FRAME ; /* and return */ \
iret
-#endif /* CPL_AND_CML */
-
-
/*
* Handle "spurious INTerrupts".
* Notes:
@@ -635,11 +398,6 @@ _Xcpucheckstate:
testl $PSL_VM, 24(%esp)
jne 1f
incl %ebx /* system or interrupt */
-#ifdef CPL_AND_CML
- cmpl $0, _inside_intr
- je 1f
- incl %ebx /* interrupt */
-#endif
1:
movl _cpuid, %eax
movl %ebx, _checkstate_cpustate(,%eax,4)
@@ -693,17 +451,11 @@ _Xcpuast:
* Giant locks do not come cheap.
* A lot of cycles are going to be wasted here.
*/
- call _get_isrlock
+ call _get_mplock
- AVCPL_LOCK
-#ifdef CPL_AND_CML
- movl _cml, %eax
-#else
movl _cpl, %eax
-#endif
pushl %eax
movl $1, _astpending /* XXX */
- AVCPL_UNLOCK
lock
incb _intr_nesting_level
sti
@@ -716,7 +468,7 @@ _Xcpuast:
lock
btrl %eax, CNAME(resched_cpus)
jnc 2f
- movl $1, CNAME(want_resched)
+ orl $AST_RESCHED,_astpending
lock
incl CNAME(want_resched_cnt)
2:
@@ -749,7 +501,7 @@ _Xforward_irq:
FAKE_MCOUNT(13*4(%esp))
- ISR_TRYLOCK
+ MP_TRYLOCK
testl %eax,%eax /* Did we get the lock ? */
jz 1f /* No */
@@ -758,14 +510,8 @@ _Xforward_irq:
cmpb $4, _intr_nesting_level
jae 2f
- AVCPL_LOCK
-#ifdef CPL_AND_CML
- movl _cml, %eax
-#else
movl _cpl, %eax
-#endif
pushl %eax
- AVCPL_UNLOCK
lock
incb _intr_nesting_level
sti
@@ -785,7 +531,7 @@ _Xforward_irq:
lock
incl CNAME(forward_irq_toodeepcnt)
3:
- ISR_RELLOCK
+ MP_RELLOCK
MEXITCOUNT
POP_FRAME
iret
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index f3982ae688f0..a05d541d1051 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -65,8 +65,6 @@ _hlt_vector: .long _default_halt /* pointer to halt routine */
.globl _panic
- .globl _want_resched
-_want_resched: .long 0 /* we need to re-run the scheduler */
#if defined(SWTCH_OPTIM_STATS)
.globl _swtch_optim_stats, _tlb_flush_count
_swtch_optim_stats: .long 0 /* number of _swtch_optims */
@@ -129,6 +127,9 @@ _idle:
/*
* XXX callers of cpu_switch() do a bogus splclock(). Locking should
* be left to cpu_switch().
+ *
+ * NOTE: spl*() may only be called while we hold the MP lock (which
+ * we do).
*/
call _spl0
@@ -159,14 +160,14 @@ idle_loop:
testl %eax,%eax
jnz 3f
+ /*
+ * Handle page-zeroing in the idle loop. Called with interrupts
+ * disabled and the MP lock released. Inside vm_page_zero_idle
+ * we enable interrupts and grab the mplock as required.
+ */
cmpl $0,_do_page_zero_idle
je 2f
- /* XXX appears to cause panics */
- /*
- * Inside zero_idle we enable interrupts and grab the mplock
- * as needed. It needs to be careful about entry/exit mutexes.
- */
call _vm_page_zero_idle /* internal locking */
testl %eax, %eax
jnz idle_loop
@@ -178,9 +179,15 @@ idle_loop:
cli
jmp idle_loop
+ /*
+ * Note that interrupts must be enabled while obtaining the MP lock
+ * in order to be able to take IPI's while blocked.
+ */
3:
movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */
+ sti
call _get_mplock
+ cli
call _procrunnable
testl %eax,%eax
CROSSJUMP(jnz, sw1a, jz)
@@ -355,8 +362,8 @@ sw1a:
CROSSJUMP(je, _idle, jne) /* if no proc, idle */
movl %eax,%ecx
- movl $0,%eax
- movl %eax,_want_resched
+ xorl %eax,%eax
+ andl $~WANT_RESCHED,_astpending
#ifdef DIAGNOSTIC
cmpl %eax,P_WCHAN(%ecx)
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index 7042d58f79cb..91c5b8aa6885 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -41,23 +41,11 @@
#include <machine/psl.h>
#include <machine/trap.h>
#ifdef SMP
-#include <machine/smptests.h> /** CPL_AND_CML, REAL_ */
+#include <machine/smptests.h> /** various SMP options */
#endif
#include "assym.s"
-#ifndef SMP
-#define ECPL_LOCK /* make these nops */
-#define ECPL_UNLOCK
-#define ICPL_LOCK
-#define ICPL_UNLOCK
-#define FAST_ICPL_UNLOCK
-#define AICPL_LOCK
-#define AICPL_UNLOCK
-#define AVCPL_LOCK
-#define AVCPL_UNLOCK
-#endif /* SMP */
-
#ifdef SMP
#define MOVL_KPSEL_EAX movl $KPSEL,%eax
#else
@@ -71,16 +59,45 @@
/* Trap handling */
/*****************************************************************************/
/*
- * Trap and fault vector routines
+ * Trap and fault vector routines.
+ *
+ * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
+ * the stack that mostly looks like an interrupt, but does not disable
+ * interrupts. A few of the traps we are use are interrupt gates,
+ * SDT_SYS386IGT, which are nearly the same thing except interrupts are
+ * disabled on entry.
+ *
+ * The cpu will push a certain amount of state onto the kernel stack for
+ * the current process. The amount of state depends on the type of trap
+ * and whether the trap crossed rings or not. See i386/include/frame.h.
+ * At the very least the current EFLAGS (status register, which includes
+ * the interrupt disable state prior to the trap), the code segment register,
+ * and the return instruction pointer are pushed by the cpu. The cpu
+ * will also push an 'error' code for certain traps. We push a dummy
+ * error code for those traps where the cpu doesn't in order to maintain
+ * a consistent frame. We also push a contrived 'trap number'.
+ *
+ * The cpu does not push the general registers, we must do that, and we
+ * must restore them prior to calling 'iret'. The cpu adjusts the %cs and
+ * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
+ * must load them with appropriate values for supervisor mode operation.
+ *
+ * On entry to a trap or interrupt WE DO NOT OWN THE MP LOCK. This means
+ * that we must be careful in regards to accessing global variables. We
+ * save (push) the current cpl (our software interrupt disable mask), call
+ * the trap function, then call _doreti to restore the cpl and deal with
+ * ASTs (software interrupts). _doreti will determine if the restoration
+ * of the cpl unmasked any pending interrupts and will issue those interrupts
+ * synchronously prior to doing the iret.
+ *
+ * At the moment we must own the MP lock to do any cpl manipulation, which
+ * means we must own it prior to calling _doreti. The syscall case attempts
+ * to avoid this by handling a reduced set of cases itself and iret'ing.
*/
#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(_X,name); \
.type __CONCAT(_X,name),@function; __CONCAT(_X,name):
#define TRAP(a) pushl $(a) ; jmp _alltraps
-/*
- * XXX - debugger traps are now interrupt gates so at least bdb doesn't lose
- * control. The sti's give the standard losing behaviour for ddb and kgdb.
- */
#ifdef BDE_DEBUGGER
#define BDBTRAP(name) \
ss ; \
@@ -160,16 +177,9 @@ IDTVEC(fpu)
#ifdef SMP
MPLOCKED incl _cnt+V_TRAP
- FPU_LOCK
- ECPL_LOCK
-#ifdef CPL_AND_CML
- movl _cml,%eax
- pushl %eax /* save original cml */
-#else
+ MP_LOCK
movl _cpl,%eax
pushl %eax /* save original cpl */
-#endif /* CPL_AND_CML */
- ECPL_UNLOCK
pushl $0 /* dummy unit to finish intr frame */
#else /* SMP */
movl _cpl,%eax
@@ -190,6 +200,16 @@ IDTVEC(fpu)
IDTVEC(align)
TRAP(T_ALIGNFLT)
+ /*
+ * _alltraps entry point. Interrupts are enabled if this was a trap
+ * gate (TGT), else disabled if this was an interrupt gate (IGT).
+ * Note that int0x80_syscall is a trap gate. Only page faults
+ * use an interrupt gate.
+ *
+ * Note that all calls to MP_LOCK must occur with interrupts enabled
+ * in order to be able to take IPI's while waiting for the lock.
+ */
+
SUPERALIGN_TEXT
.globl _alltraps
.type _alltraps,@function
@@ -208,14 +228,8 @@ alltraps_with_regs_pushed:
calltrap:
FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */
MPLOCKED incl _cnt+V_TRAP
- ALIGN_LOCK
- ECPL_LOCK
-#ifdef CPL_AND_CML
- movl _cml,%ebx /* keep orig. cml here during trap() */
-#else
+ MP_LOCK
movl _cpl,%ebx /* keep orig. cpl here during trap() */
-#endif
- ECPL_UNLOCK
call _trap
/*
@@ -229,17 +243,19 @@ calltrap:
jmp _doreti
/*
- * Call gate entry for syscall.
+ * SYSCALL CALL GATE (old entry point for a.out binaries)
+ *
* The intersegment call has been set up to specify one dummy parameter.
+ *
* This leaves a place to put eflags so that the call frame can be
* converted to a trap frame. Note that the eflags is (semi-)bogusly
* pushed into (what will be) tf_err and then copied later into the
* final spot. It has to be done this way because esp can't be just
* temporarily altered for the pushfl - an interrupt might come in
* and clobber the saved cs/eip.
- */
-/*
- * THis first callgate is used for the old a.out binaries
+ *
+ * We do not obtain the MP lock, but the call to syscall2 might. If it
+ * does it will release the lock prior to returning.
*/
SUPERALIGN_TEXT
IDTVEC(syscall)
@@ -259,20 +275,28 @@ IDTVEC(syscall)
movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */
FAKE_MCOUNT(13*4(%esp))
MPLOCKED incl _cnt+V_SYSCALL
- SYSCALL_LOCK
- call _syscall
-
- /*
- * Return via _doreti to handle ASTs.
- */
+ call _syscall2
+ MEXITCOUNT
+ cli /* atomic astpending access */
+ cmpl $0,_astpending
+ je doreti_syscall_ret
+#ifdef SMP
+ MP_LOCK
+#endif
pushl $0 /* cpl to restore */
- subl $4,%esp /* dummy unit to finish intr frame */
+ subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
- MEXITCOUNT
jmp _doreti
/*
* Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
+ *
+ * Even though the name says 'int0x80', this is actually a TGT (trap gate)
+ * rather then an IGT (interrupt gate). Thus interrupts are enabled on
+ * entry just as they are for a normal syscall.
+ *
+ * We do not obtain the MP lock, but the call to syscall2 might. If it
+ * does it will release the lock prior to returning.
*/
SUPERALIGN_TEXT
IDTVEC(int0x80_syscall)
@@ -289,16 +313,17 @@ IDTVEC(int0x80_syscall)
movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */
FAKE_MCOUNT(13*4(%esp))
MPLOCKED incl _cnt+V_SYSCALL
- ALTSYSCALL_LOCK
- call _syscall
-
- /*
- * Return via _doreti to handle ASTs.
- */
+ call _syscall2
+ MEXITCOUNT
+ cli /* atomic astpending access */
+ cmpl $0,_astpending
+ je doreti_syscall_ret
+#ifdef SMP
+ MP_LOCK
+#endif
pushl $0 /* cpl to restore */
- subl $4,%esp /* dummy unit to finish intr frame */
+ subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
- MEXITCOUNT
jmp _doreti
ENTRY(fork_trampoline)
diff --git a/sys/amd64/amd64/exception.s b/sys/amd64/amd64/exception.s
index 7042d58f79cb..91c5b8aa6885 100644
--- a/sys/amd64/amd64/exception.s
+++ b/sys/amd64/amd64/exception.s
@@ -41,23 +41,11 @@
#include <machine/psl.h>
#include <machine/trap.h>
#ifdef SMP
-#include <machine/smptests.h> /** CPL_AND_CML, REAL_ */
+#include <machine/smptests.h> /** various SMP options */
#endif
#include "assym.s"
-#ifndef SMP
-#define ECPL_LOCK /* make these nops */
-#define ECPL_UNLOCK
-#define ICPL_LOCK
-#define ICPL_UNLOCK
-#define FAST_ICPL_UNLOCK
-#define AICPL_LOCK
-#define AICPL_UNLOCK
-#define AVCPL_LOCK
-#define AVCPL_UNLOCK
-#endif /* SMP */
-
#ifdef SMP
#define MOVL_KPSEL_EAX movl $KPSEL,%eax
#else
@@ -71,16 +59,45 @@
/* Trap handling */
/*****************************************************************************/
/*
- * Trap and fault vector routines
+ * Trap and fault vector routines.
+ *
+ * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
+ * the stack that mostly looks like an interrupt, but does not disable
+ * interrupts. A few of the traps we are use are interrupt gates,
+ * SDT_SYS386IGT, which are nearly the same thing except interrupts are
+ * disabled on entry.
+ *
+ * The cpu will push a certain amount of state onto the kernel stack for
+ * the current process. The amount of state depends on the type of trap
+ * and whether the trap crossed rings or not. See i386/include/frame.h.
+ * At the very least the current EFLAGS (status register, which includes
+ * the interrupt disable state prior to the trap), the code segment register,
+ * and the return instruction pointer are pushed by the cpu. The cpu
+ * will also push an 'error' code for certain traps. We push a dummy
+ * error code for those traps where the cpu doesn't in order to maintain
+ * a consistent frame. We also push a contrived 'trap number'.
+ *
+ * The cpu does not push the general registers, we must do that, and we
+ * must restore them prior to calling 'iret'. The cpu adjusts the %cs and
+ * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
+ * must load them with appropriate values for supervisor mode operation.
+ *
+ * On entry to a trap or interrupt WE DO NOT OWN THE MP LOCK. This means
+ * that we must be careful in regards to accessing global variables. We
+ * save (push) the current cpl (our software interrupt disable mask), call
+ * the trap function, then call _doreti to restore the cpl and deal with
+ * ASTs (software interrupts). _doreti will determine if the restoration
+ * of the cpl unmasked any pending interrupts and will issue those interrupts
+ * synchronously prior to doing the iret.
+ *
+ * At the moment we must own the MP lock to do any cpl manipulation, which
+ * means we must own it prior to calling _doreti. The syscall case attempts
+ * to avoid this by handling a reduced set of cases itself and iret'ing.
*/
#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(_X,name); \
.type __CONCAT(_X,name),@function; __CONCAT(_X,name):
#define TRAP(a) pushl $(a) ; jmp _alltraps
-/*
- * XXX - debugger traps are now interrupt gates so at least bdb doesn't lose
- * control. The sti's give the standard losing behaviour for ddb and kgdb.
- */
#ifdef BDE_DEBUGGER
#define BDBTRAP(name) \
ss ; \
@@ -160,16 +177,9 @@ IDTVEC(fpu)
#ifdef SMP
MPLOCKED incl _cnt+V_TRAP
- FPU_LOCK
- ECPL_LOCK
-#ifdef CPL_AND_CML
- movl _cml,%eax
- pushl %eax /* save original cml */
-#else
+ MP_LOCK
movl _cpl,%eax
pushl %eax /* save original cpl */
-#endif /* CPL_AND_CML */
- ECPL_UNLOCK
pushl $0 /* dummy unit to finish intr frame */
#else /* SMP */
movl _cpl,%eax
@@ -190,6 +200,16 @@ IDTVEC(fpu)
IDTVEC(align)
TRAP(T_ALIGNFLT)
+ /*
+ * _alltraps entry point. Interrupts are enabled if this was a trap
+ * gate (TGT), else disabled if this was an interrupt gate (IGT).
+ * Note that int0x80_syscall is a trap gate. Only page faults
+ * use an interrupt gate.
+ *
+ * Note that all calls to MP_LOCK must occur with interrupts enabled
+ * in order to be able to take IPI's while waiting for the lock.
+ */
+
SUPERALIGN_TEXT
.globl _alltraps
.type _alltraps,@function
@@ -208,14 +228,8 @@ alltraps_with_regs_pushed:
calltrap:
FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */
MPLOCKED incl _cnt+V_TRAP
- ALIGN_LOCK
- ECPL_LOCK
-#ifdef CPL_AND_CML
- movl _cml,%ebx /* keep orig. cml here during trap() */
-#else
+ MP_LOCK
movl _cpl,%ebx /* keep orig. cpl here during trap() */
-#endif
- ECPL_UNLOCK
call _trap
/*
@@ -229,17 +243,19 @@ calltrap:
jmp _doreti
/*
- * Call gate entry for syscall.
+ * SYSCALL CALL GATE (old entry point for a.out binaries)
+ *
* The intersegment call has been set up to specify one dummy parameter.
+ *
* This leaves a place to put eflags so that the call frame can be
* converted to a trap frame. Note that the eflags is (semi-)bogusly
* pushed into (what will be) tf_err and then copied later into the
* final spot. It has to be done this way because esp can't be just
* temporarily altered for the pushfl - an interrupt might come in
* and clobber the saved cs/eip.
- */
-/*
- * THis first callgate is used for the old a.out binaries
+ *
+ * We do not obtain the MP lock, but the call to syscall2 might. If it
+ * does it will release the lock prior to returning.
*/
SUPERALIGN_TEXT
IDTVEC(syscall)
@@ -259,20 +275,28 @@ IDTVEC(syscall)
movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */
FAKE_MCOUNT(13*4(%esp))
MPLOCKED incl _cnt+V_SYSCALL
- SYSCALL_LOCK
- call _syscall
-
- /*
- * Return via _doreti to handle ASTs.
- */
+ call _syscall2
+ MEXITCOUNT
+ cli /* atomic astpending access */
+ cmpl $0,_astpending
+ je doreti_syscall_ret
+#ifdef SMP
+ MP_LOCK
+#endif
pushl $0 /* cpl to restore */
- subl $4,%esp /* dummy unit to finish intr frame */
+ subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
- MEXITCOUNT
jmp _doreti
/*
* Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
+ *
+ * Even though the name says 'int0x80', this is actually a TGT (trap gate)
+ * rather then an IGT (interrupt gate). Thus interrupts are enabled on
+ * entry just as they are for a normal syscall.
+ *
+ * We do not obtain the MP lock, but the call to syscall2 might. If it
+ * does it will release the lock prior to returning.
*/
SUPERALIGN_TEXT
IDTVEC(int0x80_syscall)
@@ -289,16 +313,17 @@ IDTVEC(int0x80_syscall)
movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */
FAKE_MCOUNT(13*4(%esp))
MPLOCKED incl _cnt+V_SYSCALL
- ALTSYSCALL_LOCK
- call _syscall
-
- /*
- * Return via _doreti to handle ASTs.
- */
+ call _syscall2
+ MEXITCOUNT
+ cli /* atomic astpending access */
+ cmpl $0,_astpending
+ je doreti_syscall_ret
+#ifdef SMP
+ MP_LOCK
+#endif
pushl $0 /* cpl to restore */
- subl $4,%esp /* dummy unit to finish intr frame */
+ subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
- MEXITCOUNT
jmp _doreti
ENTRY(fork_trampoline)
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 23039820715e..5a2377be0085 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -176,6 +176,7 @@ ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime));
ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks));
ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd));
ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt));
+ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending));
#ifdef USER_LDT
ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt));
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 8e349a9800e4..b5bc1fd09022 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -1770,8 +1770,10 @@ init_locks(void)
*/
mp_lock = 0x00000001;
+#if 0
/* ISR uses its own "giant lock" */
isr_lock = FREE_LOCK;
+#endif
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c
index 8e349a9800e4..b5bc1fd09022 100644
--- a/sys/amd64/amd64/mptable.c
+++ b/sys/amd64/amd64/mptable.c
@@ -1770,8 +1770,10 @@ init_locks(void)
*/
mp_lock = 0x00000001;
+#if 0
/* ISR uses its own "giant lock" */
isr_lock = FREE_LOCK;
+#endif
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index 394848cd047f..1f350f5d1e27 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -848,7 +848,11 @@ ENTRY(i586_copyout)
jmp done_copyout
#endif /* I586_CPU && NNPX > 0 */
-/* copyin(from_user, to_kernel, len) */
+/*
+ * copyin(from_user, to_kernel, len)
+ *
+ * MPSAFE
+ */
ENTRY(copyin)
MEXITCOUNT
jmp *_copyin_vector
@@ -1130,6 +1134,8 @@ fastmove_tail_fault:
/*
* fu{byte,sword,word} : fetch a byte (sword, word) from user memory
+ *
+ * MP SAFE
*/
ENTRY(fuword)
movl _curpcb,%ecx
@@ -1154,6 +1160,9 @@ ENTRY(fuswintr)
movl $-1,%eax
ret
+/*
+ * MP SAFE
+ */
ENTRY(fusword)
movl _curpcb,%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
@@ -1166,6 +1175,9 @@ ENTRY(fusword)
movl $0,PCB_ONFAULT(%ecx)
ret
+/*
+ * MP SAFE
+ */
ENTRY(fubyte)
movl _curpcb,%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s
index 394848cd047f..1f350f5d1e27 100644
--- a/sys/amd64/amd64/support.s
+++ b/sys/amd64/amd64/support.s
@@ -848,7 +848,11 @@ ENTRY(i586_copyout)
jmp done_copyout
#endif /* I586_CPU && NNPX > 0 */
-/* copyin(from_user, to_kernel, len) */
+/*
+ * copyin(from_user, to_kernel, len)
+ *
+ * MPSAFE
+ */
ENTRY(copyin)
MEXITCOUNT
jmp *_copyin_vector
@@ -1130,6 +1134,8 @@ fastmove_tail_fault:
/*
* fu{byte,sword,word} : fetch a byte (sword, word) from user memory
+ *
+ * MP SAFE
*/
ENTRY(fuword)
movl _curpcb,%ecx
@@ -1154,6 +1160,9 @@ ENTRY(fuswintr)
movl $-1,%eax
ret
+/*
+ * MP SAFE
+ */
ENTRY(fusword)
movl _curpcb,%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
@@ -1166,6 +1175,9 @@ ENTRY(fusword)
movl $0,PCB_ONFAULT(%ecx)
ret
+/*
+ * MP SAFE
+ */
ENTRY(fubyte)
movl _curpcb,%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s
index f3982ae688f0..a05d541d1051 100644
--- a/sys/amd64/amd64/swtch.s
+++ b/sys/amd64/amd64/swtch.s
@@ -65,8 +65,6 @@ _hlt_vector: .long _default_halt /* pointer to halt routine */
.globl _panic
- .globl _want_resched
-_want_resched: .long 0 /* we need to re-run the scheduler */
#if defined(SWTCH_OPTIM_STATS)
.globl _swtch_optim_stats, _tlb_flush_count
_swtch_optim_stats: .long 0 /* number of _swtch_optims */
@@ -129,6 +127,9 @@ _idle:
/*
* XXX callers of cpu_switch() do a bogus splclock(). Locking should
* be left to cpu_switch().
+ *
+ * NOTE: spl*() may only be called while we hold the MP lock (which
+ * we do).
*/
call _spl0
@@ -159,14 +160,14 @@ idle_loop:
testl %eax,%eax
jnz 3f
+ /*
+ * Handle page-zeroing in the idle loop. Called with interrupts
+ * disabled and the MP lock released. Inside vm_page_zero_idle
+ * we enable interrupts and grab the mplock as required.
+ */
cmpl $0,_do_page_zero_idle
je 2f
- /* XXX appears to cause panics */
- /*
- * Inside zero_idle we enable interrupts and grab the mplock
- * as needed. It needs to be careful about entry/exit mutexes.
- */
call _vm_page_zero_idle /* internal locking */
testl %eax, %eax
jnz idle_loop
@@ -178,9 +179,15 @@ idle_loop:
cli
jmp idle_loop
+ /*
+ * Note that interrupts must be enabled while obtaining the MP lock
+ * in order to be able to take IPI's while blocked.
+ */
3:
movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */
+ sti
call _get_mplock
+ cli
call _procrunnable
testl %eax,%eax
CROSSJUMP(jnz, sw1a, jz)
@@ -355,8 +362,8 @@ sw1a:
CROSSJUMP(je, _idle, jne) /* if no proc, idle */
movl %eax,%ecx
- movl $0,%eax
- movl %eax,_want_resched
+ xorl %eax,%eax
+ andl $~WANT_RESCHED,_astpending
#ifdef DIAGNOSTIC
cmpl %eax,P_WCHAN(%ecx)
diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c
index d722735f8b4c..081d28c93b2c 100644
--- a/sys/amd64/amd64/sys_machdep.c
+++ b/sys/amd64/amd64/sys_machdep.c
@@ -53,6 +53,7 @@
#include <sys/user.h>
#include <machine/cpu.h>
+#include <machine/ipl.h>
#include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */
#include <machine/sysarch.h>
#ifdef SMP
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index a8b73cf6a02b..703d48dc84ed 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -99,7 +99,7 @@ int (*pmath_emulate) __P((struct trapframe *));
extern void trap __P((struct trapframe frame));
extern int trapwrite __P((unsigned addr));
-extern void syscall __P((struct trapframe frame));
+extern void syscall2 __P((struct trapframe frame));
static int trap_pfault __P((struct trapframe *, int, vm_offset_t));
static void trap_fatal __P((struct trapframe *, vm_offset_t));
@@ -140,38 +140,32 @@ static char *trap_msg[] = {
"machine check trap", /* 28 T_MCHK */
};
-static __inline void userret __P((struct proc *p, struct trapframe *frame,
- u_quad_t oticks));
+static __inline int userret __P((struct proc *p, struct trapframe *frame,
+ u_quad_t oticks, int have_mplock));
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
extern int has_f00f_bug;
#endif
-static __inline void
-userret(p, frame, oticks)
+static __inline int
+userret(p, frame, oticks, have_mplock)
struct proc *p;
struct trapframe *frame;
u_quad_t oticks;
+ int have_mplock;
{
int sig, s;
- while ((sig = CURSIG(p)) != 0)
+ while ((sig = CURSIG(p)) != 0) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
postsig(sig);
-
-#if 0
- if (!want_resched &&
- (p->p_priority <= p->p_usrpri) &&
- (p->p_rtprio.type == RTP_PRIO_NORMAL)) {
- int newpriority;
- p->p_estcpu += 1;
- newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
- newpriority = min(newpriority, MAXPRI);
- p->p_usrpri = newpriority;
}
-#endif
-
+
p->p_priority = p->p_usrpri;
- if (want_resched) {
+ if (resched_wanted()) {
/*
* Since we are curproc, clock will normally just change
* our priority without moving us from one queue to another
@@ -180,6 +174,10 @@ userret(p, frame, oticks)
* mi_switch()'ed, we might not be on the queue indicated by
* our priority.
*/
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
s = splhigh();
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
@@ -191,11 +189,16 @@ userret(p, frame, oticks)
/*
* Charge system time if profiling.
*/
- if (p->p_flag & P_PROFIL)
+ if (p->p_flag & P_PROFIL) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
addupc_task(p, frame->tf_eip,
(u_int)(p->p_sticks - oticks) * psratio);
-
+ }
curpriority = p->p_priority;
+ return(have_mplock);
}
/*
@@ -604,7 +607,7 @@ kernel_trap:
#endif
out:
- userret(p, &frame, sticks);
+ userret(p, &frame, sticks, 1);
}
#ifdef notyet
@@ -999,11 +1002,18 @@ int trapwrite(addr)
}
/*
- * System call request from POSIX system call gate interface to kernel.
- * Like trap(), argument is call by reference.
+ * syscall2 - MP aware system call request C handler
+ *
+ * A system call is essentially treated as a trap except that the
+ * MP lock is not held on entry or return. We are responsible for
+ * obtaining the MP lock if necessary and for handling ASTs
+ * (e.g. a task switch) prior to return.
+ *
+ * In general, only simple access and manipulation of curproc and
+ * the current stack is allowed without having to hold MP lock.
*/
void
-syscall(frame)
+syscall2(frame)
struct trapframe frame;
{
caddr_t params;
@@ -1012,22 +1022,42 @@ syscall(frame)
struct proc *p = curproc;
u_quad_t sticks;
int error;
+ int narg;
int args[8];
+ int have_mplock = 0;
u_int code;
#ifdef DIAGNOSTIC
- if (ISPL(frame.tf_cs) != SEL_UPL)
+ if (ISPL(frame.tf_cs) != SEL_UPL) {
+ get_mplock();
panic("syscall");
+ /* NOT REACHED */
+ }
#endif
- sticks = p->p_sticks;
+
+ /*
+ * handle atomicy by looping since interrupts are enabled and the
+ * MP lock is not held.
+ */
+ sticks = ((volatile struct proc *)p)->p_sticks;
+ while (sticks != ((volatile struct proc *)p)->p_sticks)
+ sticks = ((volatile struct proc *)p)->p_sticks;
+
p->p_md.md_regs = &frame;
params = (caddr_t)frame.tf_esp + sizeof(int);
code = frame.tf_eax;
+
if (p->p_sysent->sv_prepsyscall) {
+ /*
+ * The prep code is not MP aware.
+ */
+ get_mplock();
(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
+ rel_mplock();
} else {
/*
* Need to check if this is a 32 bit or 64 bit syscall.
+ * fuword is MP aware.
*/
if (code == SYS_syscall) {
/*
@@ -1053,27 +1083,52 @@ syscall(frame)
else
callp = &p->p_sysent->sv_table[code];
- if (params && (i = callp->sy_narg * sizeof(int)) &&
+ narg = callp->sy_narg & SYF_ARGMASK;
+
+ /*
+ * copyin is MP aware, but the tracing code is not
+ */
+ if (params && (i = narg * sizeof(int)) &&
(error = copyin(params, (caddr_t)args, (u_int)i))) {
+ get_mplock();
+ have_mplock = 1;
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL))
- ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
+ ktrsyscall(p->p_tracep, code, narg, args);
#endif
goto bad;
}
+
+ /*
+ * Try to run the syscall without the MP lock if the syscall
+ * is MP safe. We have to obtain the MP lock no matter what if
+ * we are ktracing
+ */
+ if ((callp->sy_narg & SYF_MPSAFE) == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
+
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSCALL))
- ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
+ if (KTRPOINT(p, KTR_SYSCALL)) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
+ ktrsyscall(p->p_tracep, code, narg, args);
+ }
#endif
p->p_retval[0] = 0;
p->p_retval[1] = frame.tf_edx;
- STOPEVENT(p, S_SCE, callp->sy_narg);
+ STOPEVENT(p, S_SCE, narg); /* MP aware */
error = (*callp->sy_call)(p, args);
+ /*
+ * MP SAFE (we may or may not have the MP lock at this point)
+ */
switch (error) {
-
case 0:
/*
* Reinitialize proc pointer `p' as it may be different
@@ -1109,17 +1164,31 @@ bad:
break;
}
+ /*
+ * Traced syscall. trapsignal() is not MP aware.
+ */
if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
- /* Traced syscall. */
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
frame.tf_eflags &= ~PSL_T;
trapsignal(p, SIGTRAP, 0);
}
- userret(p, &frame, sticks);
+ /*
+ * Handle reschedule and other end-of-syscall issues
+ */
+ have_mplock = userret(p, &frame, sticks, have_mplock);
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSRET))
+ if (KTRPOINT(p, KTR_SYSRET)) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
+ }
#endif
/*
@@ -1129,11 +1198,17 @@ bad:
*/
STOPEVENT(p, S_SCX, code);
+ /*
+ * Release the MP lock if we had to get it
+ */
+ if (have_mplock)
+ rel_mplock();
}
/*
* Simplified back end of syscall(), used when returning from fork()
- * directly into user mode.
+ * directly into user mode. MP lock is held on entry and should be
+ * held on return.
*/
void
fork_return(p, frame)
@@ -1144,7 +1219,7 @@ fork_return(p, frame)
frame.tf_eflags &= ~PSL_C; /* success */
frame.tf_edx = 1;
- userret(p, &frame, 0);
+ userret(p, &frame, 0, 1);
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSRET))
ktrsysret(p->p_tracep, SYS_fork, 0, 0);
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 9f5d6c046d46..d58e5a6538fb 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -578,13 +578,7 @@ vm_page_zero_idle()
TAILQ_REMOVE(&vm_page_queues[m->queue].pl, m, pageq);
m->queue = PQ_NONE;
splx(s);
-#if 0
- rel_mplock();
-#endif
pmap_zero_page(VM_PAGE_TO_PHYS(m));
-#if 0
- get_mplock();
-#endif
(void)splvm();
vm_page_flag_set(m, PG_ZERO);
m->queue = PQ_FREE + m->pc;
@@ -606,6 +600,12 @@ vm_page_zero_idle()
#ifdef SMP
}
#endif
+ /*
+ * We have to enable interrupts for a moment if the try_mplock fails
+ * in order to potentially take an IPI. XXX this should be in
+ * swtch.s
+ */
+ __asm __volatile("sti; nop; cli" : : : "memory");
return (0);
}
diff --git a/sys/amd64/include/cpu.h b/sys/amd64/include/cpu.h
index 18049d0d681b..c6aa46fbe5b2 100644
--- a/sys/amd64/include/cpu.h
+++ b/sys/amd64/include/cpu.h
@@ -82,10 +82,13 @@
/*
* Preempt the current process if in interrupt from user mode,
* or after the current trap/syscall if in system mode.
+ *
+ * XXX: if astpending is later changed to an |= here due to more flags being
+ * added, we will have an atomicy problem. The type of atomicy we need is
+ * a non-locked orl.
*/
-#define need_resched() do { want_resched = 1; aston(); } while (0)
-
-#define resched_wanted() want_resched
+#define need_resched() do { astpending = AST_RESCHED|AST_PENDING; } while (0)
+#define resched_wanted() (astpending & AST_RESCHED)
/*
* Arrange to handle pending profiling ticks before returning to user mode.
@@ -100,10 +103,15 @@
/*
* Notify the current process (p) that it has a signal pending,
* process as soon as possible.
+ *
+ * XXX: aston() really needs to be an atomic (not locked, but an orl),
+ * in case need_resched() is set by an interrupt. But with astpending a
+ * per-cpu variable this is not trivial to do efficiently. For now we blow
+ * it off (asynchronous need_resched() conflicts are not critical).
*/
#define signotify(p) aston()
-#define aston() do { astpending = 1; } while (0)
+#define aston() do { astpending |= AST_PENDING; } while (0)
#define astoff()
/*
@@ -126,11 +134,9 @@
}
#ifdef _KERNEL
-extern int astpending;
extern char btext[];
extern char etext[];
extern u_char intr_nesting_level;
-extern int want_resched; /* resched was called */
void fork_trampoline __P((void));
void fork_return __P((struct proc *, struct trapframe));
diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h
index 8e349a9800e4..b5bc1fd09022 100644
--- a/sys/amd64/include/mptable.h
+++ b/sys/amd64/include/mptable.h
@@ -1770,8 +1770,10 @@ init_locks(void)
*/
mp_lock = 0x00000001;
+#if 0
/* ISR uses its own "giant lock" */
isr_lock = FREE_LOCK;
+#endif
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index 28336d708534..58bd9cfe9416 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -66,6 +66,7 @@ struct globaldata {
caddr_t gd_prv_CADDR3;
unsigned *gd_prv_PADDR1;
#endif
+ u_int gd_astpending;
};
#ifdef SMP
diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s
index ca909d907e69..587d763a4573 100644
--- a/sys/i386/i386/apic_vector.s
+++ b/sys/i386/i386/apic_vector.s
@@ -9,28 +9,17 @@
#include "i386/isa/intr_machdep.h"
-
-#ifdef FAST_SIMPLELOCK
-
-#define GET_FAST_INTR_LOCK \
- pushl $_fast_intr_lock ; /* address of lock */ \
- call _s_lock ; /* MP-safe */ \
- addl $4,%esp
-
-#define REL_FAST_INTR_LOCK \
- movl $0, _fast_intr_lock
-
-#else /* FAST_SIMPLELOCK */
+/*
+ * Interrupts must be enabled while waiting for the MP lock.
+ */
#define GET_FAST_INTR_LOCK \
- call _get_isrlock
+ sti; call _get_mplock; cli
#define REL_FAST_INTR_LOCK \
movl $_mp_lock, %edx ; /* GIANT_LOCK */ \
call _MPrellock_edx
-#endif /* FAST_SIMPLELOCK */
-
/* convert an absolute IRQ# into a bitmask */
#define IRQ_BIT(irq_num) (1 << (irq_num))
@@ -42,10 +31,6 @@
* Macros for interrupt interrupt entry, call to handler, and exit.
*/
-#ifdef FAST_WITHOUTCPL
-
-/*
- */
#define FAST_INTR(irq_num, vec_name) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -82,83 +67,6 @@ IDTVEC(vec_name) ; \
popl %eax ; \
iret
-#else /* FAST_WITHOUTCPL */
-
-#define FAST_INTR(irq_num, vec_name) \
- .text ; \
- SUPERALIGN_TEXT ; \
-IDTVEC(vec_name) ; \
- pushl %eax ; /* save only call-used registers */ \
- pushl %ecx ; \
- pushl %edx ; \
- pushl %ds ; \
- MAYBE_PUSHL_ES ; \
- pushl %fs ; \
- movl $KDSEL, %eax ; \
- movl %ax, %ds ; \
- MAYBE_MOVW_AX_ES ; \
- movl $KPSEL, %eax ; \
- movl %ax, %fs ; \
- FAKE_MCOUNT((5+ACTUALLY_PUSHED)*4(%esp)) ; \
- GET_FAST_INTR_LOCK ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- call *_intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \
- addl $4, %esp ; \
- movl $0, lapic_eoi ; \
- lock ; \
- incl _cnt+V_INTR ; /* book-keeping can wait */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- lock ; \
- incl (%eax) ; \
- movl _cpl, %eax ; /* unmasking pending HWIs or SWIs? */ \
- notl %eax ; \
- andl _ipending, %eax ; \
- jne 2f ; /* yes, maybe handle them */ \
-1: ; \
- MEXITCOUNT ; \
- REL_FAST_INTR_LOCK ; \
- popl %fs ; \
- MAYBE_POPL_ES ; \
- popl %ds ; \
- popl %edx ; \
- popl %ecx ; \
- popl %eax ; \
- iret ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- cmpb $3, _intr_nesting_level ; /* enough stack? */ \
- jae 1b ; /* no, return */ \
- movl _cpl, %eax ; \
- /* XXX next line is probably unnecessary now. */ \
- movl $HWI_MASK|SWI_MASK, _cpl ; /* limit nesting ... */ \
- lock ; \
- incb _intr_nesting_level ; /* ... really limit it ... */ \
- sti ; /* to do this as early as possible */ \
- popl %fs ; /* discard most of thin frame ... */ \
- MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
- popl %ecx ; /* ... original %ds ... */ \
- popl %edx ; \
- xchgl %eax, 4(%esp) ; /* orig %eax; save cpl */ \
- pushal ; /* build fat frame (grrr) ... */ \
- pushl %ecx ; /* ... actually %ds ... */ \
- pushl %es ; \
- pushl %fs ;
- movl $KDSEL, %eax ; \
- movl %ax, %es ; \
- movl $KPSEL, %eax ;
- movl %ax, %fs ;
- movl (3+8+0)*4(%esp), %ecx ; /* %ecx from thin frame ... */ \
- movl %ecx, (3+6)*4(%esp) ; /* ... to fat frame ... */ \
- movl (3+8+1)*4(%esp), %eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
- subl $4, %esp ; /* junk for unit number */ \
- MEXITCOUNT ; \
- jmp _doreti
-
-#endif /** FAST_WITHOUTCPL */
-
-
/*
*
*/
@@ -242,19 +150,6 @@ IDTVEC(vec_name) ; \
7: ; \
IMASK_UNLOCK
-#ifdef INTR_SIMPLELOCK
-#define ENLOCK
-#define DELOCK
-#define LATELOCK call _get_isrlock
-#else
-#define ENLOCK \
- ISR_TRYLOCK ; /* XXX this is going away... */ \
- testl %eax, %eax ; /* did we get it? */ \
- jz 3f
-#define DELOCK ISR_RELLOCK
-#define LATELOCK
-#endif
-
#ifdef APIC_INTR_DIAGNOSTIC
#ifdef APIC_INTR_DIAGNOSTIC_IRQ
log_intr_event:
@@ -319,125 +214,6 @@ log_intr_event:
#define APIC_ITRACE(name, irq_num, id)
#endif
-#ifdef CPL_AND_CML
-
-#define INTR(irq_num, vec_name, maybe_extra_ipending) \
- .text ; \
- SUPERALIGN_TEXT ; \
-/* _XintrNN: entry point used by IDT/HWIs & splz_unpend via _vec[]. */ \
-IDTVEC(vec_name) ; \
- PUSH_FRAME ; \
- movl $KDSEL, %eax ; /* reload with kernel's data segment */ \
- movl %ax, %ds ; \
- movl %ax, %es ; \
- movl $KPSEL, %eax ; \
- movl %ax, %fs ; \
-; \
- maybe_extra_ipending ; \
-; \
- APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \
- lock ; /* MP-safe */ \
- btsl $(irq_num), iactive ; /* lazy masking */ \
- jc 1f ; /* already active */ \
-; \
- MASK_LEVEL_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
-0: ; \
- APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- ENLOCK ; \
-; \
- APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- AVCPL_LOCK ; /* MP-safe */ \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 2f ; /* this INT masked */ \
- testl $IRQ_BIT(irq_num), _cml ; \
- jne 2f ; /* this INT masked */ \
- orl $IRQ_BIT(irq_num), _cil ; \
- AVCPL_UNLOCK ; \
-; \
- incb _intr_nesting_level ; \
-; \
- /* entry point used by doreti_unpend for HWIs. */ \
-__CONCAT(Xresume,irq_num): ; \
- FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \
- lock ; incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4, %eax ; \
- lock ; incl (%eax) ; \
-; \
- AVCPL_LOCK ; /* MP-safe */ \
- movl _cml, %eax ; \
- pushl %eax ; \
- orl _intr_mask + (irq_num) * 4, %eax ; \
- movl %eax, _cml ; \
- AVCPL_UNLOCK ; \
-; \
- pushl _intr_unit + (irq_num) * 4 ; \
- incl _inside_intr ; \
- APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
- sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; \
- APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \
- decl _inside_intr ; \
-; \
- lock ; andl $~IRQ_BIT(irq_num), iactive ; \
- lock ; andl $~IRQ_BIT(irq_num), _cil ; \
- UNMASK_IRQ(irq_num) ; \
- APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \
- sti ; /* doreti repeats cli/sti */ \
- MEXITCOUNT ; \
- LATELOCK ; \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-1: ; /* active */ \
- APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
- MASK_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
- AVCPL_LOCK ; /* MP-safe */ \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
- lock ; \
- btsl $(irq_num), iactive ; /* still active */ \
- jnc 0b ; /* retry */ \
- POP_FRAME ; \
- iret ; \
-; \
- ALIGN_TEXT ; \
-2: ; /* masked by cpl|cml */ \
- APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
- DELOCK ; /* XXX this is going away... */ \
- POP_FRAME ; \
- iret ; \
- ALIGN_TEXT ; \
-3: ; /* other cpu has isr lock */ \
- APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- AVCPL_LOCK ; /* MP-safe */ \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 4f ; /* this INT masked */ \
- testl $IRQ_BIT(irq_num), _cml ; \
- jne 4f ; /* this INT masked */ \
- orl $IRQ_BIT(irq_num), _cil ; \
- AVCPL_UNLOCK ; \
- call forward_irq ; /* forward irq to lock holder */ \
- POP_FRAME ; /* and return */ \
- iret ; \
- ALIGN_TEXT ; \
-4: ; /* blocked */ \
- APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- AVCPL_UNLOCK ; \
- POP_FRAME ; /* and return */ \
- iret
-
-#else /* CPL_AND_CML */
-
-
#define INTR(irq_num, vec_name, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -461,15 +237,13 @@ IDTVEC(vec_name) ; \
EOI_IRQ(irq_num) ; \
0: ; \
APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- ISR_TRYLOCK ; /* XXX this is going away... */ \
+ MP_TRYLOCK ; /* XXX this is going away... */ \
testl %eax, %eax ; /* did we get it? */ \
jz 3f ; /* no */ \
; \
APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- AVCPL_LOCK ; /* MP-safe */ \
testl $IRQ_BIT(irq_num), _cpl ; \
jne 2f ; /* this INT masked */ \
- AVCPL_UNLOCK ; \
; \
incb _intr_nesting_level ; \
; \
@@ -480,14 +254,12 @@ __CONCAT(Xresume,irq_num): ; \
movl _intr_countp + (irq_num) * 4, %eax ; \
lock ; incl (%eax) ; \
; \
- AVCPL_LOCK ; /* MP-safe */ \
movl _cpl, %eax ; \
pushl %eax ; \
orl _intr_mask + (irq_num) * 4, %eax ; \
movl %eax, _cpl ; \
lock ; \
andl $~IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
; \
pushl _intr_unit + (irq_num) * 4 ; \
APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
@@ -508,10 +280,8 @@ __CONCAT(Xresume,irq_num): ; \
APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
MASK_IRQ(irq_num) ; \
EOI_IRQ(irq_num) ; \
- AVCPL_LOCK ; /* MP-safe */ \
lock ; \
orl $IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
lock ; \
btsl $(irq_num), iactive ; /* still active */ \
jnc 0b ; /* retry */ \
@@ -522,32 +292,25 @@ __CONCAT(Xresume,irq_num): ; \
APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
lock ; \
orl $IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
- ISR_RELLOCK ; /* XXX this is going away... */ \
+ MP_RELLOCK ; \
POP_FRAME ; \
iret ; \
ALIGN_TEXT ; \
3: ; /* other cpu has isr lock */ \
APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- AVCPL_LOCK ; /* MP-safe */ \
lock ; \
orl $IRQ_BIT(irq_num), _ipending ; \
testl $IRQ_BIT(irq_num), _cpl ; \
jne 4f ; /* this INT masked */ \
- AVCPL_UNLOCK ; \
call forward_irq ; /* forward irq to lock holder */ \
POP_FRAME ; /* and return */ \
iret ; \
ALIGN_TEXT ; \
4: ; /* blocked */ \
APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- AVCPL_UNLOCK ; \
POP_FRAME ; /* and return */ \
iret
-#endif /* CPL_AND_CML */
-
-
/*
* Handle "spurious INTerrupts".
* Notes:
@@ -635,11 +398,6 @@ _Xcpucheckstate:
testl $PSL_VM, 24(%esp)
jne 1f
incl %ebx /* system or interrupt */
-#ifdef CPL_AND_CML
- cmpl $0, _inside_intr
- je 1f
- incl %ebx /* interrupt */
-#endif
1:
movl _cpuid, %eax
movl %ebx, _checkstate_cpustate(,%eax,4)
@@ -693,17 +451,11 @@ _Xcpuast:
* Giant locks do not come cheap.
* A lot of cycles are going to be wasted here.
*/
- call _get_isrlock
+ call _get_mplock
- AVCPL_LOCK
-#ifdef CPL_AND_CML
- movl _cml, %eax
-#else
movl _cpl, %eax
-#endif
pushl %eax
movl $1, _astpending /* XXX */
- AVCPL_UNLOCK
lock
incb _intr_nesting_level
sti
@@ -716,7 +468,7 @@ _Xcpuast:
lock
btrl %eax, CNAME(resched_cpus)
jnc 2f
- movl $1, CNAME(want_resched)
+ orl $AST_RESCHED,_astpending
lock
incl CNAME(want_resched_cnt)
2:
@@ -749,7 +501,7 @@ _Xforward_irq:
FAKE_MCOUNT(13*4(%esp))
- ISR_TRYLOCK
+ MP_TRYLOCK
testl %eax,%eax /* Did we get the lock ? */
jz 1f /* No */
@@ -758,14 +510,8 @@ _Xforward_irq:
cmpb $4, _intr_nesting_level
jae 2f
- AVCPL_LOCK
-#ifdef CPL_AND_CML
- movl _cml, %eax
-#else
movl _cpl, %eax
-#endif
pushl %eax
- AVCPL_UNLOCK
lock
incb _intr_nesting_level
sti
@@ -785,7 +531,7 @@ _Xforward_irq:
lock
incl CNAME(forward_irq_toodeepcnt)
3:
- ISR_RELLOCK
+ MP_RELLOCK
MEXITCOUNT
POP_FRAME
iret
diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s
index 7042d58f79cb..91c5b8aa6885 100644
--- a/sys/i386/i386/exception.s
+++ b/sys/i386/i386/exception.s
@@ -41,23 +41,11 @@
#include <machine/psl.h>
#include <machine/trap.h>
#ifdef SMP
-#include <machine/smptests.h> /** CPL_AND_CML, REAL_ */
+#include <machine/smptests.h> /** various SMP options */
#endif
#include "assym.s"
-#ifndef SMP
-#define ECPL_LOCK /* make these nops */
-#define ECPL_UNLOCK
-#define ICPL_LOCK
-#define ICPL_UNLOCK
-#define FAST_ICPL_UNLOCK
-#define AICPL_LOCK
-#define AICPL_UNLOCK
-#define AVCPL_LOCK
-#define AVCPL_UNLOCK
-#endif /* SMP */
-
#ifdef SMP
#define MOVL_KPSEL_EAX movl $KPSEL,%eax
#else
@@ -71,16 +59,45 @@
/* Trap handling */
/*****************************************************************************/
/*
- * Trap and fault vector routines
+ * Trap and fault vector routines.
+ *
+ * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
+ * the stack that mostly looks like an interrupt, but does not disable
+ * interrupts. A few of the traps we are use are interrupt gates,
+ * SDT_SYS386IGT, which are nearly the same thing except interrupts are
+ * disabled on entry.
+ *
+ * The cpu will push a certain amount of state onto the kernel stack for
+ * the current process. The amount of state depends on the type of trap
+ * and whether the trap crossed rings or not. See i386/include/frame.h.
+ * At the very least the current EFLAGS (status register, which includes
+ * the interrupt disable state prior to the trap), the code segment register,
+ * and the return instruction pointer are pushed by the cpu. The cpu
+ * will also push an 'error' code for certain traps. We push a dummy
+ * error code for those traps where the cpu doesn't in order to maintain
+ * a consistent frame. We also push a contrived 'trap number'.
+ *
+ * The cpu does not push the general registers, we must do that, and we
+ * must restore them prior to calling 'iret'. The cpu adjusts the %cs and
+ * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
+ * must load them with appropriate values for supervisor mode operation.
+ *
+ * On entry to a trap or interrupt WE DO NOT OWN THE MP LOCK. This means
+ * that we must be careful in regards to accessing global variables. We
+ * save (push) the current cpl (our software interrupt disable mask), call
+ * the trap function, then call _doreti to restore the cpl and deal with
+ * ASTs (software interrupts). _doreti will determine if the restoration
+ * of the cpl unmasked any pending interrupts and will issue those interrupts
+ * synchronously prior to doing the iret.
+ *
+ * At the moment we must own the MP lock to do any cpl manipulation, which
+ * means we must own it prior to calling _doreti. The syscall case attempts
+ * to avoid this by handling a reduced set of cases itself and iret'ing.
*/
#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(_X,name); \
.type __CONCAT(_X,name),@function; __CONCAT(_X,name):
#define TRAP(a) pushl $(a) ; jmp _alltraps
-/*
- * XXX - debugger traps are now interrupt gates so at least bdb doesn't lose
- * control. The sti's give the standard losing behaviour for ddb and kgdb.
- */
#ifdef BDE_DEBUGGER
#define BDBTRAP(name) \
ss ; \
@@ -160,16 +177,9 @@ IDTVEC(fpu)
#ifdef SMP
MPLOCKED incl _cnt+V_TRAP
- FPU_LOCK
- ECPL_LOCK
-#ifdef CPL_AND_CML
- movl _cml,%eax
- pushl %eax /* save original cml */
-#else
+ MP_LOCK
movl _cpl,%eax
pushl %eax /* save original cpl */
-#endif /* CPL_AND_CML */
- ECPL_UNLOCK
pushl $0 /* dummy unit to finish intr frame */
#else /* SMP */
movl _cpl,%eax
@@ -190,6 +200,16 @@ IDTVEC(fpu)
IDTVEC(align)
TRAP(T_ALIGNFLT)
+ /*
+ * _alltraps entry point. Interrupts are enabled if this was a trap
+ * gate (TGT), else disabled if this was an interrupt gate (IGT).
+ * Note that int0x80_syscall is a trap gate. Only page faults
+ * use an interrupt gate.
+ *
+ * Note that all calls to MP_LOCK must occur with interrupts enabled
+ * in order to be able to take IPI's while waiting for the lock.
+ */
+
SUPERALIGN_TEXT
.globl _alltraps
.type _alltraps,@function
@@ -208,14 +228,8 @@ alltraps_with_regs_pushed:
calltrap:
FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */
MPLOCKED incl _cnt+V_TRAP
- ALIGN_LOCK
- ECPL_LOCK
-#ifdef CPL_AND_CML
- movl _cml,%ebx /* keep orig. cml here during trap() */
-#else
+ MP_LOCK
movl _cpl,%ebx /* keep orig. cpl here during trap() */
-#endif
- ECPL_UNLOCK
call _trap
/*
@@ -229,17 +243,19 @@ calltrap:
jmp _doreti
/*
- * Call gate entry for syscall.
+ * SYSCALL CALL GATE (old entry point for a.out binaries)
+ *
* The intersegment call has been set up to specify one dummy parameter.
+ *
* This leaves a place to put eflags so that the call frame can be
* converted to a trap frame. Note that the eflags is (semi-)bogusly
* pushed into (what will be) tf_err and then copied later into the
* final spot. It has to be done this way because esp can't be just
* temporarily altered for the pushfl - an interrupt might come in
* and clobber the saved cs/eip.
- */
-/*
- * THis first callgate is used for the old a.out binaries
+ *
+ * We do not obtain the MP lock, but the call to syscall2 might. If it
+ * does it will release the lock prior to returning.
*/
SUPERALIGN_TEXT
IDTVEC(syscall)
@@ -259,20 +275,28 @@ IDTVEC(syscall)
movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */
FAKE_MCOUNT(13*4(%esp))
MPLOCKED incl _cnt+V_SYSCALL
- SYSCALL_LOCK
- call _syscall
-
- /*
- * Return via _doreti to handle ASTs.
- */
+ call _syscall2
+ MEXITCOUNT
+ cli /* atomic astpending access */
+ cmpl $0,_astpending
+ je doreti_syscall_ret
+#ifdef SMP
+ MP_LOCK
+#endif
pushl $0 /* cpl to restore */
- subl $4,%esp /* dummy unit to finish intr frame */
+ subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
- MEXITCOUNT
jmp _doreti
/*
* Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
+ *
+ * Even though the name says 'int0x80', this is actually a TGT (trap gate)
+ * rather then an IGT (interrupt gate). Thus interrupts are enabled on
+ * entry just as they are for a normal syscall.
+ *
+ * We do not obtain the MP lock, but the call to syscall2 might. If it
+ * does it will release the lock prior to returning.
*/
SUPERALIGN_TEXT
IDTVEC(int0x80_syscall)
@@ -289,16 +313,17 @@ IDTVEC(int0x80_syscall)
movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */
FAKE_MCOUNT(13*4(%esp))
MPLOCKED incl _cnt+V_SYSCALL
- ALTSYSCALL_LOCK
- call _syscall
-
- /*
- * Return via _doreti to handle ASTs.
- */
+ call _syscall2
+ MEXITCOUNT
+ cli /* atomic astpending access */
+ cmpl $0,_astpending
+ je doreti_syscall_ret
+#ifdef SMP
+ MP_LOCK
+#endif
pushl $0 /* cpl to restore */
- subl $4,%esp /* dummy unit to finish intr frame */
+ subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
- MEXITCOUNT
jmp _doreti
ENTRY(fork_trampoline)
diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c
index 23039820715e..5a2377be0085 100644
--- a/sys/i386/i386/genassym.c
+++ b/sys/i386/i386/genassym.c
@@ -176,6 +176,7 @@ ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime));
ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks));
ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd));
ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt));
+ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending));
#ifdef USER_LDT
ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt));
diff --git a/sys/i386/i386/globals.s b/sys/i386/i386/globals.s
index 8a46b522cc7c..31fbfd5e98b1 100644
--- a/sys/i386/i386/globals.s
+++ b/sys/i386/i386/globals.s
@@ -61,9 +61,10 @@ globaldata:
#else
.set globaldata,0
#endif
- .globl gd_curproc, gd_curpcb, gd_npxproc
+ .globl gd_curproc, gd_curpcb, gd_npxproc, gd_astpending
.globl gd_common_tss, gd_switchtime, gd_switchticks
.set gd_curproc,globaldata + GD_CURPROC
+ .set gd_astpending,globaldata + GD_ASTPENDING
.set gd_curpcb,globaldata + GD_CURPCB
.set gd_npxproc,globaldata + GD_NPXPROC
.set gd_common_tss,globaldata + GD_COMMON_TSS
@@ -80,9 +81,10 @@ globaldata:
#endif
#ifndef SMP
- .globl _curproc, _curpcb, _npxproc
+ .globl _curproc, _curpcb, _npxproc, _astpending
.globl _common_tss, _switchtime, _switchticks
.set _curproc,globaldata + GD_CURPROC
+ .set _astpending,globaldata + GD_ASTPENDING
.set _curpcb,globaldata + GD_CURPCB
.set _npxproc,globaldata + GD_NPXPROC
.set _common_tss,globaldata + GD_COMMON_TSS
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 8e349a9800e4..b5bc1fd09022 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -1770,8 +1770,10 @@ init_locks(void)
*/
mp_lock = 0x00000001;
+#if 0
/* ISR uses its own "giant lock" */
isr_lock = FREE_LOCK;
+#endif
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
diff --git a/sys/i386/i386/mplock.s b/sys/i386/i386/mplock.s
index a37b14a174f5..858df3310a21 100644
--- a/sys/i386/i386/mplock.s
+++ b/sys/i386/i386/mplock.s
@@ -18,7 +18,6 @@
* The attempt to seize/release the semaphore and the increment/decrement
* is done in one atomic operation. This way we are safe from all kinds
* of weird reentrancy situations.
- *
*/
#include <machine/asmacros.h>
@@ -51,12 +50,8 @@
* Claim LOWest PRIOrity, ie. attempt to grab ALL INTerrupts.
*/
-/* location of saved TPR on stack */
-#define TPR_TARGET 8(%esp)
-
-/* after 1st acquire of lock we attempt to grab all hardware INTs */
-#define GRAB_HWI movl $ALLHWI_LEVEL, TPR_TARGET
-#define GRAB_HWI_2 movl $ALLHWI_LEVEL, lapic_tpr /* CHEAP_TPR */
+/* after 1st acquire of lock we grab all hardware INTs */
+#define GRAB_HWI movl $ALLHWI_LEVEL, lapic_tpr
/* after last release of lock give up LOW PRIO (ie, arbitrate INTerrupts) */
#define ARB_HWI movl $LOPRIO_LEVEL, lapic_tpr /* CHEAP_TPR */
@@ -64,7 +59,6 @@
#else /* GRAB_LOPRIO */
#define GRAB_HWI /* nop */
-#define GRAB_HWI_2 /* nop */
#define ARB_HWI /* nop */
#endif /* GRAB_LOPRIO */
@@ -75,7 +69,8 @@
* void MPgetlock_edx(unsigned int *lock : %edx)
* ----------------------------------
* Destroys %eax, %ecx. %edx must hold lock argument.
- * Note: TPR_TARGET (relative to the stack) is destroyed in GRAB_HWI
+ *
+ * Grabs hardware interrupts on first aquire.
*
* NOTE: Serialization is not required if we already hold the lock, since
* we already hold the lock, nor do we need a locked instruction if we
@@ -131,7 +126,7 @@ NON_GPROF_ENTRY(MPtrylock)
#ifdef GLPROFILE
incl _tryhits2
#endif /* GLPROFILE */
- GRAB_HWI_2 /* 1st acquire, grab hw INTs */
+ GRAB_HWI /* 1st acquire, grab hw INTs */
movl $1, %eax
ret
1:
@@ -197,38 +192,51 @@ NON_GPROF_ENTRY(MPrellock_edx)
*
* Stack (after call to _MPgetlock):
*
- * EFLAGS 4(%esp)
- * local APIC TPR 8(%esp) <-- note, TPR_TARGET
- * edx 12(%esp)
- * ecx 16(%esp)
- * eax 20(%esp)
+ * edx 4(%esp)
+ * ecx 8(%esp)
+ * eax 12(%esp)
+ *
+ * Requirements: Interrupts should be enabled on call so we can take
+ * IPI's and FAST INTs while we are waiting for the lock
+ * (else the system may not be able to halt).
+ *
+ * XXX there are still places where get_mplock() is called
+ * with interrupts disabled, so we have to temporarily reenable
+ * interrupts.
+ *
+ * Side effects: The current cpu will be given ownership of the
+ * hardware interrupts when it first aquires the lock.
+ *
+ * Costs: Initial aquisition requires the use of a costly locked
+ * instruction, but recursive aquisition is cheap. Release
+ * is very cheap.
*/
NON_GPROF_ENTRY(get_mplock)
pushl %eax
pushl %ecx
pushl %edx
-
- /* block all HW INTs via Task Priority Register */
- pushl lapic_tpr /* save current TPR */
- pushfl /* save current EFLAGS */
- testl $(1<<9), (%esp) /* test EI bit */
- jnz 1f /* INTs currently enabled */
- sti /* allow IPI and FAST INTs */
-1:
movl $_mp_lock, %edx
+ pushfl
+ testl $(1<<9), (%esp)
+ jz 2f
call _MPgetlock_edx
-
- popfl /* restore original EFLAGS */
- popl lapic_tpr /* restore TPR */
+ addl $4,%esp
+1:
popl %edx
popl %ecx
popl %eax
ret
+2:
+ sti
+ call _MPgetlock_edx
+ popfl
+ jmp 1b
/*
* Special version of get_mplock that is used during bootstrap when we can't
- * yet enable interrupts of any sort since the APIC isn't online yet.
+ * yet enable interrupts of any sort since the APIC isn't online yet. We
+ * do an endrun around MPgetlock_edx to avoid enabling interrupts.
*
* XXX FIXME.. - APIC should be online from the start to simplify IPI's.
*/
@@ -236,20 +244,19 @@ NON_GPROF_ENTRY(boot_get_mplock)
pushl %eax
pushl %ecx
pushl %edx
-
#ifdef GRAB_LOPRIO
- pushl $0 /* dummy TPR (TPR_TARGET) */
pushfl
+ pushl lapic_tpr
+ cli
#endif
movl $_mp_lock, %edx
call _MPgetlock_edx
#ifdef GRAB_LOPRIO
+ popl lapic_tpr
popfl
- addl $4, %esp
#endif
-
popl %edx
popl %ecx
popl %eax
@@ -287,214 +294,6 @@ NON_GPROF_ENTRY(rel_mplock)
ret
/***********************************************************************
- * void get_isrlock()
- * -----------------
- * no registers preserved, assummed the calling ISR does!
- *
- * Stack (after call to _MPgetlock):
- *
- * EFLAGS 4(%esp)
- * local APIC TPR 8(%esp)
- */
-
-NON_GPROF_ENTRY(get_isrlock)
-
- /* block all HW INTs via Task Priority Register */
- pushl lapic_tpr /* save current TPR (TPR_TARGET) */
- pushfl /* save current EFLAGS */
- sti /* allow IPI and FAST INTs */
-
- movl $_mp_lock, %edx
- call _MPgetlock_edx
-
- popfl /* restore original EFLAGS */
- popl lapic_tpr /* restore TPR */
- ret
-
-
-/***********************************************************************
- * void try_isrlock()
- * -----------------
- * no registers preserved, assummed the calling ISR does!
- * reg %eax == 1 if success
- */
-
-NON_GPROF_ENTRY(try_isrlock)
- pushl $_mp_lock
- call _MPtrylock
- add $4, %esp
- ret
-
-
-/***********************************************************************
- * void rel_isrlock()
- * -----------------
- * no registers preserved, assummed the calling ISR does!
- */
-
-NON_GPROF_ENTRY(rel_isrlock)
- movl $_mp_lock,%edx
- jmp _MPrellock_edx
-
-
-/***********************************************************************
- * FPU locks
- */
-
-NON_GPROF_ENTRY(get_fpu_lock)
- pushl lapic_tpr /* save current TPR (TPR_TARGET) */
- pushfl
- sti
- movl $_mp_lock, %edx
- call _MPgetlock_edx
- popfl
- popl lapic_tpr
- ret
-
-#ifdef notneeded
-NON_GPROF_ENTRY(try_fpu_lock)
- pushl $_mp_lock
- call _MPtrylock
- add $4, %esp
- ret
-
-NON_GPROF_ENTRY(rel_fpu_lock)
- movl $_mp_lock,%edx
- jmp _MPrellock_edx
-#endif /* notneeded */
-
-
-/***********************************************************************
- * align locks
- */
-
-NON_GPROF_ENTRY(get_align_lock)
- pushl lapic_tpr /* save current TPR (TPR_TARGET) */
- pushfl
- sti
- movl $_mp_lock, %edx
- call _MPgetlock_edx
- popfl
- popl lapic_tpr
- ret
-
-#ifdef notneeded
-NON_GPROF_ENTRY(try_align_lock)
- pushl $_mp_lock
- call _MPtrylock
- add $4, %esp
- ret
-
-NON_GPROF_ENTRY(rel_align_lock)
- movl $_mp_lock,%edx
- jmp _MPrellock_edx
-#endif /* notneeded */
-
-
-/***********************************************************************
- * syscall locks
- */
-
-NON_GPROF_ENTRY(get_syscall_lock)
- pushl lapic_tpr /* save current TPR (TPR_TARGET) */
- pushfl
- sti
- movl $_mp_lock, %edx
- call _MPgetlock_edx
- popfl
- popl lapic_tpr
- ret
-
-#ifdef notneeded
-NON_GPROF_ENTRY(try_syscall_lock)
- pushl $_mp_lock
- call _MPtrylock
- add $4, %esp
- ret
-#endif /* notneeded */
-
-NON_GPROF_ENTRY(rel_syscall_lock)
- movl $_mp_lock,%edx
- jmp _MPrellock_edx
-
-
-/***********************************************************************
- * altsyscall locks
- */
-
-NON_GPROF_ENTRY(get_altsyscall_lock)
- pushl lapic_tpr /* save current TPR (TPR_TARGET) */
- pushfl
- sti
- movl $_mp_lock, %edx
- call _MPgetlock_edx
- popfl
- popl lapic_tpr
- ret
-
-#ifdef notneeded
-NON_GPROF_ENTRY(try_altsyscall_lock)
- pushl $_mp_lock
- call _MPtrylock
- add $4, %esp
- ret
-
-NON_GPROF_ENTRY(rel_altsyscall_lock)
- movl $_mp_lock,%edx
- jmp _MPrellock_edx
-#endif /* notneeded */
-
-
-#ifdef RECURSIVE_MPINTRLOCK
-/***********************************************************************
- * void get_mpintrlock()
- * -----------------
- * All registers preserved
- */
-
-NON_GPROF_ENTRY(get_mpintrlock)
- pushl %eax
- pushl %ecx
- pushl %edx
-
-#ifdef GRAB_LOPRIO
- pushl lapic_tpr /* save current TPR (TPR_TARGET) */
- pushfl
-#endif
-
- movl $_mpintr_lock, %edx
- call _MPgetlock_edx
-
-#ifdef GRAB_LOPRIO
- popfl
- popl lapic_tpr
-#endif
-
- popl %edx
- popl %ecx
- popl %eax
- ret
-
-/***********************************************************************
- * void rel_mpintrlock()
- * -----------------
- * All registers preserved
- */
-
-NON_GPROF_ENTRY(rel_mpintrlock)
- pushl %ecx
- pushl %edx
-
- movl $_mpintr_lock,%edx
- call _MPrellock_edx
-
- popl %edx
- popl %ecx
- ret
-#endif /* RECURSIVE_MPINTRLOCK */
-
-
-/***********************************************************************
*
*/
.data
@@ -503,15 +302,6 @@ NON_GPROF_ENTRY(rel_mpintrlock)
.globl _mp_lock
_mp_lock: .long 0
- .globl _isr_lock
-_isr_lock: .long 0
-
-#ifdef RECURSIVE_MPINTRLOCK
- .globl _mpintr_lock
-_mpintr_lock: .long 0xffffffff
-#endif /* RECURSIVE_MPINTRLOCK */
-
-
#ifdef GLPROFILE
.globl _gethits
_gethits:
diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c
index 8e349a9800e4..b5bc1fd09022 100644
--- a/sys/i386/i386/mptable.c
+++ b/sys/i386/i386/mptable.c
@@ -1770,8 +1770,10 @@ init_locks(void)
*/
mp_lock = 0x00000001;
+#if 0
/* ISR uses its own "giant lock" */
isr_lock = FREE_LOCK;
+#endif
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
diff --git a/sys/i386/i386/simplelock.s b/sys/i386/i386/simplelock.s
index 9abc3b74660a..378cf85389e9 100644
--- a/sys/i386/i386/simplelock.s
+++ b/sys/i386/i386/simplelock.s
@@ -186,8 +186,11 @@ ENTRY(s_unlock)
movl $0, (%eax)
ret
+#if 0
/*
+ * XXX CRUFTY SS_LOCK IMPLEMENTATION REMOVED XXX
+ *
* These versions of simple_lock block interrupts,
* making it suitable for regions accessed by both top and bottom levels.
* This is done by saving the current value of the cpu flags in a per-cpu
@@ -279,6 +282,8 @@ ENTRY(ss_unlock)
ss_unlock2:
ret
+#endif
+
/*
* These versions of simple_lock does not contain calls to profiling code.
* Thus they can be called from the profiling code.
diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s
index 394848cd047f..1f350f5d1e27 100644
--- a/sys/i386/i386/support.s
+++ b/sys/i386/i386/support.s
@@ -848,7 +848,11 @@ ENTRY(i586_copyout)
jmp done_copyout
#endif /* I586_CPU && NNPX > 0 */
-/* copyin(from_user, to_kernel, len) */
+/*
+ * copyin(from_user, to_kernel, len)
+ *
+ * MPSAFE
+ */
ENTRY(copyin)
MEXITCOUNT
jmp *_copyin_vector
@@ -1130,6 +1134,8 @@ fastmove_tail_fault:
/*
* fu{byte,sword,word} : fetch a byte (sword, word) from user memory
+ *
+ * MP SAFE
*/
ENTRY(fuword)
movl _curpcb,%ecx
@@ -1154,6 +1160,9 @@ ENTRY(fuswintr)
movl $-1,%eax
ret
+/*
+ * MP SAFE
+ */
ENTRY(fusword)
movl _curpcb,%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
@@ -1166,6 +1175,9 @@ ENTRY(fusword)
movl $0,PCB_ONFAULT(%ecx)
ret
+/*
+ * MP SAFE
+ */
ENTRY(fubyte)
movl _curpcb,%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s
index f3982ae688f0..a05d541d1051 100644
--- a/sys/i386/i386/swtch.s
+++ b/sys/i386/i386/swtch.s
@@ -65,8 +65,6 @@ _hlt_vector: .long _default_halt /* pointer to halt routine */
.globl _panic
- .globl _want_resched
-_want_resched: .long 0 /* we need to re-run the scheduler */
#if defined(SWTCH_OPTIM_STATS)
.globl _swtch_optim_stats, _tlb_flush_count
_swtch_optim_stats: .long 0 /* number of _swtch_optims */
@@ -129,6 +127,9 @@ _idle:
/*
* XXX callers of cpu_switch() do a bogus splclock(). Locking should
* be left to cpu_switch().
+ *
+ * NOTE: spl*() may only be called while we hold the MP lock (which
+ * we do).
*/
call _spl0
@@ -159,14 +160,14 @@ idle_loop:
testl %eax,%eax
jnz 3f
+ /*
+ * Handle page-zeroing in the idle loop. Called with interrupts
+ * disabled and the MP lock released. Inside vm_page_zero_idle
+ * we enable interrupts and grab the mplock as required.
+ */
cmpl $0,_do_page_zero_idle
je 2f
- /* XXX appears to cause panics */
- /*
- * Inside zero_idle we enable interrupts and grab the mplock
- * as needed. It needs to be careful about entry/exit mutexes.
- */
call _vm_page_zero_idle /* internal locking */
testl %eax, %eax
jnz idle_loop
@@ -178,9 +179,15 @@ idle_loop:
cli
jmp idle_loop
+ /*
+ * Note that interrupts must be enabled while obtaining the MP lock
+ * in order to be able to take IPI's while blocked.
+ */
3:
movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */
+ sti
call _get_mplock
+ cli
call _procrunnable
testl %eax,%eax
CROSSJUMP(jnz, sw1a, jz)
@@ -355,8 +362,8 @@ sw1a:
CROSSJUMP(je, _idle, jne) /* if no proc, idle */
movl %eax,%ecx
- movl $0,%eax
- movl %eax,_want_resched
+ xorl %eax,%eax
+ andl $~WANT_RESCHED,_astpending
#ifdef DIAGNOSTIC
cmpl %eax,P_WCHAN(%ecx)
diff --git a/sys/i386/i386/sys_machdep.c b/sys/i386/i386/sys_machdep.c
index d722735f8b4c..081d28c93b2c 100644
--- a/sys/i386/i386/sys_machdep.c
+++ b/sys/i386/i386/sys_machdep.c
@@ -53,6 +53,7 @@
#include <sys/user.h>
#include <machine/cpu.h>
+#include <machine/ipl.h>
#include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */
#include <machine/sysarch.h>
#ifdef SMP
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index a8b73cf6a02b..703d48dc84ed 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -99,7 +99,7 @@ int (*pmath_emulate) __P((struct trapframe *));
extern void trap __P((struct trapframe frame));
extern int trapwrite __P((unsigned addr));
-extern void syscall __P((struct trapframe frame));
+extern void syscall2 __P((struct trapframe frame));
static int trap_pfault __P((struct trapframe *, int, vm_offset_t));
static void trap_fatal __P((struct trapframe *, vm_offset_t));
@@ -140,38 +140,32 @@ static char *trap_msg[] = {
"machine check trap", /* 28 T_MCHK */
};
-static __inline void userret __P((struct proc *p, struct trapframe *frame,
- u_quad_t oticks));
+static __inline int userret __P((struct proc *p, struct trapframe *frame,
+ u_quad_t oticks, int have_mplock));
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
extern int has_f00f_bug;
#endif
-static __inline void
-userret(p, frame, oticks)
+static __inline int
+userret(p, frame, oticks, have_mplock)
struct proc *p;
struct trapframe *frame;
u_quad_t oticks;
+ int have_mplock;
{
int sig, s;
- while ((sig = CURSIG(p)) != 0)
+ while ((sig = CURSIG(p)) != 0) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
postsig(sig);
-
-#if 0
- if (!want_resched &&
- (p->p_priority <= p->p_usrpri) &&
- (p->p_rtprio.type == RTP_PRIO_NORMAL)) {
- int newpriority;
- p->p_estcpu += 1;
- newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
- newpriority = min(newpriority, MAXPRI);
- p->p_usrpri = newpriority;
}
-#endif
-
+
p->p_priority = p->p_usrpri;
- if (want_resched) {
+ if (resched_wanted()) {
/*
* Since we are curproc, clock will normally just change
* our priority without moving us from one queue to another
@@ -180,6 +174,10 @@ userret(p, frame, oticks)
* mi_switch()'ed, we might not be on the queue indicated by
* our priority.
*/
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
s = splhigh();
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
@@ -191,11 +189,16 @@ userret(p, frame, oticks)
/*
* Charge system time if profiling.
*/
- if (p->p_flag & P_PROFIL)
+ if (p->p_flag & P_PROFIL) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
addupc_task(p, frame->tf_eip,
(u_int)(p->p_sticks - oticks) * psratio);
-
+ }
curpriority = p->p_priority;
+ return(have_mplock);
}
/*
@@ -604,7 +607,7 @@ kernel_trap:
#endif
out:
- userret(p, &frame, sticks);
+ userret(p, &frame, sticks, 1);
}
#ifdef notyet
@@ -999,11 +1002,18 @@ int trapwrite(addr)
}
/*
- * System call request from POSIX system call gate interface to kernel.
- * Like trap(), argument is call by reference.
+ * syscall2 - MP aware system call request C handler
+ *
+ * A system call is essentially treated as a trap except that the
+ * MP lock is not held on entry or return. We are responsible for
+ * obtaining the MP lock if necessary and for handling ASTs
+ * (e.g. a task switch) prior to return.
+ *
+ * In general, only simple access and manipulation of curproc and
+ * the current stack is allowed without having to hold MP lock.
*/
void
-syscall(frame)
+syscall2(frame)
struct trapframe frame;
{
caddr_t params;
@@ -1012,22 +1022,42 @@ syscall(frame)
struct proc *p = curproc;
u_quad_t sticks;
int error;
+ int narg;
int args[8];
+ int have_mplock = 0;
u_int code;
#ifdef DIAGNOSTIC
- if (ISPL(frame.tf_cs) != SEL_UPL)
+ if (ISPL(frame.tf_cs) != SEL_UPL) {
+ get_mplock();
panic("syscall");
+ /* NOT REACHED */
+ }
#endif
- sticks = p->p_sticks;
+
+ /*
+ * handle atomicy by looping since interrupts are enabled and the
+ * MP lock is not held.
+ */
+ sticks = ((volatile struct proc *)p)->p_sticks;
+ while (sticks != ((volatile struct proc *)p)->p_sticks)
+ sticks = ((volatile struct proc *)p)->p_sticks;
+
p->p_md.md_regs = &frame;
params = (caddr_t)frame.tf_esp + sizeof(int);
code = frame.tf_eax;
+
if (p->p_sysent->sv_prepsyscall) {
+ /*
+ * The prep code is not MP aware.
+ */
+ get_mplock();
(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
+ rel_mplock();
} else {
/*
* Need to check if this is a 32 bit or 64 bit syscall.
+ * fuword is MP aware.
*/
if (code == SYS_syscall) {
/*
@@ -1053,27 +1083,52 @@ syscall(frame)
else
callp = &p->p_sysent->sv_table[code];
- if (params && (i = callp->sy_narg * sizeof(int)) &&
+ narg = callp->sy_narg & SYF_ARGMASK;
+
+ /*
+ * copyin is MP aware, but the tracing code is not
+ */
+ if (params && (i = narg * sizeof(int)) &&
(error = copyin(params, (caddr_t)args, (u_int)i))) {
+ get_mplock();
+ have_mplock = 1;
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL))
- ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
+ ktrsyscall(p->p_tracep, code, narg, args);
#endif
goto bad;
}
+
+ /*
+ * Try to run the syscall without the MP lock if the syscall
+ * is MP safe. We have to obtain the MP lock no matter what if
+ * we are ktracing
+ */
+ if ((callp->sy_narg & SYF_MPSAFE) == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
+
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSCALL))
- ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
+ if (KTRPOINT(p, KTR_SYSCALL)) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
+ ktrsyscall(p->p_tracep, code, narg, args);
+ }
#endif
p->p_retval[0] = 0;
p->p_retval[1] = frame.tf_edx;
- STOPEVENT(p, S_SCE, callp->sy_narg);
+ STOPEVENT(p, S_SCE, narg); /* MP aware */
error = (*callp->sy_call)(p, args);
+ /*
+ * MP SAFE (we may or may not have the MP lock at this point)
+ */
switch (error) {
-
case 0:
/*
* Reinitialize proc pointer `p' as it may be different
@@ -1109,17 +1164,31 @@ bad:
break;
}
+ /*
+ * Traced syscall. trapsignal() is not MP aware.
+ */
if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
- /* Traced syscall. */
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
frame.tf_eflags &= ~PSL_T;
trapsignal(p, SIGTRAP, 0);
}
- userret(p, &frame, sticks);
+ /*
+ * Handle reschedule and other end-of-syscall issues
+ */
+ have_mplock = userret(p, &frame, sticks, have_mplock);
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSRET))
+ if (KTRPOINT(p, KTR_SYSRET)) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
+ }
#endif
/*
@@ -1129,11 +1198,17 @@ bad:
*/
STOPEVENT(p, S_SCX, code);
+ /*
+ * Release the MP lock if we had to get it
+ */
+ if (have_mplock)
+ rel_mplock();
}
/*
* Simplified back end of syscall(), used when returning from fork()
- * directly into user mode.
+ * directly into user mode. MP lock is held on entry and should be
+ * held on return.
*/
void
fork_return(p, frame)
@@ -1144,7 +1219,7 @@ fork_return(p, frame)
frame.tf_eflags &= ~PSL_C; /* success */
frame.tf_edx = 1;
- userret(p, &frame, 0);
+ userret(p, &frame, 0, 1);
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSRET))
ktrsysret(p->p_tracep, SYS_fork, 0, 0);
diff --git a/sys/i386/i386/vm86bios.s b/sys/i386/i386/vm86bios.s
index 778148176491..a64c0402c15b 100644
--- a/sys/i386/i386/vm86bios.s
+++ b/sys/i386/i386/vm86bios.s
@@ -64,7 +64,7 @@ ENTRY(vm86_bioscall)
#ifdef SMP
pushl %edx
- ALIGN_LOCK /* Get global lock */
+ MP_LOCK /* Get global lock */
popl %edx
#endif
@@ -136,12 +136,7 @@ ENTRY(vm86_bioscall)
* Return via _doreti
*/
#ifdef SMP
- ECPL_LOCK
-#ifdef CPL_AND_CML
-#error Not ready for CPL_AND_CML
-#endif
pushl _cpl /* cpl to restore */
- ECPL_UNLOCK
#else
pushl _cpl /* cpl to restore */
#endif
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 9f5d6c046d46..d58e5a6538fb 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -578,13 +578,7 @@ vm_page_zero_idle()
TAILQ_REMOVE(&vm_page_queues[m->queue].pl, m, pageq);
m->queue = PQ_NONE;
splx(s);
-#if 0
- rel_mplock();
-#endif
pmap_zero_page(VM_PAGE_TO_PHYS(m));
-#if 0
- get_mplock();
-#endif
(void)splvm();
vm_page_flag_set(m, PG_ZERO);
m->queue = PQ_FREE + m->pc;
@@ -606,6 +600,12 @@ vm_page_zero_idle()
#ifdef SMP
}
#endif
+ /*
+ * We have to enable interrupts for a moment if the try_mplock fails
+ * in order to potentially take an IPI. XXX this should be in
+ * swtch.s
+ */
+ __asm __volatile("sti; nop; cli" : : : "memory");
return (0);
}
diff --git a/sys/i386/include/asnames.h b/sys/i386/include/asnames.h
index b9b631d6d314..3ccbee6be344 100644
--- a/sys/i386/include/asnames.h
+++ b/sys/i386/include/asnames.h
@@ -155,7 +155,6 @@
#define _arith_invalid arith_invalid
#define _arith_overflow arith_overflow
#define _arith_underflow arith_underflow
-#define _astpending astpending
#define _bcopy bcopy
#define _bcopy_vector bcopy_vector
#define _bigJump bigJump
@@ -307,7 +306,7 @@
#define _swi_generic swi_generic
#define _swi_null swi_null
#define _swi_vm swi_vm
-#define _syscall syscall
+#define _syscall2 syscall2
#define _szsigcode szsigcode
#define _ticks ticks
#define _time time
@@ -321,7 +320,6 @@
#define _vm86paddr vm86paddr
#define _vm86pcb vm86pcb
#define _vm_page_zero_idle vm_page_zero_idle
-#define _want_resched want_resched
#define _wm_sqrt wm_sqrt
#endif /* __ELF__ */
@@ -339,6 +337,7 @@
#define _cpu_lockid FS(cpu_lockid)
#define _curpcb FS(curpcb)
#define _curproc FS(curproc)
+#define _astpending FS(astpending)
#define _currentldt FS(currentldt)
#define _inside_intr FS(inside_intr)
#define _npxproc FS(npxproc)
diff --git a/sys/i386/include/cpu.h b/sys/i386/include/cpu.h
index 18049d0d681b..c6aa46fbe5b2 100644
--- a/sys/i386/include/cpu.h
+++ b/sys/i386/include/cpu.h
@@ -82,10 +82,13 @@
/*
* Preempt the current process if in interrupt from user mode,
* or after the current trap/syscall if in system mode.
+ *
+ * XXX: if astpending is later changed to an |= here due to more flags being
+ * added, we will have an atomicy problem. The type of atomicy we need is
+ * a non-locked orl.
*/
-#define need_resched() do { want_resched = 1; aston(); } while (0)
-
-#define resched_wanted() want_resched
+#define need_resched() do { astpending = AST_RESCHED|AST_PENDING; } while (0)
+#define resched_wanted() (astpending & AST_RESCHED)
/*
* Arrange to handle pending profiling ticks before returning to user mode.
@@ -100,10 +103,15 @@
/*
* Notify the current process (p) that it has a signal pending,
* process as soon as possible.
+ *
+ * XXX: aston() really needs to be an atomic (not locked, but an orl),
+ * in case need_resched() is set by an interrupt. But with astpending a
+ * per-cpu variable this is not trivial to do efficiently. For now we blow
+ * it off (asynchronous need_resched() conflicts are not critical).
*/
#define signotify(p) aston()
-#define aston() do { astpending = 1; } while (0)
+#define aston() do { astpending |= AST_PENDING; } while (0)
#define astoff()
/*
@@ -126,11 +134,9 @@
}
#ifdef _KERNEL
-extern int astpending;
extern char btext[];
extern char etext[];
extern u_char intr_nesting_level;
-extern int want_resched; /* resched was called */
void fork_trampoline __P((void));
void fork_return __P((struct proc *, struct trapframe));
diff --git a/sys/i386/include/globaldata.h b/sys/i386/include/globaldata.h
index 28336d708534..58bd9cfe9416 100644
--- a/sys/i386/include/globaldata.h
+++ b/sys/i386/include/globaldata.h
@@ -66,6 +66,7 @@ struct globaldata {
caddr_t gd_prv_CADDR3;
unsigned *gd_prv_PADDR1;
#endif
+ u_int gd_astpending;
};
#ifdef SMP
diff --git a/sys/i386/include/globals.h b/sys/i386/include/globals.h
index b5f9514af648..ae05d5644e76 100644
--- a/sys/i386/include/globals.h
+++ b/sys/i386/include/globals.h
@@ -90,6 +90,7 @@
#define common_tssd GLOBAL_LVALUE(common_tssd, struct segment_descriptor)
#define tss_gdt GLOBAL_LVALUE(tss_gdt, struct segment_descriptor *)
+#define astpending GLOBAL_LVALUE(astpending, u_int)
#ifdef USER_LDT
#define currentldt GLOBAL_LVALUE(currentldt, int)
@@ -111,6 +112,7 @@
#endif /*UP kernel*/
GLOBAL_FUNC(curproc)
+GLOBAL_FUNC(astpending)
GLOBAL_FUNC(curpcb)
GLOBAL_FUNC(npxproc)
GLOBAL_FUNC(common_tss)
diff --git a/sys/i386/include/ipl.h b/sys/i386/include/ipl.h
index 28c39aaf94e0..2a73a6acd3fb 100644
--- a/sys/i386/include/ipl.h
+++ b/sys/i386/include/ipl.h
@@ -85,6 +85,12 @@
#define SWI_CLOCK_MASK SWI_CLOCK_PENDING
#define SWI_MASK (~HWI_MASK)
+/*
+ * astpending bits
+ */
+#define AST_PENDING 0x00000001
+#define AST_RESCHED 0x00000002
+
#ifndef LOCORE
/*
diff --git a/sys/i386/include/lock.h b/sys/i386/include/lock.h
index 25c54e48f8cf..534f77e8d2fb 100644
--- a/sys/i386/include/lock.h
+++ b/sys/i386/include/lock.h
@@ -37,24 +37,17 @@
#define MPLOCKED lock ;
/*
- * Some handy macros to allow logical organization and
- * convenient reassignment of various locks.
+ * Some handy macros to allow logical organization.
*/
-#define FPU_LOCK call _get_fpu_lock
-#define ALIGN_LOCK call _get_align_lock
-#define SYSCALL_LOCK call _get_syscall_lock
-#define ALTSYSCALL_LOCK call _get_altsyscall_lock
+#define MP_LOCK call _get_mplock
-/*
- * Protects INTR() ISRs.
- */
-#define ISR_TRYLOCK \
+#define MP_TRYLOCK \
pushl $_mp_lock ; /* GIANT_LOCK */ \
call _MPtrylock ; /* try to get lock */ \
add $4, %esp
-#define ISR_RELLOCK \
+#define MP_RELLOCK \
movl $_mp_lock,%edx ; /* GIANT_LOCK */ \
call _MPrellock_edx
@@ -69,55 +62,11 @@
#define IMASK_UNLOCK \
movl $0, _imen_lock
-/*
- * Variations of CPL_LOCK protect spl updates as a critical region.
- * Items within this 'region' include:
- * cpl
- * cml
- * cil
- * ipending
- */
-
-/*
- * Bottom half routines, ie. those already protected from INTs.
- *
- * Used in:
- * sys/i386/isa/ipl.s: _doreti
- * sys/i386/isa/apic_vector.s: _Xintr0, ..., _Xintr23
- */
-#define CPL_LOCK \
- pushl $_cpl_lock ; /* address of lock */ \
- call _s_lock ; /* MP-safe */ \
- addl $4, %esp
-
-#define CPL_UNLOCK \
- movl $0, _cpl_lock
-
-/*
- * INT safe version for top half of kernel.
- *
- * Used in:
- * sys/i386/i386/exception.s: _Xfpu, _Xalign, _Xsyscall, _Xint0x80_syscall
- * sys/i386/isa/apic_ipl.s: splz()
- */
-#define SCPL_LOCK \
- pushl $_cpl_lock ; \
- call _ss_lock ; \
- addl $4, %esp
-
-#define SCPL_UNLOCK \
- pushl $_cpl_lock ; \
- call _ss_unlock ; \
- addl $4, %esp
-
#else /* SMP */
#define MPLOCKED /* NOP */
-#define FPU_LOCK /* NOP */
-#define ALIGN_LOCK /* NOP */
-#define SYSCALL_LOCK /* NOP */
-#define ALTSYSCALL_LOCK /* NOP */
+#define MP_LOCK /* NOP */
#endif /* SMP */
@@ -139,18 +88,6 @@
#endif /* USE_MPINTRLOCK */
/*
- * Protects cpl/cml/cil/ipending data as a critical region.
- *
- * Used in:
- * sys/i386/isa/ipl_funcs.c: DO_SETBITS, softclockpending(), GENSPL,
- * spl0(), splx(), splq()
- */
-#define CPL_LOCK() s_lock(&cpl_lock) /* Bottom end */
-#define CPL_UNLOCK() s_unlock(&cpl_lock)
-#define SCPL_LOCK() ss_lock(&cpl_lock) /* INT safe: top end */
-#define SCPL_UNLOCK() ss_unlock(&cpl_lock)
-
-/*
* sio/cy lock.
* XXX should rc (RISCom/8) use this?
*/
@@ -191,11 +128,6 @@
#define MPINTR_LOCK()
#define MPINTR_UNLOCK()
-#define CPL_LOCK()
-#define CPL_UNLOCK()
-#define SCPL_LOCK()
-#define SCPL_UNLOCK()
-
#define COM_LOCK()
#define COM_UNLOCK()
#define CLOCK_LOCK()
diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h
index 8e349a9800e4..b5bc1fd09022 100644
--- a/sys/i386/include/mptable.h
+++ b/sys/i386/include/mptable.h
@@ -1770,8 +1770,10 @@ init_locks(void)
*/
mp_lock = 0x00000001;
+#if 0
/* ISR uses its own "giant lock" */
isr_lock = FREE_LOCK;
+#endif
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h
index 28336d708534..58bd9cfe9416 100644
--- a/sys/i386/include/pcpu.h
+++ b/sys/i386/include/pcpu.h
@@ -66,6 +66,7 @@ struct globaldata {
caddr_t gd_prv_CADDR3;
unsigned *gd_prv_PADDR1;
#endif
+ u_int gd_astpending;
};
#ifdef SMP
diff --git a/sys/i386/include/smptests.h b/sys/i386/include/smptests.h
index dd897284e234..f9ac4a36919e 100644
--- a/sys/i386/include/smptests.h
+++ b/sys/i386/include/smptests.h
@@ -63,65 +63,6 @@
#define PUSHDOWN_LEVEL_3_NOT
#define PUSHDOWN_LEVEL_4_NOT
-
-/*
- * XXX some temp debug control of cpl locks
- */
-#ifdef PUSHDOWN_LEVEL_2
-#define REAL_ECPL /* exception.s: SCPL_LOCK/SCPL_UNLOCK */
-#define REAL_ICPL /* ipl.s: CPL_LOCK/CPL_UNLOCK/FAST */
-#define REAL_AICPL /* apic_ipl.s: SCPL_LOCK/SCPL_UNLOCK */
-#define REAL_AVCPL /* apic_vector.s: CPL_LOCK/CPL_UNLOCK */
-#define REAL_IFCPL /* ipl_funcs.c: SCPL_LOCK/SCPL_UNLOCK */
-#endif /* PUSHDOWN_LEVEL_2 */
-
-/*
- * The xCPL_LOCK/xCPL_UNLOCK defines control the spinlocks
- * that protect cpl/cml/cil and the spl functions.
- */
-#ifdef REAL_ECPL
-#define ECPL_LOCK SCPL_LOCK
-#define ECPL_UNLOCK SCPL_UNLOCK
-#else
-#define ECPL_LOCK
-#define ECPL_UNLOCK
-#endif /* REAL_ECPL */
-
-#ifdef REAL_ICPL
-#define ICPL_LOCK CPL_LOCK
-#define ICPL_UNLOCK CPL_UNLOCK
-#define FAST_ICPL_UNLOCK movl $0, _cpl_lock
-#else
-#define ICPL_LOCK
-#define ICPL_UNLOCK
-#define FAST_ICPL_UNLOCK
-#endif /* REAL_ICPL */
-
-#ifdef REAL_AICPL
-#define AICPL_LOCK SCPL_LOCK
-#define AICPL_UNLOCK SCPL_UNLOCK
-#else
-#define AICPL_LOCK
-#define AICPL_UNLOCK
-#endif /* REAL_AICPL */
-
-#ifdef REAL_AVCPL
-#define AVCPL_LOCK CPL_LOCK
-#define AVCPL_UNLOCK CPL_UNLOCK
-#else
-#define AVCPL_LOCK
-#define AVCPL_UNLOCK
-#endif /* REAL_AVCPL */
-
-#ifdef REAL_IFCPL
-#define IFCPL_LOCK() SCPL_LOCK()
-#define IFCPL_UNLOCK() SCPL_UNLOCK()
-#else
-#define IFCPL_LOCK()
-#define IFCPL_UNLOCK()
-#endif /* REAL_IFCPL */
-
-
/*
* Debug version of simple_lock. This will store the CPU id of the
* holding CPU along with the lock. When a CPU fails to get the lock
@@ -160,36 +101,40 @@
/*
- * Regular INTerrupts without the giant lock, NOT READY YET!!!
+ * INTR_SIMPLELOCK has been removed, as the interrupt mechanism will likely
+ * not use this sort of optimization if we move to interrupt threads.
*/
#ifdef PUSHDOWN_LEVEL_4
-#define INTR_SIMPLELOCK
#endif
/*
- * Separate the INTR() portion of cpl into another variable: cml.
+ * CPL_AND_CML has been removed. Interrupt threads will eventually not
+ * use either mechanism so there is no point trying to optimize it.
*/
#ifdef PUSHDOWN_LEVEL_3
-#define CPL_AND_CML
#endif
/*
- * Forces spl functions to spin while waiting for safe time to change cpl.
+ * SPL_DEBUG_POSTCODE/INTR_SPL/SPL_DEBUG - removed
*
-#define SPL_DEBUG_POSTCODE (slows the system down noticably)
+ * These functions were too expensive for the standard case but, more
+ * importantly, we should be able to come up with a much cleaner way
+ * to handle the cpl. Having to do any locking at all is a mistake
+ * for something that is modified as often as cpl is.
*/
-#ifdef PUSHDOWN_LEVEL_3
-#define INTR_SPL
-#define SPL_DEBUG
-#endif
-
/*
+ * FAST_WITHOUTCPL - now made the default (define removed). Text below
+ * contains the current discussion. I am confident we can find a solution
+ * that does not require us to process softints from a hard int, which can
+ * kill serial performance due to the lack of true hardware ipl's.
+ *
+ ****
+ *
* Ignore the ipending bits when exiting FAST_INTR() routines.
*
- ***
* according to Bruce:
*
* setsoft*() may set ipending. setsofttty() is actually used in the
@@ -209,21 +154,17 @@
* I finish making spl/cpl MP-safe.
*/
#ifdef PUSHDOWN_LEVEL_1
-#define FAST_WITHOUTCPL
#endif
/*
- * Use a simplelock to serialize FAST_INTR()s.
- * sio.c, and probably other FAST_INTR() drivers, never expected several CPUs
- * to be inside them at once. Things such as global vars prevent more
- * than 1 thread of execution from existing at once, so we serialize
- * the access of FAST_INTR()s via a simple lock.
- * One optimization on this would be a simple lock per DRIVER, but I'm
- * not sure how to organize that yet...
+ * FAST_SIMPLELOCK no longer exists, because it doesn't help us. The cpu
+ * is likely to already hold the MP lock and recursive MP locks are now
+ * very cheap, so we do not need this optimization. Eventually *ALL*
+ * interrupts will run in their own thread, so there is no sense complicating
+ * matters now.
*/
#ifdef PUSHDOWN_LEVEL_1
-#define FAST_SIMPLELOCK
#endif
diff --git a/sys/i386/isa/apic_ipl.s b/sys/i386/isa/apic_ipl.s
index 855de2651338..94771f3eadb3 100644
--- a/sys/i386/isa/apic_ipl.s
+++ b/sys/i386/isa/apic_ipl.s
@@ -29,15 +29,6 @@
.data
ALIGN_DATA
-/* current INTerrupt level */
- .globl _cil
-_cil: .long 0
-
-/* current INTerrupt level mask */
- .globl _cml
-_cml: .long 0
-
-
/*
* Routines used by splz_unpend to build an interrupt frame from a
* trap frame. The _vec[] routines build the proper frame on the stack,
@@ -78,6 +69,8 @@ _apic_imen:
SUPERALIGN_TEXT
/*
+ * splz() - dispatch pending interrupts after cpl reduced
+ *
* Interrupt priority mechanism
* -- soft splXX masks with group mechanism (cpl)
* -- h/w masks for currently active or unused interrupts (imen)
@@ -87,20 +80,25 @@ _apic_imen:
ENTRY(splz)
/*
* The caller has restored cpl and checked that (ipending & ~cpl)
- * is nonzero. We have to repeat the check since if there is an
- * interrupt while we're looking, _doreti processing for the
- * interrupt will handle all the unmasked pending interrupts
- * because we restored early. We're repeating the calculation
- * of (ipending & ~cpl) anyway so that the caller doesn't have
- * to pass it, so this only costs one "jne". "bsfl %ecx,%ecx"
- * is undefined when %ecx is 0 so we can't rely on the secondary
- * btrl tests.
+ * is nonzero. However, since ipending can change at any time
+ * (by an interrupt or, with SMP, by another cpu), we have to
+ * repeat the check. At the moment we must own the MP lock in
+ * the SMP case because the interruput handlers require it. We
+ * loop until no unmasked pending interrupts remain.
+ *
+ * No new unmaksed pending interrupts will be added during the
+ * loop because, being unmasked, the interrupt code will be able
+ * to execute the interrupts.
+ *
+ * Interrupts come in two flavors: Hardware interrupts and software
+ * interrupts. We have to detect the type of interrupt (based on the
+ * position of the interrupt bit) and call the appropriate dispatch
+ * routine.
+ *
+ * NOTE: "bsfl %ecx,%ecx" is undefined when %ecx is 0 so we can't
+ * rely on the secondary btrl tests.
*/
- AICPL_LOCK
movl _cpl,%eax
-#ifdef CPL_AND_CML
- orl _cml, %eax /* add cml to cpl */
-#endif
splz_next:
/*
* We don't need any locking here. (ipending & ~cpl) cannot grow
@@ -110,7 +108,6 @@ splz_next:
notl %ecx /* set bit = unmasked level */
andl _ipending,%ecx /* set bit = unmasked pending INT */
jne splz_unpend
- AICPL_UNLOCK
ret
ALIGN_TEXT
@@ -131,9 +128,6 @@ splz_unpend:
* The vec[] routines build the proper frame on the stack,
* then call one of _Xintr0 thru _XintrNN.
*/
- pushl %ecx
- AICPL_UNLOCK
- popl %ecx
jmp *_vec(,%ecx,4)
ALIGN_TEXT
@@ -141,11 +135,7 @@ splz_swi:
pushl %eax
orl imasks(,%ecx,4),%eax
movl %eax,_cpl
- pushl %ecx
- AICPL_UNLOCK
- popl %ecx
call *_ihandlers(,%ecx,4)
- AICPL_LOCK
popl %eax
movl %eax,_cpl
jmp splz_next
diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s
index ca909d907e69..587d763a4573 100644
--- a/sys/i386/isa/apic_vector.s
+++ b/sys/i386/isa/apic_vector.s
@@ -9,28 +9,17 @@
#include "i386/isa/intr_machdep.h"
-
-#ifdef FAST_SIMPLELOCK
-
-#define GET_FAST_INTR_LOCK \
- pushl $_fast_intr_lock ; /* address of lock */ \
- call _s_lock ; /* MP-safe */ \
- addl $4,%esp
-
-#define REL_FAST_INTR_LOCK \
- movl $0, _fast_intr_lock
-
-#else /* FAST_SIMPLELOCK */
+/*
+ * Interrupts must be enabled while waiting for the MP lock.
+ */
#define GET_FAST_INTR_LOCK \
- call _get_isrlock
+ sti; call _get_mplock; cli
#define REL_FAST_INTR_LOCK \
movl $_mp_lock, %edx ; /* GIANT_LOCK */ \
call _MPrellock_edx
-#endif /* FAST_SIMPLELOCK */
-
/* convert an absolute IRQ# into a bitmask */
#define IRQ_BIT(irq_num) (1 << (irq_num))
@@ -42,10 +31,6 @@
* Macros for interrupt interrupt entry, call to handler, and exit.
*/
-#ifdef FAST_WITHOUTCPL
-
-/*
- */
#define FAST_INTR(irq_num, vec_name) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -82,83 +67,6 @@ IDTVEC(vec_name) ; \
popl %eax ; \
iret
-#else /* FAST_WITHOUTCPL */
-
-#define FAST_INTR(irq_num, vec_name) \
- .text ; \
- SUPERALIGN_TEXT ; \
-IDTVEC(vec_name) ; \
- pushl %eax ; /* save only call-used registers */ \
- pushl %ecx ; \
- pushl %edx ; \
- pushl %ds ; \
- MAYBE_PUSHL_ES ; \
- pushl %fs ; \
- movl $KDSEL, %eax ; \
- movl %ax, %ds ; \
- MAYBE_MOVW_AX_ES ; \
- movl $KPSEL, %eax ; \
- movl %ax, %fs ; \
- FAKE_MCOUNT((5+ACTUALLY_PUSHED)*4(%esp)) ; \
- GET_FAST_INTR_LOCK ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- call *_intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \
- addl $4, %esp ; \
- movl $0, lapic_eoi ; \
- lock ; \
- incl _cnt+V_INTR ; /* book-keeping can wait */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- lock ; \
- incl (%eax) ; \
- movl _cpl, %eax ; /* unmasking pending HWIs or SWIs? */ \
- notl %eax ; \
- andl _ipending, %eax ; \
- jne 2f ; /* yes, maybe handle them */ \
-1: ; \
- MEXITCOUNT ; \
- REL_FAST_INTR_LOCK ; \
- popl %fs ; \
- MAYBE_POPL_ES ; \
- popl %ds ; \
- popl %edx ; \
- popl %ecx ; \
- popl %eax ; \
- iret ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- cmpb $3, _intr_nesting_level ; /* enough stack? */ \
- jae 1b ; /* no, return */ \
- movl _cpl, %eax ; \
- /* XXX next line is probably unnecessary now. */ \
- movl $HWI_MASK|SWI_MASK, _cpl ; /* limit nesting ... */ \
- lock ; \
- incb _intr_nesting_level ; /* ... really limit it ... */ \
- sti ; /* to do this as early as possible */ \
- popl %fs ; /* discard most of thin frame ... */ \
- MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
- popl %ecx ; /* ... original %ds ... */ \
- popl %edx ; \
- xchgl %eax, 4(%esp) ; /* orig %eax; save cpl */ \
- pushal ; /* build fat frame (grrr) ... */ \
- pushl %ecx ; /* ... actually %ds ... */ \
- pushl %es ; \
- pushl %fs ;
- movl $KDSEL, %eax ; \
- movl %ax, %es ; \
- movl $KPSEL, %eax ;
- movl %ax, %fs ;
- movl (3+8+0)*4(%esp), %ecx ; /* %ecx from thin frame ... */ \
- movl %ecx, (3+6)*4(%esp) ; /* ... to fat frame ... */ \
- movl (3+8+1)*4(%esp), %eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
- subl $4, %esp ; /* junk for unit number */ \
- MEXITCOUNT ; \
- jmp _doreti
-
-#endif /** FAST_WITHOUTCPL */
-
-
/*
*
*/
@@ -242,19 +150,6 @@ IDTVEC(vec_name) ; \
7: ; \
IMASK_UNLOCK
-#ifdef INTR_SIMPLELOCK
-#define ENLOCK
-#define DELOCK
-#define LATELOCK call _get_isrlock
-#else
-#define ENLOCK \
- ISR_TRYLOCK ; /* XXX this is going away... */ \
- testl %eax, %eax ; /* did we get it? */ \
- jz 3f
-#define DELOCK ISR_RELLOCK
-#define LATELOCK
-#endif
-
#ifdef APIC_INTR_DIAGNOSTIC
#ifdef APIC_INTR_DIAGNOSTIC_IRQ
log_intr_event:
@@ -319,125 +214,6 @@ log_intr_event:
#define APIC_ITRACE(name, irq_num, id)
#endif
-#ifdef CPL_AND_CML
-
-#define INTR(irq_num, vec_name, maybe_extra_ipending) \
- .text ; \
- SUPERALIGN_TEXT ; \
-/* _XintrNN: entry point used by IDT/HWIs & splz_unpend via _vec[]. */ \
-IDTVEC(vec_name) ; \
- PUSH_FRAME ; \
- movl $KDSEL, %eax ; /* reload with kernel's data segment */ \
- movl %ax, %ds ; \
- movl %ax, %es ; \
- movl $KPSEL, %eax ; \
- movl %ax, %fs ; \
-; \
- maybe_extra_ipending ; \
-; \
- APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \
- lock ; /* MP-safe */ \
- btsl $(irq_num), iactive ; /* lazy masking */ \
- jc 1f ; /* already active */ \
-; \
- MASK_LEVEL_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
-0: ; \
- APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- ENLOCK ; \
-; \
- APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- AVCPL_LOCK ; /* MP-safe */ \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 2f ; /* this INT masked */ \
- testl $IRQ_BIT(irq_num), _cml ; \
- jne 2f ; /* this INT masked */ \
- orl $IRQ_BIT(irq_num), _cil ; \
- AVCPL_UNLOCK ; \
-; \
- incb _intr_nesting_level ; \
-; \
- /* entry point used by doreti_unpend for HWIs. */ \
-__CONCAT(Xresume,irq_num): ; \
- FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \
- lock ; incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4, %eax ; \
- lock ; incl (%eax) ; \
-; \
- AVCPL_LOCK ; /* MP-safe */ \
- movl _cml, %eax ; \
- pushl %eax ; \
- orl _intr_mask + (irq_num) * 4, %eax ; \
- movl %eax, _cml ; \
- AVCPL_UNLOCK ; \
-; \
- pushl _intr_unit + (irq_num) * 4 ; \
- incl _inside_intr ; \
- APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
- sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; \
- APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \
- decl _inside_intr ; \
-; \
- lock ; andl $~IRQ_BIT(irq_num), iactive ; \
- lock ; andl $~IRQ_BIT(irq_num), _cil ; \
- UNMASK_IRQ(irq_num) ; \
- APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \
- sti ; /* doreti repeats cli/sti */ \
- MEXITCOUNT ; \
- LATELOCK ; \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-1: ; /* active */ \
- APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
- MASK_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
- AVCPL_LOCK ; /* MP-safe */ \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
- lock ; \
- btsl $(irq_num), iactive ; /* still active */ \
- jnc 0b ; /* retry */ \
- POP_FRAME ; \
- iret ; \
-; \
- ALIGN_TEXT ; \
-2: ; /* masked by cpl|cml */ \
- APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
- DELOCK ; /* XXX this is going away... */ \
- POP_FRAME ; \
- iret ; \
- ALIGN_TEXT ; \
-3: ; /* other cpu has isr lock */ \
- APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- AVCPL_LOCK ; /* MP-safe */ \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 4f ; /* this INT masked */ \
- testl $IRQ_BIT(irq_num), _cml ; \
- jne 4f ; /* this INT masked */ \
- orl $IRQ_BIT(irq_num), _cil ; \
- AVCPL_UNLOCK ; \
- call forward_irq ; /* forward irq to lock holder */ \
- POP_FRAME ; /* and return */ \
- iret ; \
- ALIGN_TEXT ; \
-4: ; /* blocked */ \
- APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- AVCPL_UNLOCK ; \
- POP_FRAME ; /* and return */ \
- iret
-
-#else /* CPL_AND_CML */
-
-
#define INTR(irq_num, vec_name, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -461,15 +237,13 @@ IDTVEC(vec_name) ; \
EOI_IRQ(irq_num) ; \
0: ; \
APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- ISR_TRYLOCK ; /* XXX this is going away... */ \
+ MP_TRYLOCK ; /* XXX this is going away... */ \
testl %eax, %eax ; /* did we get it? */ \
jz 3f ; /* no */ \
; \
APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- AVCPL_LOCK ; /* MP-safe */ \
testl $IRQ_BIT(irq_num), _cpl ; \
jne 2f ; /* this INT masked */ \
- AVCPL_UNLOCK ; \
; \
incb _intr_nesting_level ; \
; \
@@ -480,14 +254,12 @@ __CONCAT(Xresume,irq_num): ; \
movl _intr_countp + (irq_num) * 4, %eax ; \
lock ; incl (%eax) ; \
; \
- AVCPL_LOCK ; /* MP-safe */ \
movl _cpl, %eax ; \
pushl %eax ; \
orl _intr_mask + (irq_num) * 4, %eax ; \
movl %eax, _cpl ; \
lock ; \
andl $~IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
; \
pushl _intr_unit + (irq_num) * 4 ; \
APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
@@ -508,10 +280,8 @@ __CONCAT(Xresume,irq_num): ; \
APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
MASK_IRQ(irq_num) ; \
EOI_IRQ(irq_num) ; \
- AVCPL_LOCK ; /* MP-safe */ \
lock ; \
orl $IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
lock ; \
btsl $(irq_num), iactive ; /* still active */ \
jnc 0b ; /* retry */ \
@@ -522,32 +292,25 @@ __CONCAT(Xresume,irq_num): ; \
APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
lock ; \
orl $IRQ_BIT(irq_num), _ipending ; \
- AVCPL_UNLOCK ; \
- ISR_RELLOCK ; /* XXX this is going away... */ \
+ MP_RELLOCK ; \
POP_FRAME ; \
iret ; \
ALIGN_TEXT ; \
3: ; /* other cpu has isr lock */ \
APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- AVCPL_LOCK ; /* MP-safe */ \
lock ; \
orl $IRQ_BIT(irq_num), _ipending ; \
testl $IRQ_BIT(irq_num), _cpl ; \
jne 4f ; /* this INT masked */ \
- AVCPL_UNLOCK ; \
call forward_irq ; /* forward irq to lock holder */ \
POP_FRAME ; /* and return */ \
iret ; \
ALIGN_TEXT ; \
4: ; /* blocked */ \
APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- AVCPL_UNLOCK ; \
POP_FRAME ; /* and return */ \
iret
-#endif /* CPL_AND_CML */
-
-
/*
* Handle "spurious INTerrupts".
* Notes:
@@ -635,11 +398,6 @@ _Xcpucheckstate:
testl $PSL_VM, 24(%esp)
jne 1f
incl %ebx /* system or interrupt */
-#ifdef CPL_AND_CML
- cmpl $0, _inside_intr
- je 1f
- incl %ebx /* interrupt */
-#endif
1:
movl _cpuid, %eax
movl %ebx, _checkstate_cpustate(,%eax,4)
@@ -693,17 +451,11 @@ _Xcpuast:
* Giant locks do not come cheap.
* A lot of cycles are going to be wasted here.
*/
- call _get_isrlock
+ call _get_mplock
- AVCPL_LOCK
-#ifdef CPL_AND_CML
- movl _cml, %eax
-#else
movl _cpl, %eax
-#endif
pushl %eax
movl $1, _astpending /* XXX */
- AVCPL_UNLOCK
lock
incb _intr_nesting_level
sti
@@ -716,7 +468,7 @@ _Xcpuast:
lock
btrl %eax, CNAME(resched_cpus)
jnc 2f
- movl $1, CNAME(want_resched)
+ orl $AST_RESCHED,_astpending
lock
incl CNAME(want_resched_cnt)
2:
@@ -749,7 +501,7 @@ _Xforward_irq:
FAKE_MCOUNT(13*4(%esp))
- ISR_TRYLOCK
+ MP_TRYLOCK
testl %eax,%eax /* Did we get the lock ? */
jz 1f /* No */
@@ -758,14 +510,8 @@ _Xforward_irq:
cmpb $4, _intr_nesting_level
jae 2f
- AVCPL_LOCK
-#ifdef CPL_AND_CML
- movl _cml, %eax
-#else
movl _cpl, %eax
-#endif
pushl %eax
- AVCPL_UNLOCK
lock
incb _intr_nesting_level
sti
@@ -785,7 +531,7 @@ _Xforward_irq:
lock
incl CNAME(forward_irq_toodeepcnt)
3:
- ISR_RELLOCK
+ MP_RELLOCK
MEXITCOUNT
POP_FRAME
iret
diff --git a/sys/i386/isa/ipl.s b/sys/i386/isa/ipl.s
index 7c1fca1383e7..980257f3f1de 100644
--- a/sys/i386/isa/ipl.s
+++ b/sys/i386/isa/ipl.s
@@ -43,6 +43,10 @@
/*
* AT/386
* Vector interrupt control section
+ *
+ * cpl - Current interrupt disable mask
+ * *_imask - Interrupt masks for various spl*() functions
+ * ipending - Pending interrupts (set when a masked interrupt occurs)
*/
.data
@@ -67,9 +71,6 @@ _softnet_imask: .long SWI_NET_MASK
.globl _softtty_imask
_softtty_imask: .long SWI_TTY_MASK
- .globl _astpending
-_astpending: .long 0
-
/* pending interrupts blocked by splxxx() */
.globl _ipending
_ipending: .long 0
@@ -91,29 +92,12 @@ _netisrs:
.text
-#ifdef SMP
-#ifdef notnow
-#define TEST_CIL \
- cmpl $0x0100, _cil ; \
- jne 1f ; \
- cmpl $0, _inside_intr ; \
- jne 1f ; \
- int $3 ; \
-1:
-#else
-#define TEST_CIL
-#endif
-#endif
-
/*
* Handle return from interrupts, traps and syscalls.
*/
SUPERALIGN_TEXT
.type _doreti,@function
_doreti:
-#ifdef SMP
- TEST_CIL
-#endif
FAKE_MCOUNT(_bintr) /* init "from" _bintr -> _doreti */
addl $4,%esp /* discard unit number */
popl %eax /* cpl or cml to restore */
@@ -128,32 +112,17 @@ doreti_next:
* handlers is limited by the number of bits in cpl).
*/
#ifdef SMP
- TEST_CIL
cli /* early to prevent INT deadlock */
- pushl %eax /* preserve cpl while getting lock */
- ICPL_LOCK
- popl %eax
doreti_next2:
#endif
movl %eax,%ecx
-#ifdef CPL_AND_CML
- orl _cpl, %ecx /* add cpl to cml */
-#endif
notl %ecx /* set bit = unmasked level */
#ifndef SMP
cli
#endif
andl _ipending,%ecx /* set bit = unmasked pending INT */
jne doreti_unpend
-#ifdef SMP
- TEST_CIL
-#endif
-#ifdef CPL_AND_CML
- movl %eax, _cml
-#else
movl %eax,_cpl
-#endif
- FAST_ICPL_UNLOCK /* preserves %eax */
MPLOCKED decb _intr_nesting_level
/* Check for ASTs that can be handled now. */
@@ -166,19 +135,27 @@ doreti_next2:
cmpl $1,_in_vm86call
jne doreti_ast
+ /*
+ * doreti_exit - release MP lock, pop registers, iret.
+ *
+ * Note that the syscall trap shotcuts to doreti_syscall_ret.
+ * The segment register pop is a special case, since it may
+ * fault if (for example) a sigreturn specifies bad segment
+ * registers. The fault is handled in trap.c
+ */
+
doreti_exit:
MEXITCOUNT
#ifdef SMP
-#ifdef INTR_SIMPLELOCK
-#error code needed here to decide which lock to release, INTR or giant
-#endif
/* release the kernel lock */
movl $_mp_lock, %edx /* GIANT_LOCK */
call _MPrellock_edx
#endif /* SMP */
.globl doreti_popl_fs
+ .globl doreti_syscall_ret
+doreti_syscall_ret:
doreti_popl_fs:
popl %fs
.globl doreti_popl_es
@@ -215,22 +192,13 @@ doreti_popl_fs_fault:
doreti_unpend:
/*
* Enabling interrupts is safe because we haven't restored cpl yet.
- * The locking from the "btrl" test is probably no longer necessary.
- * We won't miss any new pending interrupts because we will check
- * for them again.
+ * %ecx contains the next probable ready interrupt (~cpl & ipending)
*/
#ifdef SMP
- TEST_CIL
- /* we enter with cpl locked */
- bsfl %ecx, %ecx /* slow, but not worth optimizing */
+ bsfl %ecx, %ecx /* locate the next dispatchable int */
lock
- btrl %ecx, _ipending
+ btrl %ecx, _ipending /* is it really still pending? */
jnc doreti_next2 /* some intr cleared memory copy */
- cmpl $NHWI, %ecx
- jae 1f
- btsl %ecx, _cil
-1:
- FAST_ICPL_UNLOCK /* preserves %eax */
sti /* late to prevent INT deadlock */
#else
sti
@@ -238,8 +206,9 @@ doreti_unpend:
btrl %ecx,_ipending
jnc doreti_next /* some intr cleared memory copy */
#endif /* SMP */
-
/*
+ * Execute handleable interrupt
+ *
* Set up JUMP to _ihandlers[%ecx] for HWIs.
* Set up CALL of _ihandlers[%ecx] for SWIs.
* This is a bit early for the SMP case - we have to push %ecx and
@@ -247,25 +216,10 @@ doreti_unpend:
*/
movl _ihandlers(,%ecx,4),%edx
cmpl $NHWI,%ecx
- jae doreti_swi
- cli
+ jae doreti_swi /* software interrupt handling */
+ cli /* else hardware int handling */
#ifdef SMP
- pushl %edx /* preserve %edx */
-#ifdef APIC_INTR_DIAGNOSTIC
- pushl %ecx
-#endif
- pushl %eax /* preserve %eax */
- ICPL_LOCK
-#ifdef CPL_AND_CML
- popl _cml
-#else
- popl _cpl
-#endif
- FAST_ICPL_UNLOCK
-#ifdef APIC_INTR_DIAGNOSTIC
- popl %ecx
-#endif
- popl %edx
+ movl %eax,_cpl /* same as non-smp case right now */
#else
movl %eax,_cpl
#endif
@@ -292,9 +246,6 @@ doreti_unpend:
ALIGN_TEXT
doreti_swi:
-#ifdef SMP
- TEST_CIL
-#endif
pushl %eax
/*
* At least the SWI_CLOCK handler has to run at a possibly strictly
@@ -304,29 +255,18 @@ doreti_swi:
* in dying interrupt frames and about 12 HWIs nested in active
* interrupt frames. There are only 4 different SWIs and the HWI
* and SWI masks limit the nesting further.
+ *
+ * The SMP case is currently the same as the non-SMP case.
*/
#ifdef SMP
- orl imasks(,%ecx,4), %eax
- pushl %ecx /* preserve for use by _swi_generic */
- pushl %edx /* save handler entry point */
- cli /* prevent INT deadlock */
- pushl %eax /* save cpl|cml */
- ICPL_LOCK
-#ifdef CPL_AND_CML
- popl _cml /* restore cml */
-#else
- popl _cpl /* restore cpl */
-#endif
- FAST_ICPL_UNLOCK
- sti
- popl %edx /* restore handler entry point */
- popl %ecx
+ orl imasks(,%ecx,4), %eax /* or in imasks */
+ movl %eax,_cpl /* set cpl for call */
#else
orl imasks(,%ecx,4),%eax
movl %eax,_cpl
#endif
call %edx
- popl %eax
+ popl %eax /* cpl to restore */
jmp doreti_next
ALIGN_TEXT
@@ -336,9 +276,6 @@ doreti_ast:
movl $T_ASTFLT,TF_TRAPNO(%esp)
call _trap
subl %eax,%eax /* recover cpl|cml */
-#ifdef CPL_AND_CML
- movl %eax, _cpl
-#endif
movb $1,_intr_nesting_level /* for doreti_next to decrement */
jmp doreti_next
diff --git a/sys/i386/isa/ipl_funcs.c b/sys/i386/isa/ipl_funcs.c
index d7ba1c4b9653..043d6b432cf6 100644
--- a/sys/i386/isa/ipl_funcs.c
+++ b/sys/i386/isa/ipl_funcs.c
@@ -35,8 +35,9 @@
#include <i386/isa/intr_machdep.h>
/*
- * The volatile bitmap variables must be set atomically. This normally
- * involves using a machine-dependent bit-set or `or' instruction.
+ * Bits in the ipending bitmap variable must be set atomically because
+ * ipending may be manipulated by interrupts or other cpu's without holding
+ * any locks.
*
* Note: setbits uses a locked or, making simple cases MP safe.
*/
@@ -67,6 +68,10 @@ softclockpending(void)
return (ipending & SWI_CLOCK_PENDING);
}
+/*
+ * Support for SPL assertions.
+ */
+
#ifdef INVARIANT_SUPPORT
#define SPLASSERT_IGNORE 0
@@ -112,6 +117,40 @@ NAME##assert(const char *msg) \
#define GENSPLASSERT(NAME, MODIFIER)
#endif
+/************************************************************************
+ * GENERAL SPL CODE *
+ ************************************************************************
+ *
+ * Implement splXXX(), spl0(), splx(), and splq(). splXXX() disables a
+ * set of interrupts (e.g. splbio() disables interrupts relating to
+ * device I/O) and returns the previous interrupt mask. splx() restores
+ * the previous interrupt mask, spl0() is a special case which enables
+ * all interrupts and is typically used inside i386/i386 swtch.s and
+ * fork_trampoline. splq() is a generic version of splXXX().
+ *
+ * The SPL routines mess around with the 'cpl' global, which masks
+ * interrupts. Interrupts are not *actually* masked. What happens is
+ * that if an interrupt masked by the cpl occurs, the appropriate bit
+ * in 'ipending' is set and the interrupt is defered. When we clear
+ * bits in the cpl we must check to see if any ipending interrupts have
+ * been unmasked and issue the synchronously, which is what the splz()
+ * call does.
+ *
+ * Because the cpl is often saved and restored in a nested fashion, cpl
+ * modifications are only allowed in the SMP case when the MP lock is held
+ * to prevent multiple processes from tripping over each other's masks.
+ * The cpl is saved when you do a context switch (mi_switch()) and restored
+ * when your process gets cpu again.
+ *
+ * An interrupt routine is allowed to modify the cpl as long as it restores
+ * it prior to returning (thus the interrupted mainline code doesn't notice
+ * anything amiss). For the SMP case, the interrupt routine must hold
+ * the MP lock for any cpl manipulation.
+ *
+ * Likewise, due to the deterministic nature of cpl modifications, we do
+ * NOT need to use locked instructions to modify it.
+ */
+
#ifndef SMP
#define GENSPL(NAME, OP, MODIFIER, PC) \
@@ -154,219 +193,65 @@ splq(intrmask_t mask)
#include <machine/smp.h>
#include <machine/smptests.h>
-#ifndef SPL_DEBUG_POSTCODE
-#undef POSTCODE
-#undef POSTCODE_LO
-#undef POSTCODE_HI
-#define POSTCODE(X)
-#define POSTCODE_LO(X)
-#define POSTCODE_HI(X)
-#endif /* SPL_DEBUG_POSTCODE */
-
-
/*
- * This version has to check for bsp_apic_ready,
- * as calling simple_lock() (ie ss_lock) before then deadlocks the system.
- * A sample count of GENSPL calls before bsp_apic_ready was set: 2193
+ * SMP CASE
+ *
+ * Mostly the same as the non-SMP case now, but it didn't used to be
+ * this clean.
*/
-#ifdef INTR_SPL
-
-#ifdef SPL_DEBUG
-#define MAXZ 100000000
-#define SPIN_VAR unsigned z;
-#define SPIN_RESET z = 0;
-#if 0
-#define SPIN_SPL \
- if (++z >= MAXZ) { \
- /* XXX allow lock-free panic */ \
- bsp_apic_ready = 0; \
- panic("\ncil: 0x%08x", cil); \
- }
-#else
-#define SPIN_SPL \
- if (++z >= MAXZ) { \
- /* XXX allow lock-free panic */ \
- bsp_apic_ready = 0; \
- printf("\ncil: 0x%08x", cil); \
- breakpoint(); \
- }
-#endif /* 0/1 */
-#else /* SPL_DEBUG */
-#define SPIN_VAR
-#define SPIN_RESET
-#define SPIN_SPL
-#endif /* SPL_DEBUG */
-
-#endif
-
-#ifdef INTR_SPL
-
-#define GENSPL(NAME, OP, MODIFIER, PC) \
-GENSPLASSERT(NAME, MODIFIER) \
-unsigned NAME(void) \
-{ \
- unsigned x, y; \
- SPIN_VAR; \
- \
- if (!bsp_apic_ready) { \
- x = cpl; \
- cpl OP MODIFIER; \
- return (x); \
- } \
- \
- for (;;) { \
- IFCPL_LOCK(); /* MP-safe */ \
- x = y = cpl; /* current value */ \
- POSTCODE(0x20 | PC); \
- if (inside_intr) \
- break; /* XXX only 1 INT allowed */ \
- y OP MODIFIER; /* desired value */ \
- if (cil & y) { /* not now */ \
- IFCPL_UNLOCK(); /* allow cil to change */ \
- SPIN_RESET; \
- while (cil & y) \
- SPIN_SPL \
- continue; /* try again */ \
- } \
- break; \
- } \
- cpl OP MODIFIER; /* make the change */ \
- IFCPL_UNLOCK(); \
- \
- return (x); \
-}
-
-#else /* INTR_SPL */
-
#define GENSPL(NAME, OP, MODIFIER, PC) \
GENSPLASSERT(NAME, MODIFIER) \
unsigned NAME(void) \
{ \
unsigned x; \
\
- IFCPL_LOCK(); \
x = cpl; \
cpl OP MODIFIER; \
- IFCPL_UNLOCK(); \
\
return (x); \
}
-#endif /* INTR_SPL */
-
-
+/*
+ * spl0() - unmask all interrupts
+ *
+ * The MP lock must be held on entry
+ * This routine may only be called from mainline code.
+ */
void
spl0(void)
{
- int unpend;
-#ifdef INTR_SPL
- SPIN_VAR;
-
- for (;;) {
- IFCPL_LOCK();
- POSTCODE_HI(0xc);
- /*
- * XXX SWI_AST_MASK in ipending has moved to 1 in astpending,
- * so the following code is dead, but just removing it may
- * not be right.
- */
-#if 0
- if (cil & SWI_AST_MASK) { /* not now */
- IFCPL_UNLOCK(); /* allow cil to change */
- SPIN_RESET;
- while (cil & SWI_AST_MASK)
- SPIN_SPL
- continue; /* try again */
- }
-#endif
- break;
- }
-#else /* INTR_SPL */
- IFCPL_LOCK();
-#endif /* INTR_SPL */
-
+ KASSERT(inside_intr == 0, ("spl0: called from interrupt"));
cpl = 0;
- unpend = ipending;
- IFCPL_UNLOCK();
-
- if (unpend && !inside_intr)
+ if (ipending)
splz();
}
+/*
+ * splx() - restore previous interrupt mask
+ *
+ * The MP lock must be held on entry
+ */
+
void
splx(unsigned ipl)
{
- int unpend;
-#ifdef INTR_SPL
- SPIN_VAR;
-
- for (;;) {
- IFCPL_LOCK();
- POSTCODE_HI(0xe);
- if (inside_intr)
- break; /* XXX only 1 INT allowed */
- POSTCODE_HI(0xf);
- if (cil & ipl) { /* not now */
- IFCPL_UNLOCK(); /* allow cil to change */
- SPIN_RESET;
- while (cil & ipl)
- SPIN_SPL
- continue; /* try again */
- }
- break;
- }
-#else /* INTR_SPL */
- IFCPL_LOCK();
-#endif /* INTR_SPL */
-
cpl = ipl;
- unpend = ipending & ~ipl;
- IFCPL_UNLOCK();
-
- if (unpend && !inside_intr)
+ if (inside_intr == 0 && (ipending & ~cpl) != 0)
splz();
}
/*
- * Replaces UP specific inline found in (?) pci/pci_support.c.
+ * splq() - blocks specified interrupts
*
- * Stefan said:
- * You know, that splq() is used in the shared interrupt multiplexer, and that
- * the SMP version should not have too much overhead. If it is significantly
- * slower, then moving the splq() out of the loop in intr_mux() and passing in
- * the logical OR of all mask values might be a better solution than the
- * current code. (This logical OR could of course be pre-calculated whenever
- * another shared interrupt is registered ...)
+ * The MP lock must be held on entry
*/
intrmask_t
splq(intrmask_t mask)
{
- intrmask_t tmp;
-#ifdef INTR_SPL
- intrmask_t tmp2;
-
- for (;;) {
- IFCPL_LOCK();
- tmp = tmp2 = cpl;
- tmp2 |= mask;
- if (cil & tmp2) { /* not now */
- IFCPL_UNLOCK(); /* allow cil to change */
- while (cil & tmp2)
- /* spin */ ;
- continue; /* try again */
- }
- break;
- }
- cpl = tmp2;
-#else /* INTR_SPL */
- IFCPL_LOCK();
- tmp = cpl;
+ intrmask_t tmp = cpl;
cpl |= mask;
-#endif /* INTR_SPL */
-
- IFCPL_UNLOCK();
return (tmp);
}
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index 05de442441e3..e3f976d7ebe4 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -44,7 +44,7 @@ struct sysent sysent[] = {
{ 4, (sy_call_t *)mount }, /* 21 = mount */
{ 2, (sy_call_t *)unmount }, /* 22 = unmount */
{ 1, (sy_call_t *)setuid }, /* 23 = setuid */
- { 0, (sy_call_t *)getuid }, /* 24 = getuid */
+ { SYF_MPSAFE | 0, (sy_call_t *)getuid }, /* 24 = getuid */
{ 0, (sy_call_t *)geteuid }, /* 25 = geteuid */
{ 4, (sy_call_t *)ptrace }, /* 26 = ptrace */
{ 3, (sy_call_t *)recvmsg }, /* 27 = recvmsg */
@@ -67,7 +67,7 @@ struct sysent sysent[] = {
{ 4, (sy_call_t *)profil }, /* 44 = profil */
{ 4, (sy_call_t *)ktrace }, /* 45 = ktrace */
{ compat(3,sigaction) }, /* 46 = old sigaction */
- { 0, (sy_call_t *)getgid }, /* 47 = getgid */
+ { SYF_MPSAFE | 0, (sy_call_t *)getgid }, /* 47 = getgid */
{ compat(2,sigprocmask) }, /* 48 = old sigprocmask */
{ 2, (sy_call_t *)getlogin }, /* 49 = getlogin */
{ 1, (sy_call_t *)setlogin }, /* 50 = setlogin */
@@ -80,7 +80,7 @@ struct sysent sysent[] = {
{ 2, (sy_call_t *)symlink }, /* 57 = symlink */
{ 3, (sy_call_t *)readlink }, /* 58 = readlink */
{ 3, (sy_call_t *)execve }, /* 59 = execve */
- { 1, (sy_call_t *)umask }, /* 60 = umask */
+ { SYF_MPSAFE | 1, (sy_call_t *)umask }, /* 60 = umask */
{ 1, (sy_call_t *)chroot }, /* 61 = chroot */
{ compat(2,fstat) }, /* 62 = old fstat */
{ compat(4,getkerninfo) }, /* 63 = old getkerninfo */
@@ -101,7 +101,7 @@ struct sysent sysent[] = {
{ 3, (sy_call_t *)mincore }, /* 78 = mincore */
{ 2, (sy_call_t *)getgroups }, /* 79 = getgroups */
{ 2, (sy_call_t *)setgroups }, /* 80 = setgroups */
- { 0, (sy_call_t *)getpgrp }, /* 81 = getpgrp */
+ { SYF_MPSAFE | 0, (sy_call_t *)getpgrp }, /* 81 = getpgrp */
{ 2, (sy_call_t *)setpgid }, /* 82 = setpgid */
{ 3, (sy_call_t *)setitimer }, /* 83 = setitimer */
{ compat(0,wait) }, /* 84 = old wait */
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
index 2ac10da6aca8..06bc88931fa4 100644
--- a/sys/kern/kern_prot.c
+++ b/sys/kern/kern_prot.c
@@ -62,6 +62,9 @@ struct getpid_args {
};
#endif
+/*
+ * NOT MP SAFE due to p_pptr access
+ */
/* ARGSUSED */
int
getpid(p, uap)
@@ -92,7 +95,11 @@ getppid(p, uap)
return (0);
}
-/* Get process group ID; note that POSIX getpgrp takes no parameter */
+/*
+ * Get process group ID; note that POSIX getpgrp takes no parameter
+ *
+ * MP SAFE
+ */
#ifndef _SYS_SYSPROTO_H_
struct getpgrp_args {
int dummy;
@@ -168,6 +175,9 @@ struct getuid_args {
};
#endif
+/*
+ * MP SAFE
+ */
/* ARGSUSED */
int
getuid(p, uap)
@@ -205,6 +215,9 @@ struct getgid_args {
};
#endif
+/*
+ * MP SAFE
+ */
/* ARGSUSED */
int
getgid(p, uap)
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index c3bb68f73753..b374fed75506 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -64,6 +64,7 @@
#include <vm/vm_zone.h>
+#include <machine/ipl.h>
#include <machine/cpu.h>
#ifdef SMP
#include <machine/smp.h>
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index a4b05fe76a45..3146f9e856ef 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -140,6 +140,8 @@ remrunqueue(struct proc *p)
* procrunnable() returns a boolean true (non-zero) value if there are
* any runnable processes. This is intended to be called from the idle
* loop to avoid the more expensive (and destructive) chooseproc().
+ *
+ * MP SAFE. CALLED WITHOUT THE MP LOCK
*/
u_int32_t
procrunnable(void)
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index a590506ecd45..d7a66b03ad74 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -57,6 +57,7 @@
#endif
#include <machine/cpu.h>
+#include <machine/ipl.h>
#ifdef SMP
#include <machine/smp.h>
#endif
diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c
index 8f297b21ecab..cce81c3bff6e 100644
--- a/sys/kern/ksched.c
+++ b/sys/kern/ksched.c
@@ -41,6 +41,7 @@
#include <sys/kernel.h>
#include <sys/resource.h>
#include <machine/cpu.h> /* For need_resched */
+#include <machine/ipl.h> /* For need_resched */
#include <posix4/posix4.h>
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
index ed9c0d8b77e3..117f0309981a 100644
--- a/sys/kern/subr_prof.c
+++ b/sys/kern/subr_prof.c
@@ -42,6 +42,7 @@
#include <sys/resourcevar.h>
#include <sys/sysctl.h>
+#include <machine/ipl.h>
#include <machine/cpu.h>
#ifdef GPROF
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index 8e349a9800e4..b5bc1fd09022 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -1770,8 +1770,10 @@ init_locks(void)
*/
mp_lock = 0x00000001;
+#if 0
/* ISR uses its own "giant lock" */
isr_lock = FREE_LOCK;
+#endif
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index a8b73cf6a02b..703d48dc84ed 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -99,7 +99,7 @@ int (*pmath_emulate) __P((struct trapframe *));
extern void trap __P((struct trapframe frame));
extern int trapwrite __P((unsigned addr));
-extern void syscall __P((struct trapframe frame));
+extern void syscall2 __P((struct trapframe frame));
static int trap_pfault __P((struct trapframe *, int, vm_offset_t));
static void trap_fatal __P((struct trapframe *, vm_offset_t));
@@ -140,38 +140,32 @@ static char *trap_msg[] = {
"machine check trap", /* 28 T_MCHK */
};
-static __inline void userret __P((struct proc *p, struct trapframe *frame,
- u_quad_t oticks));
+static __inline int userret __P((struct proc *p, struct trapframe *frame,
+ u_quad_t oticks, int have_mplock));
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
extern int has_f00f_bug;
#endif
-static __inline void
-userret(p, frame, oticks)
+static __inline int
+userret(p, frame, oticks, have_mplock)
struct proc *p;
struct trapframe *frame;
u_quad_t oticks;
+ int have_mplock;
{
int sig, s;
- while ((sig = CURSIG(p)) != 0)
+ while ((sig = CURSIG(p)) != 0) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
postsig(sig);
-
-#if 0
- if (!want_resched &&
- (p->p_priority <= p->p_usrpri) &&
- (p->p_rtprio.type == RTP_PRIO_NORMAL)) {
- int newpriority;
- p->p_estcpu += 1;
- newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
- newpriority = min(newpriority, MAXPRI);
- p->p_usrpri = newpriority;
}
-#endif
-
+
p->p_priority = p->p_usrpri;
- if (want_resched) {
+ if (resched_wanted()) {
/*
* Since we are curproc, clock will normally just change
* our priority without moving us from one queue to another
@@ -180,6 +174,10 @@ userret(p, frame, oticks)
* mi_switch()'ed, we might not be on the queue indicated by
* our priority.
*/
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
s = splhigh();
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
@@ -191,11 +189,16 @@ userret(p, frame, oticks)
/*
* Charge system time if profiling.
*/
- if (p->p_flag & P_PROFIL)
+ if (p->p_flag & P_PROFIL) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
addupc_task(p, frame->tf_eip,
(u_int)(p->p_sticks - oticks) * psratio);
-
+ }
curpriority = p->p_priority;
+ return(have_mplock);
}
/*
@@ -604,7 +607,7 @@ kernel_trap:
#endif
out:
- userret(p, &frame, sticks);
+ userret(p, &frame, sticks, 1);
}
#ifdef notyet
@@ -999,11 +1002,18 @@ int trapwrite(addr)
}
/*
- * System call request from POSIX system call gate interface to kernel.
- * Like trap(), argument is call by reference.
+ * syscall2 - MP aware system call request C handler
+ *
+ * A system call is essentially treated as a trap except that the
+ * MP lock is not held on entry or return. We are responsible for
+ * obtaining the MP lock if necessary and for handling ASTs
+ * (e.g. a task switch) prior to return.
+ *
+ * In general, only simple access and manipulation of curproc and
+ * the current stack is allowed without having to hold MP lock.
*/
void
-syscall(frame)
+syscall2(frame)
struct trapframe frame;
{
caddr_t params;
@@ -1012,22 +1022,42 @@ syscall(frame)
struct proc *p = curproc;
u_quad_t sticks;
int error;
+ int narg;
int args[8];
+ int have_mplock = 0;
u_int code;
#ifdef DIAGNOSTIC
- if (ISPL(frame.tf_cs) != SEL_UPL)
+ if (ISPL(frame.tf_cs) != SEL_UPL) {
+ get_mplock();
panic("syscall");
+ /* NOT REACHED */
+ }
#endif
- sticks = p->p_sticks;
+
+ /*
+ * handle atomicy by looping since interrupts are enabled and the
+ * MP lock is not held.
+ */
+ sticks = ((volatile struct proc *)p)->p_sticks;
+ while (sticks != ((volatile struct proc *)p)->p_sticks)
+ sticks = ((volatile struct proc *)p)->p_sticks;
+
p->p_md.md_regs = &frame;
params = (caddr_t)frame.tf_esp + sizeof(int);
code = frame.tf_eax;
+
if (p->p_sysent->sv_prepsyscall) {
+ /*
+ * The prep code is not MP aware.
+ */
+ get_mplock();
(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
+ rel_mplock();
} else {
/*
* Need to check if this is a 32 bit or 64 bit syscall.
+ * fuword is MP aware.
*/
if (code == SYS_syscall) {
/*
@@ -1053,27 +1083,52 @@ syscall(frame)
else
callp = &p->p_sysent->sv_table[code];
- if (params && (i = callp->sy_narg * sizeof(int)) &&
+ narg = callp->sy_narg & SYF_ARGMASK;
+
+ /*
+ * copyin is MP aware, but the tracing code is not
+ */
+ if (params && (i = narg * sizeof(int)) &&
(error = copyin(params, (caddr_t)args, (u_int)i))) {
+ get_mplock();
+ have_mplock = 1;
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL))
- ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
+ ktrsyscall(p->p_tracep, code, narg, args);
#endif
goto bad;
}
+
+ /*
+ * Try to run the syscall without the MP lock if the syscall
+ * is MP safe. We have to obtain the MP lock no matter what if
+ * we are ktracing
+ */
+ if ((callp->sy_narg & SYF_MPSAFE) == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
+
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSCALL))
- ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
+ if (KTRPOINT(p, KTR_SYSCALL)) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
+ ktrsyscall(p->p_tracep, code, narg, args);
+ }
#endif
p->p_retval[0] = 0;
p->p_retval[1] = frame.tf_edx;
- STOPEVENT(p, S_SCE, callp->sy_narg);
+ STOPEVENT(p, S_SCE, narg); /* MP aware */
error = (*callp->sy_call)(p, args);
+ /*
+ * MP SAFE (we may or may not have the MP lock at this point)
+ */
switch (error) {
-
case 0:
/*
* Reinitialize proc pointer `p' as it may be different
@@ -1109,17 +1164,31 @@ bad:
break;
}
+ /*
+ * Traced syscall. trapsignal() is not MP aware.
+ */
if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
- /* Traced syscall. */
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
frame.tf_eflags &= ~PSL_T;
trapsignal(p, SIGTRAP, 0);
}
- userret(p, &frame, sticks);
+ /*
+ * Handle reschedule and other end-of-syscall issues
+ */
+ have_mplock = userret(p, &frame, sticks, have_mplock);
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSRET))
+ if (KTRPOINT(p, KTR_SYSRET)) {
+ if (have_mplock == 0) {
+ get_mplock();
+ have_mplock = 1;
+ }
ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
+ }
#endif
/*
@@ -1129,11 +1198,17 @@ bad:
*/
STOPEVENT(p, S_SCX, code);
+ /*
+ * Release the MP lock if we had to get it
+ */
+ if (have_mplock)
+ rel_mplock();
}
/*
* Simplified back end of syscall(), used when returning from fork()
- * directly into user mode.
+ * directly into user mode. MP lock is held on entry and should be
+ * held on return.
*/
void
fork_return(p, frame)
@@ -1144,7 +1219,7 @@ fork_return(p, frame)
frame.tf_eflags &= ~PSL_C; /* success */
frame.tf_edx = 1;
- userret(p, &frame, 0);
+ userret(p, &frame, 0, 1);
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSRET))
ktrsysret(p->p_tracep, SYS_fork, 0, 0);
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index 142a2c339422..99084d332ed6 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -3000,6 +3000,8 @@ getdents(p, uap)
/*
* Set the mode mask for creation of filesystem nodes.
+ *
+ * MP SAFE
*/
#ifndef _SYS_SYSPROTO_H_
struct umask_args {
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 142a2c339422..99084d332ed6 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -3000,6 +3000,8 @@ getdents(p, uap)
/*
* Set the mode mask for creation of filesystem nodes.
+ *
+ * MP SAFE
*/
#ifndef _SYS_SYSPROTO_H_
struct umask_args {
diff --git a/sys/posix4/ksched.c b/sys/posix4/ksched.c
index 8f297b21ecab..cce81c3bff6e 100644
--- a/sys/posix4/ksched.c
+++ b/sys/posix4/ksched.c
@@ -41,6 +41,7 @@
#include <sys/kernel.h>
#include <sys/resource.h>
#include <machine/cpu.h> /* For need_resched */
+#include <machine/ipl.h> /* For need_resched */
#include <posix4/posix4.h>
diff --git a/sys/sys/ktrace.h b/sys/sys/ktrace.h
index 5383fc409a24..3804937c9f11 100644
--- a/sys/sys/ktrace.h
+++ b/sys/sys/ktrace.h
@@ -62,7 +62,7 @@ struct ktr_header {
};
/*
- * Test for kernel trace point
+ * Test for kernel trace point (MP SAFE)
*/
#define KTRPOINT(p, type) \
(((p)->p_traceflag & ((1<<(type))|KTRFAC_ACTIVE)) == (1<<(type)))
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 4c2eead9f775..ad1ba41547ec 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -338,9 +338,18 @@ MALLOC_DECLARE(M_PARGS);
FREE(s, M_SESSION); \
}
+/*
+ * STOPEVENT is MP SAFE.
+ */
extern void stopevent(struct proc*, unsigned int, unsigned int);
-#define STOPEVENT(p,e,v) do { \
- if ((p)->p_stops & (e)) stopevent(p,e,v); } while (0)
+#define STOPEVENT(p,e,v) \
+ do { \
+ if ((p)->p_stops & (e)) { \
+ get_mplock(); \
+ stopevent(p,e,v); \
+ rel_mplock(); \
+ } \
+ } while (0)
/* hold process U-area in memory, normally for ptrace/procfs work */
#define PHOLD(p) { \
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 56cf3fa26d9d..496d68519505 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -39,6 +39,9 @@
#include <sys/signal.h>
#include <sys/proc.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
/*
* Kernel signal definitions and data structures,
@@ -225,16 +228,24 @@ void sendsig __P((sig_t action, int sig, sigset_t *retmask, u_long code));
* Determine signal that should be delivered to process p, the current
* process, 0 if none. If there is a pending stop signal with default
* action, the process stops in issignal().
+ *
+ * MP SAFE
*/
extern __inline int __cursig(struct proc *p)
{
sigset_t tmpset;
+ int r;
tmpset = p->p_siglist;
SIGSETNAND(tmpset, p->p_sigmask);
- return ((SIGISEMPTY(p->p_siglist) ||
- (!(p->p_flag & P_TRACED) && SIGISEMPTY(tmpset)))
- ? 0 : issignal(p));
+ if (SIGISEMPTY(p->p_siglist) ||
+ (!(p->p_flag & P_TRACED) && SIGISEMPTY(tmpset))) {
+ return(0);
+ }
+ get_mplock();
+ r = issignal(p);
+ rel_mplock();
+ return(r);
}
#endif /* _KERNEL */
diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h
index 66fe731ae66c..fff1ed055ad9 100644
--- a/sys/sys/sysent.h
+++ b/sys/sys/sysent.h
@@ -44,6 +44,10 @@ struct sysent { /* system call table */
int sy_narg; /* number of arguments */
sy_call_t *sy_call; /* implementing function */
};
+
+#define SYF_ARGMASK 0x0000FFFF
+#define SYF_MPSAFE 0x00010000
+
#define SCARG(p,k) ((p)->k) /* get arg from args pointer */
/* placeholder till we integrate rest of lite2 syscallargs changes XXX */