diff options
60 files changed, 1003 insertions, 1843 deletions
diff --git a/sys/alpha/alpha/trap.c b/sys/alpha/alpha/trap.c index 7e9b15e82651..dd69b9a08585 100644 --- a/sys/alpha/alpha/trap.c +++ b/sys/alpha/alpha/trap.c @@ -615,7 +615,7 @@ syscall(code, framep) else callp = &p->p_sysent->sv_table[code]; - nargs = callp->sy_narg + hidden; + nargs = (callp->sy_narg & SYF_ARGMASK) + hidden; switch (nargs) { default: if (nargs > 10) /* XXX */ @@ -639,13 +639,13 @@ syscall(code, framep) } #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) - ktrsyscall(p->p_tracep, code, callp->sy_narg, args + hidden); + ktrsyscall(p->p_tracep, code, (callp->sy_narg & SYF_ARGMASK), args + hidden); #endif if (error == 0) { p->p_retval[0] = 0; p->p_retval[1] = 0; - STOPEVENT(p, S_SCE, callp->sy_narg); + STOPEVENT(p, S_SCE, (callp->sy_narg & SYF_ARGMASK)); error = (*callp->sy_call)(p, args + hidden); } diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index ca909d907e69..587d763a4573 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -9,28 +9,17 @@ #include "i386/isa/intr_machdep.h" - -#ifdef FAST_SIMPLELOCK - -#define GET_FAST_INTR_LOCK \ - pushl $_fast_intr_lock ; /* address of lock */ \ - call _s_lock ; /* MP-safe */ \ - addl $4,%esp - -#define REL_FAST_INTR_LOCK \ - movl $0, _fast_intr_lock - -#else /* FAST_SIMPLELOCK */ +/* + * Interrupts must be enabled while waiting for the MP lock. + */ #define GET_FAST_INTR_LOCK \ - call _get_isrlock + sti; call _get_mplock; cli #define REL_FAST_INTR_LOCK \ movl $_mp_lock, %edx ; /* GIANT_LOCK */ \ call _MPrellock_edx -#endif /* FAST_SIMPLELOCK */ - /* convert an absolute IRQ# into a bitmask */ #define IRQ_BIT(irq_num) (1 << (irq_num)) @@ -42,10 +31,6 @@ * Macros for interrupt interrupt entry, call to handler, and exit. */ -#ifdef FAST_WITHOUTCPL - -/* - */ #define FAST_INTR(irq_num, vec_name) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -82,83 +67,6 @@ IDTVEC(vec_name) ; \ popl %eax ; \ iret -#else /* FAST_WITHOUTCPL */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl %eax ; /* save only call-used registers */ \ - pushl %ecx ; \ - pushl %edx ; \ - pushl %ds ; \ - MAYBE_PUSHL_ES ; \ - pushl %fs ; \ - movl $KDSEL, %eax ; \ - movl %ax, %ds ; \ - MAYBE_MOVW_AX_ES ; \ - movl $KPSEL, %eax ; \ - movl %ax, %fs ; \ - FAKE_MCOUNT((5+ACTUALLY_PUSHED)*4(%esp)) ; \ - GET_FAST_INTR_LOCK ; \ - pushl _intr_unit + (irq_num) * 4 ; \ - call *_intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic_eoi ; \ - lock ; \ - incl _cnt+V_INTR ; /* book-keeping can wait */ \ - movl _intr_countp + (irq_num) * 4,%eax ; \ - lock ; \ - incl (%eax) ; \ - movl _cpl, %eax ; /* unmasking pending HWIs or SWIs? */ \ - notl %eax ; \ - andl _ipending, %eax ; \ - jne 2f ; /* yes, maybe handle them */ \ -1: ; \ - MEXITCOUNT ; \ - REL_FAST_INTR_LOCK ; \ - popl %fs ; \ - MAYBE_POPL_ES ; \ - popl %ds ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax ; \ - iret ; \ -; \ - ALIGN_TEXT ; \ -2: ; \ - cmpb $3, _intr_nesting_level ; /* enough stack? */ \ - jae 1b ; /* no, return */ \ - movl _cpl, %eax ; \ - /* XXX next line is probably unnecessary now. */ \ - movl $HWI_MASK|SWI_MASK, _cpl ; /* limit nesting ... */ \ - lock ; \ - incb _intr_nesting_level ; /* ... really limit it ... */ \ - sti ; /* to do this as early as possible */ \ - popl %fs ; /* discard most of thin frame ... */ \ - MAYBE_POPL_ES ; /* discard most of thin frame ... */ \ - popl %ecx ; /* ... original %ds ... */ \ - popl %edx ; \ - xchgl %eax, 4(%esp) ; /* orig %eax; save cpl */ \ - pushal ; /* build fat frame (grrr) ... */ \ - pushl %ecx ; /* ... actually %ds ... */ \ - pushl %es ; \ - pushl %fs ; - movl $KDSEL, %eax ; \ - movl %ax, %es ; \ - movl $KPSEL, %eax ; - movl %ax, %fs ; - movl (3+8+0)*4(%esp), %ecx ; /* %ecx from thin frame ... */ \ - movl %ecx, (3+6)*4(%esp) ; /* ... to fat frame ... */ \ - movl (3+8+1)*4(%esp), %eax ; /* ... cpl from thin frame */ \ - pushl %eax ; \ - subl $4, %esp ; /* junk for unit number */ \ - MEXITCOUNT ; \ - jmp _doreti - -#endif /** FAST_WITHOUTCPL */ - - /* * */ @@ -242,19 +150,6 @@ IDTVEC(vec_name) ; \ 7: ; \ IMASK_UNLOCK -#ifdef INTR_SIMPLELOCK -#define ENLOCK -#define DELOCK -#define LATELOCK call _get_isrlock -#else -#define ENLOCK \ - ISR_TRYLOCK ; /* XXX this is going away... */ \ - testl %eax, %eax ; /* did we get it? */ \ - jz 3f -#define DELOCK ISR_RELLOCK -#define LATELOCK -#endif - #ifdef APIC_INTR_DIAGNOSTIC #ifdef APIC_INTR_DIAGNOSTIC_IRQ log_intr_event: @@ -319,125 +214,6 @@ log_intr_event: #define APIC_ITRACE(name, irq_num, id) #endif -#ifdef CPL_AND_CML - -#define INTR(irq_num, vec_name, maybe_extra_ipending) \ - .text ; \ - SUPERALIGN_TEXT ; \ -/* _XintrNN: entry point used by IDT/HWIs & splz_unpend via _vec[]. */ \ -IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - movl $KDSEL, %eax ; /* reload with kernel's data segment */ \ - movl %ax, %ds ; \ - movl %ax, %es ; \ - movl $KPSEL, %eax ; \ - movl %ax, %fs ; \ -; \ - maybe_extra_ipending ; \ -; \ - APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \ - lock ; /* MP-safe */ \ - btsl $(irq_num), iactive ; /* lazy masking */ \ - jc 1f ; /* already active */ \ -; \ - MASK_LEVEL_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ -0: ; \ - APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - ENLOCK ; \ -; \ - APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ - AVCPL_LOCK ; /* MP-safe */ \ - testl $IRQ_BIT(irq_num), _cpl ; \ - jne 2f ; /* this INT masked */ \ - testl $IRQ_BIT(irq_num), _cml ; \ - jne 2f ; /* this INT masked */ \ - orl $IRQ_BIT(irq_num), _cil ; \ - AVCPL_UNLOCK ; \ -; \ - incb _intr_nesting_level ; \ -; \ - /* entry point used by doreti_unpend for HWIs. */ \ -__CONCAT(Xresume,irq_num): ; \ - FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ - lock ; incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4, %eax ; \ - lock ; incl (%eax) ; \ -; \ - AVCPL_LOCK ; /* MP-safe */ \ - movl _cml, %eax ; \ - pushl %eax ; \ - orl _intr_mask + (irq_num) * 4, %eax ; \ - movl %eax, _cml ; \ - AVCPL_UNLOCK ; \ -; \ - pushl _intr_unit + (irq_num) * 4 ; \ - incl _inside_intr ; \ - APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ - sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; \ - APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \ - decl _inside_intr ; \ -; \ - lock ; andl $~IRQ_BIT(irq_num), iactive ; \ - lock ; andl $~IRQ_BIT(irq_num), _cil ; \ - UNMASK_IRQ(irq_num) ; \ - APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \ - sti ; /* doreti repeats cli/sti */ \ - MEXITCOUNT ; \ - LATELOCK ; \ - jmp _doreti ; \ -; \ - ALIGN_TEXT ; \ -1: ; /* active */ \ - APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ - MASK_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ - AVCPL_LOCK ; /* MP-safe */ \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ - lock ; \ - btsl $(irq_num), iactive ; /* still active */ \ - jnc 0b ; /* retry */ \ - POP_FRAME ; \ - iret ; \ -; \ - ALIGN_TEXT ; \ -2: ; /* masked by cpl|cml */ \ - APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ - DELOCK ; /* XXX this is going away... */ \ - POP_FRAME ; \ - iret ; \ - ALIGN_TEXT ; \ -3: ; /* other cpu has isr lock */ \ - APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ - AVCPL_LOCK ; /* MP-safe */ \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - testl $IRQ_BIT(irq_num), _cpl ; \ - jne 4f ; /* this INT masked */ \ - testl $IRQ_BIT(irq_num), _cml ; \ - jne 4f ; /* this INT masked */ \ - orl $IRQ_BIT(irq_num), _cil ; \ - AVCPL_UNLOCK ; \ - call forward_irq ; /* forward irq to lock holder */ \ - POP_FRAME ; /* and return */ \ - iret ; \ - ALIGN_TEXT ; \ -4: ; /* blocked */ \ - APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ - AVCPL_UNLOCK ; \ - POP_FRAME ; /* and return */ \ - iret - -#else /* CPL_AND_CML */ - - #define INTR(irq_num, vec_name, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -461,15 +237,13 @@ IDTVEC(vec_name) ; \ EOI_IRQ(irq_num) ; \ 0: ; \ APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - ISR_TRYLOCK ; /* XXX this is going away... */ \ + MP_TRYLOCK ; /* XXX this is going away... */ \ testl %eax, %eax ; /* did we get it? */ \ jz 3f ; /* no */ \ ; \ APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ - AVCPL_LOCK ; /* MP-safe */ \ testl $IRQ_BIT(irq_num), _cpl ; \ jne 2f ; /* this INT masked */ \ - AVCPL_UNLOCK ; \ ; \ incb _intr_nesting_level ; \ ; \ @@ -480,14 +254,12 @@ __CONCAT(Xresume,irq_num): ; \ movl _intr_countp + (irq_num) * 4, %eax ; \ lock ; incl (%eax) ; \ ; \ - AVCPL_LOCK ; /* MP-safe */ \ movl _cpl, %eax ; \ pushl %eax ; \ orl _intr_mask + (irq_num) * 4, %eax ; \ movl %eax, _cpl ; \ lock ; \ andl $~IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ ; \ pushl _intr_unit + (irq_num) * 4 ; \ APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ @@ -508,10 +280,8 @@ __CONCAT(Xresume,irq_num): ; \ APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ MASK_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ - AVCPL_LOCK ; /* MP-safe */ \ lock ; \ orl $IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ lock ; \ btsl $(irq_num), iactive ; /* still active */ \ jnc 0b ; /* retry */ \ @@ -522,32 +292,25 @@ __CONCAT(Xresume,irq_num): ; \ APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ lock ; \ orl $IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ - ISR_RELLOCK ; /* XXX this is going away... */ \ + MP_RELLOCK ; \ POP_FRAME ; \ iret ; \ ALIGN_TEXT ; \ 3: ; /* other cpu has isr lock */ \ APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ - AVCPL_LOCK ; /* MP-safe */ \ lock ; \ orl $IRQ_BIT(irq_num), _ipending ; \ testl $IRQ_BIT(irq_num), _cpl ; \ jne 4f ; /* this INT masked */ \ - AVCPL_UNLOCK ; \ call forward_irq ; /* forward irq to lock holder */ \ POP_FRAME ; /* and return */ \ iret ; \ ALIGN_TEXT ; \ 4: ; /* blocked */ \ APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ - AVCPL_UNLOCK ; \ POP_FRAME ; /* and return */ \ iret -#endif /* CPL_AND_CML */ - - /* * Handle "spurious INTerrupts". * Notes: @@ -635,11 +398,6 @@ _Xcpucheckstate: testl $PSL_VM, 24(%esp) jne 1f incl %ebx /* system or interrupt */ -#ifdef CPL_AND_CML - cmpl $0, _inside_intr - je 1f - incl %ebx /* interrupt */ -#endif 1: movl _cpuid, %eax movl %ebx, _checkstate_cpustate(,%eax,4) @@ -693,17 +451,11 @@ _Xcpuast: * Giant locks do not come cheap. * A lot of cycles are going to be wasted here. */ - call _get_isrlock + call _get_mplock - AVCPL_LOCK -#ifdef CPL_AND_CML - movl _cml, %eax -#else movl _cpl, %eax -#endif pushl %eax movl $1, _astpending /* XXX */ - AVCPL_UNLOCK lock incb _intr_nesting_level sti @@ -716,7 +468,7 @@ _Xcpuast: lock btrl %eax, CNAME(resched_cpus) jnc 2f - movl $1, CNAME(want_resched) + orl $AST_RESCHED,_astpending lock incl CNAME(want_resched_cnt) 2: @@ -749,7 +501,7 @@ _Xforward_irq: FAKE_MCOUNT(13*4(%esp)) - ISR_TRYLOCK + MP_TRYLOCK testl %eax,%eax /* Did we get the lock ? */ jz 1f /* No */ @@ -758,14 +510,8 @@ _Xforward_irq: cmpb $4, _intr_nesting_level jae 2f - AVCPL_LOCK -#ifdef CPL_AND_CML - movl _cml, %eax -#else movl _cpl, %eax -#endif pushl %eax - AVCPL_UNLOCK lock incb _intr_nesting_level sti @@ -785,7 +531,7 @@ _Xforward_irq: lock incl CNAME(forward_irq_toodeepcnt) 3: - ISR_RELLOCK + MP_RELLOCK MEXITCOUNT POP_FRAME iret diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index f3982ae688f0..a05d541d1051 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -65,8 +65,6 @@ _hlt_vector: .long _default_halt /* pointer to halt routine */ .globl _panic - .globl _want_resched -_want_resched: .long 0 /* we need to re-run the scheduler */ #if defined(SWTCH_OPTIM_STATS) .globl _swtch_optim_stats, _tlb_flush_count _swtch_optim_stats: .long 0 /* number of _swtch_optims */ @@ -129,6 +127,9 @@ _idle: /* * XXX callers of cpu_switch() do a bogus splclock(). Locking should * be left to cpu_switch(). + * + * NOTE: spl*() may only be called while we hold the MP lock (which + * we do). */ call _spl0 @@ -159,14 +160,14 @@ idle_loop: testl %eax,%eax jnz 3f + /* + * Handle page-zeroing in the idle loop. Called with interrupts + * disabled and the MP lock released. Inside vm_page_zero_idle + * we enable interrupts and grab the mplock as required. + */ cmpl $0,_do_page_zero_idle je 2f - /* XXX appears to cause panics */ - /* - * Inside zero_idle we enable interrupts and grab the mplock - * as needed. It needs to be careful about entry/exit mutexes. - */ call _vm_page_zero_idle /* internal locking */ testl %eax, %eax jnz idle_loop @@ -178,9 +179,15 @@ idle_loop: cli jmp idle_loop + /* + * Note that interrupts must be enabled while obtaining the MP lock + * in order to be able to take IPI's while blocked. + */ 3: movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */ + sti call _get_mplock + cli call _procrunnable testl %eax,%eax CROSSJUMP(jnz, sw1a, jz) @@ -355,8 +362,8 @@ sw1a: CROSSJUMP(je, _idle, jne) /* if no proc, idle */ movl %eax,%ecx - movl $0,%eax - movl %eax,_want_resched + xorl %eax,%eax + andl $~WANT_RESCHED,_astpending #ifdef DIAGNOSTIC cmpl %eax,P_WCHAN(%ecx) diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 7042d58f79cb..91c5b8aa6885 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -41,23 +41,11 @@ #include <machine/psl.h> #include <machine/trap.h> #ifdef SMP -#include <machine/smptests.h> /** CPL_AND_CML, REAL_ */ +#include <machine/smptests.h> /** various SMP options */ #endif #include "assym.s" -#ifndef SMP -#define ECPL_LOCK /* make these nops */ -#define ECPL_UNLOCK -#define ICPL_LOCK -#define ICPL_UNLOCK -#define FAST_ICPL_UNLOCK -#define AICPL_LOCK -#define AICPL_UNLOCK -#define AVCPL_LOCK -#define AVCPL_UNLOCK -#endif /* SMP */ - #ifdef SMP #define MOVL_KPSEL_EAX movl $KPSEL,%eax #else @@ -71,16 +59,45 @@ /* Trap handling */ /*****************************************************************************/ /* - * Trap and fault vector routines + * Trap and fault vector routines. + * + * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on + * the stack that mostly looks like an interrupt, but does not disable + * interrupts. A few of the traps we are use are interrupt gates, + * SDT_SYS386IGT, which are nearly the same thing except interrupts are + * disabled on entry. + * + * The cpu will push a certain amount of state onto the kernel stack for + * the current process. The amount of state depends on the type of trap + * and whether the trap crossed rings or not. See i386/include/frame.h. + * At the very least the current EFLAGS (status register, which includes + * the interrupt disable state prior to the trap), the code segment register, + * and the return instruction pointer are pushed by the cpu. The cpu + * will also push an 'error' code for certain traps. We push a dummy + * error code for those traps where the cpu doesn't in order to maintain + * a consistent frame. We also push a contrived 'trap number'. + * + * The cpu does not push the general registers, we must do that, and we + * must restore them prior to calling 'iret'. The cpu adjusts the %cs and + * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we + * must load them with appropriate values for supervisor mode operation. + * + * On entry to a trap or interrupt WE DO NOT OWN THE MP LOCK. This means + * that we must be careful in regards to accessing global variables. We + * save (push) the current cpl (our software interrupt disable mask), call + * the trap function, then call _doreti to restore the cpl and deal with + * ASTs (software interrupts). _doreti will determine if the restoration + * of the cpl unmasked any pending interrupts and will issue those interrupts + * synchronously prior to doing the iret. + * + * At the moment we must own the MP lock to do any cpl manipulation, which + * means we must own it prior to calling _doreti. The syscall case attempts + * to avoid this by handling a reduced set of cases itself and iret'ing. */ #define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(_X,name); \ .type __CONCAT(_X,name),@function; __CONCAT(_X,name): #define TRAP(a) pushl $(a) ; jmp _alltraps -/* - * XXX - debugger traps are now interrupt gates so at least bdb doesn't lose - * control. The sti's give the standard losing behaviour for ddb and kgdb. - */ #ifdef BDE_DEBUGGER #define BDBTRAP(name) \ ss ; \ @@ -160,16 +177,9 @@ IDTVEC(fpu) #ifdef SMP MPLOCKED incl _cnt+V_TRAP - FPU_LOCK - ECPL_LOCK -#ifdef CPL_AND_CML - movl _cml,%eax - pushl %eax /* save original cml */ -#else + MP_LOCK movl _cpl,%eax pushl %eax /* save original cpl */ -#endif /* CPL_AND_CML */ - ECPL_UNLOCK pushl $0 /* dummy unit to finish intr frame */ #else /* SMP */ movl _cpl,%eax @@ -190,6 +200,16 @@ IDTVEC(fpu) IDTVEC(align) TRAP(T_ALIGNFLT) + /* + * _alltraps entry point. Interrupts are enabled if this was a trap + * gate (TGT), else disabled if this was an interrupt gate (IGT). + * Note that int0x80_syscall is a trap gate. Only page faults + * use an interrupt gate. + * + * Note that all calls to MP_LOCK must occur with interrupts enabled + * in order to be able to take IPI's while waiting for the lock. + */ + SUPERALIGN_TEXT .globl _alltraps .type _alltraps,@function @@ -208,14 +228,8 @@ alltraps_with_regs_pushed: calltrap: FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */ MPLOCKED incl _cnt+V_TRAP - ALIGN_LOCK - ECPL_LOCK -#ifdef CPL_AND_CML - movl _cml,%ebx /* keep orig. cml here during trap() */ -#else + MP_LOCK movl _cpl,%ebx /* keep orig. cpl here during trap() */ -#endif - ECPL_UNLOCK call _trap /* @@ -229,17 +243,19 @@ calltrap: jmp _doreti /* - * Call gate entry for syscall. + * SYSCALL CALL GATE (old entry point for a.out binaries) + * * The intersegment call has been set up to specify one dummy parameter. + * * This leaves a place to put eflags so that the call frame can be * converted to a trap frame. Note that the eflags is (semi-)bogusly * pushed into (what will be) tf_err and then copied later into the * final spot. It has to be done this way because esp can't be just * temporarily altered for the pushfl - an interrupt might come in * and clobber the saved cs/eip. - */ -/* - * THis first callgate is used for the old a.out binaries + * + * We do not obtain the MP lock, but the call to syscall2 might. If it + * does it will release the lock prior to returning. */ SUPERALIGN_TEXT IDTVEC(syscall) @@ -259,20 +275,28 @@ IDTVEC(syscall) movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */ FAKE_MCOUNT(13*4(%esp)) MPLOCKED incl _cnt+V_SYSCALL - SYSCALL_LOCK - call _syscall - - /* - * Return via _doreti to handle ASTs. - */ + call _syscall2 + MEXITCOUNT + cli /* atomic astpending access */ + cmpl $0,_astpending + je doreti_syscall_ret +#ifdef SMP + MP_LOCK +#endif pushl $0 /* cpl to restore */ - subl $4,%esp /* dummy unit to finish intr frame */ + subl $4,%esp /* dummy unit for interrupt frame */ movb $1,_intr_nesting_level - MEXITCOUNT jmp _doreti /* * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80) + * + * Even though the name says 'int0x80', this is actually a TGT (trap gate) + * rather then an IGT (interrupt gate). Thus interrupts are enabled on + * entry just as they are for a normal syscall. + * + * We do not obtain the MP lock, but the call to syscall2 might. If it + * does it will release the lock prior to returning. */ SUPERALIGN_TEXT IDTVEC(int0x80_syscall) @@ -289,16 +313,17 @@ IDTVEC(int0x80_syscall) movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */ FAKE_MCOUNT(13*4(%esp)) MPLOCKED incl _cnt+V_SYSCALL - ALTSYSCALL_LOCK - call _syscall - - /* - * Return via _doreti to handle ASTs. - */ + call _syscall2 + MEXITCOUNT + cli /* atomic astpending access */ + cmpl $0,_astpending + je doreti_syscall_ret +#ifdef SMP + MP_LOCK +#endif pushl $0 /* cpl to restore */ - subl $4,%esp /* dummy unit to finish intr frame */ + subl $4,%esp /* dummy unit for interrupt frame */ movb $1,_intr_nesting_level - MEXITCOUNT jmp _doreti ENTRY(fork_trampoline) diff --git a/sys/amd64/amd64/exception.s b/sys/amd64/amd64/exception.s index 7042d58f79cb..91c5b8aa6885 100644 --- a/sys/amd64/amd64/exception.s +++ b/sys/amd64/amd64/exception.s @@ -41,23 +41,11 @@ #include <machine/psl.h> #include <machine/trap.h> #ifdef SMP -#include <machine/smptests.h> /** CPL_AND_CML, REAL_ */ +#include <machine/smptests.h> /** various SMP options */ #endif #include "assym.s" -#ifndef SMP -#define ECPL_LOCK /* make these nops */ -#define ECPL_UNLOCK -#define ICPL_LOCK -#define ICPL_UNLOCK -#define FAST_ICPL_UNLOCK -#define AICPL_LOCK -#define AICPL_UNLOCK -#define AVCPL_LOCK -#define AVCPL_UNLOCK -#endif /* SMP */ - #ifdef SMP #define MOVL_KPSEL_EAX movl $KPSEL,%eax #else @@ -71,16 +59,45 @@ /* Trap handling */ /*****************************************************************************/ /* - * Trap and fault vector routines + * Trap and fault vector routines. + * + * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on + * the stack that mostly looks like an interrupt, but does not disable + * interrupts. A few of the traps we are use are interrupt gates, + * SDT_SYS386IGT, which are nearly the same thing except interrupts are + * disabled on entry. + * + * The cpu will push a certain amount of state onto the kernel stack for + * the current process. The amount of state depends on the type of trap + * and whether the trap crossed rings or not. See i386/include/frame.h. + * At the very least the current EFLAGS (status register, which includes + * the interrupt disable state prior to the trap), the code segment register, + * and the return instruction pointer are pushed by the cpu. The cpu + * will also push an 'error' code for certain traps. We push a dummy + * error code for those traps where the cpu doesn't in order to maintain + * a consistent frame. We also push a contrived 'trap number'. + * + * The cpu does not push the general registers, we must do that, and we + * must restore them prior to calling 'iret'. The cpu adjusts the %cs and + * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we + * must load them with appropriate values for supervisor mode operation. + * + * On entry to a trap or interrupt WE DO NOT OWN THE MP LOCK. This means + * that we must be careful in regards to accessing global variables. We + * save (push) the current cpl (our software interrupt disable mask), call + * the trap function, then call _doreti to restore the cpl and deal with + * ASTs (software interrupts). _doreti will determine if the restoration + * of the cpl unmasked any pending interrupts and will issue those interrupts + * synchronously prior to doing the iret. + * + * At the moment we must own the MP lock to do any cpl manipulation, which + * means we must own it prior to calling _doreti. The syscall case attempts + * to avoid this by handling a reduced set of cases itself and iret'ing. */ #define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(_X,name); \ .type __CONCAT(_X,name),@function; __CONCAT(_X,name): #define TRAP(a) pushl $(a) ; jmp _alltraps -/* - * XXX - debugger traps are now interrupt gates so at least bdb doesn't lose - * control. The sti's give the standard losing behaviour for ddb and kgdb. - */ #ifdef BDE_DEBUGGER #define BDBTRAP(name) \ ss ; \ @@ -160,16 +177,9 @@ IDTVEC(fpu) #ifdef SMP MPLOCKED incl _cnt+V_TRAP - FPU_LOCK - ECPL_LOCK -#ifdef CPL_AND_CML - movl _cml,%eax - pushl %eax /* save original cml */ -#else + MP_LOCK movl _cpl,%eax pushl %eax /* save original cpl */ -#endif /* CPL_AND_CML */ - ECPL_UNLOCK pushl $0 /* dummy unit to finish intr frame */ #else /* SMP */ movl _cpl,%eax @@ -190,6 +200,16 @@ IDTVEC(fpu) IDTVEC(align) TRAP(T_ALIGNFLT) + /* + * _alltraps entry point. Interrupts are enabled if this was a trap + * gate (TGT), else disabled if this was an interrupt gate (IGT). + * Note that int0x80_syscall is a trap gate. Only page faults + * use an interrupt gate. + * + * Note that all calls to MP_LOCK must occur with interrupts enabled + * in order to be able to take IPI's while waiting for the lock. + */ + SUPERALIGN_TEXT .globl _alltraps .type _alltraps,@function @@ -208,14 +228,8 @@ alltraps_with_regs_pushed: calltrap: FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */ MPLOCKED incl _cnt+V_TRAP - ALIGN_LOCK - ECPL_LOCK -#ifdef CPL_AND_CML - movl _cml,%ebx /* keep orig. cml here during trap() */ -#else + MP_LOCK movl _cpl,%ebx /* keep orig. cpl here during trap() */ -#endif - ECPL_UNLOCK call _trap /* @@ -229,17 +243,19 @@ calltrap: jmp _doreti /* - * Call gate entry for syscall. + * SYSCALL CALL GATE (old entry point for a.out binaries) + * * The intersegment call has been set up to specify one dummy parameter. + * * This leaves a place to put eflags so that the call frame can be * converted to a trap frame. Note that the eflags is (semi-)bogusly * pushed into (what will be) tf_err and then copied later into the * final spot. It has to be done this way because esp can't be just * temporarily altered for the pushfl - an interrupt might come in * and clobber the saved cs/eip. - */ -/* - * THis first callgate is used for the old a.out binaries + * + * We do not obtain the MP lock, but the call to syscall2 might. If it + * does it will release the lock prior to returning. */ SUPERALIGN_TEXT IDTVEC(syscall) @@ -259,20 +275,28 @@ IDTVEC(syscall) movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */ FAKE_MCOUNT(13*4(%esp)) MPLOCKED incl _cnt+V_SYSCALL - SYSCALL_LOCK - call _syscall - - /* - * Return via _doreti to handle ASTs. - */ + call _syscall2 + MEXITCOUNT + cli /* atomic astpending access */ + cmpl $0,_astpending + je doreti_syscall_ret +#ifdef SMP + MP_LOCK +#endif pushl $0 /* cpl to restore */ - subl $4,%esp /* dummy unit to finish intr frame */ + subl $4,%esp /* dummy unit for interrupt frame */ movb $1,_intr_nesting_level - MEXITCOUNT jmp _doreti /* * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80) + * + * Even though the name says 'int0x80', this is actually a TGT (trap gate) + * rather then an IGT (interrupt gate). Thus interrupts are enabled on + * entry just as they are for a normal syscall. + * + * We do not obtain the MP lock, but the call to syscall2 might. If it + * does it will release the lock prior to returning. */ SUPERALIGN_TEXT IDTVEC(int0x80_syscall) @@ -289,16 +313,17 @@ IDTVEC(int0x80_syscall) movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */ FAKE_MCOUNT(13*4(%esp)) MPLOCKED incl _cnt+V_SYSCALL - ALTSYSCALL_LOCK - call _syscall - - /* - * Return via _doreti to handle ASTs. - */ + call _syscall2 + MEXITCOUNT + cli /* atomic astpending access */ + cmpl $0,_astpending + je doreti_syscall_ret +#ifdef SMP + MP_LOCK +#endif pushl $0 /* cpl to restore */ - subl $4,%esp /* dummy unit to finish intr frame */ + subl $4,%esp /* dummy unit for interrupt frame */ movb $1,_intr_nesting_level - MEXITCOUNT jmp _doreti ENTRY(fork_trampoline) diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 23039820715e..5a2377be0085 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -176,6 +176,7 @@ ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime)); ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks)); ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd)); ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt)); +ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending)); #ifdef USER_LDT ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt)); diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 8e349a9800e4..b5bc1fd09022 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -1770,8 +1770,10 @@ init_locks(void) */ mp_lock = 0x00000001; +#if 0 /* ISR uses its own "giant lock" */ isr_lock = FREE_LOCK; +#endif #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c index 8e349a9800e4..b5bc1fd09022 100644 --- a/sys/amd64/amd64/mptable.c +++ b/sys/amd64/amd64/mptable.c @@ -1770,8 +1770,10 @@ init_locks(void) */ mp_lock = 0x00000001; +#if 0 /* ISR uses its own "giant lock" */ isr_lock = FREE_LOCK; +#endif #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 394848cd047f..1f350f5d1e27 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -848,7 +848,11 @@ ENTRY(i586_copyout) jmp done_copyout #endif /* I586_CPU && NNPX > 0 */ -/* copyin(from_user, to_kernel, len) */ +/* + * copyin(from_user, to_kernel, len) + * + * MPSAFE + */ ENTRY(copyin) MEXITCOUNT jmp *_copyin_vector @@ -1130,6 +1134,8 @@ fastmove_tail_fault: /* * fu{byte,sword,word} : fetch a byte (sword, word) from user memory + * + * MP SAFE */ ENTRY(fuword) movl _curpcb,%ecx @@ -1154,6 +1160,9 @@ ENTRY(fuswintr) movl $-1,%eax ret +/* + * MP SAFE + */ ENTRY(fusword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) @@ -1166,6 +1175,9 @@ ENTRY(fusword) movl $0,PCB_ONFAULT(%ecx) ret +/* + * MP SAFE + */ ENTRY(fubyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s index 394848cd047f..1f350f5d1e27 100644 --- a/sys/amd64/amd64/support.s +++ b/sys/amd64/amd64/support.s @@ -848,7 +848,11 @@ ENTRY(i586_copyout) jmp done_copyout #endif /* I586_CPU && NNPX > 0 */ -/* copyin(from_user, to_kernel, len) */ +/* + * copyin(from_user, to_kernel, len) + * + * MPSAFE + */ ENTRY(copyin) MEXITCOUNT jmp *_copyin_vector @@ -1130,6 +1134,8 @@ fastmove_tail_fault: /* * fu{byte,sword,word} : fetch a byte (sword, word) from user memory + * + * MP SAFE */ ENTRY(fuword) movl _curpcb,%ecx @@ -1154,6 +1160,9 @@ ENTRY(fuswintr) movl $-1,%eax ret +/* + * MP SAFE + */ ENTRY(fusword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) @@ -1166,6 +1175,9 @@ ENTRY(fusword) movl $0,PCB_ONFAULT(%ecx) ret +/* + * MP SAFE + */ ENTRY(fubyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s index f3982ae688f0..a05d541d1051 100644 --- a/sys/amd64/amd64/swtch.s +++ b/sys/amd64/amd64/swtch.s @@ -65,8 +65,6 @@ _hlt_vector: .long _default_halt /* pointer to halt routine */ .globl _panic - .globl _want_resched -_want_resched: .long 0 /* we need to re-run the scheduler */ #if defined(SWTCH_OPTIM_STATS) .globl _swtch_optim_stats, _tlb_flush_count _swtch_optim_stats: .long 0 /* number of _swtch_optims */ @@ -129,6 +127,9 @@ _idle: /* * XXX callers of cpu_switch() do a bogus splclock(). Locking should * be left to cpu_switch(). + * + * NOTE: spl*() may only be called while we hold the MP lock (which + * we do). */ call _spl0 @@ -159,14 +160,14 @@ idle_loop: testl %eax,%eax jnz 3f + /* + * Handle page-zeroing in the idle loop. Called with interrupts + * disabled and the MP lock released. Inside vm_page_zero_idle + * we enable interrupts and grab the mplock as required. + */ cmpl $0,_do_page_zero_idle je 2f - /* XXX appears to cause panics */ - /* - * Inside zero_idle we enable interrupts and grab the mplock - * as needed. It needs to be careful about entry/exit mutexes. - */ call _vm_page_zero_idle /* internal locking */ testl %eax, %eax jnz idle_loop @@ -178,9 +179,15 @@ idle_loop: cli jmp idle_loop + /* + * Note that interrupts must be enabled while obtaining the MP lock + * in order to be able to take IPI's while blocked. + */ 3: movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */ + sti call _get_mplock + cli call _procrunnable testl %eax,%eax CROSSJUMP(jnz, sw1a, jz) @@ -355,8 +362,8 @@ sw1a: CROSSJUMP(je, _idle, jne) /* if no proc, idle */ movl %eax,%ecx - movl $0,%eax - movl %eax,_want_resched + xorl %eax,%eax + andl $~WANT_RESCHED,_astpending #ifdef DIAGNOSTIC cmpl %eax,P_WCHAN(%ecx) diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c index d722735f8b4c..081d28c93b2c 100644 --- a/sys/amd64/amd64/sys_machdep.c +++ b/sys/amd64/amd64/sys_machdep.c @@ -53,6 +53,7 @@ #include <sys/user.h> #include <machine/cpu.h> +#include <machine/ipl.h> #include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */ #include <machine/sysarch.h> #ifdef SMP diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index a8b73cf6a02b..703d48dc84ed 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -99,7 +99,7 @@ int (*pmath_emulate) __P((struct trapframe *)); extern void trap __P((struct trapframe frame)); extern int trapwrite __P((unsigned addr)); -extern void syscall __P((struct trapframe frame)); +extern void syscall2 __P((struct trapframe frame)); static int trap_pfault __P((struct trapframe *, int, vm_offset_t)); static void trap_fatal __P((struct trapframe *, vm_offset_t)); @@ -140,38 +140,32 @@ static char *trap_msg[] = { "machine check trap", /* 28 T_MCHK */ }; -static __inline void userret __P((struct proc *p, struct trapframe *frame, - u_quad_t oticks)); +static __inline int userret __P((struct proc *p, struct trapframe *frame, + u_quad_t oticks, int have_mplock)); #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif -static __inline void -userret(p, frame, oticks) +static __inline int +userret(p, frame, oticks, have_mplock) struct proc *p; struct trapframe *frame; u_quad_t oticks; + int have_mplock; { int sig, s; - while ((sig = CURSIG(p)) != 0) + while ((sig = CURSIG(p)) != 0) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } postsig(sig); - -#if 0 - if (!want_resched && - (p->p_priority <= p->p_usrpri) && - (p->p_rtprio.type == RTP_PRIO_NORMAL)) { - int newpriority; - p->p_estcpu += 1; - newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice; - newpriority = min(newpriority, MAXPRI); - p->p_usrpri = newpriority; } -#endif - + p->p_priority = p->p_usrpri; - if (want_resched) { + if (resched_wanted()) { /* * Since we are curproc, clock will normally just change * our priority without moving us from one queue to another @@ -180,6 +174,10 @@ userret(p, frame, oticks) * mi_switch()'ed, we might not be on the queue indicated by * our priority. */ + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } s = splhigh(); setrunqueue(p); p->p_stats->p_ru.ru_nivcsw++; @@ -191,11 +189,16 @@ userret(p, frame, oticks) /* * Charge system time if profiling. */ - if (p->p_flag & P_PROFIL) + if (p->p_flag & P_PROFIL) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } addupc_task(p, frame->tf_eip, (u_int)(p->p_sticks - oticks) * psratio); - + } curpriority = p->p_priority; + return(have_mplock); } /* @@ -604,7 +607,7 @@ kernel_trap: #endif out: - userret(p, &frame, sticks); + userret(p, &frame, sticks, 1); } #ifdef notyet @@ -999,11 +1002,18 @@ int trapwrite(addr) } /* - * System call request from POSIX system call gate interface to kernel. - * Like trap(), argument is call by reference. + * syscall2 - MP aware system call request C handler + * + * A system call is essentially treated as a trap except that the + * MP lock is not held on entry or return. We are responsible for + * obtaining the MP lock if necessary and for handling ASTs + * (e.g. a task switch) prior to return. + * + * In general, only simple access and manipulation of curproc and + * the current stack is allowed without having to hold MP lock. */ void -syscall(frame) +syscall2(frame) struct trapframe frame; { caddr_t params; @@ -1012,22 +1022,42 @@ syscall(frame) struct proc *p = curproc; u_quad_t sticks; int error; + int narg; int args[8]; + int have_mplock = 0; u_int code; #ifdef DIAGNOSTIC - if (ISPL(frame.tf_cs) != SEL_UPL) + if (ISPL(frame.tf_cs) != SEL_UPL) { + get_mplock(); panic("syscall"); + /* NOT REACHED */ + } #endif - sticks = p->p_sticks; + + /* + * handle atomicy by looping since interrupts are enabled and the + * MP lock is not held. + */ + sticks = ((volatile struct proc *)p)->p_sticks; + while (sticks != ((volatile struct proc *)p)->p_sticks) + sticks = ((volatile struct proc *)p)->p_sticks; + p->p_md.md_regs = &frame; params = (caddr_t)frame.tf_esp + sizeof(int); code = frame.tf_eax; + if (p->p_sysent->sv_prepsyscall) { + /* + * The prep code is not MP aware. + */ + get_mplock(); (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); + rel_mplock(); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. + * fuword is MP aware. */ if (code == SYS_syscall) { /* @@ -1053,27 +1083,52 @@ syscall(frame) else callp = &p->p_sysent->sv_table[code]; - if (params && (i = callp->sy_narg * sizeof(int)) && + narg = callp->sy_narg & SYF_ARGMASK; + + /* + * copyin is MP aware, but the tracing code is not + */ + if (params && (i = narg * sizeof(int)) && (error = copyin(params, (caddr_t)args, (u_int)i))) { + get_mplock(); + have_mplock = 1; #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) - ktrsyscall(p->p_tracep, code, callp->sy_narg, args); + ktrsyscall(p->p_tracep, code, narg, args); #endif goto bad; } + + /* + * Try to run the syscall without the MP lock if the syscall + * is MP safe. We have to obtain the MP lock no matter what if + * we are ktracing + */ + if ((callp->sy_narg & SYF_MPSAFE) == 0) { + get_mplock(); + have_mplock = 1; + } + #ifdef KTRACE - if (KTRPOINT(p, KTR_SYSCALL)) - ktrsyscall(p->p_tracep, code, callp->sy_narg, args); + if (KTRPOINT(p, KTR_SYSCALL)) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } + ktrsyscall(p->p_tracep, code, narg, args); + } #endif p->p_retval[0] = 0; p->p_retval[1] = frame.tf_edx; - STOPEVENT(p, S_SCE, callp->sy_narg); + STOPEVENT(p, S_SCE, narg); /* MP aware */ error = (*callp->sy_call)(p, args); + /* + * MP SAFE (we may or may not have the MP lock at this point) + */ switch (error) { - case 0: /* * Reinitialize proc pointer `p' as it may be different @@ -1109,17 +1164,31 @@ bad: break; } + /* + * Traced syscall. trapsignal() is not MP aware. + */ if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) { - /* Traced syscall. */ + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } frame.tf_eflags &= ~PSL_T; trapsignal(p, SIGTRAP, 0); } - userret(p, &frame, sticks); + /* + * Handle reschedule and other end-of-syscall issues + */ + have_mplock = userret(p, &frame, sticks, have_mplock); #ifdef KTRACE - if (KTRPOINT(p, KTR_SYSRET)) + if (KTRPOINT(p, KTR_SYSRET)) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } ktrsysret(p->p_tracep, code, error, p->p_retval[0]); + } #endif /* @@ -1129,11 +1198,17 @@ bad: */ STOPEVENT(p, S_SCX, code); + /* + * Release the MP lock if we had to get it + */ + if (have_mplock) + rel_mplock(); } /* * Simplified back end of syscall(), used when returning from fork() - * directly into user mode. + * directly into user mode. MP lock is held on entry and should be + * held on return. */ void fork_return(p, frame) @@ -1144,7 +1219,7 @@ fork_return(p, frame) frame.tf_eflags &= ~PSL_C; /* success */ frame.tf_edx = 1; - userret(p, &frame, 0); + userret(p, &frame, 0, 1); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p->p_tracep, SYS_fork, 0, 0); diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 9f5d6c046d46..d58e5a6538fb 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -578,13 +578,7 @@ vm_page_zero_idle() TAILQ_REMOVE(&vm_page_queues[m->queue].pl, m, pageq); m->queue = PQ_NONE; splx(s); -#if 0 - rel_mplock(); -#endif pmap_zero_page(VM_PAGE_TO_PHYS(m)); -#if 0 - get_mplock(); -#endif (void)splvm(); vm_page_flag_set(m, PG_ZERO); m->queue = PQ_FREE + m->pc; @@ -606,6 +600,12 @@ vm_page_zero_idle() #ifdef SMP } #endif + /* + * We have to enable interrupts for a moment if the try_mplock fails + * in order to potentially take an IPI. XXX this should be in + * swtch.s + */ + __asm __volatile("sti; nop; cli" : : : "memory"); return (0); } diff --git a/sys/amd64/include/cpu.h b/sys/amd64/include/cpu.h index 18049d0d681b..c6aa46fbe5b2 100644 --- a/sys/amd64/include/cpu.h +++ b/sys/amd64/include/cpu.h @@ -82,10 +82,13 @@ /* * Preempt the current process if in interrupt from user mode, * or after the current trap/syscall if in system mode. + * + * XXX: if astpending is later changed to an |= here due to more flags being + * added, we will have an atomicy problem. The type of atomicy we need is + * a non-locked orl. */ -#define need_resched() do { want_resched = 1; aston(); } while (0) - -#define resched_wanted() want_resched +#define need_resched() do { astpending = AST_RESCHED|AST_PENDING; } while (0) +#define resched_wanted() (astpending & AST_RESCHED) /* * Arrange to handle pending profiling ticks before returning to user mode. @@ -100,10 +103,15 @@ /* * Notify the current process (p) that it has a signal pending, * process as soon as possible. + * + * XXX: aston() really needs to be an atomic (not locked, but an orl), + * in case need_resched() is set by an interrupt. But with astpending a + * per-cpu variable this is not trivial to do efficiently. For now we blow + * it off (asynchronous need_resched() conflicts are not critical). */ #define signotify(p) aston() -#define aston() do { astpending = 1; } while (0) +#define aston() do { astpending |= AST_PENDING; } while (0) #define astoff() /* @@ -126,11 +134,9 @@ } #ifdef _KERNEL -extern int astpending; extern char btext[]; extern char etext[]; extern u_char intr_nesting_level; -extern int want_resched; /* resched was called */ void fork_trampoline __P((void)); void fork_return __P((struct proc *, struct trapframe)); diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h index 8e349a9800e4..b5bc1fd09022 100644 --- a/sys/amd64/include/mptable.h +++ b/sys/amd64/include/mptable.h @@ -1770,8 +1770,10 @@ init_locks(void) */ mp_lock = 0x00000001; +#if 0 /* ISR uses its own "giant lock" */ isr_lock = FREE_LOCK; +#endif #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index 28336d708534..58bd9cfe9416 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -66,6 +66,7 @@ struct globaldata { caddr_t gd_prv_CADDR3; unsigned *gd_prv_PADDR1; #endif + u_int gd_astpending; }; #ifdef SMP diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s index ca909d907e69..587d763a4573 100644 --- a/sys/i386/i386/apic_vector.s +++ b/sys/i386/i386/apic_vector.s @@ -9,28 +9,17 @@ #include "i386/isa/intr_machdep.h" - -#ifdef FAST_SIMPLELOCK - -#define GET_FAST_INTR_LOCK \ - pushl $_fast_intr_lock ; /* address of lock */ \ - call _s_lock ; /* MP-safe */ \ - addl $4,%esp - -#define REL_FAST_INTR_LOCK \ - movl $0, _fast_intr_lock - -#else /* FAST_SIMPLELOCK */ +/* + * Interrupts must be enabled while waiting for the MP lock. + */ #define GET_FAST_INTR_LOCK \ - call _get_isrlock + sti; call _get_mplock; cli #define REL_FAST_INTR_LOCK \ movl $_mp_lock, %edx ; /* GIANT_LOCK */ \ call _MPrellock_edx -#endif /* FAST_SIMPLELOCK */ - /* convert an absolute IRQ# into a bitmask */ #define IRQ_BIT(irq_num) (1 << (irq_num)) @@ -42,10 +31,6 @@ * Macros for interrupt interrupt entry, call to handler, and exit. */ -#ifdef FAST_WITHOUTCPL - -/* - */ #define FAST_INTR(irq_num, vec_name) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -82,83 +67,6 @@ IDTVEC(vec_name) ; \ popl %eax ; \ iret -#else /* FAST_WITHOUTCPL */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl %eax ; /* save only call-used registers */ \ - pushl %ecx ; \ - pushl %edx ; \ - pushl %ds ; \ - MAYBE_PUSHL_ES ; \ - pushl %fs ; \ - movl $KDSEL, %eax ; \ - movl %ax, %ds ; \ - MAYBE_MOVW_AX_ES ; \ - movl $KPSEL, %eax ; \ - movl %ax, %fs ; \ - FAKE_MCOUNT((5+ACTUALLY_PUSHED)*4(%esp)) ; \ - GET_FAST_INTR_LOCK ; \ - pushl _intr_unit + (irq_num) * 4 ; \ - call *_intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic_eoi ; \ - lock ; \ - incl _cnt+V_INTR ; /* book-keeping can wait */ \ - movl _intr_countp + (irq_num) * 4,%eax ; \ - lock ; \ - incl (%eax) ; \ - movl _cpl, %eax ; /* unmasking pending HWIs or SWIs? */ \ - notl %eax ; \ - andl _ipending, %eax ; \ - jne 2f ; /* yes, maybe handle them */ \ -1: ; \ - MEXITCOUNT ; \ - REL_FAST_INTR_LOCK ; \ - popl %fs ; \ - MAYBE_POPL_ES ; \ - popl %ds ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax ; \ - iret ; \ -; \ - ALIGN_TEXT ; \ -2: ; \ - cmpb $3, _intr_nesting_level ; /* enough stack? */ \ - jae 1b ; /* no, return */ \ - movl _cpl, %eax ; \ - /* XXX next line is probably unnecessary now. */ \ - movl $HWI_MASK|SWI_MASK, _cpl ; /* limit nesting ... */ \ - lock ; \ - incb _intr_nesting_level ; /* ... really limit it ... */ \ - sti ; /* to do this as early as possible */ \ - popl %fs ; /* discard most of thin frame ... */ \ - MAYBE_POPL_ES ; /* discard most of thin frame ... */ \ - popl %ecx ; /* ... original %ds ... */ \ - popl %edx ; \ - xchgl %eax, 4(%esp) ; /* orig %eax; save cpl */ \ - pushal ; /* build fat frame (grrr) ... */ \ - pushl %ecx ; /* ... actually %ds ... */ \ - pushl %es ; \ - pushl %fs ; - movl $KDSEL, %eax ; \ - movl %ax, %es ; \ - movl $KPSEL, %eax ; - movl %ax, %fs ; - movl (3+8+0)*4(%esp), %ecx ; /* %ecx from thin frame ... */ \ - movl %ecx, (3+6)*4(%esp) ; /* ... to fat frame ... */ \ - movl (3+8+1)*4(%esp), %eax ; /* ... cpl from thin frame */ \ - pushl %eax ; \ - subl $4, %esp ; /* junk for unit number */ \ - MEXITCOUNT ; \ - jmp _doreti - -#endif /** FAST_WITHOUTCPL */ - - /* * */ @@ -242,19 +150,6 @@ IDTVEC(vec_name) ; \ 7: ; \ IMASK_UNLOCK -#ifdef INTR_SIMPLELOCK -#define ENLOCK -#define DELOCK -#define LATELOCK call _get_isrlock -#else -#define ENLOCK \ - ISR_TRYLOCK ; /* XXX this is going away... */ \ - testl %eax, %eax ; /* did we get it? */ \ - jz 3f -#define DELOCK ISR_RELLOCK -#define LATELOCK -#endif - #ifdef APIC_INTR_DIAGNOSTIC #ifdef APIC_INTR_DIAGNOSTIC_IRQ log_intr_event: @@ -319,125 +214,6 @@ log_intr_event: #define APIC_ITRACE(name, irq_num, id) #endif -#ifdef CPL_AND_CML - -#define INTR(irq_num, vec_name, maybe_extra_ipending) \ - .text ; \ - SUPERALIGN_TEXT ; \ -/* _XintrNN: entry point used by IDT/HWIs & splz_unpend via _vec[]. */ \ -IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - movl $KDSEL, %eax ; /* reload with kernel's data segment */ \ - movl %ax, %ds ; \ - movl %ax, %es ; \ - movl $KPSEL, %eax ; \ - movl %ax, %fs ; \ -; \ - maybe_extra_ipending ; \ -; \ - APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \ - lock ; /* MP-safe */ \ - btsl $(irq_num), iactive ; /* lazy masking */ \ - jc 1f ; /* already active */ \ -; \ - MASK_LEVEL_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ -0: ; \ - APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - ENLOCK ; \ -; \ - APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ - AVCPL_LOCK ; /* MP-safe */ \ - testl $IRQ_BIT(irq_num), _cpl ; \ - jne 2f ; /* this INT masked */ \ - testl $IRQ_BIT(irq_num), _cml ; \ - jne 2f ; /* this INT masked */ \ - orl $IRQ_BIT(irq_num), _cil ; \ - AVCPL_UNLOCK ; \ -; \ - incb _intr_nesting_level ; \ -; \ - /* entry point used by doreti_unpend for HWIs. */ \ -__CONCAT(Xresume,irq_num): ; \ - FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ - lock ; incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4, %eax ; \ - lock ; incl (%eax) ; \ -; \ - AVCPL_LOCK ; /* MP-safe */ \ - movl _cml, %eax ; \ - pushl %eax ; \ - orl _intr_mask + (irq_num) * 4, %eax ; \ - movl %eax, _cml ; \ - AVCPL_UNLOCK ; \ -; \ - pushl _intr_unit + (irq_num) * 4 ; \ - incl _inside_intr ; \ - APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ - sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; \ - APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \ - decl _inside_intr ; \ -; \ - lock ; andl $~IRQ_BIT(irq_num), iactive ; \ - lock ; andl $~IRQ_BIT(irq_num), _cil ; \ - UNMASK_IRQ(irq_num) ; \ - APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \ - sti ; /* doreti repeats cli/sti */ \ - MEXITCOUNT ; \ - LATELOCK ; \ - jmp _doreti ; \ -; \ - ALIGN_TEXT ; \ -1: ; /* active */ \ - APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ - MASK_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ - AVCPL_LOCK ; /* MP-safe */ \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ - lock ; \ - btsl $(irq_num), iactive ; /* still active */ \ - jnc 0b ; /* retry */ \ - POP_FRAME ; \ - iret ; \ -; \ - ALIGN_TEXT ; \ -2: ; /* masked by cpl|cml */ \ - APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ - DELOCK ; /* XXX this is going away... */ \ - POP_FRAME ; \ - iret ; \ - ALIGN_TEXT ; \ -3: ; /* other cpu has isr lock */ \ - APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ - AVCPL_LOCK ; /* MP-safe */ \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - testl $IRQ_BIT(irq_num), _cpl ; \ - jne 4f ; /* this INT masked */ \ - testl $IRQ_BIT(irq_num), _cml ; \ - jne 4f ; /* this INT masked */ \ - orl $IRQ_BIT(irq_num), _cil ; \ - AVCPL_UNLOCK ; \ - call forward_irq ; /* forward irq to lock holder */ \ - POP_FRAME ; /* and return */ \ - iret ; \ - ALIGN_TEXT ; \ -4: ; /* blocked */ \ - APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ - AVCPL_UNLOCK ; \ - POP_FRAME ; /* and return */ \ - iret - -#else /* CPL_AND_CML */ - - #define INTR(irq_num, vec_name, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -461,15 +237,13 @@ IDTVEC(vec_name) ; \ EOI_IRQ(irq_num) ; \ 0: ; \ APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - ISR_TRYLOCK ; /* XXX this is going away... */ \ + MP_TRYLOCK ; /* XXX this is going away... */ \ testl %eax, %eax ; /* did we get it? */ \ jz 3f ; /* no */ \ ; \ APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ - AVCPL_LOCK ; /* MP-safe */ \ testl $IRQ_BIT(irq_num), _cpl ; \ jne 2f ; /* this INT masked */ \ - AVCPL_UNLOCK ; \ ; \ incb _intr_nesting_level ; \ ; \ @@ -480,14 +254,12 @@ __CONCAT(Xresume,irq_num): ; \ movl _intr_countp + (irq_num) * 4, %eax ; \ lock ; incl (%eax) ; \ ; \ - AVCPL_LOCK ; /* MP-safe */ \ movl _cpl, %eax ; \ pushl %eax ; \ orl _intr_mask + (irq_num) * 4, %eax ; \ movl %eax, _cpl ; \ lock ; \ andl $~IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ ; \ pushl _intr_unit + (irq_num) * 4 ; \ APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ @@ -508,10 +280,8 @@ __CONCAT(Xresume,irq_num): ; \ APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ MASK_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ - AVCPL_LOCK ; /* MP-safe */ \ lock ; \ orl $IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ lock ; \ btsl $(irq_num), iactive ; /* still active */ \ jnc 0b ; /* retry */ \ @@ -522,32 +292,25 @@ __CONCAT(Xresume,irq_num): ; \ APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ lock ; \ orl $IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ - ISR_RELLOCK ; /* XXX this is going away... */ \ + MP_RELLOCK ; \ POP_FRAME ; \ iret ; \ ALIGN_TEXT ; \ 3: ; /* other cpu has isr lock */ \ APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ - AVCPL_LOCK ; /* MP-safe */ \ lock ; \ orl $IRQ_BIT(irq_num), _ipending ; \ testl $IRQ_BIT(irq_num), _cpl ; \ jne 4f ; /* this INT masked */ \ - AVCPL_UNLOCK ; \ call forward_irq ; /* forward irq to lock holder */ \ POP_FRAME ; /* and return */ \ iret ; \ ALIGN_TEXT ; \ 4: ; /* blocked */ \ APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ - AVCPL_UNLOCK ; \ POP_FRAME ; /* and return */ \ iret -#endif /* CPL_AND_CML */ - - /* * Handle "spurious INTerrupts". * Notes: @@ -635,11 +398,6 @@ _Xcpucheckstate: testl $PSL_VM, 24(%esp) jne 1f incl %ebx /* system or interrupt */ -#ifdef CPL_AND_CML - cmpl $0, _inside_intr - je 1f - incl %ebx /* interrupt */ -#endif 1: movl _cpuid, %eax movl %ebx, _checkstate_cpustate(,%eax,4) @@ -693,17 +451,11 @@ _Xcpuast: * Giant locks do not come cheap. * A lot of cycles are going to be wasted here. */ - call _get_isrlock + call _get_mplock - AVCPL_LOCK -#ifdef CPL_AND_CML - movl _cml, %eax -#else movl _cpl, %eax -#endif pushl %eax movl $1, _astpending /* XXX */ - AVCPL_UNLOCK lock incb _intr_nesting_level sti @@ -716,7 +468,7 @@ _Xcpuast: lock btrl %eax, CNAME(resched_cpus) jnc 2f - movl $1, CNAME(want_resched) + orl $AST_RESCHED,_astpending lock incl CNAME(want_resched_cnt) 2: @@ -749,7 +501,7 @@ _Xforward_irq: FAKE_MCOUNT(13*4(%esp)) - ISR_TRYLOCK + MP_TRYLOCK testl %eax,%eax /* Did we get the lock ? */ jz 1f /* No */ @@ -758,14 +510,8 @@ _Xforward_irq: cmpb $4, _intr_nesting_level jae 2f - AVCPL_LOCK -#ifdef CPL_AND_CML - movl _cml, %eax -#else movl _cpl, %eax -#endif pushl %eax - AVCPL_UNLOCK lock incb _intr_nesting_level sti @@ -785,7 +531,7 @@ _Xforward_irq: lock incl CNAME(forward_irq_toodeepcnt) 3: - ISR_RELLOCK + MP_RELLOCK MEXITCOUNT POP_FRAME iret diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s index 7042d58f79cb..91c5b8aa6885 100644 --- a/sys/i386/i386/exception.s +++ b/sys/i386/i386/exception.s @@ -41,23 +41,11 @@ #include <machine/psl.h> #include <machine/trap.h> #ifdef SMP -#include <machine/smptests.h> /** CPL_AND_CML, REAL_ */ +#include <machine/smptests.h> /** various SMP options */ #endif #include "assym.s" -#ifndef SMP -#define ECPL_LOCK /* make these nops */ -#define ECPL_UNLOCK -#define ICPL_LOCK -#define ICPL_UNLOCK -#define FAST_ICPL_UNLOCK -#define AICPL_LOCK -#define AICPL_UNLOCK -#define AVCPL_LOCK -#define AVCPL_UNLOCK -#endif /* SMP */ - #ifdef SMP #define MOVL_KPSEL_EAX movl $KPSEL,%eax #else @@ -71,16 +59,45 @@ /* Trap handling */ /*****************************************************************************/ /* - * Trap and fault vector routines + * Trap and fault vector routines. + * + * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on + * the stack that mostly looks like an interrupt, but does not disable + * interrupts. A few of the traps we are use are interrupt gates, + * SDT_SYS386IGT, which are nearly the same thing except interrupts are + * disabled on entry. + * + * The cpu will push a certain amount of state onto the kernel stack for + * the current process. The amount of state depends on the type of trap + * and whether the trap crossed rings or not. See i386/include/frame.h. + * At the very least the current EFLAGS (status register, which includes + * the interrupt disable state prior to the trap), the code segment register, + * and the return instruction pointer are pushed by the cpu. The cpu + * will also push an 'error' code for certain traps. We push a dummy + * error code for those traps where the cpu doesn't in order to maintain + * a consistent frame. We also push a contrived 'trap number'. + * + * The cpu does not push the general registers, we must do that, and we + * must restore them prior to calling 'iret'. The cpu adjusts the %cs and + * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we + * must load them with appropriate values for supervisor mode operation. + * + * On entry to a trap or interrupt WE DO NOT OWN THE MP LOCK. This means + * that we must be careful in regards to accessing global variables. We + * save (push) the current cpl (our software interrupt disable mask), call + * the trap function, then call _doreti to restore the cpl and deal with + * ASTs (software interrupts). _doreti will determine if the restoration + * of the cpl unmasked any pending interrupts and will issue those interrupts + * synchronously prior to doing the iret. + * + * At the moment we must own the MP lock to do any cpl manipulation, which + * means we must own it prior to calling _doreti. The syscall case attempts + * to avoid this by handling a reduced set of cases itself and iret'ing. */ #define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(_X,name); \ .type __CONCAT(_X,name),@function; __CONCAT(_X,name): #define TRAP(a) pushl $(a) ; jmp _alltraps -/* - * XXX - debugger traps are now interrupt gates so at least bdb doesn't lose - * control. The sti's give the standard losing behaviour for ddb and kgdb. - */ #ifdef BDE_DEBUGGER #define BDBTRAP(name) \ ss ; \ @@ -160,16 +177,9 @@ IDTVEC(fpu) #ifdef SMP MPLOCKED incl _cnt+V_TRAP - FPU_LOCK - ECPL_LOCK -#ifdef CPL_AND_CML - movl _cml,%eax - pushl %eax /* save original cml */ -#else + MP_LOCK movl _cpl,%eax pushl %eax /* save original cpl */ -#endif /* CPL_AND_CML */ - ECPL_UNLOCK pushl $0 /* dummy unit to finish intr frame */ #else /* SMP */ movl _cpl,%eax @@ -190,6 +200,16 @@ IDTVEC(fpu) IDTVEC(align) TRAP(T_ALIGNFLT) + /* + * _alltraps entry point. Interrupts are enabled if this was a trap + * gate (TGT), else disabled if this was an interrupt gate (IGT). + * Note that int0x80_syscall is a trap gate. Only page faults + * use an interrupt gate. + * + * Note that all calls to MP_LOCK must occur with interrupts enabled + * in order to be able to take IPI's while waiting for the lock. + */ + SUPERALIGN_TEXT .globl _alltraps .type _alltraps,@function @@ -208,14 +228,8 @@ alltraps_with_regs_pushed: calltrap: FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */ MPLOCKED incl _cnt+V_TRAP - ALIGN_LOCK - ECPL_LOCK -#ifdef CPL_AND_CML - movl _cml,%ebx /* keep orig. cml here during trap() */ -#else + MP_LOCK movl _cpl,%ebx /* keep orig. cpl here during trap() */ -#endif - ECPL_UNLOCK call _trap /* @@ -229,17 +243,19 @@ calltrap: jmp _doreti /* - * Call gate entry for syscall. + * SYSCALL CALL GATE (old entry point for a.out binaries) + * * The intersegment call has been set up to specify one dummy parameter. + * * This leaves a place to put eflags so that the call frame can be * converted to a trap frame. Note that the eflags is (semi-)bogusly * pushed into (what will be) tf_err and then copied later into the * final spot. It has to be done this way because esp can't be just * temporarily altered for the pushfl - an interrupt might come in * and clobber the saved cs/eip. - */ -/* - * THis first callgate is used for the old a.out binaries + * + * We do not obtain the MP lock, but the call to syscall2 might. If it + * does it will release the lock prior to returning. */ SUPERALIGN_TEXT IDTVEC(syscall) @@ -259,20 +275,28 @@ IDTVEC(syscall) movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */ FAKE_MCOUNT(13*4(%esp)) MPLOCKED incl _cnt+V_SYSCALL - SYSCALL_LOCK - call _syscall - - /* - * Return via _doreti to handle ASTs. - */ + call _syscall2 + MEXITCOUNT + cli /* atomic astpending access */ + cmpl $0,_astpending + je doreti_syscall_ret +#ifdef SMP + MP_LOCK +#endif pushl $0 /* cpl to restore */ - subl $4,%esp /* dummy unit to finish intr frame */ + subl $4,%esp /* dummy unit for interrupt frame */ movb $1,_intr_nesting_level - MEXITCOUNT jmp _doreti /* * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80) + * + * Even though the name says 'int0x80', this is actually a TGT (trap gate) + * rather then an IGT (interrupt gate). Thus interrupts are enabled on + * entry just as they are for a normal syscall. + * + * We do not obtain the MP lock, but the call to syscall2 might. If it + * does it will release the lock prior to returning. */ SUPERALIGN_TEXT IDTVEC(int0x80_syscall) @@ -289,16 +313,17 @@ IDTVEC(int0x80_syscall) movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */ FAKE_MCOUNT(13*4(%esp)) MPLOCKED incl _cnt+V_SYSCALL - ALTSYSCALL_LOCK - call _syscall - - /* - * Return via _doreti to handle ASTs. - */ + call _syscall2 + MEXITCOUNT + cli /* atomic astpending access */ + cmpl $0,_astpending + je doreti_syscall_ret +#ifdef SMP + MP_LOCK +#endif pushl $0 /* cpl to restore */ - subl $4,%esp /* dummy unit to finish intr frame */ + subl $4,%esp /* dummy unit for interrupt frame */ movb $1,_intr_nesting_level - MEXITCOUNT jmp _doreti ENTRY(fork_trampoline) diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c index 23039820715e..5a2377be0085 100644 --- a/sys/i386/i386/genassym.c +++ b/sys/i386/i386/genassym.c @@ -176,6 +176,7 @@ ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime)); ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks)); ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd)); ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt)); +ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending)); #ifdef USER_LDT ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt)); diff --git a/sys/i386/i386/globals.s b/sys/i386/i386/globals.s index 8a46b522cc7c..31fbfd5e98b1 100644 --- a/sys/i386/i386/globals.s +++ b/sys/i386/i386/globals.s @@ -61,9 +61,10 @@ globaldata: #else .set globaldata,0 #endif - .globl gd_curproc, gd_curpcb, gd_npxproc + .globl gd_curproc, gd_curpcb, gd_npxproc, gd_astpending .globl gd_common_tss, gd_switchtime, gd_switchticks .set gd_curproc,globaldata + GD_CURPROC + .set gd_astpending,globaldata + GD_ASTPENDING .set gd_curpcb,globaldata + GD_CURPCB .set gd_npxproc,globaldata + GD_NPXPROC .set gd_common_tss,globaldata + GD_COMMON_TSS @@ -80,9 +81,10 @@ globaldata: #endif #ifndef SMP - .globl _curproc, _curpcb, _npxproc + .globl _curproc, _curpcb, _npxproc, _astpending .globl _common_tss, _switchtime, _switchticks .set _curproc,globaldata + GD_CURPROC + .set _astpending,globaldata + GD_ASTPENDING .set _curpcb,globaldata + GD_CURPCB .set _npxproc,globaldata + GD_NPXPROC .set _common_tss,globaldata + GD_COMMON_TSS diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 8e349a9800e4..b5bc1fd09022 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -1770,8 +1770,10 @@ init_locks(void) */ mp_lock = 0x00000001; +#if 0 /* ISR uses its own "giant lock" */ isr_lock = FREE_LOCK; +#endif #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); diff --git a/sys/i386/i386/mplock.s b/sys/i386/i386/mplock.s index a37b14a174f5..858df3310a21 100644 --- a/sys/i386/i386/mplock.s +++ b/sys/i386/i386/mplock.s @@ -18,7 +18,6 @@ * The attempt to seize/release the semaphore and the increment/decrement * is done in one atomic operation. This way we are safe from all kinds * of weird reentrancy situations. - * */ #include <machine/asmacros.h> @@ -51,12 +50,8 @@ * Claim LOWest PRIOrity, ie. attempt to grab ALL INTerrupts. */ -/* location of saved TPR on stack */ -#define TPR_TARGET 8(%esp) - -/* after 1st acquire of lock we attempt to grab all hardware INTs */ -#define GRAB_HWI movl $ALLHWI_LEVEL, TPR_TARGET -#define GRAB_HWI_2 movl $ALLHWI_LEVEL, lapic_tpr /* CHEAP_TPR */ +/* after 1st acquire of lock we grab all hardware INTs */ +#define GRAB_HWI movl $ALLHWI_LEVEL, lapic_tpr /* after last release of lock give up LOW PRIO (ie, arbitrate INTerrupts) */ #define ARB_HWI movl $LOPRIO_LEVEL, lapic_tpr /* CHEAP_TPR */ @@ -64,7 +59,6 @@ #else /* GRAB_LOPRIO */ #define GRAB_HWI /* nop */ -#define GRAB_HWI_2 /* nop */ #define ARB_HWI /* nop */ #endif /* GRAB_LOPRIO */ @@ -75,7 +69,8 @@ * void MPgetlock_edx(unsigned int *lock : %edx) * ---------------------------------- * Destroys %eax, %ecx. %edx must hold lock argument. - * Note: TPR_TARGET (relative to the stack) is destroyed in GRAB_HWI + * + * Grabs hardware interrupts on first aquire. * * NOTE: Serialization is not required if we already hold the lock, since * we already hold the lock, nor do we need a locked instruction if we @@ -131,7 +126,7 @@ NON_GPROF_ENTRY(MPtrylock) #ifdef GLPROFILE incl _tryhits2 #endif /* GLPROFILE */ - GRAB_HWI_2 /* 1st acquire, grab hw INTs */ + GRAB_HWI /* 1st acquire, grab hw INTs */ movl $1, %eax ret 1: @@ -197,38 +192,51 @@ NON_GPROF_ENTRY(MPrellock_edx) * * Stack (after call to _MPgetlock): * - * EFLAGS 4(%esp) - * local APIC TPR 8(%esp) <-- note, TPR_TARGET - * edx 12(%esp) - * ecx 16(%esp) - * eax 20(%esp) + * edx 4(%esp) + * ecx 8(%esp) + * eax 12(%esp) + * + * Requirements: Interrupts should be enabled on call so we can take + * IPI's and FAST INTs while we are waiting for the lock + * (else the system may not be able to halt). + * + * XXX there are still places where get_mplock() is called + * with interrupts disabled, so we have to temporarily reenable + * interrupts. + * + * Side effects: The current cpu will be given ownership of the + * hardware interrupts when it first aquires the lock. + * + * Costs: Initial aquisition requires the use of a costly locked + * instruction, but recursive aquisition is cheap. Release + * is very cheap. */ NON_GPROF_ENTRY(get_mplock) pushl %eax pushl %ecx pushl %edx - - /* block all HW INTs via Task Priority Register */ - pushl lapic_tpr /* save current TPR */ - pushfl /* save current EFLAGS */ - testl $(1<<9), (%esp) /* test EI bit */ - jnz 1f /* INTs currently enabled */ - sti /* allow IPI and FAST INTs */ -1: movl $_mp_lock, %edx + pushfl + testl $(1<<9), (%esp) + jz 2f call _MPgetlock_edx - - popfl /* restore original EFLAGS */ - popl lapic_tpr /* restore TPR */ + addl $4,%esp +1: popl %edx popl %ecx popl %eax ret +2: + sti + call _MPgetlock_edx + popfl + jmp 1b /* * Special version of get_mplock that is used during bootstrap when we can't - * yet enable interrupts of any sort since the APIC isn't online yet. + * yet enable interrupts of any sort since the APIC isn't online yet. We + * do an endrun around MPgetlock_edx to avoid enabling interrupts. * * XXX FIXME.. - APIC should be online from the start to simplify IPI's. */ @@ -236,20 +244,19 @@ NON_GPROF_ENTRY(boot_get_mplock) pushl %eax pushl %ecx pushl %edx - #ifdef GRAB_LOPRIO - pushl $0 /* dummy TPR (TPR_TARGET) */ pushfl + pushl lapic_tpr + cli #endif movl $_mp_lock, %edx call _MPgetlock_edx #ifdef GRAB_LOPRIO + popl lapic_tpr popfl - addl $4, %esp #endif - popl %edx popl %ecx popl %eax @@ -287,214 +294,6 @@ NON_GPROF_ENTRY(rel_mplock) ret /*********************************************************************** - * void get_isrlock() - * ----------------- - * no registers preserved, assummed the calling ISR does! - * - * Stack (after call to _MPgetlock): - * - * EFLAGS 4(%esp) - * local APIC TPR 8(%esp) - */ - -NON_GPROF_ENTRY(get_isrlock) - - /* block all HW INTs via Task Priority Register */ - pushl lapic_tpr /* save current TPR (TPR_TARGET) */ - pushfl /* save current EFLAGS */ - sti /* allow IPI and FAST INTs */ - - movl $_mp_lock, %edx - call _MPgetlock_edx - - popfl /* restore original EFLAGS */ - popl lapic_tpr /* restore TPR */ - ret - - -/*********************************************************************** - * void try_isrlock() - * ----------------- - * no registers preserved, assummed the calling ISR does! - * reg %eax == 1 if success - */ - -NON_GPROF_ENTRY(try_isrlock) - pushl $_mp_lock - call _MPtrylock - add $4, %esp - ret - - -/*********************************************************************** - * void rel_isrlock() - * ----------------- - * no registers preserved, assummed the calling ISR does! - */ - -NON_GPROF_ENTRY(rel_isrlock) - movl $_mp_lock,%edx - jmp _MPrellock_edx - - -/*********************************************************************** - * FPU locks - */ - -NON_GPROF_ENTRY(get_fpu_lock) - pushl lapic_tpr /* save current TPR (TPR_TARGET) */ - pushfl - sti - movl $_mp_lock, %edx - call _MPgetlock_edx - popfl - popl lapic_tpr - ret - -#ifdef notneeded -NON_GPROF_ENTRY(try_fpu_lock) - pushl $_mp_lock - call _MPtrylock - add $4, %esp - ret - -NON_GPROF_ENTRY(rel_fpu_lock) - movl $_mp_lock,%edx - jmp _MPrellock_edx -#endif /* notneeded */ - - -/*********************************************************************** - * align locks - */ - -NON_GPROF_ENTRY(get_align_lock) - pushl lapic_tpr /* save current TPR (TPR_TARGET) */ - pushfl - sti - movl $_mp_lock, %edx - call _MPgetlock_edx - popfl - popl lapic_tpr - ret - -#ifdef notneeded -NON_GPROF_ENTRY(try_align_lock) - pushl $_mp_lock - call _MPtrylock - add $4, %esp - ret - -NON_GPROF_ENTRY(rel_align_lock) - movl $_mp_lock,%edx - jmp _MPrellock_edx -#endif /* notneeded */ - - -/*********************************************************************** - * syscall locks - */ - -NON_GPROF_ENTRY(get_syscall_lock) - pushl lapic_tpr /* save current TPR (TPR_TARGET) */ - pushfl - sti - movl $_mp_lock, %edx - call _MPgetlock_edx - popfl - popl lapic_tpr - ret - -#ifdef notneeded -NON_GPROF_ENTRY(try_syscall_lock) - pushl $_mp_lock - call _MPtrylock - add $4, %esp - ret -#endif /* notneeded */ - -NON_GPROF_ENTRY(rel_syscall_lock) - movl $_mp_lock,%edx - jmp _MPrellock_edx - - -/*********************************************************************** - * altsyscall locks - */ - -NON_GPROF_ENTRY(get_altsyscall_lock) - pushl lapic_tpr /* save current TPR (TPR_TARGET) */ - pushfl - sti - movl $_mp_lock, %edx - call _MPgetlock_edx - popfl - popl lapic_tpr - ret - -#ifdef notneeded -NON_GPROF_ENTRY(try_altsyscall_lock) - pushl $_mp_lock - call _MPtrylock - add $4, %esp - ret - -NON_GPROF_ENTRY(rel_altsyscall_lock) - movl $_mp_lock,%edx - jmp _MPrellock_edx -#endif /* notneeded */ - - -#ifdef RECURSIVE_MPINTRLOCK -/*********************************************************************** - * void get_mpintrlock() - * ----------------- - * All registers preserved - */ - -NON_GPROF_ENTRY(get_mpintrlock) - pushl %eax - pushl %ecx - pushl %edx - -#ifdef GRAB_LOPRIO - pushl lapic_tpr /* save current TPR (TPR_TARGET) */ - pushfl -#endif - - movl $_mpintr_lock, %edx - call _MPgetlock_edx - -#ifdef GRAB_LOPRIO - popfl - popl lapic_tpr -#endif - - popl %edx - popl %ecx - popl %eax - ret - -/*********************************************************************** - * void rel_mpintrlock() - * ----------------- - * All registers preserved - */ - -NON_GPROF_ENTRY(rel_mpintrlock) - pushl %ecx - pushl %edx - - movl $_mpintr_lock,%edx - call _MPrellock_edx - - popl %edx - popl %ecx - ret -#endif /* RECURSIVE_MPINTRLOCK */ - - -/*********************************************************************** * */ .data @@ -503,15 +302,6 @@ NON_GPROF_ENTRY(rel_mpintrlock) .globl _mp_lock _mp_lock: .long 0 - .globl _isr_lock -_isr_lock: .long 0 - -#ifdef RECURSIVE_MPINTRLOCK - .globl _mpintr_lock -_mpintr_lock: .long 0xffffffff -#endif /* RECURSIVE_MPINTRLOCK */ - - #ifdef GLPROFILE .globl _gethits _gethits: diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c index 8e349a9800e4..b5bc1fd09022 100644 --- a/sys/i386/i386/mptable.c +++ b/sys/i386/i386/mptable.c @@ -1770,8 +1770,10 @@ init_locks(void) */ mp_lock = 0x00000001; +#if 0 /* ISR uses its own "giant lock" */ isr_lock = FREE_LOCK; +#endif #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); diff --git a/sys/i386/i386/simplelock.s b/sys/i386/i386/simplelock.s index 9abc3b74660a..378cf85389e9 100644 --- a/sys/i386/i386/simplelock.s +++ b/sys/i386/i386/simplelock.s @@ -186,8 +186,11 @@ ENTRY(s_unlock) movl $0, (%eax) ret +#if 0 /* + * XXX CRUFTY SS_LOCK IMPLEMENTATION REMOVED XXX + * * These versions of simple_lock block interrupts, * making it suitable for regions accessed by both top and bottom levels. * This is done by saving the current value of the cpu flags in a per-cpu @@ -279,6 +282,8 @@ ENTRY(ss_unlock) ss_unlock2: ret +#endif + /* * These versions of simple_lock does not contain calls to profiling code. * Thus they can be called from the profiling code. diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s index 394848cd047f..1f350f5d1e27 100644 --- a/sys/i386/i386/support.s +++ b/sys/i386/i386/support.s @@ -848,7 +848,11 @@ ENTRY(i586_copyout) jmp done_copyout #endif /* I586_CPU && NNPX > 0 */ -/* copyin(from_user, to_kernel, len) */ +/* + * copyin(from_user, to_kernel, len) + * + * MPSAFE + */ ENTRY(copyin) MEXITCOUNT jmp *_copyin_vector @@ -1130,6 +1134,8 @@ fastmove_tail_fault: /* * fu{byte,sword,word} : fetch a byte (sword, word) from user memory + * + * MP SAFE */ ENTRY(fuword) movl _curpcb,%ecx @@ -1154,6 +1160,9 @@ ENTRY(fuswintr) movl $-1,%eax ret +/* + * MP SAFE + */ ENTRY(fusword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) @@ -1166,6 +1175,9 @@ ENTRY(fusword) movl $0,PCB_ONFAULT(%ecx) ret +/* + * MP SAFE + */ ENTRY(fubyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index f3982ae688f0..a05d541d1051 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -65,8 +65,6 @@ _hlt_vector: .long _default_halt /* pointer to halt routine */ .globl _panic - .globl _want_resched -_want_resched: .long 0 /* we need to re-run the scheduler */ #if defined(SWTCH_OPTIM_STATS) .globl _swtch_optim_stats, _tlb_flush_count _swtch_optim_stats: .long 0 /* number of _swtch_optims */ @@ -129,6 +127,9 @@ _idle: /* * XXX callers of cpu_switch() do a bogus splclock(). Locking should * be left to cpu_switch(). + * + * NOTE: spl*() may only be called while we hold the MP lock (which + * we do). */ call _spl0 @@ -159,14 +160,14 @@ idle_loop: testl %eax,%eax jnz 3f + /* + * Handle page-zeroing in the idle loop. Called with interrupts + * disabled and the MP lock released. Inside vm_page_zero_idle + * we enable interrupts and grab the mplock as required. + */ cmpl $0,_do_page_zero_idle je 2f - /* XXX appears to cause panics */ - /* - * Inside zero_idle we enable interrupts and grab the mplock - * as needed. It needs to be careful about entry/exit mutexes. - */ call _vm_page_zero_idle /* internal locking */ testl %eax, %eax jnz idle_loop @@ -178,9 +179,15 @@ idle_loop: cli jmp idle_loop + /* + * Note that interrupts must be enabled while obtaining the MP lock + * in order to be able to take IPI's while blocked. + */ 3: movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */ + sti call _get_mplock + cli call _procrunnable testl %eax,%eax CROSSJUMP(jnz, sw1a, jz) @@ -355,8 +362,8 @@ sw1a: CROSSJUMP(je, _idle, jne) /* if no proc, idle */ movl %eax,%ecx - movl $0,%eax - movl %eax,_want_resched + xorl %eax,%eax + andl $~WANT_RESCHED,_astpending #ifdef DIAGNOSTIC cmpl %eax,P_WCHAN(%ecx) diff --git a/sys/i386/i386/sys_machdep.c b/sys/i386/i386/sys_machdep.c index d722735f8b4c..081d28c93b2c 100644 --- a/sys/i386/i386/sys_machdep.c +++ b/sys/i386/i386/sys_machdep.c @@ -53,6 +53,7 @@ #include <sys/user.h> #include <machine/cpu.h> +#include <machine/ipl.h> #include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */ #include <machine/sysarch.h> #ifdef SMP diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index a8b73cf6a02b..703d48dc84ed 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -99,7 +99,7 @@ int (*pmath_emulate) __P((struct trapframe *)); extern void trap __P((struct trapframe frame)); extern int trapwrite __P((unsigned addr)); -extern void syscall __P((struct trapframe frame)); +extern void syscall2 __P((struct trapframe frame)); static int trap_pfault __P((struct trapframe *, int, vm_offset_t)); static void trap_fatal __P((struct trapframe *, vm_offset_t)); @@ -140,38 +140,32 @@ static char *trap_msg[] = { "machine check trap", /* 28 T_MCHK */ }; -static __inline void userret __P((struct proc *p, struct trapframe *frame, - u_quad_t oticks)); +static __inline int userret __P((struct proc *p, struct trapframe *frame, + u_quad_t oticks, int have_mplock)); #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif -static __inline void -userret(p, frame, oticks) +static __inline int +userret(p, frame, oticks, have_mplock) struct proc *p; struct trapframe *frame; u_quad_t oticks; + int have_mplock; { int sig, s; - while ((sig = CURSIG(p)) != 0) + while ((sig = CURSIG(p)) != 0) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } postsig(sig); - -#if 0 - if (!want_resched && - (p->p_priority <= p->p_usrpri) && - (p->p_rtprio.type == RTP_PRIO_NORMAL)) { - int newpriority; - p->p_estcpu += 1; - newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice; - newpriority = min(newpriority, MAXPRI); - p->p_usrpri = newpriority; } -#endif - + p->p_priority = p->p_usrpri; - if (want_resched) { + if (resched_wanted()) { /* * Since we are curproc, clock will normally just change * our priority without moving us from one queue to another @@ -180,6 +174,10 @@ userret(p, frame, oticks) * mi_switch()'ed, we might not be on the queue indicated by * our priority. */ + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } s = splhigh(); setrunqueue(p); p->p_stats->p_ru.ru_nivcsw++; @@ -191,11 +189,16 @@ userret(p, frame, oticks) /* * Charge system time if profiling. */ - if (p->p_flag & P_PROFIL) + if (p->p_flag & P_PROFIL) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } addupc_task(p, frame->tf_eip, (u_int)(p->p_sticks - oticks) * psratio); - + } curpriority = p->p_priority; + return(have_mplock); } /* @@ -604,7 +607,7 @@ kernel_trap: #endif out: - userret(p, &frame, sticks); + userret(p, &frame, sticks, 1); } #ifdef notyet @@ -999,11 +1002,18 @@ int trapwrite(addr) } /* - * System call request from POSIX system call gate interface to kernel. - * Like trap(), argument is call by reference. + * syscall2 - MP aware system call request C handler + * + * A system call is essentially treated as a trap except that the + * MP lock is not held on entry or return. We are responsible for + * obtaining the MP lock if necessary and for handling ASTs + * (e.g. a task switch) prior to return. + * + * In general, only simple access and manipulation of curproc and + * the current stack is allowed without having to hold MP lock. */ void -syscall(frame) +syscall2(frame) struct trapframe frame; { caddr_t params; @@ -1012,22 +1022,42 @@ syscall(frame) struct proc *p = curproc; u_quad_t sticks; int error; + int narg; int args[8]; + int have_mplock = 0; u_int code; #ifdef DIAGNOSTIC - if (ISPL(frame.tf_cs) != SEL_UPL) + if (ISPL(frame.tf_cs) != SEL_UPL) { + get_mplock(); panic("syscall"); + /* NOT REACHED */ + } #endif - sticks = p->p_sticks; + + /* + * handle atomicy by looping since interrupts are enabled and the + * MP lock is not held. + */ + sticks = ((volatile struct proc *)p)->p_sticks; + while (sticks != ((volatile struct proc *)p)->p_sticks) + sticks = ((volatile struct proc *)p)->p_sticks; + p->p_md.md_regs = &frame; params = (caddr_t)frame.tf_esp + sizeof(int); code = frame.tf_eax; + if (p->p_sysent->sv_prepsyscall) { + /* + * The prep code is not MP aware. + */ + get_mplock(); (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); + rel_mplock(); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. + * fuword is MP aware. */ if (code == SYS_syscall) { /* @@ -1053,27 +1083,52 @@ syscall(frame) else callp = &p->p_sysent->sv_table[code]; - if (params && (i = callp->sy_narg * sizeof(int)) && + narg = callp->sy_narg & SYF_ARGMASK; + + /* + * copyin is MP aware, but the tracing code is not + */ + if (params && (i = narg * sizeof(int)) && (error = copyin(params, (caddr_t)args, (u_int)i))) { + get_mplock(); + have_mplock = 1; #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) - ktrsyscall(p->p_tracep, code, callp->sy_narg, args); + ktrsyscall(p->p_tracep, code, narg, args); #endif goto bad; } + + /* + * Try to run the syscall without the MP lock if the syscall + * is MP safe. We have to obtain the MP lock no matter what if + * we are ktracing + */ + if ((callp->sy_narg & SYF_MPSAFE) == 0) { + get_mplock(); + have_mplock = 1; + } + #ifdef KTRACE - if (KTRPOINT(p, KTR_SYSCALL)) - ktrsyscall(p->p_tracep, code, callp->sy_narg, args); + if (KTRPOINT(p, KTR_SYSCALL)) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } + ktrsyscall(p->p_tracep, code, narg, args); + } #endif p->p_retval[0] = 0; p->p_retval[1] = frame.tf_edx; - STOPEVENT(p, S_SCE, callp->sy_narg); + STOPEVENT(p, S_SCE, narg); /* MP aware */ error = (*callp->sy_call)(p, args); + /* + * MP SAFE (we may or may not have the MP lock at this point) + */ switch (error) { - case 0: /* * Reinitialize proc pointer `p' as it may be different @@ -1109,17 +1164,31 @@ bad: break; } + /* + * Traced syscall. trapsignal() is not MP aware. + */ if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) { - /* Traced syscall. */ + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } frame.tf_eflags &= ~PSL_T; trapsignal(p, SIGTRAP, 0); } - userret(p, &frame, sticks); + /* + * Handle reschedule and other end-of-syscall issues + */ + have_mplock = userret(p, &frame, sticks, have_mplock); #ifdef KTRACE - if (KTRPOINT(p, KTR_SYSRET)) + if (KTRPOINT(p, KTR_SYSRET)) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } ktrsysret(p->p_tracep, code, error, p->p_retval[0]); + } #endif /* @@ -1129,11 +1198,17 @@ bad: */ STOPEVENT(p, S_SCX, code); + /* + * Release the MP lock if we had to get it + */ + if (have_mplock) + rel_mplock(); } /* * Simplified back end of syscall(), used when returning from fork() - * directly into user mode. + * directly into user mode. MP lock is held on entry and should be + * held on return. */ void fork_return(p, frame) @@ -1144,7 +1219,7 @@ fork_return(p, frame) frame.tf_eflags &= ~PSL_C; /* success */ frame.tf_edx = 1; - userret(p, &frame, 0); + userret(p, &frame, 0, 1); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p->p_tracep, SYS_fork, 0, 0); diff --git a/sys/i386/i386/vm86bios.s b/sys/i386/i386/vm86bios.s index 778148176491..a64c0402c15b 100644 --- a/sys/i386/i386/vm86bios.s +++ b/sys/i386/i386/vm86bios.s @@ -64,7 +64,7 @@ ENTRY(vm86_bioscall) #ifdef SMP pushl %edx - ALIGN_LOCK /* Get global lock */ + MP_LOCK /* Get global lock */ popl %edx #endif @@ -136,12 +136,7 @@ ENTRY(vm86_bioscall) * Return via _doreti */ #ifdef SMP - ECPL_LOCK -#ifdef CPL_AND_CML -#error Not ready for CPL_AND_CML -#endif pushl _cpl /* cpl to restore */ - ECPL_UNLOCK #else pushl _cpl /* cpl to restore */ #endif diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 9f5d6c046d46..d58e5a6538fb 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -578,13 +578,7 @@ vm_page_zero_idle() TAILQ_REMOVE(&vm_page_queues[m->queue].pl, m, pageq); m->queue = PQ_NONE; splx(s); -#if 0 - rel_mplock(); -#endif pmap_zero_page(VM_PAGE_TO_PHYS(m)); -#if 0 - get_mplock(); -#endif (void)splvm(); vm_page_flag_set(m, PG_ZERO); m->queue = PQ_FREE + m->pc; @@ -606,6 +600,12 @@ vm_page_zero_idle() #ifdef SMP } #endif + /* + * We have to enable interrupts for a moment if the try_mplock fails + * in order to potentially take an IPI. XXX this should be in + * swtch.s + */ + __asm __volatile("sti; nop; cli" : : : "memory"); return (0); } diff --git a/sys/i386/include/asnames.h b/sys/i386/include/asnames.h index b9b631d6d314..3ccbee6be344 100644 --- a/sys/i386/include/asnames.h +++ b/sys/i386/include/asnames.h @@ -155,7 +155,6 @@ #define _arith_invalid arith_invalid #define _arith_overflow arith_overflow #define _arith_underflow arith_underflow -#define _astpending astpending #define _bcopy bcopy #define _bcopy_vector bcopy_vector #define _bigJump bigJump @@ -307,7 +306,7 @@ #define _swi_generic swi_generic #define _swi_null swi_null #define _swi_vm swi_vm -#define _syscall syscall +#define _syscall2 syscall2 #define _szsigcode szsigcode #define _ticks ticks #define _time time @@ -321,7 +320,6 @@ #define _vm86paddr vm86paddr #define _vm86pcb vm86pcb #define _vm_page_zero_idle vm_page_zero_idle -#define _want_resched want_resched #define _wm_sqrt wm_sqrt #endif /* __ELF__ */ @@ -339,6 +337,7 @@ #define _cpu_lockid FS(cpu_lockid) #define _curpcb FS(curpcb) #define _curproc FS(curproc) +#define _astpending FS(astpending) #define _currentldt FS(currentldt) #define _inside_intr FS(inside_intr) #define _npxproc FS(npxproc) diff --git a/sys/i386/include/cpu.h b/sys/i386/include/cpu.h index 18049d0d681b..c6aa46fbe5b2 100644 --- a/sys/i386/include/cpu.h +++ b/sys/i386/include/cpu.h @@ -82,10 +82,13 @@ /* * Preempt the current process if in interrupt from user mode, * or after the current trap/syscall if in system mode. + * + * XXX: if astpending is later changed to an |= here due to more flags being + * added, we will have an atomicy problem. The type of atomicy we need is + * a non-locked orl. */ -#define need_resched() do { want_resched = 1; aston(); } while (0) - -#define resched_wanted() want_resched +#define need_resched() do { astpending = AST_RESCHED|AST_PENDING; } while (0) +#define resched_wanted() (astpending & AST_RESCHED) /* * Arrange to handle pending profiling ticks before returning to user mode. @@ -100,10 +103,15 @@ /* * Notify the current process (p) that it has a signal pending, * process as soon as possible. + * + * XXX: aston() really needs to be an atomic (not locked, but an orl), + * in case need_resched() is set by an interrupt. But with astpending a + * per-cpu variable this is not trivial to do efficiently. For now we blow + * it off (asynchronous need_resched() conflicts are not critical). */ #define signotify(p) aston() -#define aston() do { astpending = 1; } while (0) +#define aston() do { astpending |= AST_PENDING; } while (0) #define astoff() /* @@ -126,11 +134,9 @@ } #ifdef _KERNEL -extern int astpending; extern char btext[]; extern char etext[]; extern u_char intr_nesting_level; -extern int want_resched; /* resched was called */ void fork_trampoline __P((void)); void fork_return __P((struct proc *, struct trapframe)); diff --git a/sys/i386/include/globaldata.h b/sys/i386/include/globaldata.h index 28336d708534..58bd9cfe9416 100644 --- a/sys/i386/include/globaldata.h +++ b/sys/i386/include/globaldata.h @@ -66,6 +66,7 @@ struct globaldata { caddr_t gd_prv_CADDR3; unsigned *gd_prv_PADDR1; #endif + u_int gd_astpending; }; #ifdef SMP diff --git a/sys/i386/include/globals.h b/sys/i386/include/globals.h index b5f9514af648..ae05d5644e76 100644 --- a/sys/i386/include/globals.h +++ b/sys/i386/include/globals.h @@ -90,6 +90,7 @@ #define common_tssd GLOBAL_LVALUE(common_tssd, struct segment_descriptor) #define tss_gdt GLOBAL_LVALUE(tss_gdt, struct segment_descriptor *) +#define astpending GLOBAL_LVALUE(astpending, u_int) #ifdef USER_LDT #define currentldt GLOBAL_LVALUE(currentldt, int) @@ -111,6 +112,7 @@ #endif /*UP kernel*/ GLOBAL_FUNC(curproc) +GLOBAL_FUNC(astpending) GLOBAL_FUNC(curpcb) GLOBAL_FUNC(npxproc) GLOBAL_FUNC(common_tss) diff --git a/sys/i386/include/ipl.h b/sys/i386/include/ipl.h index 28c39aaf94e0..2a73a6acd3fb 100644 --- a/sys/i386/include/ipl.h +++ b/sys/i386/include/ipl.h @@ -85,6 +85,12 @@ #define SWI_CLOCK_MASK SWI_CLOCK_PENDING #define SWI_MASK (~HWI_MASK) +/* + * astpending bits + */ +#define AST_PENDING 0x00000001 +#define AST_RESCHED 0x00000002 + #ifndef LOCORE /* diff --git a/sys/i386/include/lock.h b/sys/i386/include/lock.h index 25c54e48f8cf..534f77e8d2fb 100644 --- a/sys/i386/include/lock.h +++ b/sys/i386/include/lock.h @@ -37,24 +37,17 @@ #define MPLOCKED lock ; /* - * Some handy macros to allow logical organization and - * convenient reassignment of various locks. + * Some handy macros to allow logical organization. */ -#define FPU_LOCK call _get_fpu_lock -#define ALIGN_LOCK call _get_align_lock -#define SYSCALL_LOCK call _get_syscall_lock -#define ALTSYSCALL_LOCK call _get_altsyscall_lock +#define MP_LOCK call _get_mplock -/* - * Protects INTR() ISRs. - */ -#define ISR_TRYLOCK \ +#define MP_TRYLOCK \ pushl $_mp_lock ; /* GIANT_LOCK */ \ call _MPtrylock ; /* try to get lock */ \ add $4, %esp -#define ISR_RELLOCK \ +#define MP_RELLOCK \ movl $_mp_lock,%edx ; /* GIANT_LOCK */ \ call _MPrellock_edx @@ -69,55 +62,11 @@ #define IMASK_UNLOCK \ movl $0, _imen_lock -/* - * Variations of CPL_LOCK protect spl updates as a critical region. - * Items within this 'region' include: - * cpl - * cml - * cil - * ipending - */ - -/* - * Bottom half routines, ie. those already protected from INTs. - * - * Used in: - * sys/i386/isa/ipl.s: _doreti - * sys/i386/isa/apic_vector.s: _Xintr0, ..., _Xintr23 - */ -#define CPL_LOCK \ - pushl $_cpl_lock ; /* address of lock */ \ - call _s_lock ; /* MP-safe */ \ - addl $4, %esp - -#define CPL_UNLOCK \ - movl $0, _cpl_lock - -/* - * INT safe version for top half of kernel. - * - * Used in: - * sys/i386/i386/exception.s: _Xfpu, _Xalign, _Xsyscall, _Xint0x80_syscall - * sys/i386/isa/apic_ipl.s: splz() - */ -#define SCPL_LOCK \ - pushl $_cpl_lock ; \ - call _ss_lock ; \ - addl $4, %esp - -#define SCPL_UNLOCK \ - pushl $_cpl_lock ; \ - call _ss_unlock ; \ - addl $4, %esp - #else /* SMP */ #define MPLOCKED /* NOP */ -#define FPU_LOCK /* NOP */ -#define ALIGN_LOCK /* NOP */ -#define SYSCALL_LOCK /* NOP */ -#define ALTSYSCALL_LOCK /* NOP */ +#define MP_LOCK /* NOP */ #endif /* SMP */ @@ -139,18 +88,6 @@ #endif /* USE_MPINTRLOCK */ /* - * Protects cpl/cml/cil/ipending data as a critical region. - * - * Used in: - * sys/i386/isa/ipl_funcs.c: DO_SETBITS, softclockpending(), GENSPL, - * spl0(), splx(), splq() - */ -#define CPL_LOCK() s_lock(&cpl_lock) /* Bottom end */ -#define CPL_UNLOCK() s_unlock(&cpl_lock) -#define SCPL_LOCK() ss_lock(&cpl_lock) /* INT safe: top end */ -#define SCPL_UNLOCK() ss_unlock(&cpl_lock) - -/* * sio/cy lock. * XXX should rc (RISCom/8) use this? */ @@ -191,11 +128,6 @@ #define MPINTR_LOCK() #define MPINTR_UNLOCK() -#define CPL_LOCK() -#define CPL_UNLOCK() -#define SCPL_LOCK() -#define SCPL_UNLOCK() - #define COM_LOCK() #define COM_UNLOCK() #define CLOCK_LOCK() diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h index 8e349a9800e4..b5bc1fd09022 100644 --- a/sys/i386/include/mptable.h +++ b/sys/i386/include/mptable.h @@ -1770,8 +1770,10 @@ init_locks(void) */ mp_lock = 0x00000001; +#if 0 /* ISR uses its own "giant lock" */ isr_lock = FREE_LOCK; +#endif #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h index 28336d708534..58bd9cfe9416 100644 --- a/sys/i386/include/pcpu.h +++ b/sys/i386/include/pcpu.h @@ -66,6 +66,7 @@ struct globaldata { caddr_t gd_prv_CADDR3; unsigned *gd_prv_PADDR1; #endif + u_int gd_astpending; }; #ifdef SMP diff --git a/sys/i386/include/smptests.h b/sys/i386/include/smptests.h index dd897284e234..f9ac4a36919e 100644 --- a/sys/i386/include/smptests.h +++ b/sys/i386/include/smptests.h @@ -63,65 +63,6 @@ #define PUSHDOWN_LEVEL_3_NOT #define PUSHDOWN_LEVEL_4_NOT - -/* - * XXX some temp debug control of cpl locks - */ -#ifdef PUSHDOWN_LEVEL_2 -#define REAL_ECPL /* exception.s: SCPL_LOCK/SCPL_UNLOCK */ -#define REAL_ICPL /* ipl.s: CPL_LOCK/CPL_UNLOCK/FAST */ -#define REAL_AICPL /* apic_ipl.s: SCPL_LOCK/SCPL_UNLOCK */ -#define REAL_AVCPL /* apic_vector.s: CPL_LOCK/CPL_UNLOCK */ -#define REAL_IFCPL /* ipl_funcs.c: SCPL_LOCK/SCPL_UNLOCK */ -#endif /* PUSHDOWN_LEVEL_2 */ - -/* - * The xCPL_LOCK/xCPL_UNLOCK defines control the spinlocks - * that protect cpl/cml/cil and the spl functions. - */ -#ifdef REAL_ECPL -#define ECPL_LOCK SCPL_LOCK -#define ECPL_UNLOCK SCPL_UNLOCK -#else -#define ECPL_LOCK -#define ECPL_UNLOCK -#endif /* REAL_ECPL */ - -#ifdef REAL_ICPL -#define ICPL_LOCK CPL_LOCK -#define ICPL_UNLOCK CPL_UNLOCK -#define FAST_ICPL_UNLOCK movl $0, _cpl_lock -#else -#define ICPL_LOCK -#define ICPL_UNLOCK -#define FAST_ICPL_UNLOCK -#endif /* REAL_ICPL */ - -#ifdef REAL_AICPL -#define AICPL_LOCK SCPL_LOCK -#define AICPL_UNLOCK SCPL_UNLOCK -#else -#define AICPL_LOCK -#define AICPL_UNLOCK -#endif /* REAL_AICPL */ - -#ifdef REAL_AVCPL -#define AVCPL_LOCK CPL_LOCK -#define AVCPL_UNLOCK CPL_UNLOCK -#else -#define AVCPL_LOCK -#define AVCPL_UNLOCK -#endif /* REAL_AVCPL */ - -#ifdef REAL_IFCPL -#define IFCPL_LOCK() SCPL_LOCK() -#define IFCPL_UNLOCK() SCPL_UNLOCK() -#else -#define IFCPL_LOCK() -#define IFCPL_UNLOCK() -#endif /* REAL_IFCPL */ - - /* * Debug version of simple_lock. This will store the CPU id of the * holding CPU along with the lock. When a CPU fails to get the lock @@ -160,36 +101,40 @@ /* - * Regular INTerrupts without the giant lock, NOT READY YET!!! + * INTR_SIMPLELOCK has been removed, as the interrupt mechanism will likely + * not use this sort of optimization if we move to interrupt threads. */ #ifdef PUSHDOWN_LEVEL_4 -#define INTR_SIMPLELOCK #endif /* - * Separate the INTR() portion of cpl into another variable: cml. + * CPL_AND_CML has been removed. Interrupt threads will eventually not + * use either mechanism so there is no point trying to optimize it. */ #ifdef PUSHDOWN_LEVEL_3 -#define CPL_AND_CML #endif /* - * Forces spl functions to spin while waiting for safe time to change cpl. + * SPL_DEBUG_POSTCODE/INTR_SPL/SPL_DEBUG - removed * -#define SPL_DEBUG_POSTCODE (slows the system down noticably) + * These functions were too expensive for the standard case but, more + * importantly, we should be able to come up with a much cleaner way + * to handle the cpl. Having to do any locking at all is a mistake + * for something that is modified as often as cpl is. */ -#ifdef PUSHDOWN_LEVEL_3 -#define INTR_SPL -#define SPL_DEBUG -#endif - /* + * FAST_WITHOUTCPL - now made the default (define removed). Text below + * contains the current discussion. I am confident we can find a solution + * that does not require us to process softints from a hard int, which can + * kill serial performance due to the lack of true hardware ipl's. + * + **** + * * Ignore the ipending bits when exiting FAST_INTR() routines. * - *** * according to Bruce: * * setsoft*() may set ipending. setsofttty() is actually used in the @@ -209,21 +154,17 @@ * I finish making spl/cpl MP-safe. */ #ifdef PUSHDOWN_LEVEL_1 -#define FAST_WITHOUTCPL #endif /* - * Use a simplelock to serialize FAST_INTR()s. - * sio.c, and probably other FAST_INTR() drivers, never expected several CPUs - * to be inside them at once. Things such as global vars prevent more - * than 1 thread of execution from existing at once, so we serialize - * the access of FAST_INTR()s via a simple lock. - * One optimization on this would be a simple lock per DRIVER, but I'm - * not sure how to organize that yet... + * FAST_SIMPLELOCK no longer exists, because it doesn't help us. The cpu + * is likely to already hold the MP lock and recursive MP locks are now + * very cheap, so we do not need this optimization. Eventually *ALL* + * interrupts will run in their own thread, so there is no sense complicating + * matters now. */ #ifdef PUSHDOWN_LEVEL_1 -#define FAST_SIMPLELOCK #endif diff --git a/sys/i386/isa/apic_ipl.s b/sys/i386/isa/apic_ipl.s index 855de2651338..94771f3eadb3 100644 --- a/sys/i386/isa/apic_ipl.s +++ b/sys/i386/isa/apic_ipl.s @@ -29,15 +29,6 @@ .data ALIGN_DATA -/* current INTerrupt level */ - .globl _cil -_cil: .long 0 - -/* current INTerrupt level mask */ - .globl _cml -_cml: .long 0 - - /* * Routines used by splz_unpend to build an interrupt frame from a * trap frame. The _vec[] routines build the proper frame on the stack, @@ -78,6 +69,8 @@ _apic_imen: SUPERALIGN_TEXT /* + * splz() - dispatch pending interrupts after cpl reduced + * * Interrupt priority mechanism * -- soft splXX masks with group mechanism (cpl) * -- h/w masks for currently active or unused interrupts (imen) @@ -87,20 +80,25 @@ _apic_imen: ENTRY(splz) /* * The caller has restored cpl and checked that (ipending & ~cpl) - * is nonzero. We have to repeat the check since if there is an - * interrupt while we're looking, _doreti processing for the - * interrupt will handle all the unmasked pending interrupts - * because we restored early. We're repeating the calculation - * of (ipending & ~cpl) anyway so that the caller doesn't have - * to pass it, so this only costs one "jne". "bsfl %ecx,%ecx" - * is undefined when %ecx is 0 so we can't rely on the secondary - * btrl tests. + * is nonzero. However, since ipending can change at any time + * (by an interrupt or, with SMP, by another cpu), we have to + * repeat the check. At the moment we must own the MP lock in + * the SMP case because the interruput handlers require it. We + * loop until no unmasked pending interrupts remain. + * + * No new unmaksed pending interrupts will be added during the + * loop because, being unmasked, the interrupt code will be able + * to execute the interrupts. + * + * Interrupts come in two flavors: Hardware interrupts and software + * interrupts. We have to detect the type of interrupt (based on the + * position of the interrupt bit) and call the appropriate dispatch + * routine. + * + * NOTE: "bsfl %ecx,%ecx" is undefined when %ecx is 0 so we can't + * rely on the secondary btrl tests. */ - AICPL_LOCK movl _cpl,%eax -#ifdef CPL_AND_CML - orl _cml, %eax /* add cml to cpl */ -#endif splz_next: /* * We don't need any locking here. (ipending & ~cpl) cannot grow @@ -110,7 +108,6 @@ splz_next: notl %ecx /* set bit = unmasked level */ andl _ipending,%ecx /* set bit = unmasked pending INT */ jne splz_unpend - AICPL_UNLOCK ret ALIGN_TEXT @@ -131,9 +128,6 @@ splz_unpend: * The vec[] routines build the proper frame on the stack, * then call one of _Xintr0 thru _XintrNN. */ - pushl %ecx - AICPL_UNLOCK - popl %ecx jmp *_vec(,%ecx,4) ALIGN_TEXT @@ -141,11 +135,7 @@ splz_swi: pushl %eax orl imasks(,%ecx,4),%eax movl %eax,_cpl - pushl %ecx - AICPL_UNLOCK - popl %ecx call *_ihandlers(,%ecx,4) - AICPL_LOCK popl %eax movl %eax,_cpl jmp splz_next diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s index ca909d907e69..587d763a4573 100644 --- a/sys/i386/isa/apic_vector.s +++ b/sys/i386/isa/apic_vector.s @@ -9,28 +9,17 @@ #include "i386/isa/intr_machdep.h" - -#ifdef FAST_SIMPLELOCK - -#define GET_FAST_INTR_LOCK \ - pushl $_fast_intr_lock ; /* address of lock */ \ - call _s_lock ; /* MP-safe */ \ - addl $4,%esp - -#define REL_FAST_INTR_LOCK \ - movl $0, _fast_intr_lock - -#else /* FAST_SIMPLELOCK */ +/* + * Interrupts must be enabled while waiting for the MP lock. + */ #define GET_FAST_INTR_LOCK \ - call _get_isrlock + sti; call _get_mplock; cli #define REL_FAST_INTR_LOCK \ movl $_mp_lock, %edx ; /* GIANT_LOCK */ \ call _MPrellock_edx -#endif /* FAST_SIMPLELOCK */ - /* convert an absolute IRQ# into a bitmask */ #define IRQ_BIT(irq_num) (1 << (irq_num)) @@ -42,10 +31,6 @@ * Macros for interrupt interrupt entry, call to handler, and exit. */ -#ifdef FAST_WITHOUTCPL - -/* - */ #define FAST_INTR(irq_num, vec_name) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -82,83 +67,6 @@ IDTVEC(vec_name) ; \ popl %eax ; \ iret -#else /* FAST_WITHOUTCPL */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl %eax ; /* save only call-used registers */ \ - pushl %ecx ; \ - pushl %edx ; \ - pushl %ds ; \ - MAYBE_PUSHL_ES ; \ - pushl %fs ; \ - movl $KDSEL, %eax ; \ - movl %ax, %ds ; \ - MAYBE_MOVW_AX_ES ; \ - movl $KPSEL, %eax ; \ - movl %ax, %fs ; \ - FAKE_MCOUNT((5+ACTUALLY_PUSHED)*4(%esp)) ; \ - GET_FAST_INTR_LOCK ; \ - pushl _intr_unit + (irq_num) * 4 ; \ - call *_intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic_eoi ; \ - lock ; \ - incl _cnt+V_INTR ; /* book-keeping can wait */ \ - movl _intr_countp + (irq_num) * 4,%eax ; \ - lock ; \ - incl (%eax) ; \ - movl _cpl, %eax ; /* unmasking pending HWIs or SWIs? */ \ - notl %eax ; \ - andl _ipending, %eax ; \ - jne 2f ; /* yes, maybe handle them */ \ -1: ; \ - MEXITCOUNT ; \ - REL_FAST_INTR_LOCK ; \ - popl %fs ; \ - MAYBE_POPL_ES ; \ - popl %ds ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax ; \ - iret ; \ -; \ - ALIGN_TEXT ; \ -2: ; \ - cmpb $3, _intr_nesting_level ; /* enough stack? */ \ - jae 1b ; /* no, return */ \ - movl _cpl, %eax ; \ - /* XXX next line is probably unnecessary now. */ \ - movl $HWI_MASK|SWI_MASK, _cpl ; /* limit nesting ... */ \ - lock ; \ - incb _intr_nesting_level ; /* ... really limit it ... */ \ - sti ; /* to do this as early as possible */ \ - popl %fs ; /* discard most of thin frame ... */ \ - MAYBE_POPL_ES ; /* discard most of thin frame ... */ \ - popl %ecx ; /* ... original %ds ... */ \ - popl %edx ; \ - xchgl %eax, 4(%esp) ; /* orig %eax; save cpl */ \ - pushal ; /* build fat frame (grrr) ... */ \ - pushl %ecx ; /* ... actually %ds ... */ \ - pushl %es ; \ - pushl %fs ; - movl $KDSEL, %eax ; \ - movl %ax, %es ; \ - movl $KPSEL, %eax ; - movl %ax, %fs ; - movl (3+8+0)*4(%esp), %ecx ; /* %ecx from thin frame ... */ \ - movl %ecx, (3+6)*4(%esp) ; /* ... to fat frame ... */ \ - movl (3+8+1)*4(%esp), %eax ; /* ... cpl from thin frame */ \ - pushl %eax ; \ - subl $4, %esp ; /* junk for unit number */ \ - MEXITCOUNT ; \ - jmp _doreti - -#endif /** FAST_WITHOUTCPL */ - - /* * */ @@ -242,19 +150,6 @@ IDTVEC(vec_name) ; \ 7: ; \ IMASK_UNLOCK -#ifdef INTR_SIMPLELOCK -#define ENLOCK -#define DELOCK -#define LATELOCK call _get_isrlock -#else -#define ENLOCK \ - ISR_TRYLOCK ; /* XXX this is going away... */ \ - testl %eax, %eax ; /* did we get it? */ \ - jz 3f -#define DELOCK ISR_RELLOCK -#define LATELOCK -#endif - #ifdef APIC_INTR_DIAGNOSTIC #ifdef APIC_INTR_DIAGNOSTIC_IRQ log_intr_event: @@ -319,125 +214,6 @@ log_intr_event: #define APIC_ITRACE(name, irq_num, id) #endif -#ifdef CPL_AND_CML - -#define INTR(irq_num, vec_name, maybe_extra_ipending) \ - .text ; \ - SUPERALIGN_TEXT ; \ -/* _XintrNN: entry point used by IDT/HWIs & splz_unpend via _vec[]. */ \ -IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - movl $KDSEL, %eax ; /* reload with kernel's data segment */ \ - movl %ax, %ds ; \ - movl %ax, %es ; \ - movl $KPSEL, %eax ; \ - movl %ax, %fs ; \ -; \ - maybe_extra_ipending ; \ -; \ - APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \ - lock ; /* MP-safe */ \ - btsl $(irq_num), iactive ; /* lazy masking */ \ - jc 1f ; /* already active */ \ -; \ - MASK_LEVEL_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ -0: ; \ - APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - ENLOCK ; \ -; \ - APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ - AVCPL_LOCK ; /* MP-safe */ \ - testl $IRQ_BIT(irq_num), _cpl ; \ - jne 2f ; /* this INT masked */ \ - testl $IRQ_BIT(irq_num), _cml ; \ - jne 2f ; /* this INT masked */ \ - orl $IRQ_BIT(irq_num), _cil ; \ - AVCPL_UNLOCK ; \ -; \ - incb _intr_nesting_level ; \ -; \ - /* entry point used by doreti_unpend for HWIs. */ \ -__CONCAT(Xresume,irq_num): ; \ - FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ - lock ; incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4, %eax ; \ - lock ; incl (%eax) ; \ -; \ - AVCPL_LOCK ; /* MP-safe */ \ - movl _cml, %eax ; \ - pushl %eax ; \ - orl _intr_mask + (irq_num) * 4, %eax ; \ - movl %eax, _cml ; \ - AVCPL_UNLOCK ; \ -; \ - pushl _intr_unit + (irq_num) * 4 ; \ - incl _inside_intr ; \ - APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ - sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; \ - APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \ - decl _inside_intr ; \ -; \ - lock ; andl $~IRQ_BIT(irq_num), iactive ; \ - lock ; andl $~IRQ_BIT(irq_num), _cil ; \ - UNMASK_IRQ(irq_num) ; \ - APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \ - sti ; /* doreti repeats cli/sti */ \ - MEXITCOUNT ; \ - LATELOCK ; \ - jmp _doreti ; \ -; \ - ALIGN_TEXT ; \ -1: ; /* active */ \ - APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ - MASK_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ - AVCPL_LOCK ; /* MP-safe */ \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ - lock ; \ - btsl $(irq_num), iactive ; /* still active */ \ - jnc 0b ; /* retry */ \ - POP_FRAME ; \ - iret ; \ -; \ - ALIGN_TEXT ; \ -2: ; /* masked by cpl|cml */ \ - APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ - DELOCK ; /* XXX this is going away... */ \ - POP_FRAME ; \ - iret ; \ - ALIGN_TEXT ; \ -3: ; /* other cpu has isr lock */ \ - APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ - AVCPL_LOCK ; /* MP-safe */ \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - testl $IRQ_BIT(irq_num), _cpl ; \ - jne 4f ; /* this INT masked */ \ - testl $IRQ_BIT(irq_num), _cml ; \ - jne 4f ; /* this INT masked */ \ - orl $IRQ_BIT(irq_num), _cil ; \ - AVCPL_UNLOCK ; \ - call forward_irq ; /* forward irq to lock holder */ \ - POP_FRAME ; /* and return */ \ - iret ; \ - ALIGN_TEXT ; \ -4: ; /* blocked */ \ - APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ - AVCPL_UNLOCK ; \ - POP_FRAME ; /* and return */ \ - iret - -#else /* CPL_AND_CML */ - - #define INTR(irq_num, vec_name, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -461,15 +237,13 @@ IDTVEC(vec_name) ; \ EOI_IRQ(irq_num) ; \ 0: ; \ APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - ISR_TRYLOCK ; /* XXX this is going away... */ \ + MP_TRYLOCK ; /* XXX this is going away... */ \ testl %eax, %eax ; /* did we get it? */ \ jz 3f ; /* no */ \ ; \ APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ - AVCPL_LOCK ; /* MP-safe */ \ testl $IRQ_BIT(irq_num), _cpl ; \ jne 2f ; /* this INT masked */ \ - AVCPL_UNLOCK ; \ ; \ incb _intr_nesting_level ; \ ; \ @@ -480,14 +254,12 @@ __CONCAT(Xresume,irq_num): ; \ movl _intr_countp + (irq_num) * 4, %eax ; \ lock ; incl (%eax) ; \ ; \ - AVCPL_LOCK ; /* MP-safe */ \ movl _cpl, %eax ; \ pushl %eax ; \ orl _intr_mask + (irq_num) * 4, %eax ; \ movl %eax, _cpl ; \ lock ; \ andl $~IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ ; \ pushl _intr_unit + (irq_num) * 4 ; \ APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ @@ -508,10 +280,8 @@ __CONCAT(Xresume,irq_num): ; \ APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ MASK_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ - AVCPL_LOCK ; /* MP-safe */ \ lock ; \ orl $IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ lock ; \ btsl $(irq_num), iactive ; /* still active */ \ jnc 0b ; /* retry */ \ @@ -522,32 +292,25 @@ __CONCAT(Xresume,irq_num): ; \ APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ lock ; \ orl $IRQ_BIT(irq_num), _ipending ; \ - AVCPL_UNLOCK ; \ - ISR_RELLOCK ; /* XXX this is going away... */ \ + MP_RELLOCK ; \ POP_FRAME ; \ iret ; \ ALIGN_TEXT ; \ 3: ; /* other cpu has isr lock */ \ APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ - AVCPL_LOCK ; /* MP-safe */ \ lock ; \ orl $IRQ_BIT(irq_num), _ipending ; \ testl $IRQ_BIT(irq_num), _cpl ; \ jne 4f ; /* this INT masked */ \ - AVCPL_UNLOCK ; \ call forward_irq ; /* forward irq to lock holder */ \ POP_FRAME ; /* and return */ \ iret ; \ ALIGN_TEXT ; \ 4: ; /* blocked */ \ APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ - AVCPL_UNLOCK ; \ POP_FRAME ; /* and return */ \ iret -#endif /* CPL_AND_CML */ - - /* * Handle "spurious INTerrupts". * Notes: @@ -635,11 +398,6 @@ _Xcpucheckstate: testl $PSL_VM, 24(%esp) jne 1f incl %ebx /* system or interrupt */ -#ifdef CPL_AND_CML - cmpl $0, _inside_intr - je 1f - incl %ebx /* interrupt */ -#endif 1: movl _cpuid, %eax movl %ebx, _checkstate_cpustate(,%eax,4) @@ -693,17 +451,11 @@ _Xcpuast: * Giant locks do not come cheap. * A lot of cycles are going to be wasted here. */ - call _get_isrlock + call _get_mplock - AVCPL_LOCK -#ifdef CPL_AND_CML - movl _cml, %eax -#else movl _cpl, %eax -#endif pushl %eax movl $1, _astpending /* XXX */ - AVCPL_UNLOCK lock incb _intr_nesting_level sti @@ -716,7 +468,7 @@ _Xcpuast: lock btrl %eax, CNAME(resched_cpus) jnc 2f - movl $1, CNAME(want_resched) + orl $AST_RESCHED,_astpending lock incl CNAME(want_resched_cnt) 2: @@ -749,7 +501,7 @@ _Xforward_irq: FAKE_MCOUNT(13*4(%esp)) - ISR_TRYLOCK + MP_TRYLOCK testl %eax,%eax /* Did we get the lock ? */ jz 1f /* No */ @@ -758,14 +510,8 @@ _Xforward_irq: cmpb $4, _intr_nesting_level jae 2f - AVCPL_LOCK -#ifdef CPL_AND_CML - movl _cml, %eax -#else movl _cpl, %eax -#endif pushl %eax - AVCPL_UNLOCK lock incb _intr_nesting_level sti @@ -785,7 +531,7 @@ _Xforward_irq: lock incl CNAME(forward_irq_toodeepcnt) 3: - ISR_RELLOCK + MP_RELLOCK MEXITCOUNT POP_FRAME iret diff --git a/sys/i386/isa/ipl.s b/sys/i386/isa/ipl.s index 7c1fca1383e7..980257f3f1de 100644 --- a/sys/i386/isa/ipl.s +++ b/sys/i386/isa/ipl.s @@ -43,6 +43,10 @@ /* * AT/386 * Vector interrupt control section + * + * cpl - Current interrupt disable mask + * *_imask - Interrupt masks for various spl*() functions + * ipending - Pending interrupts (set when a masked interrupt occurs) */ .data @@ -67,9 +71,6 @@ _softnet_imask: .long SWI_NET_MASK .globl _softtty_imask _softtty_imask: .long SWI_TTY_MASK - .globl _astpending -_astpending: .long 0 - /* pending interrupts blocked by splxxx() */ .globl _ipending _ipending: .long 0 @@ -91,29 +92,12 @@ _netisrs: .text -#ifdef SMP -#ifdef notnow -#define TEST_CIL \ - cmpl $0x0100, _cil ; \ - jne 1f ; \ - cmpl $0, _inside_intr ; \ - jne 1f ; \ - int $3 ; \ -1: -#else -#define TEST_CIL -#endif -#endif - /* * Handle return from interrupts, traps and syscalls. */ SUPERALIGN_TEXT .type _doreti,@function _doreti: -#ifdef SMP - TEST_CIL -#endif FAKE_MCOUNT(_bintr) /* init "from" _bintr -> _doreti */ addl $4,%esp /* discard unit number */ popl %eax /* cpl or cml to restore */ @@ -128,32 +112,17 @@ doreti_next: * handlers is limited by the number of bits in cpl). */ #ifdef SMP - TEST_CIL cli /* early to prevent INT deadlock */ - pushl %eax /* preserve cpl while getting lock */ - ICPL_LOCK - popl %eax doreti_next2: #endif movl %eax,%ecx -#ifdef CPL_AND_CML - orl _cpl, %ecx /* add cpl to cml */ -#endif notl %ecx /* set bit = unmasked level */ #ifndef SMP cli #endif andl _ipending,%ecx /* set bit = unmasked pending INT */ jne doreti_unpend -#ifdef SMP - TEST_CIL -#endif -#ifdef CPL_AND_CML - movl %eax, _cml -#else movl %eax,_cpl -#endif - FAST_ICPL_UNLOCK /* preserves %eax */ MPLOCKED decb _intr_nesting_level /* Check for ASTs that can be handled now. */ @@ -166,19 +135,27 @@ doreti_next2: cmpl $1,_in_vm86call jne doreti_ast + /* + * doreti_exit - release MP lock, pop registers, iret. + * + * Note that the syscall trap shotcuts to doreti_syscall_ret. + * The segment register pop is a special case, since it may + * fault if (for example) a sigreturn specifies bad segment + * registers. The fault is handled in trap.c + */ + doreti_exit: MEXITCOUNT #ifdef SMP -#ifdef INTR_SIMPLELOCK -#error code needed here to decide which lock to release, INTR or giant -#endif /* release the kernel lock */ movl $_mp_lock, %edx /* GIANT_LOCK */ call _MPrellock_edx #endif /* SMP */ .globl doreti_popl_fs + .globl doreti_syscall_ret +doreti_syscall_ret: doreti_popl_fs: popl %fs .globl doreti_popl_es @@ -215,22 +192,13 @@ doreti_popl_fs_fault: doreti_unpend: /* * Enabling interrupts is safe because we haven't restored cpl yet. - * The locking from the "btrl" test is probably no longer necessary. - * We won't miss any new pending interrupts because we will check - * for them again. + * %ecx contains the next probable ready interrupt (~cpl & ipending) */ #ifdef SMP - TEST_CIL - /* we enter with cpl locked */ - bsfl %ecx, %ecx /* slow, but not worth optimizing */ + bsfl %ecx, %ecx /* locate the next dispatchable int */ lock - btrl %ecx, _ipending + btrl %ecx, _ipending /* is it really still pending? */ jnc doreti_next2 /* some intr cleared memory copy */ - cmpl $NHWI, %ecx - jae 1f - btsl %ecx, _cil -1: - FAST_ICPL_UNLOCK /* preserves %eax */ sti /* late to prevent INT deadlock */ #else sti @@ -238,8 +206,9 @@ doreti_unpend: btrl %ecx,_ipending jnc doreti_next /* some intr cleared memory copy */ #endif /* SMP */ - /* + * Execute handleable interrupt + * * Set up JUMP to _ihandlers[%ecx] for HWIs. * Set up CALL of _ihandlers[%ecx] for SWIs. * This is a bit early for the SMP case - we have to push %ecx and @@ -247,25 +216,10 @@ doreti_unpend: */ movl _ihandlers(,%ecx,4),%edx cmpl $NHWI,%ecx - jae doreti_swi - cli + jae doreti_swi /* software interrupt handling */ + cli /* else hardware int handling */ #ifdef SMP - pushl %edx /* preserve %edx */ -#ifdef APIC_INTR_DIAGNOSTIC - pushl %ecx -#endif - pushl %eax /* preserve %eax */ - ICPL_LOCK -#ifdef CPL_AND_CML - popl _cml -#else - popl _cpl -#endif - FAST_ICPL_UNLOCK -#ifdef APIC_INTR_DIAGNOSTIC - popl %ecx -#endif - popl %edx + movl %eax,_cpl /* same as non-smp case right now */ #else movl %eax,_cpl #endif @@ -292,9 +246,6 @@ doreti_unpend: ALIGN_TEXT doreti_swi: -#ifdef SMP - TEST_CIL -#endif pushl %eax /* * At least the SWI_CLOCK handler has to run at a possibly strictly @@ -304,29 +255,18 @@ doreti_swi: * in dying interrupt frames and about 12 HWIs nested in active * interrupt frames. There are only 4 different SWIs and the HWI * and SWI masks limit the nesting further. + * + * The SMP case is currently the same as the non-SMP case. */ #ifdef SMP - orl imasks(,%ecx,4), %eax - pushl %ecx /* preserve for use by _swi_generic */ - pushl %edx /* save handler entry point */ - cli /* prevent INT deadlock */ - pushl %eax /* save cpl|cml */ - ICPL_LOCK -#ifdef CPL_AND_CML - popl _cml /* restore cml */ -#else - popl _cpl /* restore cpl */ -#endif - FAST_ICPL_UNLOCK - sti - popl %edx /* restore handler entry point */ - popl %ecx + orl imasks(,%ecx,4), %eax /* or in imasks */ + movl %eax,_cpl /* set cpl for call */ #else orl imasks(,%ecx,4),%eax movl %eax,_cpl #endif call %edx - popl %eax + popl %eax /* cpl to restore */ jmp doreti_next ALIGN_TEXT @@ -336,9 +276,6 @@ doreti_ast: movl $T_ASTFLT,TF_TRAPNO(%esp) call _trap subl %eax,%eax /* recover cpl|cml */ -#ifdef CPL_AND_CML - movl %eax, _cpl -#endif movb $1,_intr_nesting_level /* for doreti_next to decrement */ jmp doreti_next diff --git a/sys/i386/isa/ipl_funcs.c b/sys/i386/isa/ipl_funcs.c index d7ba1c4b9653..043d6b432cf6 100644 --- a/sys/i386/isa/ipl_funcs.c +++ b/sys/i386/isa/ipl_funcs.c @@ -35,8 +35,9 @@ #include <i386/isa/intr_machdep.h> /* - * The volatile bitmap variables must be set atomically. This normally - * involves using a machine-dependent bit-set or `or' instruction. + * Bits in the ipending bitmap variable must be set atomically because + * ipending may be manipulated by interrupts or other cpu's without holding + * any locks. * * Note: setbits uses a locked or, making simple cases MP safe. */ @@ -67,6 +68,10 @@ softclockpending(void) return (ipending & SWI_CLOCK_PENDING); } +/* + * Support for SPL assertions. + */ + #ifdef INVARIANT_SUPPORT #define SPLASSERT_IGNORE 0 @@ -112,6 +117,40 @@ NAME##assert(const char *msg) \ #define GENSPLASSERT(NAME, MODIFIER) #endif +/************************************************************************ + * GENERAL SPL CODE * + ************************************************************************ + * + * Implement splXXX(), spl0(), splx(), and splq(). splXXX() disables a + * set of interrupts (e.g. splbio() disables interrupts relating to + * device I/O) and returns the previous interrupt mask. splx() restores + * the previous interrupt mask, spl0() is a special case which enables + * all interrupts and is typically used inside i386/i386 swtch.s and + * fork_trampoline. splq() is a generic version of splXXX(). + * + * The SPL routines mess around with the 'cpl' global, which masks + * interrupts. Interrupts are not *actually* masked. What happens is + * that if an interrupt masked by the cpl occurs, the appropriate bit + * in 'ipending' is set and the interrupt is defered. When we clear + * bits in the cpl we must check to see if any ipending interrupts have + * been unmasked and issue the synchronously, which is what the splz() + * call does. + * + * Because the cpl is often saved and restored in a nested fashion, cpl + * modifications are only allowed in the SMP case when the MP lock is held + * to prevent multiple processes from tripping over each other's masks. + * The cpl is saved when you do a context switch (mi_switch()) and restored + * when your process gets cpu again. + * + * An interrupt routine is allowed to modify the cpl as long as it restores + * it prior to returning (thus the interrupted mainline code doesn't notice + * anything amiss). For the SMP case, the interrupt routine must hold + * the MP lock for any cpl manipulation. + * + * Likewise, due to the deterministic nature of cpl modifications, we do + * NOT need to use locked instructions to modify it. + */ + #ifndef SMP #define GENSPL(NAME, OP, MODIFIER, PC) \ @@ -154,219 +193,65 @@ splq(intrmask_t mask) #include <machine/smp.h> #include <machine/smptests.h> -#ifndef SPL_DEBUG_POSTCODE -#undef POSTCODE -#undef POSTCODE_LO -#undef POSTCODE_HI -#define POSTCODE(X) -#define POSTCODE_LO(X) -#define POSTCODE_HI(X) -#endif /* SPL_DEBUG_POSTCODE */ - - /* - * This version has to check for bsp_apic_ready, - * as calling simple_lock() (ie ss_lock) before then deadlocks the system. - * A sample count of GENSPL calls before bsp_apic_ready was set: 2193 + * SMP CASE + * + * Mostly the same as the non-SMP case now, but it didn't used to be + * this clean. */ -#ifdef INTR_SPL - -#ifdef SPL_DEBUG -#define MAXZ 100000000 -#define SPIN_VAR unsigned z; -#define SPIN_RESET z = 0; -#if 0 -#define SPIN_SPL \ - if (++z >= MAXZ) { \ - /* XXX allow lock-free panic */ \ - bsp_apic_ready = 0; \ - panic("\ncil: 0x%08x", cil); \ - } -#else -#define SPIN_SPL \ - if (++z >= MAXZ) { \ - /* XXX allow lock-free panic */ \ - bsp_apic_ready = 0; \ - printf("\ncil: 0x%08x", cil); \ - breakpoint(); \ - } -#endif /* 0/1 */ -#else /* SPL_DEBUG */ -#define SPIN_VAR -#define SPIN_RESET -#define SPIN_SPL -#endif /* SPL_DEBUG */ - -#endif - -#ifdef INTR_SPL - -#define GENSPL(NAME, OP, MODIFIER, PC) \ -GENSPLASSERT(NAME, MODIFIER) \ -unsigned NAME(void) \ -{ \ - unsigned x, y; \ - SPIN_VAR; \ - \ - if (!bsp_apic_ready) { \ - x = cpl; \ - cpl OP MODIFIER; \ - return (x); \ - } \ - \ - for (;;) { \ - IFCPL_LOCK(); /* MP-safe */ \ - x = y = cpl; /* current value */ \ - POSTCODE(0x20 | PC); \ - if (inside_intr) \ - break; /* XXX only 1 INT allowed */ \ - y OP MODIFIER; /* desired value */ \ - if (cil & y) { /* not now */ \ - IFCPL_UNLOCK(); /* allow cil to change */ \ - SPIN_RESET; \ - while (cil & y) \ - SPIN_SPL \ - continue; /* try again */ \ - } \ - break; \ - } \ - cpl OP MODIFIER; /* make the change */ \ - IFCPL_UNLOCK(); \ - \ - return (x); \ -} - -#else /* INTR_SPL */ - #define GENSPL(NAME, OP, MODIFIER, PC) \ GENSPLASSERT(NAME, MODIFIER) \ unsigned NAME(void) \ { \ unsigned x; \ \ - IFCPL_LOCK(); \ x = cpl; \ cpl OP MODIFIER; \ - IFCPL_UNLOCK(); \ \ return (x); \ } -#endif /* INTR_SPL */ - - +/* + * spl0() - unmask all interrupts + * + * The MP lock must be held on entry + * This routine may only be called from mainline code. + */ void spl0(void) { - int unpend; -#ifdef INTR_SPL - SPIN_VAR; - - for (;;) { - IFCPL_LOCK(); - POSTCODE_HI(0xc); - /* - * XXX SWI_AST_MASK in ipending has moved to 1 in astpending, - * so the following code is dead, but just removing it may - * not be right. - */ -#if 0 - if (cil & SWI_AST_MASK) { /* not now */ - IFCPL_UNLOCK(); /* allow cil to change */ - SPIN_RESET; - while (cil & SWI_AST_MASK) - SPIN_SPL - continue; /* try again */ - } -#endif - break; - } -#else /* INTR_SPL */ - IFCPL_LOCK(); -#endif /* INTR_SPL */ - + KASSERT(inside_intr == 0, ("spl0: called from interrupt")); cpl = 0; - unpend = ipending; - IFCPL_UNLOCK(); - - if (unpend && !inside_intr) + if (ipending) splz(); } +/* + * splx() - restore previous interrupt mask + * + * The MP lock must be held on entry + */ + void splx(unsigned ipl) { - int unpend; -#ifdef INTR_SPL - SPIN_VAR; - - for (;;) { - IFCPL_LOCK(); - POSTCODE_HI(0xe); - if (inside_intr) - break; /* XXX only 1 INT allowed */ - POSTCODE_HI(0xf); - if (cil & ipl) { /* not now */ - IFCPL_UNLOCK(); /* allow cil to change */ - SPIN_RESET; - while (cil & ipl) - SPIN_SPL - continue; /* try again */ - } - break; - } -#else /* INTR_SPL */ - IFCPL_LOCK(); -#endif /* INTR_SPL */ - cpl = ipl; - unpend = ipending & ~ipl; - IFCPL_UNLOCK(); - - if (unpend && !inside_intr) + if (inside_intr == 0 && (ipending & ~cpl) != 0) splz(); } /* - * Replaces UP specific inline found in (?) pci/pci_support.c. + * splq() - blocks specified interrupts * - * Stefan said: - * You know, that splq() is used in the shared interrupt multiplexer, and that - * the SMP version should not have too much overhead. If it is significantly - * slower, then moving the splq() out of the loop in intr_mux() and passing in - * the logical OR of all mask values might be a better solution than the - * current code. (This logical OR could of course be pre-calculated whenever - * another shared interrupt is registered ...) + * The MP lock must be held on entry */ intrmask_t splq(intrmask_t mask) { - intrmask_t tmp; -#ifdef INTR_SPL - intrmask_t tmp2; - - for (;;) { - IFCPL_LOCK(); - tmp = tmp2 = cpl; - tmp2 |= mask; - if (cil & tmp2) { /* not now */ - IFCPL_UNLOCK(); /* allow cil to change */ - while (cil & tmp2) - /* spin */ ; - continue; /* try again */ - } - break; - } - cpl = tmp2; -#else /* INTR_SPL */ - IFCPL_LOCK(); - tmp = cpl; + intrmask_t tmp = cpl; cpl |= mask; -#endif /* INTR_SPL */ - - IFCPL_UNLOCK(); return (tmp); } diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index 05de442441e3..e3f976d7ebe4 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -44,7 +44,7 @@ struct sysent sysent[] = { { 4, (sy_call_t *)mount }, /* 21 = mount */ { 2, (sy_call_t *)unmount }, /* 22 = unmount */ { 1, (sy_call_t *)setuid }, /* 23 = setuid */ - { 0, (sy_call_t *)getuid }, /* 24 = getuid */ + { SYF_MPSAFE | 0, (sy_call_t *)getuid }, /* 24 = getuid */ { 0, (sy_call_t *)geteuid }, /* 25 = geteuid */ { 4, (sy_call_t *)ptrace }, /* 26 = ptrace */ { 3, (sy_call_t *)recvmsg }, /* 27 = recvmsg */ @@ -67,7 +67,7 @@ struct sysent sysent[] = { { 4, (sy_call_t *)profil }, /* 44 = profil */ { 4, (sy_call_t *)ktrace }, /* 45 = ktrace */ { compat(3,sigaction) }, /* 46 = old sigaction */ - { 0, (sy_call_t *)getgid }, /* 47 = getgid */ + { SYF_MPSAFE | 0, (sy_call_t *)getgid }, /* 47 = getgid */ { compat(2,sigprocmask) }, /* 48 = old sigprocmask */ { 2, (sy_call_t *)getlogin }, /* 49 = getlogin */ { 1, (sy_call_t *)setlogin }, /* 50 = setlogin */ @@ -80,7 +80,7 @@ struct sysent sysent[] = { { 2, (sy_call_t *)symlink }, /* 57 = symlink */ { 3, (sy_call_t *)readlink }, /* 58 = readlink */ { 3, (sy_call_t *)execve }, /* 59 = execve */ - { 1, (sy_call_t *)umask }, /* 60 = umask */ + { SYF_MPSAFE | 1, (sy_call_t *)umask }, /* 60 = umask */ { 1, (sy_call_t *)chroot }, /* 61 = chroot */ { compat(2,fstat) }, /* 62 = old fstat */ { compat(4,getkerninfo) }, /* 63 = old getkerninfo */ @@ -101,7 +101,7 @@ struct sysent sysent[] = { { 3, (sy_call_t *)mincore }, /* 78 = mincore */ { 2, (sy_call_t *)getgroups }, /* 79 = getgroups */ { 2, (sy_call_t *)setgroups }, /* 80 = setgroups */ - { 0, (sy_call_t *)getpgrp }, /* 81 = getpgrp */ + { SYF_MPSAFE | 0, (sy_call_t *)getpgrp }, /* 81 = getpgrp */ { 2, (sy_call_t *)setpgid }, /* 82 = setpgid */ { 3, (sy_call_t *)setitimer }, /* 83 = setitimer */ { compat(0,wait) }, /* 84 = old wait */ diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c index 2ac10da6aca8..06bc88931fa4 100644 --- a/sys/kern/kern_prot.c +++ b/sys/kern/kern_prot.c @@ -62,6 +62,9 @@ struct getpid_args { }; #endif +/* + * NOT MP SAFE due to p_pptr access + */ /* ARGSUSED */ int getpid(p, uap) @@ -92,7 +95,11 @@ getppid(p, uap) return (0); } -/* Get process group ID; note that POSIX getpgrp takes no parameter */ +/* + * Get process group ID; note that POSIX getpgrp takes no parameter + * + * MP SAFE + */ #ifndef _SYS_SYSPROTO_H_ struct getpgrp_args { int dummy; @@ -168,6 +175,9 @@ struct getuid_args { }; #endif +/* + * MP SAFE + */ /* ARGSUSED */ int getuid(p, uap) @@ -205,6 +215,9 @@ struct getgid_args { }; #endif +/* + * MP SAFE + */ /* ARGSUSED */ int getgid(p, uap) diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index c3bb68f73753..b374fed75506 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -64,6 +64,7 @@ #include <vm/vm_zone.h> +#include <machine/ipl.h> #include <machine/cpu.h> #ifdef SMP #include <machine/smp.h> diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c index a4b05fe76a45..3146f9e856ef 100644 --- a/sys/kern/kern_switch.c +++ b/sys/kern/kern_switch.c @@ -140,6 +140,8 @@ remrunqueue(struct proc *p) * procrunnable() returns a boolean true (non-zero) value if there are * any runnable processes. This is intended to be called from the idle * loop to avoid the more expensive (and destructive) chooseproc(). + * + * MP SAFE. CALLED WITHOUT THE MP LOCK */ u_int32_t procrunnable(void) diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index a590506ecd45..d7a66b03ad74 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -57,6 +57,7 @@ #endif #include <machine/cpu.h> +#include <machine/ipl.h> #ifdef SMP #include <machine/smp.h> #endif diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c index 8f297b21ecab..cce81c3bff6e 100644 --- a/sys/kern/ksched.c +++ b/sys/kern/ksched.c @@ -41,6 +41,7 @@ #include <sys/kernel.h> #include <sys/resource.h> #include <machine/cpu.h> /* For need_resched */ +#include <machine/ipl.h> /* For need_resched */ #include <posix4/posix4.h> diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c index ed9c0d8b77e3..117f0309981a 100644 --- a/sys/kern/subr_prof.c +++ b/sys/kern/subr_prof.c @@ -42,6 +42,7 @@ #include <sys/resourcevar.h> #include <sys/sysctl.h> +#include <machine/ipl.h> #include <machine/cpu.h> #ifdef GPROF diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index 8e349a9800e4..b5bc1fd09022 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -1770,8 +1770,10 @@ init_locks(void) */ mp_lock = 0x00000001; +#if 0 /* ISR uses its own "giant lock" */ isr_lock = FREE_LOCK; +#endif #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index a8b73cf6a02b..703d48dc84ed 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -99,7 +99,7 @@ int (*pmath_emulate) __P((struct trapframe *)); extern void trap __P((struct trapframe frame)); extern int trapwrite __P((unsigned addr)); -extern void syscall __P((struct trapframe frame)); +extern void syscall2 __P((struct trapframe frame)); static int trap_pfault __P((struct trapframe *, int, vm_offset_t)); static void trap_fatal __P((struct trapframe *, vm_offset_t)); @@ -140,38 +140,32 @@ static char *trap_msg[] = { "machine check trap", /* 28 T_MCHK */ }; -static __inline void userret __P((struct proc *p, struct trapframe *frame, - u_quad_t oticks)); +static __inline int userret __P((struct proc *p, struct trapframe *frame, + u_quad_t oticks, int have_mplock)); #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif -static __inline void -userret(p, frame, oticks) +static __inline int +userret(p, frame, oticks, have_mplock) struct proc *p; struct trapframe *frame; u_quad_t oticks; + int have_mplock; { int sig, s; - while ((sig = CURSIG(p)) != 0) + while ((sig = CURSIG(p)) != 0) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } postsig(sig); - -#if 0 - if (!want_resched && - (p->p_priority <= p->p_usrpri) && - (p->p_rtprio.type == RTP_PRIO_NORMAL)) { - int newpriority; - p->p_estcpu += 1; - newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice; - newpriority = min(newpriority, MAXPRI); - p->p_usrpri = newpriority; } -#endif - + p->p_priority = p->p_usrpri; - if (want_resched) { + if (resched_wanted()) { /* * Since we are curproc, clock will normally just change * our priority without moving us from one queue to another @@ -180,6 +174,10 @@ userret(p, frame, oticks) * mi_switch()'ed, we might not be on the queue indicated by * our priority. */ + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } s = splhigh(); setrunqueue(p); p->p_stats->p_ru.ru_nivcsw++; @@ -191,11 +189,16 @@ userret(p, frame, oticks) /* * Charge system time if profiling. */ - if (p->p_flag & P_PROFIL) + if (p->p_flag & P_PROFIL) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } addupc_task(p, frame->tf_eip, (u_int)(p->p_sticks - oticks) * psratio); - + } curpriority = p->p_priority; + return(have_mplock); } /* @@ -604,7 +607,7 @@ kernel_trap: #endif out: - userret(p, &frame, sticks); + userret(p, &frame, sticks, 1); } #ifdef notyet @@ -999,11 +1002,18 @@ int trapwrite(addr) } /* - * System call request from POSIX system call gate interface to kernel. - * Like trap(), argument is call by reference. + * syscall2 - MP aware system call request C handler + * + * A system call is essentially treated as a trap except that the + * MP lock is not held on entry or return. We are responsible for + * obtaining the MP lock if necessary and for handling ASTs + * (e.g. a task switch) prior to return. + * + * In general, only simple access and manipulation of curproc and + * the current stack is allowed without having to hold MP lock. */ void -syscall(frame) +syscall2(frame) struct trapframe frame; { caddr_t params; @@ -1012,22 +1022,42 @@ syscall(frame) struct proc *p = curproc; u_quad_t sticks; int error; + int narg; int args[8]; + int have_mplock = 0; u_int code; #ifdef DIAGNOSTIC - if (ISPL(frame.tf_cs) != SEL_UPL) + if (ISPL(frame.tf_cs) != SEL_UPL) { + get_mplock(); panic("syscall"); + /* NOT REACHED */ + } #endif - sticks = p->p_sticks; + + /* + * handle atomicy by looping since interrupts are enabled and the + * MP lock is not held. + */ + sticks = ((volatile struct proc *)p)->p_sticks; + while (sticks != ((volatile struct proc *)p)->p_sticks) + sticks = ((volatile struct proc *)p)->p_sticks; + p->p_md.md_regs = &frame; params = (caddr_t)frame.tf_esp + sizeof(int); code = frame.tf_eax; + if (p->p_sysent->sv_prepsyscall) { + /* + * The prep code is not MP aware. + */ + get_mplock(); (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); + rel_mplock(); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. + * fuword is MP aware. */ if (code == SYS_syscall) { /* @@ -1053,27 +1083,52 @@ syscall(frame) else callp = &p->p_sysent->sv_table[code]; - if (params && (i = callp->sy_narg * sizeof(int)) && + narg = callp->sy_narg & SYF_ARGMASK; + + /* + * copyin is MP aware, but the tracing code is not + */ + if (params && (i = narg * sizeof(int)) && (error = copyin(params, (caddr_t)args, (u_int)i))) { + get_mplock(); + have_mplock = 1; #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) - ktrsyscall(p->p_tracep, code, callp->sy_narg, args); + ktrsyscall(p->p_tracep, code, narg, args); #endif goto bad; } + + /* + * Try to run the syscall without the MP lock if the syscall + * is MP safe. We have to obtain the MP lock no matter what if + * we are ktracing + */ + if ((callp->sy_narg & SYF_MPSAFE) == 0) { + get_mplock(); + have_mplock = 1; + } + #ifdef KTRACE - if (KTRPOINT(p, KTR_SYSCALL)) - ktrsyscall(p->p_tracep, code, callp->sy_narg, args); + if (KTRPOINT(p, KTR_SYSCALL)) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } + ktrsyscall(p->p_tracep, code, narg, args); + } #endif p->p_retval[0] = 0; p->p_retval[1] = frame.tf_edx; - STOPEVENT(p, S_SCE, callp->sy_narg); + STOPEVENT(p, S_SCE, narg); /* MP aware */ error = (*callp->sy_call)(p, args); + /* + * MP SAFE (we may or may not have the MP lock at this point) + */ switch (error) { - case 0: /* * Reinitialize proc pointer `p' as it may be different @@ -1109,17 +1164,31 @@ bad: break; } + /* + * Traced syscall. trapsignal() is not MP aware. + */ if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) { - /* Traced syscall. */ + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } frame.tf_eflags &= ~PSL_T; trapsignal(p, SIGTRAP, 0); } - userret(p, &frame, sticks); + /* + * Handle reschedule and other end-of-syscall issues + */ + have_mplock = userret(p, &frame, sticks, have_mplock); #ifdef KTRACE - if (KTRPOINT(p, KTR_SYSRET)) + if (KTRPOINT(p, KTR_SYSRET)) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } ktrsysret(p->p_tracep, code, error, p->p_retval[0]); + } #endif /* @@ -1129,11 +1198,17 @@ bad: */ STOPEVENT(p, S_SCX, code); + /* + * Release the MP lock if we had to get it + */ + if (have_mplock) + rel_mplock(); } /* * Simplified back end of syscall(), used when returning from fork() - * directly into user mode. + * directly into user mode. MP lock is held on entry and should be + * held on return. */ void fork_return(p, frame) @@ -1144,7 +1219,7 @@ fork_return(p, frame) frame.tf_eflags &= ~PSL_C; /* success */ frame.tf_edx = 1; - userret(p, &frame, 0); + userret(p, &frame, 0, 1); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) ktrsysret(p->p_tracep, SYS_fork, 0, 0); diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index 142a2c339422..99084d332ed6 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -3000,6 +3000,8 @@ getdents(p, uap) /* * Set the mode mask for creation of filesystem nodes. + * + * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 142a2c339422..99084d332ed6 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -3000,6 +3000,8 @@ getdents(p, uap) /* * Set the mode mask for creation of filesystem nodes. + * + * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { diff --git a/sys/posix4/ksched.c b/sys/posix4/ksched.c index 8f297b21ecab..cce81c3bff6e 100644 --- a/sys/posix4/ksched.c +++ b/sys/posix4/ksched.c @@ -41,6 +41,7 @@ #include <sys/kernel.h> #include <sys/resource.h> #include <machine/cpu.h> /* For need_resched */ +#include <machine/ipl.h> /* For need_resched */ #include <posix4/posix4.h> diff --git a/sys/sys/ktrace.h b/sys/sys/ktrace.h index 5383fc409a24..3804937c9f11 100644 --- a/sys/sys/ktrace.h +++ b/sys/sys/ktrace.h @@ -62,7 +62,7 @@ struct ktr_header { }; /* - * Test for kernel trace point + * Test for kernel trace point (MP SAFE) */ #define KTRPOINT(p, type) \ (((p)->p_traceflag & ((1<<(type))|KTRFAC_ACTIVE)) == (1<<(type))) diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 4c2eead9f775..ad1ba41547ec 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -338,9 +338,18 @@ MALLOC_DECLARE(M_PARGS); FREE(s, M_SESSION); \ } +/* + * STOPEVENT is MP SAFE. + */ extern void stopevent(struct proc*, unsigned int, unsigned int); -#define STOPEVENT(p,e,v) do { \ - if ((p)->p_stops & (e)) stopevent(p,e,v); } while (0) +#define STOPEVENT(p,e,v) \ + do { \ + if ((p)->p_stops & (e)) { \ + get_mplock(); \ + stopevent(p,e,v); \ + rel_mplock(); \ + } \ + } while (0) /* hold process U-area in memory, normally for ptrace/procfs work */ #define PHOLD(p) { \ diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h index 56cf3fa26d9d..496d68519505 100644 --- a/sys/sys/signalvar.h +++ b/sys/sys/signalvar.h @@ -39,6 +39,9 @@ #include <sys/signal.h> #include <sys/proc.h> +#ifdef SMP +#include <machine/smp.h> +#endif /* * Kernel signal definitions and data structures, @@ -225,16 +228,24 @@ void sendsig __P((sig_t action, int sig, sigset_t *retmask, u_long code)); * Determine signal that should be delivered to process p, the current * process, 0 if none. If there is a pending stop signal with default * action, the process stops in issignal(). + * + * MP SAFE */ extern __inline int __cursig(struct proc *p) { sigset_t tmpset; + int r; tmpset = p->p_siglist; SIGSETNAND(tmpset, p->p_sigmask); - return ((SIGISEMPTY(p->p_siglist) || - (!(p->p_flag & P_TRACED) && SIGISEMPTY(tmpset))) - ? 0 : issignal(p)); + if (SIGISEMPTY(p->p_siglist) || + (!(p->p_flag & P_TRACED) && SIGISEMPTY(tmpset))) { + return(0); + } + get_mplock(); + r = issignal(p); + rel_mplock(); + return(r); } #endif /* _KERNEL */ diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index 66fe731ae66c..fff1ed055ad9 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -44,6 +44,10 @@ struct sysent { /* system call table */ int sy_narg; /* number of arguments */ sy_call_t *sy_call; /* implementing function */ }; + +#define SYF_ARGMASK 0x0000FFFF +#define SYF_MPSAFE 0x00010000 + #define SCARG(p,k) ((p)->k) /* get arg from args pointer */ /* placeholder till we integrate rest of lite2 syscallargs changes XXX */ |