diff options
author | Jason Evans <jasone@FreeBSD.org> | 2000-09-07 01:33:02 +0000 |
---|---|---|
committer | Jason Evans <jasone@FreeBSD.org> | 2000-09-07 01:33:02 +0000 |
commit | 0384fff8c5b098545c3db311b0e0aa1ec4c9ae7e (patch) | |
tree | bc6e36e781569f3efe04995c0b0befebb9154ef5 /sys/i386 | |
parent | 62ae6c89ad2b03770097d05590093f93b9d94e08 (diff) | |
download | src-0384fff8c5b098545c3db311b0e0aa1ec4c9ae7e.tar.gz src-0384fff8c5b098545c3db311b0e0aa1ec4c9ae7e.zip |
Major update to the way synchronization is done in the kernel. Highlights
include:
* Mutual exclusion is used instead of spl*(). See mutex(9). (Note: The
alpha port is still in transition and currently uses both.)
* Per-CPU idle processes.
* Interrupts are run in their own separate kernel threads and can be
preempted (i386 only).
Partially contributed by: BSDi (BSD/OS)
Submissions by (at least): cp, dfr, dillon, grog, jake, jhb, sheldonh
Notes
Notes:
svn path=/head/; revision=65557
Diffstat (limited to 'sys/i386')
55 files changed, 3418 insertions, 2819 deletions
diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s index 2a7559df7f97..54bf00366c81 100644 --- a/sys/i386/i386/apic_vector.s +++ b/sys/i386/i386/apic_vector.s @@ -17,7 +17,7 @@ /* - * Macros for interrupt interrupt entry, call to handler, and exit. + * Macros for interrupt entry, call to handler, and exit. */ #define FAST_INTR(irq_num, vec_name) \ @@ -121,7 +121,7 @@ IDTVEC(vec_name) ; \ /* - * Test to see if the source is currntly masked, clear if so. + * Test to see if the source is currently masked, clear if so. */ #define UNMASK_IRQ(irq_num) \ IMASK_LOCK ; /* into critical reg */ \ @@ -200,7 +200,16 @@ log_intr_event: #else #define APIC_ITRACE(name, irq_num, id) #endif - + +/* + * Slow, threaded interrupts. + * + * XXX Most of the parameters here are obsolete. Fix this when we're + * done. + * XXX we really shouldn't return via doreti if we just schedule the + * interrupt handler and don't run anything. We could just do an + * iret. FIXME. + */ #define INTR(irq_num, vec_name, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -216,87 +225,24 @@ IDTVEC(vec_name) ; \ maybe_extra_ipending ; \ ; \ APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \ - lock ; /* MP-safe */ \ - btsl $(irq_num), iactive ; /* lazy masking */ \ - jc 1f ; /* already active */ \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ 0: ; \ - APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - MP_TRYLOCK ; /* XXX this is going away... */ \ - testl %eax, %eax ; /* did we get it? */ \ - jz 3f ; /* no */ \ -; \ - APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ - testl $IRQ_BIT(irq_num), _cpl ; \ - jne 2f ; /* this INT masked */ \ -; \ incb _intr_nesting_level ; \ ; \ /* entry point used by doreti_unpend for HWIs. */ \ __CONCAT(Xresume,irq_num): ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ - lock ; incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4, %eax ; \ - lock ; incl (%eax) ; \ -; \ - movl _cpl, %eax ; \ - pushl %eax ; \ - orl _intr_mask + (irq_num) * 4, %eax ; \ - movl %eax, _cpl ; \ - lock ; \ - andl $~IRQ_BIT(irq_num), _ipending ; \ -; \ - pushl _intr_unit + (irq_num) * 4 ; \ + pushl $irq_num; /* pass the IRQ */ \ APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; \ + call _sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \ ; \ - lock ; andl $~IRQ_BIT(irq_num), iactive ; \ - UNMASK_IRQ(irq_num) ; \ - APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \ - sti ; /* doreti repeats cli/sti */ \ MEXITCOUNT ; \ - jmp _doreti ; \ -; \ - ALIGN_TEXT ; \ -1: ; /* active */ \ - APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ - MASK_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - lock ; \ - btsl $(irq_num), iactive ; /* still active */ \ - jnc 0b ; /* retry */ \ - POP_FRAME ; \ - iret ; /* XXX: iactive bit might be 0 now */ \ - ALIGN_TEXT ; \ -2: ; /* masked by cpl, leave iactive set */ \ - APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - MP_RELLOCK ; \ - POP_FRAME ; \ - iret ; \ - ALIGN_TEXT ; \ -3: ; /* other cpu has isr lock */ \ - APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - testl $IRQ_BIT(irq_num), _cpl ; \ - jne 4f ; /* this INT masked */ \ - call forward_irq ; /* forward irq to lock holder */ \ - POP_FRAME ; /* and return */ \ - iret ; \ - ALIGN_TEXT ; \ -4: ; /* blocked */ \ - APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ - POP_FRAME ; /* and return */ \ - iret + jmp doreti_next /* * Handle "spurious INTerrupts". @@ -434,20 +380,10 @@ _Xcpuast: FAKE_MCOUNT(13*4(%esp)) - /* - * Giant locks do not come cheap. - * A lot of cycles are going to be wasted here. - */ - call _get_mplock - - movl _cpl, %eax - pushl %eax orl $AST_PENDING, _astpending /* XXX */ incb _intr_nesting_level sti - pushl $0 - movl _cpuid, %eax lock btrl %eax, _checkstate_pending_ast @@ -461,7 +397,7 @@ _Xcpuast: lock incl CNAME(cpuast_cnt) MEXITCOUNT - jmp _doreti + jmp doreti_next 1: /* We are already in the process of delivering an ast for this CPU */ POP_FRAME @@ -487,40 +423,24 @@ _Xforward_irq: FAKE_MCOUNT(13*4(%esp)) - MP_TRYLOCK - testl %eax,%eax /* Did we get the lock ? */ - jz 1f /* No */ - lock incl CNAME(forward_irq_hitcnt) cmpb $4, _intr_nesting_level - jae 2f + jae 1f - movl _cpl, %eax - pushl %eax incb _intr_nesting_level sti - pushl $0 - MEXITCOUNT - jmp _doreti /* Handle forwarded interrupt */ + jmp doreti_next /* Handle forwarded interrupt */ 1: lock - incl CNAME(forward_irq_misscnt) - call forward_irq /* Oops, we've lost the isr lock */ - MEXITCOUNT - POP_FRAME - iret -2: - lock incl CNAME(forward_irq_toodeepcnt) -3: - MP_RELLOCK MEXITCOUNT POP_FRAME iret +#if 0 /* * */ @@ -532,9 +452,11 @@ forward_irq: cmpl $0, CNAME(forward_irq_enabled) jz 4f +/* XXX - this is broken now, because mp_lock doesn't exist movl _mp_lock,%eax cmpl $FREE_LOCK,%eax jne 1f + */ movl $0, %eax /* Pick CPU #0 if noone has lock */ 1: shrl $24,%eax @@ -559,6 +481,7 @@ forward_irq: jnz 3b 4: ret +#endif /* * Executed by a CPU when it receives an Xcpustop IPI from another CPU, @@ -654,6 +577,7 @@ MCOUNT_LABEL(bintr) FAST_INTR(22,fastintr22) FAST_INTR(23,fastintr23) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) +/* Threaded interrupts */ INTR(0,intr0, CLKINTR_PENDING) INTR(1,intr1,) INTR(2,intr2,) @@ -728,15 +652,11 @@ _ihandlers: .long _swi_null, swi_net, _swi_null, _swi_null .long _swi_vm, _swi_null, _softclock -imasks: /* masks for interrupt handlers */ - .space NHWI*4 /* padding; HWI masks are elsewhere */ - - .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK - .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK - +#if 0 /* active flag for lazy masking */ iactive: .long 0 +#endif #ifdef COUNT_XINVLTLB_HITS .globl _xhits diff --git a/sys/i386/i386/autoconf.c b/sys/i386/i386/autoconf.c index b209065027d6..4edda4bdcab5 100644 --- a/sys/i386/i386/autoconf.c +++ b/sys/i386/i386/autoconf.c @@ -163,14 +163,6 @@ configure(dummy) * XXX this is slightly misplaced. */ spl0(); - - /* - * Allow lowering of the ipl to the lowest kernel level if we - * panic (or call tsleep() before clearing `cold'). No level is - * completely safe (since a panic may occur in a critical region - * at splhigh()), but we want at least bio interrupts to work. - */ - safepri = cpl; } static void diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s index acb8b40f2810..9e77114a1385 100644 --- a/sys/i386/i386/exception.s +++ b/sys/i386/i386/exception.s @@ -38,6 +38,7 @@ #include <machine/asmacros.h> #include <machine/ipl.h> #include <machine/lock.h> +#include <machine/mutex.h> #include <machine/psl.h> #include <machine/trap.h> #ifdef SMP @@ -175,20 +176,12 @@ IDTVEC(fpu) mov %ax,%fs FAKE_MCOUNT(13*4(%esp)) -#ifdef SMP MPLOCKED incl _cnt+V_TRAP - MP_LOCK - movl _cpl,%eax - pushl %eax /* save original cpl */ - pushl $0 /* dummy unit to finish intr frame */ -#else /* SMP */ - movl _cpl,%eax - pushl %eax pushl $0 /* dummy unit to finish intr frame */ - incl _cnt+V_TRAP -#endif /* SMP */ + call __mtx_enter_giant_def call _npx_intr + call __mtx_exit_giant_def incb _intr_nesting_level MEXITCOUNT @@ -205,9 +198,6 @@ IDTVEC(align) * gate (TGT), else disabled if this was an interrupt gate (IGT). * Note that int0x80_syscall is a trap gate. Only page faults * use an interrupt gate. - * - * Note that all calls to MP_LOCK must occur with interrupts enabled - * in order to be able to take IPI's while waiting for the lock. */ SUPERALIGN_TEXT @@ -227,16 +217,12 @@ alltraps_with_regs_pushed: FAKE_MCOUNT(13*4(%esp)) calltrap: FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */ - MPLOCKED incl _cnt+V_TRAP - MP_LOCK - movl _cpl,%ebx /* keep orig. cpl here during trap() */ call _trap /* * Return via _doreti to handle ASTs. Have to change trap frame * to interrupt frame. */ - pushl %ebx /* cpl to restore */ subl $4,%esp /* dummy unit to finish intr frame */ incb _intr_nesting_level MEXITCOUNT @@ -274,16 +260,11 @@ IDTVEC(syscall) movl %eax,TF_EFLAGS(%esp) movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */ FAKE_MCOUNT(13*4(%esp)) - MPLOCKED incl _cnt+V_SYSCALL call _syscall2 MEXITCOUNT cli /* atomic astpending access */ - cmpl $0,_astpending - je doreti_syscall_ret -#ifdef SMP - MP_LOCK -#endif - pushl $0 /* cpl to restore */ + cmpl $0,_astpending /* AST pending? */ + je doreti_syscall_ret /* no, get out of here */ subl $4,%esp /* dummy unit for interrupt frame */ movb $1,_intr_nesting_level jmp _doreti @@ -312,21 +293,18 @@ IDTVEC(int0x80_syscall) mov %ax,%fs movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */ FAKE_MCOUNT(13*4(%esp)) - MPLOCKED incl _cnt+V_SYSCALL call _syscall2 MEXITCOUNT cli /* atomic astpending access */ - cmpl $0,_astpending - je doreti_syscall_ret -#ifdef SMP - MP_LOCK -#endif - pushl $0 /* cpl to restore */ + cmpl $0,_astpending /* AST pending? */ + je doreti_syscall_ret /* no, get out of here */ subl $4,%esp /* dummy unit for interrupt frame */ movb $1,_intr_nesting_level jmp _doreti ENTRY(fork_trampoline) + MTX_EXIT(_sched_lock, %ecx) + sti call _spl0 #ifdef SMP @@ -355,7 +333,6 @@ ENTRY(fork_trampoline) /* * Return via _doreti to handle ASTs. */ - pushl $0 /* cpl to restore */ subl $4,%esp /* dummy unit to finish intr frame */ movb $1,_intr_nesting_level MEXITCOUNT diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c index 60accd19ba8e..78c607591875 100644 --- a/sys/i386/i386/genassym.c +++ b/sys/i386/i386/genassym.c @@ -51,6 +51,10 @@ #include <sys/mount.h> #include <sys/socket.h> #include <sys/resourcevar.h> +/* XXX */ +#ifdef KTR_PERCPU +#include <sys/ktr.h> +#endif #include <machine/frame.h> #include <machine/bootinfo.h> #include <machine/tss.h> @@ -73,6 +77,7 @@ #include <machine/sigframe.h> #include <machine/globaldata.h> #include <machine/vm86.h> +#include <machine/mutex.h> ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); @@ -127,9 +132,7 @@ ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); ASSYM(PCB_DBREGS, PCB_DBREGS); ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); -#ifdef SMP -ASSYM(PCB_MPNEST, offsetof(struct pcb, pcb_mpnest)); -#endif +ASSYM(PCB_SCHEDNEST, offsetof(struct pcb, pcb_schednest)); ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); @@ -170,7 +173,9 @@ ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab)); ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend)); ASSYM(GD_SIZEOF, sizeof(struct globaldata)); ASSYM(GD_CURPROC, offsetof(struct globaldata, gd_curproc)); +ASSYM(GD_PREVPROC, offsetof(struct globaldata, gd_prevproc)); ASSYM(GD_NPXPROC, offsetof(struct globaldata, gd_npxproc)); +ASSYM(GD_IDLEPROC, offsetof(struct globaldata, gd_idleproc)); ASSYM(GD_CURPCB, offsetof(struct globaldata, gd_curpcb)); ASSYM(GD_COMMON_TSS, offsetof(struct globaldata, gd_common_tss)); ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime)); @@ -178,11 +183,21 @@ ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks)); ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd)); ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt)); ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending)); +ASSYM(GD_INTR_NESTING_LEVEL, offsetof(struct globaldata, gd_intr_nesting_level)); #ifdef USER_LDT ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt)); #endif +ASSYM(GD_WITNESS_SPIN_CHECK, offsetof(struct globaldata, gd_witness_spin_check)); + +/* XXX */ +#ifdef KTR_PERCPU +ASSYM(GD_KTR_IDX, offsetof(struct globaldata, gd_ktr_idx)); +ASSYM(GD_KTR_BUF, offsetof(struct globaldata, gd_ktr_buf)); +ASSYM(GD_KTR_BUF_DATA, offsetof(struct globaldata, gd_ktr_buf_data)); +#endif + #ifdef SMP ASSYM(GD_CPUID, offsetof(struct globaldata, gd_cpuid)); ASSYM(GD_CPU_LOCKID, offsetof(struct globaldata, gd_cpu_lockid)); @@ -211,3 +226,9 @@ ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL)); ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL)); ASSYM(GPROC0_SEL, GPROC0_SEL); ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame)); + +ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock)); +ASSYM(MTX_RECURSE, offsetof(struct mtx, mtx_recurse)); +ASSYM(MTX_SAVEFL, offsetof(struct mtx, mtx_savefl)); + +ASSYM(MTX_UNOWNED, MTX_UNOWNED); diff --git a/sys/i386/i386/globals.s b/sys/i386/i386/globals.s index 31fbfd5e98b1..f3181429cad5 100644 --- a/sys/i386/i386/globals.s +++ b/sys/i386/i386/globals.s @@ -61,44 +61,74 @@ globaldata: #else .set globaldata,0 #endif - .globl gd_curproc, gd_curpcb, gd_npxproc, gd_astpending - .globl gd_common_tss, gd_switchtime, gd_switchticks + .globl gd_curproc, gd_prevproc, gd_curpcb, gd_npxproc, gd_idleproc + .globl gd_astpending, gd_common_tss, gd_switchtime, gd_switchticks + .globl gd_intr_nesting_level .set gd_curproc,globaldata + GD_CURPROC + .set gd_prevproc,globaldata + GD_PREVPROC .set gd_astpending,globaldata + GD_ASTPENDING .set gd_curpcb,globaldata + GD_CURPCB .set gd_npxproc,globaldata + GD_NPXPROC + .set gd_idleproc,globaldata + GD_IDLEPROC .set gd_common_tss,globaldata + GD_COMMON_TSS .set gd_switchtime,globaldata + GD_SWITCHTIME .set gd_switchticks,globaldata + GD_SWITCHTICKS + .set gd_intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL .globl gd_common_tssd, gd_tss_gdt .set gd_common_tssd,globaldata + GD_COMMON_TSSD .set gd_tss_gdt,globaldata + GD_TSS_GDT + .globl gd_witness_spin_check + .set gd_witness_spin_check, globaldata + GD_WITNESS_SPIN_CHECK + #ifdef USER_LDT .globl gd_currentldt .set gd_currentldt,globaldata + GD_CURRENTLDT #endif +/* XXX - doesn't work yet */ +#ifdef KTR_PERCPU + .globl gd_ktr_idx, gd_ktr_buf, gd_ktr_buf_data + .set gd_ktr_idx,globaldata + GD_KTR_IDX + .set gd_ktr_buf,globaldata + GD_KTR_BUF + .set gd_ktr_buf_data,globaldata + GD_KTR_BUF_DATA +#endif + #ifndef SMP - .globl _curproc, _curpcb, _npxproc, _astpending - .globl _common_tss, _switchtime, _switchticks + .globl _curproc, _prevproc, _curpcb, _npxproc, _idleproc, + .globl _astpending, _common_tss, _switchtime, _switchticks + .global _intr_nesting_level .set _curproc,globaldata + GD_CURPROC + .set _prevproc,globaldata + GD_PREVPROC .set _astpending,globaldata + GD_ASTPENDING .set _curpcb,globaldata + GD_CURPCB .set _npxproc,globaldata + GD_NPXPROC + .set _idleproc,globaldata + GD_IDLEPROC .set _common_tss,globaldata + GD_COMMON_TSS .set _switchtime,globaldata + GD_SWITCHTIME .set _switchticks,globaldata + GD_SWITCHTICKS + .set _intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL .globl _common_tssd, _tss_gdt .set _common_tssd,globaldata + GD_COMMON_TSSD .set _tss_gdt,globaldata + GD_TSS_GDT + .globl _witness_spin_check + .set _witness_spin_check,globaldata + GD_WITNESS_SPIN_CHECK + #ifdef USER_LDT .globl _currentldt .set _currentldt,globaldata + GD_CURRENTLDT #endif + +/* XXX - doesn't work yet */ +#ifdef KTR_PERCPU + .globl _ktr_idx, _ktr_buf, _ktr_buf_data + .set _ktr_idx,globaldata + GD_KTR_IDX + .set _ktr_buf,globaldata + GD_KTR_BUF + .set _ktr_buf_data,globaldata + GD_KTR_BUF_DATA +#endif #endif #ifdef SMP diff --git a/sys/i386/i386/i386-gdbstub.c b/sys/i386/i386/i386-gdbstub.c index 986b8d4daa1f..b442a377c44f 100644 --- a/sys/i386/i386/i386-gdbstub.c +++ b/sys/i386/i386/i386-gdbstub.c @@ -188,7 +188,8 @@ getpacket (char *buffer) unsigned char ch; int s; - s = spltty (); + s = read_eflags(); + disable_intr(); do { /* wait around for the start character, ignore all other characters */ @@ -239,7 +240,7 @@ getpacket (char *buffer) } } while (checksum != xmitcsum); - splx (s); + write_eflags(s); } /* send the packet in buffer. */ @@ -253,7 +254,8 @@ putpacket (char *buffer) int s; /* $<packet info>#<checksum>. */ - s = spltty (); + s = read_eflags(); + disable_intr(); do { /* @@ -285,7 +287,7 @@ putpacket (char *buffer) putDebugChar (hexchars[checksum & 0xf]); } while ((getDebugChar () & 0x7f) != '+'); - splx (s); + write_eflags(s); } static char remcomInBuffer[BUFMAX]; diff --git a/sys/i386/i386/identcpu.c b/sys/i386/i386/identcpu.c index 0e11e2b8eadf..71ecd63de85a 100644 --- a/sys/i386/i386/identcpu.c +++ b/sys/i386/i386/identcpu.c @@ -42,6 +42,7 @@ #include "opt_cpu.h" #include <sys/param.h> +#include <sys/bus.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/sysctl.h> @@ -53,6 +54,8 @@ #include <machine/specialreg.h> #include <machine/md_var.h> +#include <sys/proc.h> +#include <i386/isa/icu.h> #include <i386/isa/intr_machdep.h> #define IDENTBLUE_CYRIX486 0 diff --git a/sys/i386/i386/initcpu.c b/sys/i386/i386/initcpu.c index be86c65cb279..b9395bfc7f85 100644 --- a/sys/i386/i386/initcpu.c +++ b/sys/i386/i386/initcpu.c @@ -607,12 +607,14 @@ void enable_K5_wt_alloc(void) { u_int64_t msr; + int intrstate; /* * Write allocate is supported only on models 1, 2, and 3, with * a stepping of 4 or greater. */ if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) { + intrstate = save_intr(); disable_intr(); msr = rdmsr(0x83); /* HWCR */ wrmsr(0x83, msr & !(0x10)); @@ -645,7 +647,7 @@ enable_K5_wt_alloc(void) msr=rdmsr(0x83); wrmsr(0x83, msr|0x10); /* enable write allocate */ - enable_intr(); + restore_intr(intrstate); } } @@ -708,7 +710,6 @@ enable_K6_wt_alloc(void) wrmsr(0x0c0000082, whcr); write_eflags(eflags); - enable_intr(); } void @@ -770,7 +771,6 @@ enable_K6_2_wt_alloc(void) wrmsr(0x0c0000082, whcr); write_eflags(eflags); - enable_intr(); } #endif /* I585_CPU && CPU_WT_ALLOC */ diff --git a/sys/i386/i386/legacy.c b/sys/i386/i386/legacy.c index 8a3077058718..5b6cdbc85618 100644 --- a/sys/i386/i386/legacy.c +++ b/sys/i386/i386/legacy.c @@ -68,7 +68,10 @@ #else #include <i386/isa/isa.h> #endif +#include <sys/proc.h> +#include <i386/isa/icu.h> #include <i386/isa/intr_machdep.h> +#include <sys/rtprio.h> static struct rman irq_rman, drq_rman, port_rman, mem_rman; @@ -397,9 +400,9 @@ static int nexus_setup_intr(device_t bus, device_t child, struct resource *irq, int flags, void (*ihand)(void *), void *arg, void **cookiep) { - intrmask_t *mask; driver_t *driver; - int error, icflags; + int error, icflags; + int pri; /* interrupt thread priority */ /* somebody tried to setup an irq that failed to allocate! */ if (irq == NULL) @@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq, driver = device_get_driver(child); switch (flags) { - case INTR_TYPE_TTY: - mask = &tty_imask; + case INTR_TYPE_TTY: /* keyboard or parallel port */ + pri = PI_TTYLOW; break; - case (INTR_TYPE_TTY | INTR_TYPE_FAST): - mask = &tty_imask; + case (INTR_TYPE_TTY | INTR_FAST): /* sio */ + pri = PI_TTYHIGH; icflags |= INTR_FAST; break; case INTR_TYPE_BIO: - mask = &bio_imask; + /* + * XXX We need to refine this. BSD/OS distinguishes + * between tape and disk priorities. + */ + pri = PI_DISK; break; case INTR_TYPE_NET: - mask = &net_imask; + pri = PI_NET; break; case INTR_TYPE_CAM: - mask = &cam_imask; + pri = PI_DISK; /* XXX or PI_CAM? */ break; case INTR_TYPE_MISC: - mask = 0; + pri = PI_DULL; /* don't care */ break; + /* We didn't specify an interrupt level. */ default: - panic("still using grody create_intr interface"); + panic("nexus_setup_intr: no interrupt type in flags"); } /* @@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq, return (error); *cookiep = inthand_add(device_get_nameunit(child), irq->r_start, - ihand, arg, mask, icflags); + ihand, arg, pri, icflags); if (*cookiep == NULL) error = EINVAL; /* XXX ??? */ diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s index bddd7d5be868..fa95fb0d6b53 100644 --- a/sys/i386/i386/locore.s +++ b/sys/i386/i386/locore.s @@ -862,9 +862,6 @@ map_read_write: movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */ movl $1, %ecx /* one private pt coming right up */ fillkpt(R(SMPptpa), $PG_RW) - -/* Initialize mp lock to allow early traps */ - movl $1, R(_mp_lock) #endif /* SMP */ /* install a pde for temporary double map of bottom of VA */ diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 6edecf04db54..875c9d5a7a8a 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -58,6 +58,7 @@ #include <sys/sysproto.h> #include <sys/signalvar.h> #include <sys/kernel.h> +#include <sys/ktr.h> #include <sys/linker.h> #include <sys/malloc.h> #include <sys/proc.h> @@ -98,10 +99,12 @@ #include <machine/bootinfo.h> #include <machine/ipl.h> #include <machine/md_var.h> +#include <machine/mutex.h> #include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */ +#include <machine/globaldata.h> +#include <machine/globals.h> #ifdef SMP #include <machine/smp.h> -#include <machine/globaldata.h> #endif #ifdef PERFMON #include <machine/perfmon.h> @@ -110,6 +113,7 @@ #ifdef OLD_BUS_ARCH #include <i386/isa/isa_device.h> #endif +#include <i386/isa/icu.h> #include <i386/isa/intr_machdep.h> #include <isa/rtc.h> #include <machine/vm86.h> @@ -247,6 +251,11 @@ vm_offset_t clean_sva, clean_eva; static vm_offset_t pager_sva, pager_eva; static struct trapframe proc0_tf; +struct cpuhead cpuhead; + +mtx_t sched_lock; +mtx_t Giant; + #define offsetof(type, member) ((size_t)(&((type *)0)->member)) static void @@ -431,6 +440,11 @@ again: bufinit(); vm_pager_bufferinit(); + SLIST_INIT(&cpuhead); + SLIST_INSERT_HEAD(&cpuhead, GLOBALDATA, gd_allcpu); + + mtx_init(&sched_lock, "sched lock", MTX_SPIN); + #ifdef SMP /* * OK, enough kmem_alloc/malloc state should be up, lets get on with it! @@ -1817,11 +1831,6 @@ init386(first) #endif int off; - /* - * Prevent lowering of the ipl if we call tsleep() early. - */ - safepri = cpl; - proc0.p_addr = proc0paddr; atdevbase = ISA_HOLE_START + KERNBASE; @@ -1871,6 +1880,10 @@ init386(first) r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); + /* setup curproc so that mutexes work */ + PCPU_SET(curproc, &proc0); + PCPU_SET(prevproc, &proc0); + /* make ldt memory segments */ /* * The data segment limit must not cover the user area because we @@ -1953,7 +1966,7 @@ init386(first) /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16; - common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; + common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); private_tss = 0; tss_gdt = &gdt[GPROC0_SEL].sd; @@ -1974,6 +1987,12 @@ init386(first) dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); + /* + * We grab Giant during the vm86bios routines, so we need to ensure + * that it is up and running before we use vm86. + */ + mtx_init(&Giant, "Giant", MTX_DEF); + vm86_initialize(); getmemsize(first); @@ -2009,9 +2028,7 @@ init386(first) /* setup proc 0's pcb */ proc0.p_addr->u_pcb.pcb_flags = 0; proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD; -#ifdef SMP - proc0.p_addr->u_pcb.pcb_mpnest = 1; -#endif + proc0.p_addr->u_pcb.pcb_schednest = 0; proc0.p_addr->u_pcb.pcb_ext = 0; proc0.p_md.md_regs = &proc0_tf; } diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 61c5ecf73205..95b5759f9e66 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -36,6 +36,7 @@ #endif #include <sys/param.h> +#include <sys/bus.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/proc.h> @@ -65,6 +66,7 @@ #include <machine/apic.h> #include <machine/atomic.h> #include <machine/cpufunc.h> +#include <machine/mutex.h> #include <machine/mpapic.h> #include <machine/psl.h> #include <machine/segments.h> @@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY { #define MP_ANNOUNCE_POST 0x19 +/* used to hold the AP's until we are ready to release them */ +struct simplelock ap_boot_lock; /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ int current_postcode; @@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr); static void install_ap_tramp(u_int boot_addr); static int start_ap(int logicalCpu, u_int boot_addr); static int apic_int_is_bus_type(int intr, int bus_type); +static void release_aps(void *dummy); /* * Calculate usable address in base memory for AP trampoline code. @@ -403,7 +408,7 @@ found: /* - * Startup the SMP processors. + * Initialize the SMP hardware and the APIC and start up the AP's. */ void mp_start(void) @@ -619,6 +624,9 @@ mp_enable(u_int boot_addr) /* initialize all SMP locks */ init_locks(); + /* obtain the ap_boot_lock */ + s_lock(&ap_boot_lock); + /* start each Application Processor */ start_all_aps(boot_addr); } @@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock; /* critical region around INTR() routines */ struct simplelock intr_lock; -/* lock regions protected in UP kernel via cli/sti */ -struct simplelock mpintr_lock; - /* lock region used by kernel profiling */ struct simplelock mcount_lock; @@ -1885,26 +1890,16 @@ struct simplelock clock_lock; /* lock around the MP rendezvous */ static struct simplelock smp_rv_lock; +/* only 1 CPU can panic at a time :) */ +struct simplelock panic_lock; + static void init_locks(void) { - /* - * Get the initial mp_lock with a count of 1 for the BSP. - * This uses a LOGICAL cpu ID, ie BSP == 0. - */ - mp_lock = 0x00000001; - -#if 0 - /* ISR uses its own "giant lock" */ - isr_lock = FREE_LOCK; -#endif - #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); #endif - s_lock_init((struct simplelock*)&mpintr_lock); - s_lock_init((struct simplelock*)&mcount_lock); s_lock_init((struct simplelock*)&fast_intr_lock); @@ -1912,6 +1907,7 @@ init_locks(void) s_lock_init((struct simplelock*)&imen_lock); s_lock_init((struct simplelock*)&cpl_lock); s_lock_init(&smp_rv_lock); + s_lock_init(&panic_lock); #ifdef USE_COMLOCK s_lock_init((struct simplelock*)&com_lock); @@ -1919,11 +1915,9 @@ init_locks(void) #ifdef USE_CLOCKLOCK s_lock_init((struct simplelock*)&clock_lock); #endif /* USE_CLOCKLOCK */ -} - -/* Wait for all APs to be fully initialized */ -extern int wait_ap(unsigned int); + s_lock_init(&ap_boot_lock); +} /* * start each AP in our list @@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr) SMPpt[pg + 4] = 0; /* *prv_PMAP1 */ /* prime data page for it to use */ + SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu); gd->gd_cpuid = x; gd->gd_cpu_lockid = x << 24; gd->gd_prv_CMAP1 = &SMPpt[pg + 1]; @@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } - /* * Flush the TLB on all other CPU's * @@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, void ap_init(void); void -ap_init() +ap_init(void) { u_int apic_id; + /* lock against other AP's that are waking up */ + s_lock(&ap_boot_lock); + /* BSP may have changed PTD while we're waiting for the lock */ cpu_invltlb(); @@ -2397,6 +2394,30 @@ ap_init() smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ smp_active = 1; /* historic */ } + + /* let other AP's wake up now */ + s_unlock(&ap_boot_lock); + + /* wait until all the AP's are up */ + while (smp_started == 0) + ; /* nothing */ + + /* + * Set curproc to our per-cpu idleproc so that mutexes have + * something unique to lock with. + */ + PCPU_SET(curproc,idleproc); + PCPU_SET(prevproc,idleproc); + + microuptime(&switchtime); + switchticks = ticks; + + /* ok, now grab sched_lock and enter the scheduler */ + enable_intr(); + mtx_enter(&sched_lock, MTX_SPIN); + cpu_throw(); /* doesn't return */ + + panic("scheduler returned us to ap_init"); } #ifdef BETTER_CLOCK @@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap) p = checkstate_curproc[id]; cpustate = checkstate_cpustate[id]; + /* XXX */ + if (p->p_ithd) + cpustate = CHECKSTATE_INTR; + else if (p == idleproc) + cpustate = CHECKSTATE_SYS; + switch (cpustate) { case CHECKSTATE_USER: if (p->p_flag & P_PROFIL) @@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap) if (pscnt > 1) return; - if (!p) + if (p == idleproc) { + p->p_sticks++; cp_time[CP_IDLE]++; - else { + } else { p->p_sticks++; cp_time[CP_SYS]++; } @@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap) p->p_iticks++; cp_time[CP_INTR]++; } - if (p != NULL) { + if (p != idleproc) { schedclock(p); /* Update resource usage integrals and maximums. */ @@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *), /* release lock */ s_unlock(&smp_rv_lock); } + +void +release_aps(void *dummy __unused) +{ + s_unlock(&ap_boot_lock); +} + +SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); diff --git a/sys/i386/i386/mpapic.c b/sys/i386/i386/mpapic.c index a3594a8ed20c..3f971d83548d 100644 --- a/sys/i386/i386/mpapic.c +++ b/sys/i386/i386/mpapic.c @@ -28,11 +28,14 @@ #include "opt_smp.h" #include <sys/param.h> +#include <sys/bus.h> #include <sys/systm.h> +#include <sys/proc.h> #include <machine/smptests.h> /** TEST_TEST1 */ #include <machine/smp.h> #include <machine/mpapic.h> +#include <machine/globaldata.h> #include <machine/segments.h> #include <i386/isa/intr_machdep.h> /* Xspuriousint() */ diff --git a/sys/i386/i386/mpboot.s b/sys/i386/i386/mpboot.s index d3602d29a2f4..9ede02c24342 100644 --- a/sys/i386/i386/mpboot.s +++ b/sys/i386/i386/mpboot.s @@ -114,43 +114,9 @@ mp_begin: /* now running relocated at KERNBASE */ CHECKPOINT(0x39, 6) - /* wait till we can get into the kernel */ - call _boot_get_mplock - - /* Now, let's prepare for some REAL WORK :-) */ + /* Now, let's prepare for some REAL WORK :-) This doesn't return. */ call _ap_init - call _rel_mplock - lock /* Avoid livelock (PIII Errata 39) */ - addl $0,-4(%esp) -2: - cmpl $0, CNAME(smp_started) /* Wait for last AP to be ready */ - jz 2b - call _get_mplock - - /* let her rip! (loads new stack) */ - jmp _cpu_switch - -NON_GPROF_ENTRY(wait_ap) - pushl %ebp - movl %esp, %ebp - call _rel_mplock - lock /* Avoid livelock (PIII Errata 39) */ - addl $0,0(%esp) - movl %eax, 8(%ebp) -1: - cmpl $0, CNAME(smp_started) - jnz 2f - decl %eax - cmpl $0, %eax - jge 1b -2: - call _get_mplock - movl %ebp, %esp - popl %ebp - ret - - /* * This is the embedded trampoline or bootstrap that is * copied into 'real-mode' low memory, it is where the diff --git a/sys/i386/i386/mplock.s b/sys/i386/i386/mplock.s deleted file mode 100644 index dc5ba01e1f05..000000000000 --- a/sys/i386/i386/mplock.s +++ /dev/null @@ -1,343 +0,0 @@ -/* - * ---------------------------------------------------------------------------- - * "THE BEER-WARE LICENSE" (Revision 42): - * <phk@FreeBSD.org> wrote this file. As long as you retain this notice you - * can do whatever you want with this stuff. If we meet some day, and you think - * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp - * ---------------------------------------------------------------------------- - * - * $FreeBSD$ - * - * Functions for locking between CPUs in a SMP system. - * - * This is an "exclusive counting semaphore". This means that it can be - * free (0xffffffff) or be owned by a CPU (0xXXYYYYYY where XX is CPU-id - * and YYYYYY is the count). - * - * Contrary to most implementations around, this one is entirely atomic: - * The attempt to seize/release the semaphore and the increment/decrement - * is done in one atomic operation. This way we are safe from all kinds - * of weird reentrancy situations. - */ - -#include <machine/asmacros.h> -#include <machine/smptests.h> /** GRAB_LOPRIO */ -#include <machine/apic.h> - -#define GLPROFILE_NOT - -#ifdef CHEAP_TPR - -/* we assumme that the 'reserved bits' can be written with zeros */ - -#else /* CHEAP_TPR */ - -#error HEADS UP: this code needs work -/* - * The APIC doc says that reserved bits must be written with whatever - * value they currently contain, ie you should: read, modify, write, - * instead of just writing new values to the TPR register. Current - * silicon seems happy with just writing. If the behaviour of the - * silicon changes, all code that access the lapic_tpr must be modified. - * The last version to contain such code was: - * Id: mplock.s,v 1.17 1997/08/10 20:59:07 fsmp Exp - */ - -#endif /* CHEAP_TPR */ - -#ifdef GRAB_LOPRIO -/* - * Claim LOWest PRIOrity, ie. attempt to grab ALL INTerrupts. - */ - -/* after 1st acquire of lock we grab all hardware INTs */ -#define GRAB_HWI movl $ALLHWI_LEVEL, lapic_tpr - -/* after last release of lock give up LOW PRIO (ie, arbitrate INTerrupts) */ -#define ARB_HWI movl $LOPRIO_LEVEL, lapic_tpr /* CHEAP_TPR */ - -#else /* GRAB_LOPRIO */ - -#define GRAB_HWI /* nop */ -#define ARB_HWI /* nop */ - -#endif /* GRAB_LOPRIO */ - - - .text - -#ifdef SMP - -/*********************************************************************** - * void MPgetlock_edx(unsigned int *lock : %edx) - * ---------------------------------- - * Destroys %eax, %ecx. %edx must hold lock argument. - * - * Grabs hardware interrupts on first aquire. - * - * NOTE: Serialization is not required if we already hold the lock, since - * we already hold the lock, nor do we need a locked instruction if we - * already hold the lock. - */ - -NON_GPROF_ENTRY(MPgetlock_edx) -1: - movl (%edx), %eax /* Get current contents of lock */ - movl %eax, %ecx - andl $CPU_FIELD,%ecx - cmpl _cpu_lockid, %ecx /* Do we already own the lock? */ - jne 2f - incl %eax /* yes, just bump the count */ - movl %eax, (%edx) /* serialization not required */ - ret -2: - movl $FREE_LOCK, %eax /* lock must be free */ - movl _cpu_lockid, %ecx - incl %ecx - lock - cmpxchg %ecx, (%edx) /* attempt to replace %eax<->%ecx */ -#ifdef GLPROFILE - jne 3f - incl _gethits2 -#else - jne 1b -#endif /* GLPROFILE */ - GRAB_HWI /* 1st acquire, grab hw INTs */ - ret -#ifdef GLPROFILE -3: - incl _gethits3 - jmp 1b -#endif - -/*********************************************************************** - * int MPtrylock(unsigned int *lock) - * --------------------------------- - * Destroys %eax, %ecx and %edx. - * Returns 1 if lock was successfull - */ - -NON_GPROF_ENTRY(MPtrylock) - movl 4(%esp), %edx /* Get the address of the lock */ - - movl $FREE_LOCK, %eax /* Assume it's free */ - movl _cpu_lockid, %ecx /* - get pre-shifted logical cpu id */ - incl %ecx /* - new count is one */ - lock - cmpxchg %ecx, (%edx) /* - try it atomically */ - jne 1f /* ...do not collect $200 */ -#ifdef GLPROFILE - incl _tryhits2 -#endif /* GLPROFILE */ - GRAB_HWI /* 1st acquire, grab hw INTs */ - movl $1, %eax - ret -1: - movl (%edx), %eax /* Try to see if we have it already */ - andl $COUNT_FIELD, %eax /* - get count */ - movl _cpu_lockid, %ecx /* - get pre-shifted logical cpu id */ - orl %ecx, %eax /* - combine them */ - movl %eax, %ecx - incl %ecx /* - new count is one more */ - lock - cmpxchg %ecx, (%edx) /* - try it atomically */ - jne 2f /* - miss */ -#ifdef GLPROFILE - incl _tryhits -#endif /* GLPROFILE */ - movl $1, %eax - ret -2: -#ifdef GLPROFILE - incl _tryhits3 -#endif /* GLPROFILE */ - movl $0, %eax - ret - - -/*********************************************************************** - * void MPrellock_edx(unsigned int *lock : %edx) - * ---------------------------------- - * Destroys %ecx, argument must be in %edx - * - * SERIALIZATION NOTE! - * - * After a lot of arguing, it turns out that there is no problem with - * not having a synchronizing instruction in the MP unlock code. There - * are two things to keep in mind: First, Intel guarentees that writes - * are ordered amoungst themselves. Second, the P6 is allowed to reorder - * reads around writes. Third, the P6 maintains cache consistency (snoops - * the bus). The second is not an issue since the one read we do is the - * basis for the conditional which determines whether the write will be - * made or not. - * - * Therefore, no synchronizing instruction is required on unlock. There are - * three performance cases: First, if a single cpu is getting and releasing - * the lock the removal of the synchronizing instruction saves approx - * 200 nS (testing w/ duel cpu PIII 450). Second, if one cpu is contending - * for the lock while the other holds it, the removal of the synchronizing - * instruction results in a 700nS LOSS in performance. Third, if two cpu's - * are switching off ownership of the MP lock but not contending for it (the - * most common case), this results in a 400nS IMPROVEMENT in performance. - * - * Since our goal is to reduce lock contention in the first place, we have - * decided to remove the synchronizing instruction from the unlock code. - */ - -NON_GPROF_ENTRY(MPrellock_edx) - movl (%edx), %ecx /* - get the value */ - decl %ecx /* - new count is one less */ - testl $COUNT_FIELD, %ecx /* - Unless it's zero... */ - jnz 2f - ARB_HWI /* last release, arbitrate hw INTs */ - movl $FREE_LOCK, %ecx /* - In which case we release it */ -#if 0 - lock - addl $0,0(%esp) /* see note above */ -#endif -2: - movl %ecx, (%edx) - ret - -/*********************************************************************** - * void get_mplock() - * ----------------- - * All registers preserved - * - * Stack (after call to _MPgetlock): - * - * edx 4(%esp) - * ecx 8(%esp) - * eax 12(%esp) - * - * Requirements: Interrupts should be enabled on call so we can take - * IPI's and FAST INTs while we are waiting for the lock - * (else the system may not be able to halt). - * - * XXX there are still places where get_mplock() is called - * with interrupts disabled, so we have to temporarily reenable - * interrupts. - * - * Side effects: The current cpu will be given ownership of the - * hardware interrupts when it first aquires the lock. - * - * Costs: Initial aquisition requires the use of a costly locked - * instruction, but recursive aquisition is cheap. Release - * is very cheap. - */ - -NON_GPROF_ENTRY(get_mplock) - pushl %eax - pushl %ecx - pushl %edx - movl $_mp_lock, %edx - pushfl - testl $(1<<9), (%esp) - jz 2f - call _MPgetlock_edx - addl $4,%esp -1: - popl %edx - popl %ecx - popl %eax - ret -2: - sti - call _MPgetlock_edx - popfl - jmp 1b - -/* - * Special version of get_mplock that is used during bootstrap when we can't - * yet enable interrupts of any sort since the APIC isn't online yet. We - * do an endrun around MPgetlock_edx to avoid enabling interrupts. - * - * XXX FIXME.. - APIC should be online from the start to simplify IPI's. - */ -NON_GPROF_ENTRY(boot_get_mplock) - pushl %eax - pushl %ecx - pushl %edx -#ifdef GRAB_LOPRIO - pushfl - pushl lapic_tpr - cli -#endif - - movl $_mp_lock, %edx - call _MPgetlock_edx - -#ifdef GRAB_LOPRIO - popl lapic_tpr - popfl -#endif - popl %edx - popl %ecx - popl %eax - ret - -/*********************************************************************** - * void try_mplock() - * ----------------- - * reg %eax == 1 if success - */ - -NON_GPROF_ENTRY(try_mplock) - pushl %ecx - pushl %edx - pushl $_mp_lock - call _MPtrylock - add $4, %esp - popl %edx - popl %ecx - ret - -/*********************************************************************** - * void rel_mplock() - * ----------------- - * All registers preserved - */ - -NON_GPROF_ENTRY(rel_mplock) - pushl %ecx - pushl %edx - movl $_mp_lock,%edx - call _MPrellock_edx - popl %edx - popl %ecx - ret - -#endif - -/*********************************************************************** - * - */ - .data - .p2align 2 /* xx_lock aligned on int boundary */ - -#ifdef SMP - - .globl _mp_lock -_mp_lock: .long 0 - -#ifdef GLPROFILE - .globl _gethits -_gethits: - .long 0 -_gethits2: - .long 0 -_gethits3: - .long 0 - - .globl _tryhits -_tryhits: - .long 0 -_tryhits2: - .long 0 -_tryhits3: - .long 0 - -msg: - .asciz "lock hits: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x\n" -#endif /* GLPROFILE */ -#endif /* SMP */ diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c index 61c5ecf73205..95b5759f9e66 100644 --- a/sys/i386/i386/mptable.c +++ b/sys/i386/i386/mptable.c @@ -36,6 +36,7 @@ #endif #include <sys/param.h> +#include <sys/bus.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/proc.h> @@ -65,6 +66,7 @@ #include <machine/apic.h> #include <machine/atomic.h> #include <machine/cpufunc.h> +#include <machine/mutex.h> #include <machine/mpapic.h> #include <machine/psl.h> #include <machine/segments.h> @@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY { #define MP_ANNOUNCE_POST 0x19 +/* used to hold the AP's until we are ready to release them */ +struct simplelock ap_boot_lock; /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ int current_postcode; @@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr); static void install_ap_tramp(u_int boot_addr); static int start_ap(int logicalCpu, u_int boot_addr); static int apic_int_is_bus_type(int intr, int bus_type); +static void release_aps(void *dummy); /* * Calculate usable address in base memory for AP trampoline code. @@ -403,7 +408,7 @@ found: /* - * Startup the SMP processors. + * Initialize the SMP hardware and the APIC and start up the AP's. */ void mp_start(void) @@ -619,6 +624,9 @@ mp_enable(u_int boot_addr) /* initialize all SMP locks */ init_locks(); + /* obtain the ap_boot_lock */ + s_lock(&ap_boot_lock); + /* start each Application Processor */ start_all_aps(boot_addr); } @@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock; /* critical region around INTR() routines */ struct simplelock intr_lock; -/* lock regions protected in UP kernel via cli/sti */ -struct simplelock mpintr_lock; - /* lock region used by kernel profiling */ struct simplelock mcount_lock; @@ -1885,26 +1890,16 @@ struct simplelock clock_lock; /* lock around the MP rendezvous */ static struct simplelock smp_rv_lock; +/* only 1 CPU can panic at a time :) */ +struct simplelock panic_lock; + static void init_locks(void) { - /* - * Get the initial mp_lock with a count of 1 for the BSP. - * This uses a LOGICAL cpu ID, ie BSP == 0. - */ - mp_lock = 0x00000001; - -#if 0 - /* ISR uses its own "giant lock" */ - isr_lock = FREE_LOCK; -#endif - #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); #endif - s_lock_init((struct simplelock*)&mpintr_lock); - s_lock_init((struct simplelock*)&mcount_lock); s_lock_init((struct simplelock*)&fast_intr_lock); @@ -1912,6 +1907,7 @@ init_locks(void) s_lock_init((struct simplelock*)&imen_lock); s_lock_init((struct simplelock*)&cpl_lock); s_lock_init(&smp_rv_lock); + s_lock_init(&panic_lock); #ifdef USE_COMLOCK s_lock_init((struct simplelock*)&com_lock); @@ -1919,11 +1915,9 @@ init_locks(void) #ifdef USE_CLOCKLOCK s_lock_init((struct simplelock*)&clock_lock); #endif /* USE_CLOCKLOCK */ -} - -/* Wait for all APs to be fully initialized */ -extern int wait_ap(unsigned int); + s_lock_init(&ap_boot_lock); +} /* * start each AP in our list @@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr) SMPpt[pg + 4] = 0; /* *prv_PMAP1 */ /* prime data page for it to use */ + SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu); gd->gd_cpuid = x; gd->gd_cpu_lockid = x << 24; gd->gd_prv_CMAP1 = &SMPpt[pg + 1]; @@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } - /* * Flush the TLB on all other CPU's * @@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, void ap_init(void); void -ap_init() +ap_init(void) { u_int apic_id; + /* lock against other AP's that are waking up */ + s_lock(&ap_boot_lock); + /* BSP may have changed PTD while we're waiting for the lock */ cpu_invltlb(); @@ -2397,6 +2394,30 @@ ap_init() smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ smp_active = 1; /* historic */ } + + /* let other AP's wake up now */ + s_unlock(&ap_boot_lock); + + /* wait until all the AP's are up */ + while (smp_started == 0) + ; /* nothing */ + + /* + * Set curproc to our per-cpu idleproc so that mutexes have + * something unique to lock with. + */ + PCPU_SET(curproc,idleproc); + PCPU_SET(prevproc,idleproc); + + microuptime(&switchtime); + switchticks = ticks; + + /* ok, now grab sched_lock and enter the scheduler */ + enable_intr(); + mtx_enter(&sched_lock, MTX_SPIN); + cpu_throw(); /* doesn't return */ + + panic("scheduler returned us to ap_init"); } #ifdef BETTER_CLOCK @@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap) p = checkstate_curproc[id]; cpustate = checkstate_cpustate[id]; + /* XXX */ + if (p->p_ithd) + cpustate = CHECKSTATE_INTR; + else if (p == idleproc) + cpustate = CHECKSTATE_SYS; + switch (cpustate) { case CHECKSTATE_USER: if (p->p_flag & P_PROFIL) @@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap) if (pscnt > 1) return; - if (!p) + if (p == idleproc) { + p->p_sticks++; cp_time[CP_IDLE]++; - else { + } else { p->p_sticks++; cp_time[CP_SYS]++; } @@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap) p->p_iticks++; cp_time[CP_INTR]++; } - if (p != NULL) { + if (p != idleproc) { schedclock(p); /* Update resource usage integrals and maximums. */ @@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *), /* release lock */ s_unlock(&smp_rv_lock); } + +void +release_aps(void *dummy __unused) +{ + s_unlock(&ap_boot_lock); +} + +SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); diff --git a/sys/i386/i386/nexus.c b/sys/i386/i386/nexus.c index 8a3077058718..5b6cdbc85618 100644 --- a/sys/i386/i386/nexus.c +++ b/sys/i386/i386/nexus.c @@ -68,7 +68,10 @@ #else #include <i386/isa/isa.h> #endif +#include <sys/proc.h> +#include <i386/isa/icu.h> #include <i386/isa/intr_machdep.h> +#include <sys/rtprio.h> static struct rman irq_rman, drq_rman, port_rman, mem_rman; @@ -397,9 +400,9 @@ static int nexus_setup_intr(device_t bus, device_t child, struct resource *irq, int flags, void (*ihand)(void *), void *arg, void **cookiep) { - intrmask_t *mask; driver_t *driver; - int error, icflags; + int error, icflags; + int pri; /* interrupt thread priority */ /* somebody tried to setup an irq that failed to allocate! */ if (irq == NULL) @@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq, driver = device_get_driver(child); switch (flags) { - case INTR_TYPE_TTY: - mask = &tty_imask; + case INTR_TYPE_TTY: /* keyboard or parallel port */ + pri = PI_TTYLOW; break; - case (INTR_TYPE_TTY | INTR_TYPE_FAST): - mask = &tty_imask; + case (INTR_TYPE_TTY | INTR_FAST): /* sio */ + pri = PI_TTYHIGH; icflags |= INTR_FAST; break; case INTR_TYPE_BIO: - mask = &bio_imask; + /* + * XXX We need to refine this. BSD/OS distinguishes + * between tape and disk priorities. + */ + pri = PI_DISK; break; case INTR_TYPE_NET: - mask = &net_imask; + pri = PI_NET; break; case INTR_TYPE_CAM: - mask = &cam_imask; + pri = PI_DISK; /* XXX or PI_CAM? */ break; case INTR_TYPE_MISC: - mask = 0; + pri = PI_DULL; /* don't care */ break; + /* We didn't specify an interrupt level. */ default: - panic("still using grody create_intr interface"); + panic("nexus_setup_intr: no interrupt type in flags"); } /* @@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq, return (error); *cookiep = inthand_add(device_get_nameunit(child), irq->r_start, - ihand, arg, mask, icflags); + ihand, arg, pri, icflags); if (*cookiep == NULL) error = EINVAL; /* XXX ??? */ diff --git a/sys/i386/i386/perfmon.c b/sys/i386/i386/perfmon.c index 574f416df2be..2efa51642d85 100644 --- a/sys/i386/i386/perfmon.c +++ b/sys/i386/i386/perfmon.c @@ -118,16 +118,19 @@ perfmon_avail(void) int perfmon_setup(int pmc, unsigned int control) { + int intrstate; + if (pmc < 0 || pmc >= NPMC) return EINVAL; perfmon_inuse |= (1 << pmc); control &= ~(PMCF_SYS_FLAGS << 16); + intrstate = save_intr(); disable_intr(); ctl_shadow[pmc] = control; writectl(pmc); wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0); - enable_intr(); + restore_intr(intrstate); return 0; } @@ -162,15 +165,18 @@ perfmon_fini(int pmc) int perfmon_start(int pmc) { + int intrstate; + if (pmc < 0 || pmc >= NPMC) return EINVAL; if (perfmon_inuse & (1 << pmc)) { + intrstate = save_intr(); disable_intr(); ctl_shadow[pmc] |= (PMCF_EN << 16); wrmsr(msr_pmc[pmc], pmc_shadow[pmc]); writectl(pmc); - enable_intr(); + restore_intr(intrstate); return 0; } return EBUSY; @@ -179,15 +185,18 @@ perfmon_start(int pmc) int perfmon_stop(int pmc) { + int intrstate; + if (pmc < 0 || pmc >= NPMC) return EINVAL; if (perfmon_inuse & (1 << pmc)) { + intrstate = save_intr(); disable_intr(); pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL; ctl_shadow[pmc] &= ~(PMCF_EN << 16); writectl(pmc); - enable_intr(); + restore_intr(intrstate); return 0; } return EBUSY; diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index edae2929fb87..7ce9120d243f 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -668,7 +668,7 @@ pmap_pte_quick(pmap, va) * (unsigned *) prv_PMAP1 = newpf | PG_RW | PG_V; cpu_invlpg(prv_PADDR1); } - return prv_PADDR1 + ((unsigned) index & (NPTEPG - 1)); + return (unsigned *)(prv_PADDR1 + (index & (NPTEPG - 1))); #else if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) { * (unsigned *) PMAP1 = newpf | PG_RW | PG_V; diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index c895fefa8c15..db56a1b40af6 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -73,189 +73,6 @@ _tlb_flush_count: .long 0 .text -/* - * When no processes are on the runq, cpu_switch() branches to _idle - * to wait for something to come ready. - */ - ALIGN_TEXT - .type _idle,@function -_idle: - xorl %ebp,%ebp - movl %ebp,_switchtime - -#ifdef SMP - - /* when called, we have the mplock, intr disabled */ - /* use our idleproc's "context" */ - movl _IdlePTD, %ecx - movl %cr3, %eax - cmpl %ecx, %eax - je 2f -#if defined(SWTCH_OPTIM_STATS) - decl _swtch_optim_stats - incl _tlb_flush_count -#endif - movl %ecx, %cr3 -2: - /* Keep space for nonexisting return addr, or profiling bombs */ - movl $gd_idlestack_top-4, %ecx - addl %fs:0, %ecx - movl %ecx, %esp - - /* update common_tss.tss_esp0 pointer */ - movl %ecx, _common_tss + TSS_ESP0 - - movl _cpuid, %esi - btrl %esi, _private_tss - jae 1f - - movl $gd_common_tssd, %edi - addl %fs:0, %edi - - /* move correct tss descriptor into GDT slot, then reload tr */ - movl _tss_gdt, %ebx /* entry in GDT */ - movl 0(%edi), %eax - movl %eax, 0(%ebx) - movl 4(%edi), %eax - movl %eax, 4(%ebx) - movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */ - ltr %si -1: - - sti - - /* - * XXX callers of cpu_switch() do a bogus splclock(). Locking should - * be left to cpu_switch(). - * - * NOTE: spl*() may only be called while we hold the MP lock (which - * we do). - */ - call _spl0 - - cli - - /* - * _REALLY_ free the lock, no matter how deep the prior nesting. - * We will recover the nesting on the way out when we have a new - * proc to load. - * - * XXX: we had damn well better be sure we had it before doing this! - */ - movl $FREE_LOCK, %eax - movl %eax, _mp_lock - - /* do NOT have lock, intrs disabled */ - .globl idle_loop -idle_loop: - - cmpl $0,_smp_active - jne 1f - cmpl $0,_cpuid - je 1f - jmp 2f - -1: - call _procrunnable - testl %eax,%eax - jnz 3f - - /* - * Handle page-zeroing in the idle loop. Called with interrupts - * disabled and the MP lock released. Inside vm_page_zero_idle - * we enable interrupts and grab the mplock as required. - */ - cmpl $0,_do_page_zero_idle - je 2f - - call _vm_page_zero_idle /* internal locking */ - testl %eax, %eax - jnz idle_loop -2: - - /* enable intrs for a halt */ - movl $0, lapic_tpr /* 1st candidate for an INT */ - call *_hlt_vector /* wait for interrupt */ - cli - jmp idle_loop - - /* - * Note that interrupts must be enabled while obtaining the MP lock - * in order to be able to take IPI's while blocked. - */ -3: - movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */ - sti - call _get_mplock - cli - call _procrunnable - testl %eax,%eax - CROSSJUMP(jnz, sw1a, jz) - call _rel_mplock - jmp idle_loop - -#else /* !SMP */ - - movl $HIDENAME(tmpstk),%esp -#if defined(OVERLY_CONSERVATIVE_PTD_MGMT) -#if defined(SWTCH_OPTIM_STATS) - incl _swtch_optim_stats -#endif - movl _IdlePTD, %ecx - movl %cr3, %eax - cmpl %ecx, %eax - je 2f -#if defined(SWTCH_OPTIM_STATS) - decl _swtch_optim_stats - incl _tlb_flush_count -#endif - movl %ecx, %cr3 -2: -#endif - - /* update common_tss.tss_esp0 pointer */ - movl %esp, _common_tss + TSS_ESP0 - - movl $0, %esi - btrl %esi, _private_tss - jae 1f - - movl $_common_tssd, %edi - - /* move correct tss descriptor into GDT slot, then reload tr */ - movl _tss_gdt, %ebx /* entry in GDT */ - movl 0(%edi), %eax - movl %eax, 0(%ebx) - movl 4(%edi), %eax - movl %eax, 4(%ebx) - movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */ - ltr %si -1: - - sti - - /* - * XXX callers of cpu_switch() do a bogus splclock(). Locking should - * be left to cpu_switch(). - */ - call _spl0 - - ALIGN_TEXT -idle_loop: - cli - call _procrunnable - testl %eax,%eax - CROSSJUMP(jnz, sw1a, jz) - call _vm_page_zero_idle - testl %eax, %eax - jnz idle_loop - call *_hlt_vector /* wait for interrupt */ - jmp idle_loop - -#endif /* SMP */ - -CROSSJUMPTARGET(_idle) - ENTRY(default_halt) sti #ifndef SMP @@ -264,16 +81,23 @@ ENTRY(default_halt) ret /* + * cpu_throw() + */ +ENTRY(cpu_throw) + jmp sw1 + +/* * cpu_switch() */ ENTRY(cpu_switch) /* switch to new process. first, save context as needed */ movl _curproc,%ecx + movl %ecx,_prevproc /* if no process to save, don't bother */ testl %ecx,%ecx - je sw1 + jz sw1 #ifdef SMP movb P_ONCPU(%ecx), %al /* save "last" cpu */ @@ -299,7 +123,7 @@ ENTRY(cpu_switch) movl %edi,PCB_EDI(%edx) movl %gs,PCB_GS(%edx) - /* test if debug regisers should be saved */ + /* test if debug registers should be saved */ movb PCB_FLAGS(%edx),%al andb $PCB_DBREGS,%al jz 1f /* no, skip over */ @@ -319,15 +143,12 @@ ENTRY(cpu_switch) movl %eax,PCB_DR0(%edx) 1: + /* save sched_lock recursion count */ + movl _sched_lock+MTX_RECURSE,%eax + movl %eax,PCB_SCHEDNEST(%edx) + #ifdef SMP - movl _mp_lock, %eax /* XXX FIXME: we should be saving the local APIC TPR */ -#ifdef DIAGNOSTIC - cmpl $FREE_LOCK, %eax /* is it free? */ - je badsw4 /* yes, bad medicine! */ -#endif /* DIAGNOSTIC */ - andl $COUNT_FIELD, %eax /* clear CPU portion */ - movl %eax, PCB_MPNEST(%edx) /* store it */ #endif /* SMP */ #if NNPX > 0 @@ -341,25 +162,33 @@ ENTRY(cpu_switch) 1: #endif /* NNPX > 0 */ - movl $0,_curproc /* out of process */ - - /* save is done, now choose a new process or idle */ + /* save is done, now choose a new process */ sw1: - cli #ifdef SMP /* Stop scheduling if smp_active goes zero and we are not BSP */ cmpl $0,_smp_active jne 1f cmpl $0,_cpuid - CROSSJUMP(je, _idle, jne) /* wind down */ + je 1f + + movl _idleproc, %eax + jmp sw1b 1: #endif + /* + * Choose a new process to schedule. chooseproc() returns idleproc + * if it cannot find another process to run. + */ sw1a: call _chooseproc /* trash ecx, edx, ret eax*/ - testl %eax,%eax - CROSSJUMP(je, _idle, jne) /* if no proc, idle */ + +#ifdef DIAGNOSTIC + testl %eax,%eax /* no process? */ + jz badsw3 /* no, panic */ +#endif +sw1b: movl %eax,%ecx xorl %eax,%eax @@ -456,9 +285,6 @@ sw1a: movl %ecx, _curproc /* into next process */ #ifdef SMP - movl _cpu_lockid, %eax - orl PCB_MPNEST(%edx), %eax /* add next count from PROC */ - movl %eax, _mp_lock /* load the mp_lock */ /* XXX FIXME: we should be restoring the local APIC TPR */ #endif /* SMP */ @@ -500,7 +326,22 @@ cpu_switch_load_gs: movl %eax,%dr7 1: - sti + /* + * restore sched_lock recursion count and transfer ownership to + * new process + */ + movl PCB_SCHEDNEST(%edx),%eax + movl %eax,_sched_lock+MTX_RECURSE + + movl _curproc,%eax + movl %eax,_sched_lock+MTX_LOCK + +#ifdef DIAGNOSTIC + pushfl + popl %ecx + testl $0x200, %ecx /* interrupts enabled? */ + jnz badsw6 /* that way madness lies */ +#endif ret CROSSJUMPTARGET(sw1a) @@ -517,15 +358,27 @@ badsw2: call _panic sw0_2: .asciz "cpu_switch: not SRUN" + +badsw3: + pushl $sw0_3 + call _panic + +sw0_3: .asciz "cpu_switch: chooseproc returned NULL" + #endif -#if defined(SMP) && defined(DIAGNOSTIC) -badsw4: - pushl $sw0_4 +#ifdef DIAGNOSTIC +badsw5: + pushl $sw0_5 + call _panic + +sw0_5: .asciz "cpu_switch: interrupts enabled (again)" +badsw6: + pushl $sw0_6 call _panic -sw0_4: .asciz "cpu_switch: do not have lock" -#endif /* SMP && DIAGNOSTIC */ +sw0_6: .asciz "cpu_switch: interrupts enabled" +#endif /* * savectx(pcb) diff --git a/sys/i386/i386/synch_machdep.c b/sys/i386/i386/synch_machdep.c new file mode 100644 index 000000000000..029225dbf314 --- /dev/null +++ b/sys/i386/i386/synch_machdep.c @@ -0,0 +1,559 @@ +/*- + * Copyright (c) 1997, 1998 Berkeley Software Design, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Berkeley Software Design Inc's name may not be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ + * $FreeBSD$ + */ + +#define MTX_STRS /* define common strings */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <ddb/ddb.h> +#include <machine/atomic.h> +#include <machine/clock.h> +#include <machine/cpu.h> +#include <machine/mutex.h> + +/* All mutexes in system (used for debug/panic) */ +mtx_t all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head", + TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), + { NULL, NULL }, &all_mtx, &all_mtx +#ifdef SMP_DEBUG + , NULL, { NULL, NULL }, NULL, 0 +#endif +}; + +int mtx_cur_cnt; +int mtx_max_cnt; + +extern void _mtx_enter_giant_def(void); +extern void _mtx_exit_giant_def(void); + +static void propagate_priority(struct proc *) __unused; + +#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) +#define mtx_owner(m) (mtx_unowned(m) ? NULL \ + : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK)) + +#define RETIP(x) *(((int *)(&x)) - 1) +#define SET_PRIO(p, pri) (p)->p_priority = (pri) + +/* + * XXX Temporary, for use from assembly language + */ + +void +_mtx_enter_giant_def(void) +{ + + mtx_enter(&Giant, MTX_DEF); +} + +void +_mtx_exit_giant_def(void) +{ + + mtx_exit(&Giant, MTX_DEF); +} + +static void +propagate_priority(struct proc *p) +{ + int pri = p->p_priority; + mtx_t *m = p->p_blocked; + + for (;;) { + struct proc *p1; + + p = mtx_owner(m); + + if (p == NULL) { + /* + * This really isn't quite right. Really + * ought to bump priority of process that + * next acquires the mutex. + */ + MPASS(m->mtx_lock == MTX_CONTESTED); + return; + } + MPASS(p->p_magic == P_MAGIC); + if (p->p_priority <= pri) + return; + /* + * If lock holder is actually running, just bump priority. + */ + if (TAILQ_NEXT(p, p_procq) == NULL) { + MPASS(p->p_stat == SRUN || p->p_stat == SZOMB); + SET_PRIO(p, pri); + return; + } + /* + * If on run queue move to new run queue, and + * quit. + */ +#if 1 + if (p->p_stat == SRUN) { +#else + if ((m = p->p_blocked) == NULL) { +#endif + MPASS(p->p_stat == SRUN); + remrunqueue(p); + SET_PRIO(p, pri); + setrunqueue(p); + return; + } + + /* + * If we aren't blocked on a mutex, give up and quit. + */ + if (p->p_stat != SMTX) { + printf( + "XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n", + p->p_pid, p->p_comm, p->p_stat, m->mtx_description); + return; + } + + /* + * Pick up the mutex that p is blocked on. + */ + m = p->p_blocked; + MPASS(m != NULL); + + printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid, + p->p_comm, m->mtx_description); + /* + * Check if the proc needs to be moved up on + * the blocked chain + */ + if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL || + p1->p_priority <= pri) { + if (p1) + printf( + "XXX: previous process %d(%s) has higher priority\n", + p->p_pid, p->p_comm); + else + printf("XXX: process at head of run queue\n"); + continue; + } + + /* + * Remove proc from blocked chain + */ + TAILQ_REMOVE(&m->mtx_blocked, p, p_procq); + TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) { + MPASS(p1->p_magic == P_MAGIC); + if (p1->p_priority > pri) + break; + } + if (p1) + TAILQ_INSERT_BEFORE(p1, p, p_procq); + else + TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); + CTR4(KTR_LOCK, + "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s", + p, p1, m, m->mtx_description); + } +} + +void +mtx_enter_hard(mtx_t *m, int type, int flags) +{ + struct proc *p = CURPROC; + + KASSERT(p != NULL, ("curproc is NULL in mutex")); + + switch (type) { + case MTX_DEF: + if ((m->mtx_lock & MTX_FLAGMASK) == (u_int)p) { + m->mtx_recurse++; + atomic_set_int(&m->mtx_lock, MTX_RECURSE); + CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m); + return; + } + CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%x) [0x%x]", + m, m->mtx_lock, RETIP(m)); + while (!atomic_cmpset_int(&m->mtx_lock, MTX_UNOWNED, (int)p)) { + int v; + struct proc *p1; + + mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY); + /* + * check if the lock has been released while + * waiting for the schedlock. + */ + if ((v = m->mtx_lock) == MTX_UNOWNED) { + mtx_exit(&sched_lock, MTX_SPIN); + continue; + } + /* + * The mutex was marked contested on release. This + * means that there are processes blocked on it. + */ + if (v == MTX_CONTESTED) { + p1 = TAILQ_FIRST(&m->mtx_blocked); + KASSERT(p1 != NULL, ("contested mutex has no contesters")); + KASSERT(p != NULL, ("curproc is NULL for contested mutex")); + m->mtx_lock = (int)p | MTX_CONTESTED; + if (p1->p_priority < p->p_priority) { + SET_PRIO(p, p1->p_priority); + } + mtx_exit(&sched_lock, MTX_SPIN); + return; + } + /* + * If the mutex isn't already contested and + * a failure occurs setting the contested bit the + * mutex was either release or the + * state of the RECURSION bit changed. + */ + if ((v & MTX_CONTESTED) == 0 && + !atomic_cmpset_int(&m->mtx_lock, v, + v | MTX_CONTESTED)) { + mtx_exit(&sched_lock, MTX_SPIN); + continue; + } + + /* We definitely have to sleep for this lock */ + mtx_assert(m, MA_NOTOWNED); + +#ifdef notyet + /* + * If we're borrowing an interrupted thread's VM + * context must clean up before going to sleep. + */ + if (p->p_flag & (P_ITHD | P_SITHD)) { + ithd_t *it = (ithd_t *)p; + + if (it->it_interrupted) { + CTR2(KTR_LOCK, + "mtx_enter: 0x%x interrupted 0x%x", + it, it->it_interrupted); + intr_thd_fixup(it); + } + } +#endif + + /* Put us on the list of procs blocked on this mutex */ + if (TAILQ_EMPTY(&m->mtx_blocked)) { + p1 = (struct proc *)(m->mtx_lock & + MTX_FLAGMASK); + LIST_INSERT_HEAD(&p1->p_contested, m, + mtx_contested); + TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); + } else { + TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) + if (p1->p_priority > p->p_priority) + break; + if (p1) + TAILQ_INSERT_BEFORE(p1, p, p_procq); + else + TAILQ_INSERT_TAIL(&m->mtx_blocked, p, + p_procq); + } + + p->p_blocked = m; /* Who we're blocked on */ + p->p_stat = SMTX; +#if 0 + propagate_priority(p); +#endif + CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s", + p, m, m->mtx_description); + mi_switch(); + CTR3(KTR_LOCK, + "mtx_enter: p 0x%p free from blocked on [0x%p] %s", + p, m, m->mtx_description); + mtx_exit(&sched_lock, MTX_SPIN); + } + return; + case MTX_SPIN: + case MTX_SPIN | MTX_FIRST: + case MTX_SPIN | MTX_TOPHALF: + { + int i = 0; + + if (m->mtx_lock == (u_int)p) { + m->mtx_recurse++; + return; + } + CTR1(KTR_LOCK, "mtx_enter: %p spinning", m); + for (;;) { + if (atomic_cmpset_int(&m->mtx_lock, MTX_UNOWNED, + (u_int)p)) + break; + while (m->mtx_lock != MTX_UNOWNED) { + if (i++ < 1000000) + continue; + if (i++ < 6000000) + DELAY (1); +#ifdef DDB + else if (!db_active) { +#else + else { +#endif +#if 0 + Debugger ("spinning"); + panic("spin lock %s held by 0x%x for > 5 seconds", + m->mtx_description, + m->mtx_lock); +#endif + } + } + } + +#ifdef SMP_DEBUG + if (type != MTX_SPIN) + m->mtx_savefl = 0xdeadbeef; + else +#endif + m->mtx_savefl = flags; + CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m); + return; + } + } +} + +void +mtx_exit_hard(mtx_t *m, int type) +{ + struct proc *p, *p1; + mtx_t *m1; + int pri; + + switch (type) { + case MTX_DEF: + case MTX_DEF | MTX_NOSWITCH: + if (m->mtx_recurse != 0) { + if (--(m->mtx_recurse) == 0) + atomic_clear_int(&m->mtx_lock, MTX_RECURSE); + CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m); + return; + } + mtx_enter(&sched_lock, MTX_SPIN); + CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m); + p = CURPROC; + p1 = TAILQ_FIRST(&m->mtx_blocked); + MPASS(p->p_magic == P_MAGIC); + MPASS(p1->p_magic == P_MAGIC); + TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq); + if (TAILQ_EMPTY(&m->mtx_blocked)) { + LIST_REMOVE(m, mtx_contested); + atomic_cmpset_int(&m->mtx_lock, m->mtx_lock, + MTX_UNOWNED); + CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m); + } else + m->mtx_lock = MTX_CONTESTED; + pri = MAXPRI; + LIST_FOREACH(m1, &p->p_contested, mtx_contested) { + int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority; + if (cp < pri) + pri = cp; + } + if (pri > p->p_nativepri) + pri = p->p_nativepri; + SET_PRIO(p, pri); + CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p", + m, p1); + p1->p_blocked = NULL; + p1->p_stat = SRUN; + setrunqueue(p1); + if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) { +#ifdef notyet + if (p->p_flag & (P_ITHD | P_SITHD)) { + ithd_t *it = (ithd_t *)p; + + if (it->it_interrupted) { + CTR2(KTR_LOCK, + "mtx_exit: 0x%x interruped 0x%x", + it, it->it_interrupted); + intr_thd_fixup(it); + } + } +#endif + setrunqueue(p); + CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%x", + m, m->mtx_lock); + mi_switch(); + CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%x", + m, m->mtx_lock); + } + mtx_exit(&sched_lock, MTX_SPIN); + return; + case MTX_SPIN: + case MTX_SPIN | MTX_FIRST: + if (m->mtx_recurse != 0) { + m->mtx_recurse--; + return; + } + if (atomic_cmpset_int(&m->mtx_lock, CURTHD, MTX_UNOWNED)) { + if (type & MTX_FIRST) { + enable_intr(); /* XXX is this kosher? */ + } else { + MPASS(m->mtx_savefl != 0xdeadbeef); + write_eflags(m->mtx_savefl); + } + return; + } + panic("unsucuessful release of spin lock"); + case MTX_SPIN | MTX_TOPHALF: + if (m->mtx_recurse != 0) { + m->mtx_recurse--; + return; + } + if (atomic_cmpset_int(&m->mtx_lock, CURTHD, MTX_UNOWNED)) + return; + panic("unsucuessful release of spin lock"); + default: + panic("mtx_exit_hard: unsupported type 0x%x\n", type); + } +} + +#define MV_DESTROY 0 /* validate before destory */ +#define MV_INIT 1 /* validate before init */ + +#ifdef SMP_DEBUG + +int mtx_validate __P((mtx_t *, int)); + +int +mtx_validate(mtx_t *m, int when) +{ + mtx_t *mp; + int i; + int retval = 0; + + if (m == &all_mtx || cold) + return 0; + + mtx_enter(&all_mtx, MTX_DEF); + ASS(kernacc((caddr_t)all_mtx.mtx_next, 4, 1) == 1); + ASS(all_mtx.mtx_next->mtx_prev == &all_mtx); + for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) { + if (kernacc((caddr_t)mp->mtx_next, 4, 1) != 1) { + panic("mtx_validate: mp=%p mp->mtx_next=%p", + mp, mp->mtx_next); + } + i++; + if (i > mtx_cur_cnt) { + panic("mtx_validate: too many in chain, known=%d\n", + mtx_cur_cnt); + } + } + ASS(i == mtx_cur_cnt); + switch (when) { + case MV_DESTROY: + for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) + if (mp == m) + break; + ASS(mp == m); + break; + case MV_INIT: + for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) + if (mp == m) { + /* + * Not good. This mutex already exits + */ + retval = 1; +#if 1 + printf("re-initing existing mutex %s\n", + m->mtx_description); + ASS(m->mtx_lock == MTX_UNOWNED); + retval = 1; +#else + panic("re-initing existing mutex %s", + m->mtx_description); +#endif + } + } + mtx_exit(&all_mtx, MTX_DEF); + return (retval); +} +#endif + +void +mtx_init(mtx_t *m, char *t, int flag) +{ + + CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t); +#ifdef SMP_DEBUG + if (mtx_validate(m, MV_INIT)) /* diagnostic and error correction */ + return; +#endif + bzero((void *)m, sizeof *m); + TAILQ_INIT(&m->mtx_blocked); + m->mtx_description = t; + m->mtx_lock = MTX_UNOWNED; + /* Put on all mutex queue */ + mtx_enter(&all_mtx, MTX_DEF); + m->mtx_next = &all_mtx; + m->mtx_prev = all_mtx.mtx_prev; + m->mtx_prev->mtx_next = m; + all_mtx.mtx_prev = m; + if (++mtx_cur_cnt > mtx_max_cnt) + mtx_max_cnt = mtx_cur_cnt; + mtx_exit(&all_mtx, MTX_DEF); + witness_init(m, flag); +} + +void +mtx_destroy(mtx_t *m) +{ + + CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description); +#ifdef SMP_DEBUG + if (m->mtx_next == NULL) + panic("mtx_destroy: %p (%s) already destroyed", + m, m->mtx_description); + + if (!mtx_owned(m)) { + ASS(m->mtx_lock == MTX_UNOWNED); + } else { + ASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0); + } + mtx_validate(m, MV_DESTROY); /* diagnostic */ +#endif + +#ifdef WITNESS + if (m->mtx_witness) + witness_destroy(m); +#endif /* WITNESS */ + + /* Remove from the all mutex queue */ + mtx_enter(&all_mtx, MTX_DEF); + m->mtx_next->mtx_prev = m->mtx_prev; + m->mtx_prev->mtx_next = m->mtx_next; +#ifdef SMP_DEBUG + m->mtx_next = m->mtx_prev = NULL; +#endif + mtx_cur_cnt--; + mtx_exit(&all_mtx, MTX_DEF); +} diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 51de1ac9e650..f32dfaeeddc0 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -49,10 +49,12 @@ #include "opt_trap.h" #include <sys/param.h> +#include <sys/bus.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/pioctl.h> #include <sys/kernel.h> +#include <sys/ktr.h> #include <sys/resourcevar.h> #include <sys/signalvar.h> #include <sys/syscall.h> @@ -76,12 +78,14 @@ #include <machine/cpu.h> #include <machine/ipl.h> #include <machine/md_var.h> +#include <machine/mutex.h> #include <machine/pcb.h> #ifdef SMP #include <machine/smp.h> #endif #include <machine/tss.h> +#include <i386/isa/icu.h> #include <i386/isa/intr_machdep.h> #ifdef POWERFAIL_NMI @@ -96,11 +100,14 @@ #include "isa.h" #include "npx.h" +#include <sys/sysctl.h> + int (*pmath_emulate) __P((struct trapframe *)); extern void trap __P((struct trapframe frame)); extern int trapwrite __P((unsigned addr)); extern void syscall2 __P((struct trapframe frame)); +extern void ast __P((struct trapframe frame)); static int trap_pfault __P((struct trapframe *, int, vm_offset_t)); static void trap_fatal __P((struct trapframe *, vm_offset_t)); @@ -142,7 +149,7 @@ static char *trap_msg[] = { }; static __inline int userret __P((struct proc *p, struct trapframe *frame, - u_quad_t oticks, int have_mplock)); + u_quad_t oticks, int have_giant)); #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; @@ -158,18 +165,18 @@ SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, &panic_on_nmi, 0, "Panic on NMI"); static __inline int -userret(p, frame, oticks, have_mplock) +userret(p, frame, oticks, have_giant) struct proc *p; struct trapframe *frame; u_quad_t oticks; - int have_mplock; + int have_giant; { int sig, s; while ((sig = CURSIG(p)) != 0) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; } postsig(sig); } @@ -184,31 +191,34 @@ userret(p, frame, oticks, have_mplock) * mi_switch()'ed, we might not be on the queue indicated by * our priority. */ - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } s = splhigh(); + mtx_enter(&sched_lock, MTX_SPIN); setrunqueue(p); p->p_stats->p_ru.ru_nivcsw++; mi_switch(); + mtx_exit(&sched_lock, MTX_SPIN); splx(s); - while ((sig = CURSIG(p)) != 0) + while ((sig = CURSIG(p)) != 0) { + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; + } postsig(sig); + } } /* * Charge system time if profiling. */ if (p->p_flag & P_PROFIL) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; } addupc_task(p, frame->tf_eip, (u_int)(p->p_sticks - oticks) * psratio); } curpriority = p->p_priority; - return(have_mplock); + return(have_giant); } /* @@ -226,13 +236,20 @@ trap(frame) u_quad_t sticks = 0; int i = 0, ucode = 0, type, code; vm_offset_t eva; +#ifdef POWERFAIL_NMI + static int lastalert = 0; +#endif - if (!(frame.tf_eflags & PSL_I)) { + atomic_add_int(&cnt.v_trap, 1); + + if ((frame.tf_eflags & PSL_I) == 0) { /* - * Buggy application or kernel code has disabled interrupts - * and then trapped. Enabling interrupts now is wrong, but - * it is better than running with interrupts disabled until - * they are accidentally enabled later. + * Buggy application or kernel code has disabled + * interrupts and then trapped. Enabling interrupts + * now is wrong, but it is better than running with + * interrupts disabled until they are accidentally + * enabled later. XXX Consider whether is this still + * correct. */ type = frame.tf_trapno; if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM)) @@ -252,54 +269,27 @@ trap(frame) eva = 0; if (frame.tf_trapno == T_PAGEFLT) { /* - * For some Cyrix CPUs, %cr2 is clobbered by interrupts. - * This problem is worked around by using an interrupt - * gate for the pagefault handler. We are finally ready - * to read %cr2 and then must reenable interrupts. - * - * XXX this should be in the switch statement, but the - * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the - * flow of control too much for this to be obviously - * correct. + * For some Cyrix CPUs, %cr2 is clobbered by + * interrupts. This problem is worked around by using + * an interrupt gate for the pagefault handler. We + * are finally ready to read %cr2 and then must + * reenable interrupts. */ eva = rcr2(); enable_intr(); - } + } + + mtx_enter(&Giant, MTX_DEF); #if defined(I586_CPU) && !defined(NO_F00F_HACK) restart: #endif + type = frame.tf_trapno; code = frame.tf_err; - if (in_vm86call) { - if (frame.tf_eflags & PSL_VM && - (type == T_PROTFLT || type == T_STKFLT)) { - i = vm86_emulate((struct vm86frame *)&frame); - if (i != 0) - /* - * returns to original process - */ - vm86_trap((struct vm86frame *)&frame); - return; - } - switch (type) { - /* - * these traps want either a process context, or - * assume a normal userspace trap. - */ - case T_PROTFLT: - case T_SEGNPFLT: - trap_fatal(&frame, eva); - return; - case T_TRCTRAP: - type = T_BPTFLT; /* kernel breakpoint */ - /* FALL THROUGH */ - } - goto kernel_trap; /* normal kernel trap handling */ - } - - if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) { + if ((ISPL(frame.tf_cs) == SEL_UPL) || + ((frame.tf_eflags & PSL_VM) && !in_vm86call)) { /* user trap */ sticks = p->p_sticks; @@ -322,16 +312,6 @@ restart: i = SIGFPE; break; - case T_ASTFLT: /* Allow process switch */ - astoff(); - cnt.v_soft++; - if (p->p_flag & P_OWEUPC) { - p->p_flag &= ~P_OWEUPC; - addupc_task(p, p->p_stats->p_prof.pr_addr, - p->p_stats->p_prof.pr_ticks); - } - goto out; - /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle @@ -342,7 +322,7 @@ restart: if (frame.tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)&frame); if (i == 0) - goto out; + goto user; break; } /* FALL THROUGH */ @@ -357,14 +337,20 @@ restart: case T_PAGEFLT: /* page fault */ i = trap_pfault(&frame, TRUE, eva); - if (i == -1) - return; #if defined(I586_CPU) && !defined(NO_F00F_HACK) - if (i == -2) + if (i == -2) { + /* + * f00f hack workaround has triggered, treat + * as illegal instruction not page fault. + */ + frame.tf_trapno = T_PRIVINFLT; goto restart; + } #endif - if (i == 0) + if (i == -1) goto out; + if (i == 0) + goto user; ucode = T_PAGEFLT; break; @@ -377,7 +363,15 @@ restart: #if NISA > 0 case T_NMI: #ifdef POWERFAIL_NMI - goto handle_powerfail; +#ifndef TIMER_FREQ +# define TIMER_FREQ 1193182 +#endif + if (time_second - lastalert > 10) { + log(LOG_WARNING, "NMI: power fail\n"); + sysbeep(TIMER_FREQ/880, hz); + lastalert = time_second; + } + goto out; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { @@ -391,7 +385,7 @@ restart: kdb_trap (type, 0, &frame); } #endif /* DDB */ - return; + goto out; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; @@ -410,9 +404,9 @@ restart: case T_DNA: #if NNPX > 0 - /* if a transparent fault (due to context switch "late") */ + /* transparent fault (due to context switch "late") */ if (npxdna()) - return; + goto out; #endif if (!pmath_emulate) { i = SIGFPE; @@ -422,7 +416,7 @@ restart: i = (*pmath_emulate)(&frame); if (i == 0) { if (!(frame.tf_eflags & PSL_T)) - return; + goto out; frame.tf_eflags &= ~PSL_T; i = SIGTRAP; } @@ -435,13 +429,12 @@ restart: break; } } else { -kernel_trap: /* kernel trap */ switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(&frame, FALSE, eva); - return; + goto out; case T_DNA: #if NNPX > 0 @@ -451,31 +444,35 @@ kernel_trap: * registered such use. */ if (npxdna()) - return; + goto out; #endif break; - case T_PROTFLT: /* general protection fault */ - case T_SEGNPFLT: /* segment not present fault */ /* - * Invalid segment selectors and out of bounds - * %eip's and %esp's can be set up in user mode. - * This causes a fault in kernel mode when the - * kernel tries to return to user mode. We want - * to get this fault so that we can fix the - * problem here and not have to check all the - * selectors and pointers when the user changes - * them. + * The following two traps can happen in + * vm86 mode, and, if so, we want to handle + * them specially. */ -#define MAYBE_DORETI_FAULT(where, whereto) \ - do { \ - if (frame.tf_eip == (int)where) { \ - frame.tf_eip = (int)whereto; \ - return; \ - } \ - } while (0) - - if (intr_nesting_level == 0) { + case T_PROTFLT: /* general protection fault */ + case T_STKFLT: /* stack fault */ + if (frame.tf_eflags & PSL_VM) { + i = vm86_emulate((struct vm86frame *)&frame); + if (i != 0) + /* + * returns to original process + */ + vm86_trap((struct vm86frame *)&frame); + goto out; + } + /* FALL THROUGH */ + + case T_SEGNPFLT: /* segment not present fault */ + if (in_vm86call) + break; + + if (intr_nesting_level != 0) + break; + /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the @@ -488,20 +485,38 @@ kernel_trap: if (frame.tf_eip == (int)cpu_switch_load_gs) { curpcb->pcb_gs = 0; psignal(p, SIGBUS); - return; + goto out; + } + + /* + * Invalid segment selectors and out of bounds + * %eip's and %esp's can be set up in user mode. + * This causes a fault in kernel mode when the + * kernel tries to return to user mode. We want + * to get this fault so that we can fix the + * problem here and not have to check all the + * selectors and pointers when the user changes + * them. + */ + if (frame.tf_eip == (int)doreti_iret) { + frame.tf_eip = (int)doreti_iret_fault; + goto out; + } + if (frame.tf_eip == (int)doreti_popl_ds) { + frame.tf_eip = (int)doreti_popl_ds_fault; + goto out; + } + if (frame.tf_eip == (int)doreti_popl_es) { + frame.tf_eip = (int)doreti_popl_es_fault; + goto out; } - MAYBE_DORETI_FAULT(doreti_iret, - doreti_iret_fault); - MAYBE_DORETI_FAULT(doreti_popl_ds, - doreti_popl_ds_fault); - MAYBE_DORETI_FAULT(doreti_popl_es, - doreti_popl_es_fault); - MAYBE_DORETI_FAULT(doreti_popl_fs, - doreti_popl_fs_fault); + if (frame.tf_eip == (int)doreti_popl_fs) { + frame.tf_eip = (int)doreti_popl_fs_fault; + goto out; + } if (curpcb && curpcb->pcb_onfault) { frame.tf_eip = (int)curpcb->pcb_onfault; - return; - } + goto out; } break; @@ -517,7 +532,7 @@ kernel_trap: */ if (frame.tf_eflags & PSL_NT) { frame.tf_eflags &= ~PSL_NT; - return; + goto out; } break; @@ -529,7 +544,7 @@ kernel_trap: * silently until the syscall handler has * saved the flags. */ - return; + goto out; } if (frame.tf_eip == (int)IDTVEC(syscall) + 1) { /* @@ -537,7 +552,7 @@ kernel_trap: * flags. Stop single stepping it. */ frame.tf_eflags &= ~PSL_T; - return; + goto out; } /* * Ignore debug register trace traps due to @@ -549,13 +564,13 @@ kernel_trap: * in kernel space because that is useful when * debugging the kernel. */ - if (user_dbreg_trap()) { + if (user_dbreg_trap() && !in_vm86call) { /* * Reset breakpoint bits because the * processor doesn't */ load_dr6(rdr6() & 0xfffffff0); - return; + goto out; } /* * Fall through (TRCTRAP kernel mode, kernel address) @@ -567,28 +582,19 @@ kernel_trap: */ #ifdef DDB if (kdb_trap (type, 0, &frame)) - return; + goto out; #endif break; #if NISA > 0 case T_NMI: #ifdef POWERFAIL_NMI -#ifndef TIMER_FREQ -# define TIMER_FREQ 1193182 -#endif - handle_powerfail: - { - static unsigned lastalert = 0; - - if(time_second - lastalert > 10) - { + if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(TIMER_FREQ/880, hz); lastalert = time_second; - } - return; } + goto out; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { @@ -602,16 +608,16 @@ kernel_trap: kdb_trap (type, 0, &frame); } #endif /* DDB */ - return; + goto out; } else if (panic_on_nmi == 0) - return; + goto out; /* FALL THROUGH */ #endif /* POWERFAIL_NMI */ #endif /* NISA > 0 */ } trap_fatal(&frame, eva); - return; + goto out; } /* Translate fault for emulators (e.g. Linux) */ @@ -630,8 +636,10 @@ kernel_trap: } #endif -out: +user: userret(p, &frame, sticks, 1); +out: + mtx_exit(&Giant, MTX_DEF); } #ifdef notyet @@ -769,10 +777,8 @@ trap_pfault(frame, usermode, eva) * fault. */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) - if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) { - frame->tf_trapno = T_PRIVINFLT; + if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) return -2; - } #endif if (usermode) goto nogo; @@ -869,8 +875,7 @@ trap_fatal(frame, eva) frame->tf_eflags & PSL_VM ? "vm86" : ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP - /* three seperate prints in case of a trap on an unmapped page */ - printf("mp_lock = %08x; ", mp_lock); + /* two seperate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", cpuid); printf("lapic.id = %08x\n", lapic.id); #endif @@ -917,26 +922,6 @@ trap_fatal(frame, eva) } else { printf("Idle\n"); } - printf("interrupt mask = "); - if ((cpl & net_imask) == net_imask) - printf("net "); - if ((cpl & tty_imask) == tty_imask) - printf("tty "); - if ((cpl & bio_imask) == bio_imask) - printf("bio "); - if ((cpl & cam_imask) == cam_imask) - printf("cam "); - if (cpl == 0) - printf("none"); -#ifdef SMP -/** - * XXX FIXME: - * we probably SHOULD have stopped the other CPUs before now! - * another CPU COULD have been touching cpl at this moment... - */ - printf(" <- SMP: XXX"); -#endif - printf("\n"); #ifdef KDB if (kdb_trap(&psl)) @@ -973,8 +958,7 @@ dblfault_handler() printf("esp = 0x%x\n", common_tss.tss_esp); printf("ebp = 0x%x\n", common_tss.tss_ebp); #ifdef SMP - /* three seperate prints in case of a trap on an unmapped page */ - printf("mp_lock = %08x; ", mp_lock); + /* two seperate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", cpuid); printf("lapic.id = %08x\n", lapic.id); #endif @@ -1048,12 +1032,14 @@ syscall2(frame) int error; int narg; int args[8]; - int have_mplock = 0; + int have_giant = 0; u_int code; + atomic_add_int(&cnt.v_syscall, 1); + #ifdef DIAGNOSTIC if (ISPL(frame.tf_cs) != SEL_UPL) { - get_mplock(); + mtx_enter(&Giant, MTX_DEF); panic("syscall"); /* NOT REACHED */ } @@ -1075,9 +1061,9 @@ syscall2(frame) /* * The prep code is not MP aware. */ - get_mplock(); + mtx_enter(&Giant, MTX_DEF); (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); - rel_mplock(); + mtx_exit(&Giant, MTX_DEF); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. @@ -1114,8 +1100,8 @@ syscall2(frame) */ if (params && (i = narg * sizeof(int)) && (error = copyin(params, (caddr_t)args, (u_int)i))) { - get_mplock(); - have_mplock = 1; + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p->p_tracep, code, narg, args); @@ -1129,15 +1115,15 @@ syscall2(frame) * we are ktracing */ if ((callp->sy_narg & SYF_MPSAFE) == 0) { - get_mplock(); - have_mplock = 1; + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; } #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; } ktrsyscall(p->p_tracep, code, narg, args); } @@ -1192,9 +1178,9 @@ bad: * Traced syscall. trapsignal() is not MP aware. */ if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; } frame.tf_eflags &= ~PSL_T; trapsignal(p, SIGTRAP, 0); @@ -1203,13 +1189,13 @@ bad: /* * Handle reschedule and other end-of-syscall issues */ - have_mplock = userret(p, &frame, sticks, have_mplock); + have_giant = userret(p, &frame, sticks, have_giant); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; } ktrsysret(p->p_tracep, code, error, p->p_retval[0]); } @@ -1225,27 +1211,66 @@ bad: /* * Release the MP lock if we had to get it */ - if (have_mplock) - rel_mplock(); + if (have_giant) + mtx_exit(&Giant, MTX_DEF); + + mtx_assert(&sched_lock, MA_NOTOWNED); + mtx_assert(&Giant, MA_NOTOWNED); +} + +void +ast(frame) + struct trapframe frame; +{ + struct proc *p = CURPROC; + u_quad_t sticks; + + /* + * handle atomicy by looping since interrupts are enabled and the + * MP lock is not held. + */ + sticks = ((volatile struct proc *)p)->p_sticks; + while (sticks != ((volatile struct proc *)p)->p_sticks) + sticks = ((volatile struct proc *)p)->p_sticks; + + astoff(); + atomic_add_int(&cnt.v_soft, 1); + if (p->p_flag & P_OWEUPC) { + mtx_enter(&Giant, MTX_DEF); + p->p_flag &= ~P_OWEUPC; + addupc_task(p, p->p_stats->p_prof.pr_addr, + p->p_stats->p_prof.pr_ticks); +} + if (userret(p, &frame, sticks, mtx_owned(&Giant)) != 0) + mtx_exit(&Giant, MTX_DEF); } /* * Simplified back end of syscall(), used when returning from fork() - * directly into user mode. MP lock is held on entry and should be - * held on return. + * directly into user mode. Giant is not held on entry, and must not + * be held on return. */ void fork_return(p, frame) struct proc *p; struct trapframe frame; { + int have_giant; + frame.tf_eax = 0; /* Child returns zero */ frame.tf_eflags &= ~PSL_C; /* success */ frame.tf_edx = 1; - userret(p, &frame, 0, 1); + have_giant = userret(p, &frame, 0, mtx_owned(&Giant)); #ifdef KTRACE - if (KTRPOINT(p, KTR_SYSRET)) + if (KTRPOINT(p, KTR_SYSRET)) { + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; + } ktrsysret(p->p_tracep, SYS_fork, 0, 0); + } #endif + if (have_giant) + mtx_exit(&Giant, MTX_DEF); } diff --git a/sys/i386/i386/tsc.c b/sys/i386/i386/tsc.c index 15044abbaa3b..724f3c2817ba 100644 --- a/sys/i386/i386/tsc.c +++ b/sys/i386/i386/tsc.c @@ -54,6 +54,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/bus.h> +#include <sys/proc.h> #include <sys/time.h> #include <sys/timetc.h> #include <sys/kernel.h> @@ -93,10 +94,6 @@ #include <i386/isa/mca_machdep.h> #endif -#ifdef SMP -#define disable_intr() CLOCK_DISABLE_INTR() -#define enable_intr() CLOCK_ENABLE_INTR() - #ifdef APIC_IO #include <i386/isa/intr_machdep.h> /* The interrupt triggered by the 8254 (timer) chip */ @@ -104,7 +101,6 @@ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif -#endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we @@ -147,7 +143,9 @@ int tsc_is_broken; int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ static int beeping = 0; +#if 0 static u_int clk_imask = HWI_MASK | SWI_MASK; +#endif static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; static u_int hardclock_max_count; static u_int32_t i8254_lastcount; @@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, static void clkintr(struct clockframe frame) { + int intrsave; + if (timecounter->tc_get_timecount == i8254_get_timecount) { + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); if (i8254_ticked) i8254_ticked = 0; else { @@ -214,7 +216,8 @@ clkintr(struct clockframe frame) i8254_lastcount = 0; } clkintr_pending = 0; - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); } timer_func(&frame); switch (timer0_state) { @@ -233,14 +236,17 @@ clkintr(struct clockframe frame) break; case ACQUIRE_PENDING: + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); @@ -249,7 +255,9 @@ clkintr(struct clockframe frame) case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; @@ -257,7 +265,8 @@ clkintr(struct clockframe frame) TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; @@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc) static int getit(void) { - u_long ef; - int high, low; + int high, low, intrsave; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); @@ -417,7 +426,7 @@ getit(void) high = inb(TIMER_CNTR0); CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); return ((high << 8) | low); } @@ -523,6 +532,7 @@ sysbeepstop(void *chan) int sysbeep(int pitch, int period) { + int intrsave; int x = splclock(); if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) @@ -531,10 +541,13 @@ sysbeep(int pitch, int period) splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); @@ -683,11 +696,12 @@ fail: static void set_timer_freq(u_int freq, int intr_freq) { - u_long ef; + int intrsave; int new_timer0_max_count; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { @@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq) outb(TIMER_CNTR0, timer0_max_count >> 8); } CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); } /* @@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq) void i8254_restore(void) { - u_long ef; + int intrsave; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); } /* @@ -979,8 +994,8 @@ cpu_initclocks() { int diag; #ifdef APIC_IO - int apic_8254_trial; - struct intrec *clkdesc; + int apic_8254_trial, num_8254_ticks; + struct intrec *clkdesc, *rtcdesc; #endif /* APIC_IO */ if (statclock_disable) { @@ -1014,14 +1029,15 @@ cpu_initclocks() } else panic("APIC_IO: Cannot route 8254 interrupt to CPU"); } - - clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, - NULL, &clk_imask, INTR_EXCL); - INTREN(1 << apic_8254_intr); - #else /* APIC_IO */ - inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask, + /* + * XXX Check the priority of this interrupt handler. I + * couldn't find anything suitable in the BSD/OS code (grog, + * 19 July 2000). + */ + /* Setup the PIC clk handler. The APIC handler is setup later */ + inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_EXCL); INTREN(IRQ0); @@ -1032,8 +1048,18 @@ cpu_initclocks() writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ - if (statclock_disable) + if (statclock_disable) { +#ifdef APIC_IO + /* + * XXX - if statclock is disabled, don't attempt the APIC + * trial. Not sure this is sane for APIC_IO. + */ + inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, + PI_REALTIME, INTR_EXCL); + INTREN(1 << apic_8254_intr); +#endif /* APIC_IO */ return; + } diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); @@ -1041,34 +1067,44 @@ cpu_initclocks() #ifdef APIC_IO if (isa_apic_irq(8) != 8) panic("APIC RTC != 8"); -#endif /* APIC_IO */ - inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask, - INTR_EXCL); - -#ifdef APIC_IO - INTREN(APIC_IRQ8); -#else - INTREN(IRQ8); -#endif /* APIC_IO */ + if (apic_8254_trial) { + /* + * XXX - We use fast interrupts for clk and rtc long enough to + * perform the APIC probe and then revert to exclusive + * interrupts. + */ + clkdesc = inthand_add("clk", apic_8254_intr, + (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST); + INTREN(1 << apic_8254_intr); - writertc(RTC_STATUSB, rtc_statusb); + rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, + PI_REALTIME, INTR_FAST); /* XXX */ + INTREN(APIC_IRQ8); + writertc(RTC_STATUSB, rtc_statusb); -#ifdef APIC_IO - if (apic_8254_trial) { - printf("APIC_IO: Testing 8254 interrupt delivery\n"); while (read_intr_count(8) < 6) ; /* nothing */ - if (read_intr_count(apic_8254_intr) < 3) { + num_8254_ticks = read_intr_count(apic_8254_intr); + + /* disable and remove our fake handlers */ + INTRDIS(1 << apic_8254_intr); + inthand_remove(clkdesc); + + writertc(RTC_STATUSA, rtc_statusa); + writertc(RTC_STATUSB, RTCSB_24HR); + + INTRDIS(APIC_IRQ8); + inthand_remove(rtcdesc); + + if (num_8254_ticks < 3) { /* * The MP table is broken. * The 8254 was not connected to the specified pin * on the IO APIC. * Workaround: Limited variant of mixed mode. */ - INTRDIS(1 << apic_8254_intr); - inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " "IOAPIC #%d intpin %d\n", @@ -1087,13 +1123,27 @@ cpu_initclocks() } apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); - inthand_add("clk", apic_8254_intr, - (inthand2_t *)clkintr, - NULL, &clk_imask, INTR_EXCL); - INTREN(1 << apic_8254_intr); } } + + /* Finally, setup the real clock handlers */ + inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, + PI_REALTIME, INTR_EXCL); + INTREN(1 << apic_8254_intr); +#endif + + inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME, + INTR_EXCL); +#ifdef APIC_IO + INTREN(APIC_IRQ8); +#else + INTREN(IRQ8); +#endif + + writertc(RTC_STATUSB, rtc_statusb); + +#ifdef APIC_IO if (apic_int_type(0, 0) != 3 || int_to_apicintpin[apic_8254_intr].ioapic != 0 || int_to_apicintpin[apic_8254_intr].int_pin != 0) @@ -1198,11 +1248,12 @@ static unsigned i8254_get_timecount(struct timecounter *tc) { u_int count; - u_long ef; + int intrsave; u_int high, low; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); @@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc) count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || - ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) && + ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) && #ifdef APIC_IO #define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ /* XXX this assumes that apic_8254_intr is < 24. */ @@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc) i8254_lastcount = count; count += i8254_offset; CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); return (count); } diff --git a/sys/i386/i386/vm86bios.s b/sys/i386/i386/vm86bios.s index 6a11c2685488..14b4259005bf 100644 --- a/sys/i386/i386/vm86bios.s +++ b/sys/i386/i386/vm86bios.s @@ -62,11 +62,9 @@ ENTRY(vm86_bioscall) pushl %edi pushl %gs -#ifdef SMP pushl %edx - MP_LOCK /* Get global lock */ + call __mtx_enter_giant_def /* Get global lock */ popl %edx -#endif #if NNPX > 0 movl _curproc,%ecx @@ -135,13 +133,9 @@ ENTRY(vm86_bioscall) /* * Return via _doreti */ -#ifdef SMP - pushl _cpl /* cpl to restore */ -#else - pushl _cpl /* cpl to restore */ -#endif subl $4,%esp /* dummy unit */ incb _intr_nesting_level + call __mtx_exit_giant_def MEXITCOUNT jmp _doreti diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index cfb6ceef44d6..831ab3b168a6 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -57,12 +57,14 @@ #include <sys/vnode.h> #include <sys/vmmeter.h> #include <sys/kernel.h> +#include <sys/ktr.h> #include <sys/sysctl.h> #include <sys/unistd.h> #include <machine/clock.h> #include <machine/cpu.h> #include <machine/md_var.h> +#include <machine/mutex.h> #ifdef SMP #include <machine/smp.h> #endif @@ -177,9 +179,8 @@ cpu_fork(p1, p2, flags) * pcb2->pcb_onfault: cloned above (always NULL here?). */ -#ifdef SMP - pcb2->pcb_mpnest = 1; -#endif + pcb2->pcb_schednest = 0; + /* * XXX don't copy the i/o pages. this should probably be fixed. */ @@ -256,8 +257,11 @@ cpu_exit(p) reset_dbregs(); pcb->pcb_flags &= ~PCB_DBREGS; } + mtx_enter(&sched_lock, MTX_SPIN); + mtx_exit(&Giant, MTX_DEF | MTX_NOSWITCH); + mtx_assert(&Giant, MA_NOTOWNED); cnt.v_swtch++; - cpu_switch(p); + cpu_switch(); panic("cpu_exit"); } @@ -406,17 +410,10 @@ vunmapbuf(bp) static void cpu_reset_proxy() { - u_int saved_mp_lock; cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) - ; /* Wait for other cpu to disable interupts */ - saved_mp_lock = mp_lock; - mp_lock = 1; - printf("cpu_reset_proxy: Grabbed mp lock for BSP\n"); - cpu_reset_proxy_active = 3; - while (cpu_reset_proxy_active == 3) - ; /* Wait for other cpu to enable interrupts */ + ; /* Wait for other cpu to see that we've started */ stop_cpus((1<<cpu_reset_proxyid)); printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); DELAY(1000000); @@ -453,6 +450,7 @@ cpu_reset() cpu_reset_proxyid = cpuid; cpustop_restartfunc = cpu_reset_proxy; + cpu_reset_proxy_active = 0; printf("cpu_reset: Restarting BSP\n"); started_cpus = (1<<0); /* Restart CPU #0 */ @@ -461,17 +459,9 @@ cpu_reset() cnt++; /* Wait for BSP to announce restart */ if (cpu_reset_proxy_active == 0) printf("cpu_reset: Failed to restart BSP\n"); - __asm __volatile("cli" : : : "memory"); + enable_intr(); cpu_reset_proxy_active = 2; - cnt = 0; - while (cpu_reset_proxy_active == 2 && cnt < 10000000) - cnt++; /* Do nothing */ - if (cpu_reset_proxy_active == 2) { - printf("cpu_reset: BSP did not grab mp lock\n"); - cpu_reset_real(); /* XXX: Bogus ? */ - } - cpu_reset_proxy_active = 4; - __asm __volatile("sti" : : : "memory"); + while (1); /* NOTREACHED */ } @@ -553,7 +543,7 @@ vm_page_zero_idle() static int free_rover; static int zero_state; vm_page_t m; - int s; + int s, intrsave; /* * Attempt to maintain approximately 1/2 of our free pages in a @@ -569,11 +559,10 @@ vm_page_zero_idle() if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) return(0); -#ifdef SMP - if (try_mplock()) { -#endif + if (mtx_try_enter(&Giant, MTX_DEF)) { s = splvm(); - __asm __volatile("sti" : : : "memory"); + intrsave = save_intr(); + enable_intr(); zero_state = 0; m = vm_page_list_find(PQ_FREE, free_rover, FALSE); if (m != NULL && (m->flags & PG_ZERO) == 0) { @@ -595,14 +584,10 @@ vm_page_zero_idle() } free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK; splx(s); - __asm __volatile("cli" : : : "memory"); -#ifdef SMP - rel_mplock(); -#endif + restore_intr(intrsave); + mtx_exit(&Giant, MTX_DEF); return (1); -#ifdef SMP } -#endif /* * We have to enable interrupts for a moment if the try_mplock fails * in order to potentially take an IPI. XXX this should be in diff --git a/sys/i386/include/asnames.h b/sys/i386/include/asnames.h index 3ccbee6be344..efdb0f9710a1 100644 --- a/sys/i386/include/asnames.h +++ b/sys/i386/include/asnames.h @@ -131,6 +131,7 @@ #define _Xintr7 Xintr7 #define _Xintr8 Xintr8 #define _Xintr9 Xintr9 +#define _Xtintr0 Xtintr0 #define _Xinvltlb Xinvltlb #define _Xrendezvous Xrendezvous #define _Xmchk Xmchk @@ -155,6 +156,7 @@ #define _arith_invalid arith_invalid #define _arith_overflow arith_overflow #define _arith_underflow arith_underflow +#define _ast ast #define _bcopy bcopy #define _bcopy_vector bcopy_vector #define _bigJump bigJump @@ -184,7 +186,6 @@ #define _cnt cnt #define _copyin_vector copyin_vector #define _copyout_vector copyout_vector -#define _cpl cpl #define _cpl_lock cpl_lock #define _cpu cpu #define _cpu0prvpage cpu0prvpage @@ -222,6 +223,7 @@ #define _get_isrlock get_isrlock #define _get_mplock get_mplock #define _get_syscall_lock get_syscall_lock +#define _Giant Giant #define _idle idle #define _ihandlers ihandlers #define _imen imen @@ -232,13 +234,11 @@ #define _intr_countp intr_countp #define _intr_handler intr_handler #define _intr_mask intr_mask -#define _intr_nesting_level intr_nesting_level #define _intr_unit intr_unit #define _intrcnt intrcnt #define _intrnames intrnames #define _invltlb_ok invltlb_ok #define _ioapic ioapic -#define _ipending ipending #define _isr_lock isr_lock #define _kernelname kernelname #define _lapic lapic @@ -249,6 +249,8 @@ #define _mp_gdtbase mp_gdtbase #define _mp_lock mp_lock #define _mp_ncpus mp_ncpus +#define __mtx_enter_giant_def _mtx_enter_giant_def +#define __mtx_exit_giant_def _mtx_exit_giant_def #define _mul64 mul64 #define _net_imask net_imask #define _netisr netisr @@ -281,6 +283,8 @@ #define _round_reg round_reg #define _s_lock s_lock #define _s_unlock s_unlock +#define _sched_ithd sched_ithd +#define _sched_lock sched_lock #define _set_precision_flag_down set_precision_flag_down #define _set_precision_flag_up set_precision_flag_up #define _set_user_ldt set_user_ldt @@ -293,6 +297,7 @@ #define _softclock softclock #define _softnet_imask softnet_imask #define _softtty_imask softtty_imask +#define _spending spending #define _spl0 spl0 #define _splz splz #define _ss_lock ss_lock @@ -326,9 +331,9 @@ #if defined(SMP) || defined(__ELF__) #ifdef SMP -#define FS(x) %fs:gd_ ## x +#define FS(x) %fs:gd_ ## x #else -#define FS(x) x +#define FS(x) x #endif #define _common_tss FS(common_tss) @@ -337,6 +342,8 @@ #define _cpu_lockid FS(cpu_lockid) #define _curpcb FS(curpcb) #define _curproc FS(curproc) +#define _prevproc FS(prevproc) +#define _idleproc FS(idleproc) #define _astpending FS(astpending) #define _currentldt FS(currentldt) #define _inside_intr FS(inside_intr) @@ -353,9 +360,16 @@ #define _ss_eflags FS(ss_eflags) #define _switchticks FS(switchticks) #define _switchtime FS(switchtime) +#define _intr_nesting_level FS(intr_nesting_level) #define _tss_gdt FS(tss_gdt) #define _idlestack FS(idlestack) #define _idlestack_top FS(idlestack_top) +#define _witness_spin_check FS(witness_spin_check) +/* +#define _ktr_idx FS(ktr_idx) +#define _ktr_buf FS(ktr_buf) +#define _ktr_buf_data FS(ktr_buf_data) +*/ #endif diff --git a/sys/i386/include/cpu.h b/sys/i386/include/cpu.h index ffabf7f8ed54..18822b87cc5b 100644 --- a/sys/i386/include/cpu.h +++ b/sys/i386/include/cpu.h @@ -46,6 +46,7 @@ #include <machine/psl.h> #include <machine/frame.h> #include <machine/segments.h> +#include <machine/globals.h> /* * definitions of cpu-dependent requirements @@ -86,7 +87,9 @@ * added, we will have an atomicy problem. The type of atomicy we need is * a non-locked orl. */ -#define need_resched() do { astpending = AST_RESCHED|AST_PENDING; } while (0) +#define need_resched() do { \ + PCPU_SET(astpending, AST_RESCHED|AST_PENDING); \ +} while (0) #define resched_wanted() (astpending & AST_RESCHED) /* @@ -109,8 +112,9 @@ * it off (asynchronous need_resched() conflicts are not critical). */ #define signotify(p) aston() - -#define aston() do { astpending |= AST_PENDING; } while (0) +#define aston() do { \ + PCPU_SET(astpending, astpending | AST_PENDING); \ +} while (0) #define astoff() /* @@ -135,7 +139,9 @@ #ifdef _KERNEL extern char btext[]; extern char etext[]; +#ifndef intr_nesting_level extern u_char intr_nesting_level; +#endif void fork_trampoline __P((void)); void fork_return __P((struct proc *, struct trapframe)); diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index 9a4052fd41d1..39868df422aa 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -86,20 +86,29 @@ static __inline void disable_intr(void) { __asm __volatile("cli" : : : "memory"); -#ifdef SMP - MPINTR_LOCK(); -#endif } static __inline void enable_intr(void) { -#ifdef SMP - MPINTR_UNLOCK(); -#endif __asm __volatile("sti"); } +static __inline u_int +save_intr(void) +{ + u_int ef; + + __asm __volatile("pushfl; popl %0" : "=r" (ef)); + return (ef); +} + +static __inline void +restore_intr(u_int ef) +{ + __asm __volatile("pushl %0; popfl" : : "r" (ef) : "memory" ); +} + #define HAVE_INLINE_FFS static __inline int diff --git a/sys/i386/include/globaldata.h b/sys/i386/include/globaldata.h index 58bd9cfe9416..440da60b4b83 100644 --- a/sys/i386/include/globaldata.h +++ b/sys/i386/include/globaldata.h @@ -26,6 +26,20 @@ * $FreeBSD$ */ +#ifndef _MACHINE_GLOBALDATA_H_ +#define _MACHINE_GLOBALDATA_H_ + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <machine/pmap.h> +#include <machine/segments.h> +#include <machine/tss.h> + +/* XXX */ +#ifdef KTR_PERCPU +#include <sys/ktr.h> +#endif + /* * This structure maps out the global data that needs to be kept on a * per-cpu basis. genassym uses this to generate offsets for the assembler @@ -41,11 +55,14 @@ struct globaldata { struct privatespace *gd_prvspace; /* self-reference */ struct proc *gd_curproc; + struct proc *gd_prevproc; struct proc *gd_npxproc; struct pcb *gd_curpcb; + struct proc *gd_idleproc; struct timeval gd_switchtime; struct i386tss gd_common_tss; int gd_switchticks; + int gd_intr_nesting_level; struct segment_descriptor gd_common_tssd; struct segment_descriptor *gd_tss_gdt; #ifdef USER_LDT @@ -67,8 +84,22 @@ struct globaldata { unsigned *gd_prv_PADDR1; #endif u_int gd_astpending; + SLIST_ENTRY(globaldata) gd_allcpu; + int gd_witness_spin_check; +#ifdef KTR_PERCPU +#ifdef KTR + volatile int gd_ktr_idx; + char *gd_ktr_buf; + char gd_ktr_buf_data[KTR_SIZE]; +#endif +#endif }; +extern struct globaldata globaldata; + +SLIST_HEAD(cpuhead, globaldata); +extern struct cpuhead cpuhead; + #ifdef SMP /* * This is the upper (0xff800000) address space layout that is per-cpu. @@ -93,3 +124,5 @@ struct privatespace { extern struct privatespace SMP_prvspace[]; #endif + +#endif /* ! _MACHINE_GLOBALDATA_H_ */ diff --git a/sys/i386/include/globals.h b/sys/i386/include/globals.h index ae05d5644e76..71bbbd580d9e 100644 --- a/sys/i386/include/globals.h +++ b/sys/i386/include/globals.h @@ -74,6 +74,14 @@ __asm("movl %0,%%fs:gd_" #name : : "r" (val)); \ } +static __inline int +_global_globaldata(void) +{ + int val; + __asm("movl %%fs:globaldata,%0" : "=r" (val)); + return (val); +} + #if defined(SMP) || defined(KLD_MODULE) || defined(ACTUALLY_LKM_NOT_KERNEL) /* * The following set of macros works for UP kernel as well, but for maximum @@ -82,18 +90,21 @@ * portability between UP and SMP kernels. */ #define curproc GLOBAL_RVALUE_NV(curproc, struct proc *) +#define prevproc GLOBAL_RVALUE_NV(prevproc, struct proc *) #define curpcb GLOBAL_RVALUE_NV(curpcb, struct pcb *) -#define npxproc GLOBAL_LVALUE(npxproc, struct proc *) +#define npxproc GLOBAL_RVALUE_NV(npxproc, struct proc *) +#define idleproc GLOBAL_RVALUE_NV(idleproc, struct proc *) #define common_tss GLOBAL_LVALUE(common_tss, struct i386tss) #define switchtime GLOBAL_LVALUE(switchtime, struct timeval) #define switchticks GLOBAL_LVALUE(switchticks, int) +#define intr_nesting_level GLOBAL_RVALUE(intr_nesting_level, u_char) #define common_tssd GLOBAL_LVALUE(common_tssd, struct segment_descriptor) #define tss_gdt GLOBAL_LVALUE(tss_gdt, struct segment_descriptor *) -#define astpending GLOBAL_LVALUE(astpending, u_int) +#define astpending GLOBAL_RVALUE(astpending, u_int) #ifdef USER_LDT -#define currentldt GLOBAL_LVALUE(currentldt, int) +#define currentldt GLOBAL_RVALUE(currentldt, int) #endif #ifdef SMP @@ -109,19 +120,32 @@ #define prv_CADDR3 GLOBAL_RVALUE(prv_CADDR3, caddr_t) #define prv_PADDR1 GLOBAL_RVALUE(prv_PADDR1, unsigned *) #endif + +#define witness_spin_check GLOBAL_RVALUE(witness_spin_check, int) + #endif /*UP kernel*/ GLOBAL_FUNC(curproc) +GLOBAL_FUNC(prevproc) GLOBAL_FUNC(astpending) GLOBAL_FUNC(curpcb) GLOBAL_FUNC(npxproc) +GLOBAL_FUNC(idleproc) GLOBAL_FUNC(common_tss) GLOBAL_FUNC(switchtime) GLOBAL_FUNC(switchticks) +GLOBAL_FUNC(intr_nesting_level) GLOBAL_FUNC(common_tssd) GLOBAL_FUNC(tss_gdt) +/* XXX */ +#ifdef KTR_PERCPU +GLOBAL_FUNC(ktr_idx) +GLOBAL_FUNC(ktr_buf) +GLOBAL_FUNC(ktr_buf_data) +#endif + #ifdef USER_LDT GLOBAL_FUNC(currentldt) #endif @@ -140,7 +164,17 @@ GLOBAL_FUNC(prv_CADDR3) GLOBAL_FUNC(prv_PADDR1) #endif -#define SET_CURPROC(x) (_global_curproc_set_nv((int)x)) +GLOBAL_FUNC(witness_spin_check) + +#ifdef SMP +#define GLOBALDATA GLOBAL_RVALUE(globaldata, struct globaldata *) +#else +#define GLOBALDATA (&globaldata) +#endif + +#define CURPROC curproc + +#define PCPU_SET(name, value) (_global_##name##_set((int)value)) #endif /* _KERNEL */ diff --git a/sys/i386/include/ipl.h b/sys/i386/include/ipl.h index 54d3f4b7b4b5..08726df51d84 100644 --- a/sys/i386/include/ipl.h +++ b/sys/i386/include/ipl.h @@ -43,9 +43,19 @@ #endif /* + * Software interrupt level. We treat the software interrupt as a + * single interrupt at a fictive hardware interrupt level. + */ +#define SOFTINTR (NHWI + 0) + +/* * Software interrupt bit numbers in priority order. The priority only * determines which swi will be dispatched next; a higher priority swi * may be dispatched when a nested h/w interrupt handler returns. + * + * XXX FIXME: There's no longer a relation between the SWIs and the + * HWIs, so it makes more sense for these values to start at 0, but + * there's lots of code which expects them to start at NHWI. */ #define SWI_TTY (NHWI + 0) #define SWI_NET (NHWI + 1) @@ -104,12 +114,9 @@ #ifdef notyet /* in <sys/interrupt.h> until pci drivers stop hacking on them */ extern unsigned bio_imask; /* group of interrupts masked with splbio() */ #endif -extern unsigned cpl; /* current priority level mask */ -#ifdef SMP -extern unsigned cil; /* current INTerrupt level mask */ -#endif + extern volatile unsigned idelayed; /* interrupts to become pending */ -extern volatile unsigned ipending; /* active interrupts masked by cpl */ +extern volatile unsigned spending; /* pending software interrupts */ #ifdef notyet /* in <sys/systm.h> until pci drivers stop hacking on them */ extern unsigned net_imask; /* group of interrupts masked with splimp() */ extern unsigned stat_imask; /* interrupts masked with splstatclock() */ diff --git a/sys/i386/include/lock.h b/sys/i386/include/lock.h index 534f77e8d2fb..b4af09d9c579 100644 --- a/sys/i386/include/lock.h +++ b/sys/i386/include/lock.h @@ -37,21 +37,6 @@ #define MPLOCKED lock ; /* - * Some handy macros to allow logical organization. - */ - -#define MP_LOCK call _get_mplock - -#define MP_TRYLOCK \ - pushl $_mp_lock ; /* GIANT_LOCK */ \ - call _MPtrylock ; /* try to get lock */ \ - add $4, %esp - -#define MP_RELLOCK \ - movl $_mp_lock,%edx ; /* GIANT_LOCK */ \ - call _MPrellock_edx - -/* * Protects the IO APIC and apic_imen as a critical region. */ #define IMASK_LOCK \ @@ -66,7 +51,8 @@ #define MPLOCKED /* NOP */ -#define MP_LOCK /* NOP */ +#define IMASK_LOCK /* NOP */ +#define IMASK_UNLOCK /* NOP */ #endif /* SMP */ @@ -77,32 +63,15 @@ #include <machine/smptests.h> /** xxx_LOCK */ /* - * Locks regions protected in UP kernel via cli/sti. - */ -#ifdef USE_MPINTRLOCK -#define MPINTR_LOCK() s_lock(&mpintr_lock) -#define MPINTR_UNLOCK() s_unlock(&mpintr_lock) -#else -#define MPINTR_LOCK() -#define MPINTR_UNLOCK() -#endif /* USE_MPINTRLOCK */ - -/* * sio/cy lock. * XXX should rc (RISCom/8) use this? */ #ifdef USE_COMLOCK #define COM_LOCK() s_lock(&com_lock) #define COM_UNLOCK() s_unlock(&com_lock) -#define COM_DISABLE_INTR() \ - { __asm __volatile("cli" : : : "memory"); COM_LOCK(); } -#define COM_ENABLE_INTR() \ - { COM_UNLOCK(); __asm __volatile("sti"); } #else #define COM_LOCK() #define COM_UNLOCK() -#define COM_DISABLE_INTR() disable_intr() -#define COM_ENABLE_INTR() enable_intr() #endif /* USE_COMLOCK */ /* @@ -112,22 +81,13 @@ #ifdef USE_CLOCKLOCK #define CLOCK_LOCK() s_lock(&clock_lock) #define CLOCK_UNLOCK() s_unlock(&clock_lock) -#define CLOCK_DISABLE_INTR() \ - { __asm __volatile("cli" : : : "memory"); CLOCK_LOCK(); } -#define CLOCK_ENABLE_INTR() \ - { CLOCK_UNLOCK(); __asm __volatile("sti"); } #else #define CLOCK_LOCK() #define CLOCK_UNLOCK() -#define CLOCK_DISABLE_INTR() disable_intr() -#define CLOCK_ENABLE_INTR() enable_intr() #endif /* USE_CLOCKLOCK */ #else /* SMP */ -#define MPINTR_LOCK() -#define MPINTR_UNLOCK() - #define COM_LOCK() #define COM_UNLOCK() #define CLOCK_LOCK() @@ -168,6 +128,7 @@ extern struct simplelock clock_lock; extern struct simplelock com_lock; extern struct simplelock mpintr_lock; extern struct simplelock mcount_lock; +extern struct simplelock panic_lock; #if !defined(SIMPLELOCK_DEBUG) && NCPUS > 1 /* diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h index 61c5ecf73205..95b5759f9e66 100644 --- a/sys/i386/include/mptable.h +++ b/sys/i386/include/mptable.h @@ -36,6 +36,7 @@ #endif #include <sys/param.h> +#include <sys/bus.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/proc.h> @@ -65,6 +66,7 @@ #include <machine/apic.h> #include <machine/atomic.h> #include <machine/cpufunc.h> +#include <machine/mutex.h> #include <machine/mpapic.h> #include <machine/psl.h> #include <machine/segments.h> @@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY { #define MP_ANNOUNCE_POST 0x19 +/* used to hold the AP's until we are ready to release them */ +struct simplelock ap_boot_lock; /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ int current_postcode; @@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr); static void install_ap_tramp(u_int boot_addr); static int start_ap(int logicalCpu, u_int boot_addr); static int apic_int_is_bus_type(int intr, int bus_type); +static void release_aps(void *dummy); /* * Calculate usable address in base memory for AP trampoline code. @@ -403,7 +408,7 @@ found: /* - * Startup the SMP processors. + * Initialize the SMP hardware and the APIC and start up the AP's. */ void mp_start(void) @@ -619,6 +624,9 @@ mp_enable(u_int boot_addr) /* initialize all SMP locks */ init_locks(); + /* obtain the ap_boot_lock */ + s_lock(&ap_boot_lock); + /* start each Application Processor */ start_all_aps(boot_addr); } @@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock; /* critical region around INTR() routines */ struct simplelock intr_lock; -/* lock regions protected in UP kernel via cli/sti */ -struct simplelock mpintr_lock; - /* lock region used by kernel profiling */ struct simplelock mcount_lock; @@ -1885,26 +1890,16 @@ struct simplelock clock_lock; /* lock around the MP rendezvous */ static struct simplelock smp_rv_lock; +/* only 1 CPU can panic at a time :) */ +struct simplelock panic_lock; + static void init_locks(void) { - /* - * Get the initial mp_lock with a count of 1 for the BSP. - * This uses a LOGICAL cpu ID, ie BSP == 0. - */ - mp_lock = 0x00000001; - -#if 0 - /* ISR uses its own "giant lock" */ - isr_lock = FREE_LOCK; -#endif - #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); #endif - s_lock_init((struct simplelock*)&mpintr_lock); - s_lock_init((struct simplelock*)&mcount_lock); s_lock_init((struct simplelock*)&fast_intr_lock); @@ -1912,6 +1907,7 @@ init_locks(void) s_lock_init((struct simplelock*)&imen_lock); s_lock_init((struct simplelock*)&cpl_lock); s_lock_init(&smp_rv_lock); + s_lock_init(&panic_lock); #ifdef USE_COMLOCK s_lock_init((struct simplelock*)&com_lock); @@ -1919,11 +1915,9 @@ init_locks(void) #ifdef USE_CLOCKLOCK s_lock_init((struct simplelock*)&clock_lock); #endif /* USE_CLOCKLOCK */ -} - -/* Wait for all APs to be fully initialized */ -extern int wait_ap(unsigned int); + s_lock_init(&ap_boot_lock); +} /* * start each AP in our list @@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr) SMPpt[pg + 4] = 0; /* *prv_PMAP1 */ /* prime data page for it to use */ + SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu); gd->gd_cpuid = x; gd->gd_cpu_lockid = x << 24; gd->gd_prv_CMAP1 = &SMPpt[pg + 1]; @@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } - /* * Flush the TLB on all other CPU's * @@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, void ap_init(void); void -ap_init() +ap_init(void) { u_int apic_id; + /* lock against other AP's that are waking up */ + s_lock(&ap_boot_lock); + /* BSP may have changed PTD while we're waiting for the lock */ cpu_invltlb(); @@ -2397,6 +2394,30 @@ ap_init() smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ smp_active = 1; /* historic */ } + + /* let other AP's wake up now */ + s_unlock(&ap_boot_lock); + + /* wait until all the AP's are up */ + while (smp_started == 0) + ; /* nothing */ + + /* + * Set curproc to our per-cpu idleproc so that mutexes have + * something unique to lock with. + */ + PCPU_SET(curproc,idleproc); + PCPU_SET(prevproc,idleproc); + + microuptime(&switchtime); + switchticks = ticks; + + /* ok, now grab sched_lock and enter the scheduler */ + enable_intr(); + mtx_enter(&sched_lock, MTX_SPIN); + cpu_throw(); /* doesn't return */ + + panic("scheduler returned us to ap_init"); } #ifdef BETTER_CLOCK @@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap) p = checkstate_curproc[id]; cpustate = checkstate_cpustate[id]; + /* XXX */ + if (p->p_ithd) + cpustate = CHECKSTATE_INTR; + else if (p == idleproc) + cpustate = CHECKSTATE_SYS; + switch (cpustate) { case CHECKSTATE_USER: if (p->p_flag & P_PROFIL) @@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap) if (pscnt > 1) return; - if (!p) + if (p == idleproc) { + p->p_sticks++; cp_time[CP_IDLE]++; - else { + } else { p->p_sticks++; cp_time[CP_SYS]++; } @@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap) p->p_iticks++; cp_time[CP_INTR]++; } - if (p != NULL) { + if (p != idleproc) { schedclock(p); /* Update resource usage integrals and maximums. */ @@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *), /* release lock */ s_unlock(&smp_rv_lock); } + +void +release_aps(void *dummy __unused) +{ + s_unlock(&ap_boot_lock); +} + +SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); diff --git a/sys/i386/include/mutex.h b/sys/i386/include/mutex.h new file mode 100644 index 000000000000..ef0c9638fc18 --- /dev/null +++ b/sys/i386/include/mutex.h @@ -0,0 +1,786 @@ +/*- + * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Berkeley Software Design Inc's name may not be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from BSDI $Id: mutex.h,v 2.7.2.35 2000/04/27 03:10:26 cp Exp $ + * $FreeBSD$ + */ + +#ifndef _MACHINE_MUTEX_H_ +#define _MACHINE_MUTEX_H_ + +#ifndef LOCORE + +#include <sys/ktr.h> +#include <sys/queue.h> +#include <machine/atomic.h> +#include <machine/cpufunc.h> +#include <machine/globals.h> + +/* + * If kern_mutex.c is being built, compile non-inlined versions of various + * functions so that kernel modules can use them. + */ +#ifndef _KERN_MUTEX_C_ +#define _MTX_INLINE static __inline +#else +#define _MTX_INLINE +#endif + +/* + * Mutex flags + * + * Types + */ +#define MTX_DEF 0x0 /* Default (spin/sleep) */ +#define MTX_SPIN 0x1 /* Spin only lock */ + +/* Options */ +#define MTX_RLIKELY 0x4 /* (opt) Recursion likely */ +#define MTX_NORECURSE 0x8 /* No recursion possible */ +#define MTX_NOSPIN 0x10 /* Don't spin before sleeping */ +#define MTX_NOSWITCH 0x20 /* Do not switch on release */ +#define MTX_FIRST 0x40 /* First spin lock holder */ +#define MTX_TOPHALF 0x80 /* Interrupts not disabled on spin */ + +/* options that should be passed on to mtx_enter_hard, mtx_exit_hard */ +#define MTX_HARDOPTS (MTX_SPIN | MTX_FIRST | MTX_TOPHALF | MTX_NOSWITCH) + +/* Flags/value used in mtx_lock */ +#define MTX_RECURSE 0x01 /* (non-spin) lock held recursively */ +#define MTX_CONTESTED 0x02 /* (non-spin) lock contested */ +#define MTX_FLAGMASK ~(MTX_RECURSE | MTX_CONTESTED) +#define MTX_UNOWNED 0x8 /* Cookie for free mutex */ + +struct proc; /* XXX */ + +/* + * Sleep/spin mutex + */ +struct mtx { + volatile u_int mtx_lock; /* lock owner/gate/flags */ + volatile u_short mtx_recurse; /* number of recursive holds */ + u_short mtx_f1; + u_int mtx_savefl; /* saved flags (for spin locks) */ + char *mtx_description; + TAILQ_HEAD(, proc) mtx_blocked; + LIST_ENTRY(mtx) mtx_contested; + struct mtx *mtx_next; /* all locks in system */ + struct mtx *mtx_prev; +#ifdef SMP_DEBUG + /* If you add anything here, adjust the mtxf_t definition below */ + struct witness *mtx_witness; + LIST_ENTRY(mtx) mtx_held; + char *mtx_file; + int mtx_line; +#endif /* SMP_DEBUG */ +}; + +typedef struct mtx mtx_t; + +/* + * Filler for structs which need to remain the same size + * whether or not SMP_DEBUG is turned on. + */ +typedef struct mtxf { +#ifdef SMP_DEBUG + char mtxf_data[0]; +#else + char mtxf_data[4*sizeof(void *) + sizeof(int)]; +#endif +} mtxf_t; + +#define mp_fixme(string) + +#ifdef _KERNEL +/* Misc */ +#define CURTHD ((u_int)CURPROC) /* Current thread ID */ + +/* Prototypes */ +void mtx_init(mtx_t *m, char *description, int flag); +void mtx_enter_hard(mtx_t *, int type, int flags); +void mtx_exit_hard(mtx_t *, int type); +void mtx_destroy(mtx_t *m); + +#if (defined(KLD_MODULE) || defined(_KERN_MUTEX_C_)) +void mtx_enter(mtx_t *mtxp, int type); +int mtx_try_enter(mtx_t *mtxp, int type); +void mtx_exit(mtx_t *mtxp, int type); +#endif + +/* Global locks */ +extern mtx_t sched_lock; +extern mtx_t Giant; + +/* + * Used to replace return with an exit Giant and return. + */ + +#define EGAR(a) \ +do { \ + mtx_exit(&Giant, MTX_DEF); \ + return (a); \ +} while (0) + +#define VEGAR \ +do { \ + mtx_exit(&Giant, MTX_DEF); \ + return; \ +} while (0) + +#define DROP_GIANT() \ +do { \ + int _giantcnt; \ + WITNESS_SAVE_DECL(Giant); \ + \ + WITNESS_SAVE(&Giant, Giant); \ + for (_giantcnt = 0; mtx_owned(&Giant); _giantcnt++) \ + mtx_exit(&Giant, MTX_DEF) + +#define PICKUP_GIANT() \ + mtx_assert(&Giant, MA_NOTOWNED); \ + while (_giantcnt--) \ + mtx_enter(&Giant, MTX_DEF); \ + WITNESS_RESTORE(&Giant, Giant); \ +} while (0) + +#define PARTIAL_PICKUP_GIANT() \ + mtx_assert(&Giant, MA_NOTOWNED); \ + while (_giantcnt--) \ + mtx_enter(&Giant, MTX_DEF); \ + WITNESS_RESTORE(&Giant, Giant) + + +/* + * Debugging + */ +#ifndef SMP_DEBUG +#define mtx_assert(m, what) +#else /* SMP_DEBUG */ + +#define MA_OWNED 1 +#define MA_NOTOWNED 2 +#define mtx_assert(m, what) { \ + switch ((what)) { \ + case MA_OWNED: \ + ASS(mtx_owned((m))); \ + break; \ + case MA_NOTOWNED: \ + ASS(!mtx_owned((m))); \ + break; \ + default: \ + panic("unknown mtx_assert at %s:%d", __FILE__, __LINE__); \ + } \ +} + +#ifdef INVARIANTS +#define ASS(ex) MPASS(ex) +#define MPASS(ex) if (!(ex)) panic("Assertion %s failed at %s:%d", \ + #ex, __FILE__, __LINE__) +#define MPASS2(ex, what) if (!(ex)) panic("Assertion %s failed at %s:%d", \ + what, __FILE__, __LINE__) + +#ifdef MTX_STRS +char STR_IEN[] = "fl & 0x200"; +char STR_IDIS[] = "!(fl & 0x200)"; +#else /* MTX_STRS */ +extern char STR_IEN[]; +extern char STR_IDIS[]; +#endif /* MTX_STRS */ +#define ASS_IEN MPASS2(read_eflags() & 0x200, STR_IEN) +#define ASS_IDIS MPASS2((read_eflags() & 0x200) == 0, STR_IDIS) +#endif /* INVARIANTS */ + +#endif /* SMP_DEBUG */ + +#if !defined(SMP_DEBUG) || !defined(INVARIANTS) +#define ASS(ex) +#define MPASS(ex) +#define MPASS2(ex, where) +#define ASS_IEN +#define ASS_IDIS +#endif /* !defined(SMP_DEBUG) || !defined(INVARIANTS) */ + +#ifdef WITNESS +#ifndef SMP_DEBUG +#error WITNESS requires SMP_DEBUG +#endif /* SMP_DEBUG */ +#define WITNESS_ENTER(m, f) \ + if ((m)->mtx_witness != NULL) \ + witness_enter((m), (f), __FILE__, __LINE__) +#define WITNESS_EXIT(m, f) \ + if ((m)->mtx_witness != NULL) \ + witness_exit((m), (f), __FILE__, __LINE__) + +#define WITNESS_SLEEP(check, m) witness_sleep(check, (m), __FILE__, __LINE__) +#define WITNESS_SAVE_DECL(n) \ + char * __CONCAT(n, __wf); \ + int __CONCAT(n, __wl) + +#define WITNESS_SAVE(m, n) \ +do { \ + if ((m)->mtx_witness != NULL) \ + witness_save(m, &__CONCAT(n, __wf), &__CONCAT(n, __wl)); \ +} while (0) + +#define WITNESS_RESTORE(m, n) \ +do { \ + if ((m)->mtx_witness != NULL) \ + witness_restore(m, __CONCAT(n, __wf), __CONCAT(n, __wl)); \ +} while (0) + +void witness_init(mtx_t *, int flag); +void witness_destroy(mtx_t *); +void witness_enter(mtx_t *, int, char *, int); +void witness_try_enter(mtx_t *, int, char *, int); +void witness_exit(mtx_t *, int, char *, int); +void witness_display(void(*)(const char *fmt, ...)); +void witness_list(struct proc *); +int witness_sleep(int, mtx_t *, char *, int); +void witness_save(mtx_t *, char **, int *); +void witness_restore(mtx_t *, char *, int); +#else /* WITNESS */ +#define WITNESS_ENTER(m, flag) +#define WITNESS_EXIT(m, flag) +#define WITNESS_SLEEP(check, m) +#define WITNESS_SAVE_DECL(n) +#define WITNESS_SAVE(m, n) +#define WITNESS_RESTORE(m, n) + +/* + * flag++ is slezoid way of shutting up unused parameter warning + * in mtx_init() + */ +#define witness_init(m, flag) flag++ +#define witness_destroy(m) +#define witness_enter(m, flag, f, l) +#define witness_try_enter(m, flag, f, l ) +#define witness_exit(m, flag, f, l) +#endif /* WITNESS */ + +/* + * Assembly macros (for internal use only) + *------------------------------------------------------------------------------ + */ + +#define _V(x) __STRING(x) + +#ifndef I386_CPU + +/* + * For 486 and newer processors. + */ + +/* Get a sleep lock, deal with recursion inline. */ +#define _getlock_sleep(mtxp, tid, type) ({ \ + int _res; \ + \ + __asm __volatile ( \ +" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \ +" " MPLOCKED "" \ +" cmpxchgl %3,%1;" /* Try */ \ +" jz 1f;" /* Got it */ \ +" andl $" _V(MTX_FLAGMASK) ",%%eax;" /* turn off spec bits */ \ +" cmpl %%eax,%3;" /* already have it? */ \ +" je 2f;" /* yes, recurse */ \ +" pushl %4;" \ +" pushl %5;" \ +" call mtx_enter_hard;" \ +" addl $8,%%esp;" \ +" jmp 1f;" \ +"2: lock; orl $" _V(MTX_RECURSE) ",%1;" \ +" incw %2;" \ +"1:" \ +"# getlock_sleep" \ + : "=&a" (_res), /* 0 (dummy output) */ \ + "+m" (mtxp->mtx_lock), /* 1 */ \ + "+m" (mtxp->mtx_recurse) /* 2 */ \ + : "r" (tid), /* 3 (input) */ \ + "gi" (type), /* 4 */ \ + "g" (mtxp) /* 5 */ \ + : "memory", "ecx", "edx" /* used */ ); \ +}) + +/* Get a spin lock, handle recursion inline (as the less common case) */ +#define _getlock_spin_block(mtxp, tid, type) ({ \ + int _res; \ + \ + __asm __volatile ( \ +" pushfl;" \ +" cli;" \ +" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \ +" " MPLOCKED "" \ +" cmpxchgl %3,%1;" /* Try */ \ +" jz 2f;" /* got it */ \ +" pushl %4;" \ +" pushl %5;" \ +" call mtx_enter_hard;" /* mtx_enter_hard(mtxp, type, oflags) */ \ +" addl $0xc,%%esp;" \ +" jmp 1f;" \ +"2: popl %2;" /* save flags */ \ +"1:" \ +"# getlock_spin_block" \ + : "=&a" (_res), /* 0 (dummy output) */ \ + "+m" (mtxp->mtx_lock), /* 1 */ \ + "=m" (mtxp->mtx_savefl) /* 2 */ \ + : "r" (tid), /* 3 (input) */ \ + "gi" (type), /* 4 */ \ + "g" (mtxp) /* 5 */ \ + : "memory", "ecx", "edx" /* used */ ); \ +}) + +/* + * Get a lock without any recursion handling. Calls the hard enter function if + * we can't get it inline. + */ +#define _getlock_norecurse(mtxp, tid, type) ({ \ + int _res; \ + \ + __asm __volatile ( \ +" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \ +" " MPLOCKED "" \ +" cmpxchgl %2,%1;" /* Try */ \ +" jz 1f;" /* got it */ \ +" pushl %3;" \ +" pushl %4;" \ +" call mtx_enter_hard;" /* mtx_enter_hard(mtxp, type) */ \ +" addl $8,%%esp;" \ +"1:" \ +"# getlock_norecurse" \ + : "=&a" (_res), /* 0 (dummy output) */ \ + "+m" (mtxp->mtx_lock) /* 1 */ \ + : "r" (tid), /* 2 (input) */ \ + "gi" (type), /* 3 */ \ + "g" (mtxp) /* 4 */ \ + : "memory", "ecx", "edx" /* used */ ); \ +}) + +/* + * Release a sleep lock assuming we haven't recursed on it, recursion is handled + * in the hard function. + */ +#define _exitlock_norecurse(mtxp, tid, type) ({ \ + int _tid = (int)(tid); \ + \ + __asm __volatile ( \ +" " MPLOCKED "" \ +" cmpxchgl %4,%0;" /* try easy rel */ \ +" jz 1f;" /* released! */ \ +" pushl %2;" \ +" pushl %3;" \ +" call mtx_exit_hard;" \ +" addl $8,%%esp;" \ +"1:" \ +"# exitlock_norecurse" \ + : "+m" (mtxp->mtx_lock), /* 0 */ \ + "+a" (_tid) /* 1 */ \ + : "gi" (type), /* 2 (input) */ \ + "g" (mtxp), /* 3 */ \ + "r" (MTX_UNOWNED) /* 4 */ \ + : "memory", "ecx", "edx" /* used */ ); \ +}) + +/* + * Release a sleep lock when its likely we recursed (the code to + * deal with simple recursion is inline). + */ +#define _exitlock(mtxp, tid, type) ({ \ + int _tid = (int)(tid); \ + \ + __asm __volatile ( \ +" " MPLOCKED "" \ +" cmpxchgl %5,%0;" /* try easy rel */ \ +" jz 1f;" /* released! */ \ +" testl $" _V(MTX_RECURSE) ",%%eax;" /* recursed? */ \ +" jnz 3f;" /* handle recursion */ \ + /* Lock not recursed and contested: do the hard way */ \ +" pushl %3;" \ +" pushl %4;" \ +" call mtx_exit_hard;" /* mtx_exit_hard(mtxp,type) */ \ +" addl $8,%%esp;" \ +" jmp 1f;" \ + /* lock recursed, lower recursion level */ \ +"3: decw %1;" /* one less level */ \ +" jnz 1f;" /* still recursed, done */ \ +" lock; andl $~" _V(MTX_RECURSE) ",%0;" /* turn off recurse flag */ \ +"1:" \ +"# exitlock" \ + : "+m" (mtxp->mtx_lock), /* 0 */ \ + "+m" (mtxp->mtx_recurse), /* 1 */ \ + "+a" (_tid) /* 2 */ \ + : "gi" (type), /* 3 (input) */ \ + "g" (mtxp), /* 4 */ \ + "r" (MTX_UNOWNED) /* 5 */ \ + : "memory", "ecx", "edx" /* used */ ); \ +}) + +/* + * Release a spin lock (with possible recursion). + * + * We use cmpxchgl to clear lock (instead of simple store) to flush posting + * buffers and make the change visible to other CPU's. + */ +#define _exitlock_spin(mtxp, inten1, inten2) ({ \ + int _res; \ + \ + __asm __volatile ( \ +" movw %1,%%ax;" \ +" decw %%ax;" \ +" js 1f;" \ +" movw %%ax,%1;" \ +" jmp 2f;" \ +"1: movl %0,%%eax;" \ +" movl $ " _V(MTX_UNOWNED) ",%%ecx;" \ +" " inten1 ";" \ +" " MPLOCKED "" \ +" cmpxchgl %%ecx,%0;" \ +" " inten2 ";" \ +"2:" \ +"# exitlock_spin" \ + : "+m" (mtxp->mtx_lock), /* 0 */ \ + "+m" (mtxp->mtx_recurse), /* 1 */ \ + "=&a" (_res) /* 2 */ \ + : "g" (mtxp->mtx_savefl) /* 3 (used in 'inten') */ \ + : "memory", "ecx" /* used */ ); \ +}) + +#else /* I386_CPU */ + +/* + * For 386 processors only. + */ + +/* Get a sleep lock, deal with recursion inline. */ +#define _getlock_sleep(mp, tid, type) do { \ + if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) { \ + if (((mp)->mtx_lock & MTX_FLAGMASK) != (tid)) \ + mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0); \ + else { \ + atomic_set_int(&(mp)->mtx_lock, MTX_RECURSE); \ + (mp)->mtx_recurse++; \ + } \ + } \ +} while (0) + +/* Get a spin lock, handle recursion inline (as the less common case) */ +#define _getlock_spin_block(mp, tid, type) do { \ + u_int _mtx_fl = read_eflags(); \ + disable_intr(); \ + if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \ + mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _mtx_fl); \ + else \ + (mp)->mtx_savefl = _mtx_fl; \ +} while (0) + +/* + * Get a lock without any recursion handling. Calls the hard enter function if + * we can't get it inline. + */ +#define _getlock_norecurse(mp, tid, type) do { \ + if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \ + mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0); \ +} while (0) + +/* + * Release a sleep lock assuming we haven't recursed on it, recursion is handled + * in the hard function. + */ +#define _exitlock_norecurse(mp, tid, type) do { \ + if (atomic_cmpset_int(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) \ + mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \ +} while (0) + +/* + * Release a sleep lock when its likely we recursed (the code to + * deal with simple recursion is inline). + */ +#define _exitlock(mp, tid, type) do { \ + if (atomic_cmpset_int(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) { \ + if ((mp)->mtx_lock & MTX_RECURSE) { \ + if (--((mp)->mtx_recurse) == 0) \ + atomic_clear_int(&(mp)->mtx_lock, \ + MTX_RECURSE); \ + } else { \ + mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \ + } \ + } \ +} while (0) + +/* Release a spin lock (with possible recursion). */ +#define _exitlock_spin(mp, inten1, inten2) do { \ + if ((mp)->mtx_recurse == 0) { \ + atomic_cmpset_int(&(mp)->mtx_lock, (mp)->mtx_lock, \ + MTX_UNOWNED); \ + write_eflags((mp)->mtx_savefl); \ + } else { \ + (mp)->mtx_recurse--; \ + } \ +} while (0) + +#endif /* I386_CPU */ + +/* + * Externally visible mutex functions. + *------------------------------------------------------------------------------ + */ + +/* + * Return non-zero if a mutex is already owned by the current thread. + */ +#define mtx_owned(m) (((m)->mtx_lock & MTX_FLAGMASK) == CURTHD) + +/* Common strings */ +#ifdef MTX_STRS +#ifdef KTR_EXTEND + +/* + * KTR_EXTEND saves file name and line for all entries, so we don't need them + * here. Theoretically we should also change the entries which refer to them + * (from CTR5 to CTR3), but since they're just passed to snprinf as the last + * parameters, it doesn't do any harm to leave them. + */ +char STR_mtx_enter_fmt[] = "GOT %s [%x] r=%d"; +char STR_mtx_exit_fmt[] = "REL %s [%x] r=%d"; +char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%x] result=%d"; +#else +char STR_mtx_enter_fmt[] = "GOT %s [%x] at %s:%d r=%d"; +char STR_mtx_exit_fmt[] = "REL %s [%x] at %s:%d r=%d"; +char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%x] at %s:%d result=%d"; +#endif +char STR_mtx_bad_type[] = "((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0"; +char STR_mtx_owned[] = "mtx_owned(_mpp)"; +char STR_mtx_recurse[] = "_mpp->mtx_recurse == 0"; +#else /* MTX_STRS */ +extern char STR_mtx_enter_fmt[]; +extern char STR_mtx_bad_type[]; +extern char STR_mtx_exit_fmt[]; +extern char STR_mtx_owned[]; +extern char STR_mtx_recurse[]; +extern char STR_mtx_try_enter_fmt[]; +#endif /* MTX_STRS */ + +#ifndef KLD_MODULE +/* + * Get lock 'm', the macro handles the easy (and most common cases) and leaves + * the slow stuff to the mtx_enter_hard() function. + * + * Note: since type is usually a constant much of this code is optimized out. + */ +_MTX_INLINE void +mtx_enter(mtx_t *mtxp, int type) +{ + mtx_t *_mpp = mtxp; + + /* bits only valid on mtx_exit() */ + MPASS2(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0, + STR_mtx_bad_type); + + do { + if ((type) & MTX_SPIN) { + /* + * Easy cases of spin locks: + * + * 1) We already own the lock and will simply + * recurse on it (if RLIKELY) + * + * 2) The lock is free, we just get it + */ + if ((type) & MTX_RLIKELY) { + /* + * Check for recursion, if we already + * have this lock we just bump the + * recursion count. + */ + if (_mpp->mtx_lock == CURTHD) { + _mpp->mtx_recurse++; + break; /* Done */ + } + } + + if (((type) & MTX_TOPHALF) == 0) { + /* + * If an interrupt thread uses this + * we must block interrupts here. + */ + if ((type) & MTX_FIRST) { + ASS_IEN; + disable_intr(); + _getlock_norecurse(_mpp, CURTHD, + (type) & MTX_HARDOPTS); + } else { + _getlock_spin_block(_mpp, CURTHD, + (type) & MTX_HARDOPTS); + } + } else + _getlock_norecurse(_mpp, CURTHD, + (type) & MTX_HARDOPTS); + } else { + /* Sleep locks */ + if ((type) & MTX_RLIKELY) + _getlock_sleep(_mpp, CURTHD, + (type) & MTX_HARDOPTS); + else + _getlock_norecurse(_mpp, CURTHD, + (type) & MTX_HARDOPTS); + } + } while (0); + WITNESS_ENTER(_mpp, type); + CTR5(KTR_LOCK, STR_mtx_enter_fmt, + (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, + (_mpp)->mtx_recurse); +} + +/* + * Attempt to get MTX_DEF lock, return non-zero if lock acquired. + * + * XXX DOES NOT HANDLE RECURSION + */ +_MTX_INLINE int +mtx_try_enter(mtx_t *mtxp, int type) +{ + mtx_t *const _mpp = mtxp; + int _rval; + + _rval = atomic_cmpset_int(&_mpp->mtx_lock, MTX_UNOWNED, CURTHD); +#ifdef SMP_DEBUG + if (_rval && (_mpp)->mtx_witness != NULL) { + ASS((_mpp)->mtx_recurse == 0); + witness_try_enter(_mpp, type, __FILE__, __LINE__); + } +#endif + CTR5(KTR_LOCK, STR_mtx_try_enter_fmt, + (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, _rval); + + return _rval; +} + +#define mtx_legal2block() (read_eflags() & 0x200) + +/* + * Release lock m. + */ +_MTX_INLINE void +mtx_exit(mtx_t *mtxp, int type) +{ + mtx_t *const _mpp = mtxp; + + MPASS2(mtx_owned(_mpp), STR_mtx_owned); + WITNESS_EXIT(_mpp, type); + CTR5(KTR_LOCK, STR_mtx_exit_fmt, + (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, + (_mpp)->mtx_recurse); + if ((type) & MTX_SPIN) { + if ((type) & MTX_NORECURSE) { + MPASS2(_mpp->mtx_recurse == 0, STR_mtx_recurse); + atomic_cmpset_int(&_mpp->mtx_lock, _mpp->mtx_lock, + MTX_UNOWNED); + if (((type) & MTX_TOPHALF) == 0) { + if ((type) & MTX_FIRST) { + ASS_IDIS; + enable_intr(); + } else + write_eflags(_mpp->mtx_savefl); + } + } else { + if ((type) & MTX_TOPHALF) + _exitlock_spin(_mpp,,); + else { + if ((type) & MTX_FIRST) { + ASS_IDIS; + _exitlock_spin(_mpp,, "sti"); + } else { + _exitlock_spin(_mpp, + "pushl %3", "popfl"); + } + } + } + } else { + /* Handle sleep locks */ + if ((type) & MTX_RLIKELY) + _exitlock(_mpp, CURTHD, (type) & MTX_HARDOPTS); + else { + _exitlock_norecurse(_mpp, CURTHD, + (type) & MTX_HARDOPTS); + } + } +} + +#endif /* KLD_MODULE */ +#endif /* _KERNEL */ + +#else /* !LOCORE */ + +/* + * Simple assembly macros to get and release non-recursive spin locks + */ + +#if defined(I386_CPU) + +#define MTX_EXIT(lck, reg) \ + movl $ MTX_UNOWNED,lck+MTX_LOCK; + +#else /* I386_CPU */ + +#define MTX_ENTER(reg, lck) \ +9: movl $ MTX_UNOWNED,%eax; \ + MPLOCKED \ + cmpxchgl reg,lck+MTX_LOCK; \ + jnz 9b + +/* Must use locked bus op (cmpxchg) when setting to unowned (barrier) */ +#define MTX_EXIT(lck,reg) \ + movl lck+MTX_LOCK,%eax; \ + movl $ MTX_UNOWNED,reg; \ + MPLOCKED \ + cmpxchgl reg,lck+MTX_LOCK; \ + +#define MTX_ENTER_WITH_RECURSION(reg, lck) \ + movl lck+MTX_LOCK,%eax; \ + cmpl PCPU_CURPROC,%eax; \ + jne 9f; \ + incw lck+MTX_RECURSECNT; \ + jmp 8f; \ +9: movl $ MTX_UNOWNED,%eax; \ + MPLOCKED \ + cmpxchgl reg,lck+MTX_LOCK; \ + jnz 9b; \ +8: + +#define MTX_EXIT_WITH_RECURSION(lck,reg) \ + movw lck+MTX_RECURSECNT,%ax; \ + decw %ax; \ + js 9f; \ + movw %ax,lck+MTX_RECURSECNT; \ + jmp 8f; \ +9: movl lck+MTX_LOCK,%eax; \ + movl $ MTX_UNOWNED,reg; \ + MPLOCKED \ + cmpxchgl reg,lck+MTX_LOCK; \ +8: + +#endif /* I386_CPU */ +#endif /* !LOCORE */ +#endif /* __MACHINE_MUTEX_H */ diff --git a/sys/i386/include/pcb.h b/sys/i386/include/pcb.h index 08beb5a83059..1c7af8505ab1 100644 --- a/sys/i386/include/pcb.h +++ b/sys/i386/include/pcb.h @@ -72,11 +72,7 @@ struct pcb { #define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ #define PCB_DBREGS 0x02 /* process using debug registers */ caddr_t pcb_onfault; /* copyin/out fault recovery */ -#ifdef SMP - u_long pcb_mpnest; -#else - u_long pcb_mpnest_dontuse; -#endif + int pcb_schednest; int pcb_gs; struct pcb_ext *pcb_ext; /* optional pcb extension */ u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */ diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h index 58bd9cfe9416..440da60b4b83 100644 --- a/sys/i386/include/pcpu.h +++ b/sys/i386/include/pcpu.h @@ -26,6 +26,20 @@ * $FreeBSD$ */ +#ifndef _MACHINE_GLOBALDATA_H_ +#define _MACHINE_GLOBALDATA_H_ + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <machine/pmap.h> +#include <machine/segments.h> +#include <machine/tss.h> + +/* XXX */ +#ifdef KTR_PERCPU +#include <sys/ktr.h> +#endif + /* * This structure maps out the global data that needs to be kept on a * per-cpu basis. genassym uses this to generate offsets for the assembler @@ -41,11 +55,14 @@ struct globaldata { struct privatespace *gd_prvspace; /* self-reference */ struct proc *gd_curproc; + struct proc *gd_prevproc; struct proc *gd_npxproc; struct pcb *gd_curpcb; + struct proc *gd_idleproc; struct timeval gd_switchtime; struct i386tss gd_common_tss; int gd_switchticks; + int gd_intr_nesting_level; struct segment_descriptor gd_common_tssd; struct segment_descriptor *gd_tss_gdt; #ifdef USER_LDT @@ -67,8 +84,22 @@ struct globaldata { unsigned *gd_prv_PADDR1; #endif u_int gd_astpending; + SLIST_ENTRY(globaldata) gd_allcpu; + int gd_witness_spin_check; +#ifdef KTR_PERCPU +#ifdef KTR + volatile int gd_ktr_idx; + char *gd_ktr_buf; + char gd_ktr_buf_data[KTR_SIZE]; +#endif +#endif }; +extern struct globaldata globaldata; + +SLIST_HEAD(cpuhead, globaldata); +extern struct cpuhead cpuhead; + #ifdef SMP /* * This is the upper (0xff800000) address space layout that is per-cpu. @@ -93,3 +124,5 @@ struct privatespace { extern struct privatespace SMP_prvspace[]; #endif + +#endif /* ! _MACHINE_GLOBALDATA_H_ */ diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index 69b716ba8579..20d4fa3a8873 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -15,6 +15,9 @@ #ifdef _KERNEL +#ifdef I386_CPU +#error SMP not supported with I386_CPU +#endif #if defined(SMP) && !defined(APIC_IO) # error APIC_IO required for SMP, add "options APIC_IO" to your config file. #endif /* SMP && !APIC_IO */ @@ -57,23 +60,6 @@ extern int bootMP_size; /* functions in mpboot.s */ void bootMP __P((void)); -/* global data in mplock.s */ -extern u_int mp_lock; -extern u_int isr_lock; -#ifdef RECURSIVE_MPINTRLOCK -extern u_int mpintr_lock; -#endif /* RECURSIVE_MPINTRLOCK */ - -/* functions in mplock.s */ -void get_mplock __P((void)); -void rel_mplock __P((void)); -int try_mplock __P((void)); -#ifdef RECURSIVE_MPINTRLOCK -void get_mpintrlock __P((void)); -void rel_mpintrlock __P((void)); -int try_mpintrlock __P((void)); -#endif /* RECURSIVE_MPINTRLOCK */ - /* global data in apic_vector.s */ extern volatile u_int stopped_cpus; extern volatile u_int started_cpus; @@ -185,23 +171,7 @@ extern int smp_started; extern volatile int smp_idle_loops; #endif /* !LOCORE */ -#else /* !SMP && !APIC_IO */ - -/* - * Create dummy MP lock empties - */ - -static __inline void -get_mplock(void) -{ -} - -static __inline void -rel_mplock(void) -{ -} - -#endif +#endif /* SMP && !APIC_IO */ #endif /* _KERNEL */ #endif /* _MACHINE_SMP_H_ */ diff --git a/sys/i386/include/smptests.h b/sys/i386/include/smptests.h index f9ac4a36919e..304e99051295 100644 --- a/sys/i386/include/smptests.h +++ b/sys/i386/include/smptests.h @@ -86,7 +86,6 @@ * These defines enable critical region locking of areas that were * protected via cli/sti in the UP kernel. * - * MPINTRLOCK protects all the generic areas. * COMLOCK protects the sio/cy drivers. * CLOCKLOCK protects clock hardware and data * known to be incomplete: @@ -94,7 +93,6 @@ * ? */ #ifdef PUSHDOWN_LEVEL_1 -#define USE_MPINTRLOCK #define USE_COMLOCK #define USE_CLOCKLOCK #endif @@ -176,9 +174,8 @@ /* * Send CPUSTOP IPI for stop/restart of other CPUs on DDB break. - * -#define VERBOSE_CPUSTOP_ON_DDBBREAK */ +#define VERBOSE_CPUSTOP_ON_DDBBREAK #define CPUSTOP_ON_DDBBREAK diff --git a/sys/i386/isa/apic_ipl.s b/sys/i386/isa/apic_ipl.s index 94771f3eadb3..0def1de7e02d 100644 --- a/sys/i386/isa/apic_ipl.s +++ b/sys/i386/isa/apic_ipl.s @@ -69,78 +69,6 @@ _apic_imen: SUPERALIGN_TEXT /* - * splz() - dispatch pending interrupts after cpl reduced - * - * Interrupt priority mechanism - * -- soft splXX masks with group mechanism (cpl) - * -- h/w masks for currently active or unused interrupts (imen) - * -- ipending = active interrupts currently masked by cpl - */ - -ENTRY(splz) - /* - * The caller has restored cpl and checked that (ipending & ~cpl) - * is nonzero. However, since ipending can change at any time - * (by an interrupt or, with SMP, by another cpu), we have to - * repeat the check. At the moment we must own the MP lock in - * the SMP case because the interruput handlers require it. We - * loop until no unmasked pending interrupts remain. - * - * No new unmaksed pending interrupts will be added during the - * loop because, being unmasked, the interrupt code will be able - * to execute the interrupts. - * - * Interrupts come in two flavors: Hardware interrupts and software - * interrupts. We have to detect the type of interrupt (based on the - * position of the interrupt bit) and call the appropriate dispatch - * routine. - * - * NOTE: "bsfl %ecx,%ecx" is undefined when %ecx is 0 so we can't - * rely on the secondary btrl tests. - */ - movl _cpl,%eax -splz_next: - /* - * We don't need any locking here. (ipending & ~cpl) cannot grow - * while we're looking at it - any interrupt will shrink it to 0. - */ - movl %eax,%ecx - notl %ecx /* set bit = unmasked level */ - andl _ipending,%ecx /* set bit = unmasked pending INT */ - jne splz_unpend - ret - - ALIGN_TEXT -splz_unpend: - bsfl %ecx,%ecx - lock - btrl %ecx,_ipending - jnc splz_next - cmpl $NHWI,%ecx - jae splz_swi - /* - * We would prefer to call the intr handler directly here but that - * doesn't work for badly behaved handlers that want the interrupt - * frame. Also, there's a problem determining the unit number. - * We should change the interface so that the unit number is not - * determined at config time. - * - * The vec[] routines build the proper frame on the stack, - * then call one of _Xintr0 thru _XintrNN. - */ - jmp *_vec(,%ecx,4) - - ALIGN_TEXT -splz_swi: - pushl %eax - orl imasks(,%ecx,4),%eax - movl %eax,_cpl - call *_ihandlers(,%ecx,4) - popl %eax - movl %eax,_cpl - jmp splz_next - -/* * Fake clock interrupt(s) so that they appear to come from our caller instead * of from here, so that system profiling works. * XXX do this more generally (for all vectors; look up the C entry point). @@ -161,8 +89,6 @@ __CONCAT(vec,irq_num): ; \ pushl $KCSEL ; \ pushl %eax ; \ cli ; \ - lock ; /* MP-safe */ \ - andl $~IRQ_BIT(irq_num), iactive ; /* lazy masking */ \ MEXITCOUNT ; \ APIC_ITRACE(apic_itrace_splz, irq_num, APIC_ITRACE_SPLZ) ; \ jmp __CONCAT(_Xintr,irq_num) diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s index 2a7559df7f97..54bf00366c81 100644 --- a/sys/i386/isa/apic_vector.s +++ b/sys/i386/isa/apic_vector.s @@ -17,7 +17,7 @@ /* - * Macros for interrupt interrupt entry, call to handler, and exit. + * Macros for interrupt entry, call to handler, and exit. */ #define FAST_INTR(irq_num, vec_name) \ @@ -121,7 +121,7 @@ IDTVEC(vec_name) ; \ /* - * Test to see if the source is currntly masked, clear if so. + * Test to see if the source is currently masked, clear if so. */ #define UNMASK_IRQ(irq_num) \ IMASK_LOCK ; /* into critical reg */ \ @@ -200,7 +200,16 @@ log_intr_event: #else #define APIC_ITRACE(name, irq_num, id) #endif - + +/* + * Slow, threaded interrupts. + * + * XXX Most of the parameters here are obsolete. Fix this when we're + * done. + * XXX we really shouldn't return via doreti if we just schedule the + * interrupt handler and don't run anything. We could just do an + * iret. FIXME. + */ #define INTR(irq_num, vec_name, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -216,87 +225,24 @@ IDTVEC(vec_name) ; \ maybe_extra_ipending ; \ ; \ APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \ - lock ; /* MP-safe */ \ - btsl $(irq_num), iactive ; /* lazy masking */ \ - jc 1f ; /* already active */ \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ 0: ; \ - APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - MP_TRYLOCK ; /* XXX this is going away... */ \ - testl %eax, %eax ; /* did we get it? */ \ - jz 3f ; /* no */ \ -; \ - APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ - testl $IRQ_BIT(irq_num), _cpl ; \ - jne 2f ; /* this INT masked */ \ -; \ incb _intr_nesting_level ; \ ; \ /* entry point used by doreti_unpend for HWIs. */ \ __CONCAT(Xresume,irq_num): ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ - lock ; incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4, %eax ; \ - lock ; incl (%eax) ; \ -; \ - movl _cpl, %eax ; \ - pushl %eax ; \ - orl _intr_mask + (irq_num) * 4, %eax ; \ - movl %eax, _cpl ; \ - lock ; \ - andl $~IRQ_BIT(irq_num), _ipending ; \ -; \ - pushl _intr_unit + (irq_num) * 4 ; \ + pushl $irq_num; /* pass the IRQ */ \ APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; \ + call _sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \ ; \ - lock ; andl $~IRQ_BIT(irq_num), iactive ; \ - UNMASK_IRQ(irq_num) ; \ - APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \ - sti ; /* doreti repeats cli/sti */ \ MEXITCOUNT ; \ - jmp _doreti ; \ -; \ - ALIGN_TEXT ; \ -1: ; /* active */ \ - APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ - MASK_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - lock ; \ - btsl $(irq_num), iactive ; /* still active */ \ - jnc 0b ; /* retry */ \ - POP_FRAME ; \ - iret ; /* XXX: iactive bit might be 0 now */ \ - ALIGN_TEXT ; \ -2: ; /* masked by cpl, leave iactive set */ \ - APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - MP_RELLOCK ; \ - POP_FRAME ; \ - iret ; \ - ALIGN_TEXT ; \ -3: ; /* other cpu has isr lock */ \ - APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - testl $IRQ_BIT(irq_num), _cpl ; \ - jne 4f ; /* this INT masked */ \ - call forward_irq ; /* forward irq to lock holder */ \ - POP_FRAME ; /* and return */ \ - iret ; \ - ALIGN_TEXT ; \ -4: ; /* blocked */ \ - APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ - POP_FRAME ; /* and return */ \ - iret + jmp doreti_next /* * Handle "spurious INTerrupts". @@ -434,20 +380,10 @@ _Xcpuast: FAKE_MCOUNT(13*4(%esp)) - /* - * Giant locks do not come cheap. - * A lot of cycles are going to be wasted here. - */ - call _get_mplock - - movl _cpl, %eax - pushl %eax orl $AST_PENDING, _astpending /* XXX */ incb _intr_nesting_level sti - pushl $0 - movl _cpuid, %eax lock btrl %eax, _checkstate_pending_ast @@ -461,7 +397,7 @@ _Xcpuast: lock incl CNAME(cpuast_cnt) MEXITCOUNT - jmp _doreti + jmp doreti_next 1: /* We are already in the process of delivering an ast for this CPU */ POP_FRAME @@ -487,40 +423,24 @@ _Xforward_irq: FAKE_MCOUNT(13*4(%esp)) - MP_TRYLOCK - testl %eax,%eax /* Did we get the lock ? */ - jz 1f /* No */ - lock incl CNAME(forward_irq_hitcnt) cmpb $4, _intr_nesting_level - jae 2f + jae 1f - movl _cpl, %eax - pushl %eax incb _intr_nesting_level sti - pushl $0 - MEXITCOUNT - jmp _doreti /* Handle forwarded interrupt */ + jmp doreti_next /* Handle forwarded interrupt */ 1: lock - incl CNAME(forward_irq_misscnt) - call forward_irq /* Oops, we've lost the isr lock */ - MEXITCOUNT - POP_FRAME - iret -2: - lock incl CNAME(forward_irq_toodeepcnt) -3: - MP_RELLOCK MEXITCOUNT POP_FRAME iret +#if 0 /* * */ @@ -532,9 +452,11 @@ forward_irq: cmpl $0, CNAME(forward_irq_enabled) jz 4f +/* XXX - this is broken now, because mp_lock doesn't exist movl _mp_lock,%eax cmpl $FREE_LOCK,%eax jne 1f + */ movl $0, %eax /* Pick CPU #0 if noone has lock */ 1: shrl $24,%eax @@ -559,6 +481,7 @@ forward_irq: jnz 3b 4: ret +#endif /* * Executed by a CPU when it receives an Xcpustop IPI from another CPU, @@ -654,6 +577,7 @@ MCOUNT_LABEL(bintr) FAST_INTR(22,fastintr22) FAST_INTR(23,fastintr23) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) +/* Threaded interrupts */ INTR(0,intr0, CLKINTR_PENDING) INTR(1,intr1,) INTR(2,intr2,) @@ -728,15 +652,11 @@ _ihandlers: .long _swi_null, swi_net, _swi_null, _swi_null .long _swi_vm, _swi_null, _softclock -imasks: /* masks for interrupt handlers */ - .space NHWI*4 /* padding; HWI masks are elsewhere */ - - .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK - .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK - +#if 0 /* active flag for lazy masking */ iactive: .long 0 +#endif #ifdef COUNT_XINVLTLB_HITS .globl _xhits diff --git a/sys/i386/isa/atpic_vector.s b/sys/i386/isa/atpic_vector.s index e427351ca205..d2b88bf705a3 100644 --- a/sys/i386/isa/atpic_vector.s +++ b/sys/i386/isa/atpic_vector.s @@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \ pushl %ecx ; \ pushl %edx ; \ pushl %ds ; \ + pushl %fs ; \ MAYBE_PUSHL_ES ; \ mov $KDSEL,%ax ; \ mov %ax,%ds ; \ + mov %ax,%fs ; \ MAYBE_MOVW_AX_ES ; \ FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \ pushl _intr_unit + (irq_num) * 4 ; \ @@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \ incl _cnt+V_INTR ; /* book-keeping can wait */ \ movl _intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ - movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \ +/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \ notl %eax ; \ - andl _ipending,%eax ; \ - jne 2f ; /* yes, maybe handle them */ \ + andl _spending,$SWI_MASK ; \ + jne 2f ; // yes, maybe handle them */ \ 1: ; \ MEXITCOUNT ; \ MAYBE_POPL_ES ; \ + popl %fs ; \ popl %ds ; \ popl %edx ; \ popl %ecx ; \ popl %eax ; \ iret ; \ + +#if 0 ; \ ALIGN_TEXT ; \ 2: ; \ @@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \ incb _intr_nesting_level ; /* ... really limit it ... */ \ sti ; /* ... to do this as early as possible */ \ MAYBE_POPL_ES ; /* discard most of thin frame ... */ \ + popl %fs ; \ popl %ecx ; /* ... original %ds ... */ \ popl %edx ; \ xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \ @@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \ movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \ movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \ movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \ - pushl %eax ; \ subl $4,%esp ; /* junk for unit number */ \ MEXITCOUNT ; \ jmp _doreti +#endif +/* + * Slow, threaded interrupts. + * + * XXX Most of the parameters here are obsolete. Fix this when we're + * done. + * XXX we really shouldn't return via doreti if we just schedule the + * interrupt handler and don't run anything. We could just do an + * iret. FIXME. + */ #define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \ pushl %ds ; /* save our data and extra segments ... */ \ pushl %es ; \ pushl %fs ; \ - mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \ - mov %ax,%ds ; /* ... early for obsolete reasons */ \ + mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ + mov %ax,%ds ; \ mov %ax,%es ; \ mov %ax,%fs ; \ maybe_extra_ipending ; \ @@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \ movb %al,_imen + IRQ_BYTE(irq_num) ; \ outb %al,$icu+ICU_IMR_OFFSET ; \ enable_icus ; \ - movl _cpl,%eax ; \ - testb $IRQ_BIT(irq_num),%reg ; \ - jne 2f ; \ - incb _intr_nesting_level ; \ + incb _intr_nesting_level ; /* XXX do we need this? */ \ __CONCAT(Xresume,irq_num): ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - movl _cpl,%eax ; \ - pushl %eax ; \ - pushl _intr_unit + (irq_num) * 4 ; \ - orl _intr_mask + (irq_num) * 4,%eax ; \ - movl %eax,_cpl ; \ + pushl $irq_num; /* pass the IRQ */ \ sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; /* must unmask _imen and icu atomically */ \ - movb _imen + IRQ_BYTE(irq_num),%al ; \ - andb $~IRQ_BIT(irq_num),%al ; \ - movb %al,_imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ - sti ; /* XXX _doreti repeats the cli/sti */ \ + call _sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ MEXITCOUNT ; \ /* We could usually avoid the following jmp by inlining some of */ \ /* _doreti, but it's probably better to use less cache. */ \ - jmp _doreti ; \ -; \ - ALIGN_TEXT ; \ -2: ; \ - /* XXX skip mcounting here to avoid double count */ \ - orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popal ; \ - addl $4+4,%esp ; \ - iret + jmp doreti_next /* and catch up inside doreti */ + +/* + * Reenable the interrupt mask after completing an interrupt. Called + * from ithd_loop. There are two separate functions, one for each + * ICU. + */ + .globl setimask0, setimask1 +setimask0: + cli + movb _imen,%al + outb %al,$IO_ICU1 + ICU_IMR_OFFSET + sti + ret + +setimask1: + cli + movb _imen + 1,%al + outb %al,$IO_ICU2 + ICU_IMR_OFFSET + sti + ret MCOUNT_LABEL(bintr) FAST_INTR(0,fastintr0, ENABLE_ICU1) @@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr) FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) +/* Threaded interrupts */ INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) @@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr) INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + MCOUNT_LABEL(eintr) .data @@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */ .long _swi_null, swi_net, _swi_null, _swi_null .long _swi_vm, _swi_null, _softclock -imasks: /* masks for interrupt handlers */ - .space NHWI*4 /* padding; HWI masks are elsewhere */ - - .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK - .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK - .text diff --git a/sys/i386/isa/bs/bsif.h b/sys/i386/isa/bs/bsif.h index 5a89681bcce0..6dcc2ab1b86d 100644 --- a/sys/i386/isa/bs/bsif.h +++ b/sys/i386/isa/bs/bsif.h @@ -208,17 +208,10 @@ static BS_INLINE void memcopy __P((void *from, void *to, register size_t len)); u_int32_t bs_adapter_info __P((int)); #define delay(y) DELAY(y) extern int dma_init_flag; -#ifdef SMP -#error XXX see comments in i386/isa/bs/bsif.h for details -/* - * ipending is 'opaque' in SMP, and can't be accessed this way. - * Since its my belief that this is PC98 code, and that PC98 and SMP - * are mutually exclusive, the above compile-time error is the "fix". - * Please inform smp@freebsd.org if this is NOT the case. - */ -#else + #define softintr(y) ipending |= (1 << y) -#endif /* SMP */ + +#endif /* IPENDING */ static BS_INLINE void memcopy(from, to, len) diff --git a/sys/i386/isa/clock.c b/sys/i386/isa/clock.c index 15044abbaa3b..724f3c2817ba 100644 --- a/sys/i386/isa/clock.c +++ b/sys/i386/isa/clock.c @@ -54,6 +54,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/bus.h> +#include <sys/proc.h> #include <sys/time.h> #include <sys/timetc.h> #include <sys/kernel.h> @@ -93,10 +94,6 @@ #include <i386/isa/mca_machdep.h> #endif -#ifdef SMP -#define disable_intr() CLOCK_DISABLE_INTR() -#define enable_intr() CLOCK_ENABLE_INTR() - #ifdef APIC_IO #include <i386/isa/intr_machdep.h> /* The interrupt triggered by the 8254 (timer) chip */ @@ -104,7 +101,6 @@ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif -#endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we @@ -147,7 +143,9 @@ int tsc_is_broken; int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ static int beeping = 0; +#if 0 static u_int clk_imask = HWI_MASK | SWI_MASK; +#endif static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; static u_int hardclock_max_count; static u_int32_t i8254_lastcount; @@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, static void clkintr(struct clockframe frame) { + int intrsave; + if (timecounter->tc_get_timecount == i8254_get_timecount) { + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); if (i8254_ticked) i8254_ticked = 0; else { @@ -214,7 +216,8 @@ clkintr(struct clockframe frame) i8254_lastcount = 0; } clkintr_pending = 0; - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); } timer_func(&frame); switch (timer0_state) { @@ -233,14 +236,17 @@ clkintr(struct clockframe frame) break; case ACQUIRE_PENDING: + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); @@ -249,7 +255,9 @@ clkintr(struct clockframe frame) case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; @@ -257,7 +265,8 @@ clkintr(struct clockframe frame) TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; @@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc) static int getit(void) { - u_long ef; - int high, low; + int high, low, intrsave; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); @@ -417,7 +426,7 @@ getit(void) high = inb(TIMER_CNTR0); CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); return ((high << 8) | low); } @@ -523,6 +532,7 @@ sysbeepstop(void *chan) int sysbeep(int pitch, int period) { + int intrsave; int x = splclock(); if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) @@ -531,10 +541,13 @@ sysbeep(int pitch, int period) splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); @@ -683,11 +696,12 @@ fail: static void set_timer_freq(u_int freq, int intr_freq) { - u_long ef; + int intrsave; int new_timer0_max_count; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { @@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq) outb(TIMER_CNTR0, timer0_max_count >> 8); } CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); } /* @@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq) void i8254_restore(void) { - u_long ef; + int intrsave; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); } /* @@ -979,8 +994,8 @@ cpu_initclocks() { int diag; #ifdef APIC_IO - int apic_8254_trial; - struct intrec *clkdesc; + int apic_8254_trial, num_8254_ticks; + struct intrec *clkdesc, *rtcdesc; #endif /* APIC_IO */ if (statclock_disable) { @@ -1014,14 +1029,15 @@ cpu_initclocks() } else panic("APIC_IO: Cannot route 8254 interrupt to CPU"); } - - clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, - NULL, &clk_imask, INTR_EXCL); - INTREN(1 << apic_8254_intr); - #else /* APIC_IO */ - inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask, + /* + * XXX Check the priority of this interrupt handler. I + * couldn't find anything suitable in the BSD/OS code (grog, + * 19 July 2000). + */ + /* Setup the PIC clk handler. The APIC handler is setup later */ + inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_EXCL); INTREN(IRQ0); @@ -1032,8 +1048,18 @@ cpu_initclocks() writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ - if (statclock_disable) + if (statclock_disable) { +#ifdef APIC_IO + /* + * XXX - if statclock is disabled, don't attempt the APIC + * trial. Not sure this is sane for APIC_IO. + */ + inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, + PI_REALTIME, INTR_EXCL); + INTREN(1 << apic_8254_intr); +#endif /* APIC_IO */ return; + } diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); @@ -1041,34 +1067,44 @@ cpu_initclocks() #ifdef APIC_IO if (isa_apic_irq(8) != 8) panic("APIC RTC != 8"); -#endif /* APIC_IO */ - inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask, - INTR_EXCL); - -#ifdef APIC_IO - INTREN(APIC_IRQ8); -#else - INTREN(IRQ8); -#endif /* APIC_IO */ + if (apic_8254_trial) { + /* + * XXX - We use fast interrupts for clk and rtc long enough to + * perform the APIC probe and then revert to exclusive + * interrupts. + */ + clkdesc = inthand_add("clk", apic_8254_intr, + (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST); + INTREN(1 << apic_8254_intr); - writertc(RTC_STATUSB, rtc_statusb); + rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, + PI_REALTIME, INTR_FAST); /* XXX */ + INTREN(APIC_IRQ8); + writertc(RTC_STATUSB, rtc_statusb); -#ifdef APIC_IO - if (apic_8254_trial) { - printf("APIC_IO: Testing 8254 interrupt delivery\n"); while (read_intr_count(8) < 6) ; /* nothing */ - if (read_intr_count(apic_8254_intr) < 3) { + num_8254_ticks = read_intr_count(apic_8254_intr); + + /* disable and remove our fake handlers */ + INTRDIS(1 << apic_8254_intr); + inthand_remove(clkdesc); + + writertc(RTC_STATUSA, rtc_statusa); + writertc(RTC_STATUSB, RTCSB_24HR); + + INTRDIS(APIC_IRQ8); + inthand_remove(rtcdesc); + + if (num_8254_ticks < 3) { /* * The MP table is broken. * The 8254 was not connected to the specified pin * on the IO APIC. * Workaround: Limited variant of mixed mode. */ - INTRDIS(1 << apic_8254_intr); - inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " "IOAPIC #%d intpin %d\n", @@ -1087,13 +1123,27 @@ cpu_initclocks() } apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); - inthand_add("clk", apic_8254_intr, - (inthand2_t *)clkintr, - NULL, &clk_imask, INTR_EXCL); - INTREN(1 << apic_8254_intr); } } + + /* Finally, setup the real clock handlers */ + inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, + PI_REALTIME, INTR_EXCL); + INTREN(1 << apic_8254_intr); +#endif + + inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME, + INTR_EXCL); +#ifdef APIC_IO + INTREN(APIC_IRQ8); +#else + INTREN(IRQ8); +#endif + + writertc(RTC_STATUSB, rtc_statusb); + +#ifdef APIC_IO if (apic_int_type(0, 0) != 3 || int_to_apicintpin[apic_8254_intr].ioapic != 0 || int_to_apicintpin[apic_8254_intr].int_pin != 0) @@ -1198,11 +1248,12 @@ static unsigned i8254_get_timecount(struct timecounter *tc) { u_int count; - u_long ef; + int intrsave; u_int high, low; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); @@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc) count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || - ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) && + ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) && #ifdef APIC_IO #define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ /* XXX this assumes that apic_8254_intr is < 24. */ @@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc) i8254_lastcount = count; count += i8254_offset; CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); return (count); } diff --git a/sys/i386/isa/cy.c b/sys/i386/isa/cy.c index 52a8cf36892f..5487d8fe6299 100644 --- a/sys/i386/isa/cy.c +++ b/sys/i386/isa/cy.c @@ -94,11 +94,6 @@ #error "The cy device requires the old isa compatibility shims" #endif -#ifdef SMP -#define disable_intr() COM_DISABLE_INTR() -#define enable_intr() COM_ENABLE_INTR() -#endif /* SMP */ - /* * Dictionary so that I can name everything *sio* or *com* to compare with * sio.c. There is also lots of ugly formatting and unnecessary ifdefs to @@ -366,7 +361,7 @@ static struct com_s *p_com_addr[NSIO]; #define com_addr(unit) (p_com_addr[unit]) struct isa_driver siodriver = { - INTR_TYPE_TTY | INTR_TYPE_FAST, + INTR_TYPE_TTY | INTR_FAST, sioprobe, sioattach, driver_name @@ -604,11 +599,9 @@ cyattach_common(cy_iobase, cy_align) com->lt_out.c_cflag = com->lt_in.c_cflag = CLOCAL; } if (siosetwater(com, com->it_in.c_ispeed) != 0) { - enable_intr(); free(com, M_DEVBUF); return (0); } - enable_intr(); termioschars(&com->it_in); com->it_in.c_ispeed = com->it_in.c_ospeed = comdefaultrate; com->it_out = com->it_in; @@ -662,6 +655,7 @@ sioopen(dev, flag, mode, p) int s; struct tty *tp; int unit; + int intrsave; mynor = minor(dev); unit = MINOR_TO_UNIT(mynor); @@ -768,14 +762,17 @@ open_top: } } + intrsave = save_intr(); disable_intr(); + COM_LOCK(); (void) inb(com->line_status_port); (void) inb(com->data_port); com->prev_modem_status = com->last_modem_status = inb(com->modem_status_port); outb(iobase + com_ier, IER_ERXRDY | IER_ETXRDY | IER_ERLS | IER_EMSC); - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); #else /* !0 */ /* * Flush fifos. This requires a full channel reset which @@ -786,13 +783,16 @@ open_top: CD1400_CCR_CMDRESET | CD1400_CCR_CHANRESET); cd1400_channel_cmd(com, com->channel_control); + intrsave = save_intr(); disable_intr(); + COM_LOCK(); com->prev_modem_status = com->last_modem_status = cd_getreg(com, CD1400_MSVR2); cd_setreg(com, CD1400_SRER, com->intr_enable = CD1400_SRER_MDMCH | CD1400_SRER_RXDATA); - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); #endif /* 0 */ /* * Handle initial DCD. Callout devices get a fake initial @@ -875,6 +875,7 @@ comhardclose(com) int s; struct tty *tp; int unit; + int intrsave; unit = com->unit; iobase = com->iobase; @@ -888,10 +889,13 @@ comhardclose(com) outb(iobase + com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); #else /* XXX */ + intrsave = save_intr(); disable_intr(); + COM_LOCK(); com->etc = ETC_NONE; cd_setreg(com, CD1400_COR2, com->cor[1] &= ~CD1400_COR2_ETC); - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); cd1400_channel_cmd(com, CD1400_CCR_CMDRESET | CD1400_CCR_FTF); #endif @@ -899,9 +903,12 @@ comhardclose(com) #if 0 outb(iobase + com_ier, 0); #else + intrsave = save_intr(); disable_intr(); + COM_LOCK(); cd_setreg(com, CD1400_SRER, com->intr_enable = 0); - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); #endif tp = com->tp; if ((tp->t_cflag & HUPCL) @@ -991,6 +998,11 @@ siodtrwakeup(chan) wakeup(&com->dtr_wait); } +/* + * This function: + * a) needs to be called with COM_LOCK() held, and + * b) needs to return with COM_LOCK() held. + */ static void sioinput(com) struct com_s *com; @@ -1000,6 +1012,7 @@ sioinput(com) u_char line_status; int recv_data; struct tty *tp; + int intrsave; buf = com->ibuf; tp = com->tp; @@ -1016,7 +1029,15 @@ sioinput(com) * slinput is reasonably fast (usually 40 instructions plus * call overhead). */ + do { + /* + * This may look odd, but it is using save-and-enable + * semantics instead of the save-and-disable semantics + * that are used everywhere else. + */ + intrsave = save_intr(); + COM_UNLOCK(); enable_intr(); incc = com->iptr - buf; if (tp->t_rawq.c_cc + incc > tp->t_ihiwat @@ -1038,10 +1059,18 @@ sioinput(com) tp->t_lflag &= ~FLUSHO; comstart(tp); } - disable_intr(); + restore_intr(intrsave); + COM_LOCK(); } while (buf < com->iptr); } else { do { + /* + * This may look odd, but it is using save-and-enable + * semantics instead of the save-and-disable semantics + * that are used everywhere else. + */ + intrsave = save_intr(); + COM_UNLOCK(); enable_intr(); line_status = buf[com->ierroff]; recv_data = *buf++; @@ -1057,7 +1086,8 @@ sioinput(com) recv_data |= TTY_PE; } (*linesw[tp->t_line].l_rint)(recv_data, tp); - disable_intr(); + restore_intr(intrsave); + COM_LOCK(); } while (buf < com->iptr); } com_events -= (com->iptr - com->ibuf); @@ -1729,6 +1759,7 @@ static void siopoll() { int unit; + int intrsave; #ifdef CyDebug ++cy_timeouts; @@ -1751,7 +1782,9 @@ repeat: * (actually never opened devices) so that we don't * loop. */ + intrsave = save_intr(); disable_intr(); + COM_LOCK(); incc = com->iptr - com->ibuf; com->iptr = com->ibuf; if (com->state & CS_CHECKMSR) { @@ -1759,7 +1792,8 @@ repeat: com->state &= ~CS_CHECKMSR; } com_events -= incc; - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); if (incc != 0) log(LOG_DEBUG, "sio%d: %d events for device with no tp\n", @@ -1767,29 +1801,39 @@ repeat: continue; } if (com->iptr != com->ibuf) { + intrsave = save_intr(); disable_intr(); + COM_LOCK(); sioinput(com); - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); } if (com->state & CS_CHECKMSR) { u_char delta_modem_status; + intrsave = save_intr(); disable_intr(); + COM_LOCK(); + sioinput(com); delta_modem_status = com->last_modem_status ^ com->prev_modem_status; com->prev_modem_status = com->last_modem_status; com_events -= LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); if (delta_modem_status & MSR_DCD) (*linesw[tp->t_line].l_modem) (tp, com->prev_modem_status & MSR_DCD); } if (com->extra_state & CSE_ODONE) { + intrsave = save_intr(); disable_intr(); + COM_LOCK(); com_events -= LOTS_OF_EVENTS; com->extra_state &= ~CSE_ODONE; - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); if (!(com->state & CS_BUSY)) { tp->t_state &= ~TS_BUSY; ttwwakeup(com->tp); @@ -1801,10 +1845,13 @@ repeat: } } if (com->state & CS_ODONE) { + intrsave = save_intr(); disable_intr(); + COM_LOCK(); com_events -= LOTS_OF_EVENTS; com->state &= ~CS_ODONE; - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); (*linesw[tp->t_line].l_start)(tp); } if (com_events == 0) @@ -1833,6 +1880,7 @@ comparam(tp, t) u_char opt; int s; int unit; + int intrsave; /* do historical conversions */ if (t->c_ispeed == 0) @@ -1857,14 +1905,9 @@ comparam(tp, t) else (void)commctl(com, TIOCM_DTR, DMBIS); - /* - * This returns with interrupts disabled so that we can complete - * the speed change atomically. - */ (void) siosetwater(com, t->c_ispeed); /* XXX we don't actually change the speed atomically. */ - enable_intr(); if (idivisor != 0) { cd_setreg(com, CD1400_RBPR, idivisor); @@ -1985,12 +2028,15 @@ comparam(tp, t) if (cflag & CCTS_OFLOW) opt |= CD1400_COR2_CCTS_OFLOW; #endif + intrsave = save_intr(); disable_intr(); + COM_LOCK(); if (opt != com->cor[1]) { cor_change |= CD1400_CCR_COR2; cd_setreg(com, CD1400_COR2, com->cor[1] = opt); } - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); /* * set channel option register 3 - @@ -2111,7 +2157,9 @@ comparam(tp, t) * XXX should have done this long ago, but there is too much state * to change all atomically. */ + intrsave = save_intr(); disable_intr(); + COM_LOCK(); com->state &= ~CS_TTGO; if (!(tp->t_state & TS_TTSTOP)) @@ -2177,7 +2225,8 @@ comparam(tp, t) | CD1400_SRER_TXMPTY); } - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); splx(s); comstart(tp); if (com->ibufold != NULL) { @@ -2196,6 +2245,7 @@ siosetwater(com, speed) u_char *ibuf; int ibufsize; struct tty *tp; + int intrsave; /* * Make the buffer size large enough to handle a softtty interrupt @@ -2207,7 +2257,6 @@ siosetwater(com, speed) for (ibufsize = 128; ibufsize < cp4ticks;) ibufsize <<= 1; if (ibufsize == com->ibufsize) { - disable_intr(); return (0); } @@ -2217,7 +2266,6 @@ siosetwater(com, speed) */ ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT); if (ibuf == NULL) { - disable_intr(); return (ENOMEM); } @@ -2235,7 +2283,9 @@ siosetwater(com, speed) * Read current input buffer, if any. Continue with interrupts * disabled. */ + intrsave = save_intr(); disable_intr(); + COM_LOCK(); if (com->iptr != com->ibuf) sioinput(com); @@ -2254,6 +2304,9 @@ siosetwater(com, speed) com->ibufend = ibuf + ibufsize; com->ierroff = ibufsize; com->ihighwater = ibuf + 3 * ibufsize / 4; + + COM_UNLOCK(); + restore_intr(intrsave); return (0); } @@ -2267,6 +2320,7 @@ comstart(tp) bool_t started; #endif int unit; + int intrsave; unit = DEV_TO_UNIT(tp->t_dev); com = com_addr(unit); @@ -2277,7 +2331,9 @@ comstart(tp) started = FALSE; #endif + intrsave = save_intr(); disable_intr(); + COM_LOCK(); if (tp->t_state & TS_TTSTOP) { com->state &= ~CS_TTGO; if (com->intr_enable & CD1400_SRER_TXRDY) @@ -2313,7 +2369,8 @@ comstart(tp) com->mcr_image |= com->mcr_rts); #endif } - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { ttwwakeup(tp); splx(s); @@ -2332,7 +2389,9 @@ comstart(tp) sizeof com->obuf1); com->obufs[0].l_next = NULL; com->obufs[0].l_queued = TRUE; + intrsave = save_intr(); disable_intr(); + COM_LOCK(); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) @@ -2351,7 +2410,8 @@ comstart(tp) & ~CD1400_SRER_TXMPTY) | CD1400_SRER_TXRDY); } - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); } if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) { #ifdef CyDebug @@ -2362,7 +2422,9 @@ comstart(tp) sizeof com->obuf2); com->obufs[1].l_next = NULL; com->obufs[1].l_queued = TRUE; + intrsave = save_intr(); disable_intr(); + COM_LOCK(); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) @@ -2381,7 +2443,8 @@ comstart(tp) & ~CD1400_SRER_TXMPTY) | CD1400_SRER_TXRDY); } - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); } tp->t_state |= TS_BUSY; } @@ -2390,10 +2453,13 @@ comstart(tp) ++com->start_real; #endif #if 0 + intrsave = save_intr(); disable_intr(); + COM_LOCK(); if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); /* fake interrupt to start output */ - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); #endif ttwwakeup(tp); splx(s); @@ -2406,10 +2472,13 @@ comstop(tp, rw) { struct com_s *com; bool_t wakeup_etc; + int intrsave; com = com_addr(DEV_TO_UNIT(tp->t_dev)); wakeup_etc = FALSE; + intrsave = save_intr(); disable_intr(); + COM_LOCK(); if (rw & FWRITE) { com->obufs[0].l_queued = FALSE; com->obufs[1].l_queued = FALSE; @@ -2432,7 +2501,8 @@ comstop(tp, rw) com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; } - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); if (wakeup_etc) wakeup(&com->etc); if (rw & FWRITE && com->etc == ETC_NONE) @@ -2448,6 +2518,7 @@ commctl(com, bits, how) { int mcr; int msr; + int intrsave; if (how == DMGET) { if (com->channel_control & CD1400_CCR_RCVEN) @@ -2485,7 +2556,9 @@ commctl(com, bits, how) mcr |= com->mcr_dtr; if (bits & TIOCM_RTS) mcr |= com->mcr_rts; + intrsave = save_intr(); disable_intr(); + COM_LOCK(); switch (how) { case DMSET: com->mcr_image = mcr; @@ -2503,7 +2576,8 @@ commctl(com, bits, how) cd_setreg(com, CD1400_MSVR2, mcr); break; } - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); return (0); } @@ -2565,9 +2639,14 @@ comwakeup(chan) com = com_addr(unit); if (com != NULL && (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) { + int intrsave; + + intrsave = save_intr(); disable_intr(); + COM_LOCK(); siointr1(com); - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); } } #endif @@ -2587,11 +2666,15 @@ comwakeup(chan) for (errnum = 0; errnum < CE_NTYPES; ++errnum) { u_int delta; u_long total; + int intrsave; + intrsave = save_intr(); disable_intr(); + COM_LOCK(); delta = com->delta_error_counts[errnum]; com->delta_error_counts[errnum] = 0; - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); if (delta == 0) continue; total = com->error_counts[errnum] += delta; @@ -2743,6 +2826,8 @@ cd_etc(com, etc) struct com_s *com; int etc; { + int intrsave; + /* * We can't change the hardware's ETC state while there are any * characters in the tx fifo, since those characters would be @@ -2754,26 +2839,28 @@ cd_etc(com, etc) * for the tx to become empty so that the command is sure to be * executed soon after we issue it. */ + intrsave = save_intr(); disable_intr(); - if (com->etc == etc) { - enable_intr(); + COM_LOCK(); + if (com->etc == etc) goto wait; - } if ((etc == CD1400_ETC_SENDBREAK && (com->etc == ETC_BREAK_STARTING || com->etc == ETC_BREAK_STARTED)) || (etc == CD1400_ETC_STOPBREAK && (com->etc == ETC_BREAK_ENDING || com->etc == ETC_BREAK_ENDED || com->etc == ETC_NONE))) { - enable_intr(); + COM_UNLOCK(); + restore_intr(intrsave); return; } com->etc = etc; cd_setreg(com, CD1400_SRER, com->intr_enable = (com->intr_enable & ~CD1400_SRER_TXRDY) | CD1400_SRER_TXMPTY); - enable_intr(); wait: + COM_UNLOCK(); + restore_intr(intrsave); while (com->etc == etc && tsleep(&com->etc, TTIPRI | PCATCH, "cyetc", 0) == 0) continue; @@ -2787,7 +2874,7 @@ cd_getreg(com, reg) struct com_s *basecom; u_char car; int cy_align; - u_long ef; + int intrsave; cy_addr iobase; int val; @@ -2795,14 +2882,16 @@ cd_getreg(com, reg) car = com->unit & CD1400_CAR_CHAN; cy_align = com->cy_align; iobase = com->iobase; - ef = read_eflags(); - if (ef & PSL_I) - disable_intr(); + intrsave = save_intr(); + disable_intr(); + if (intrsave & PSL_I) + COM_LOCK(); if (basecom->car != car) cd_outb(iobase, CD1400_CAR, cy_align, basecom->car = car); val = cd_inb(iobase, reg, cy_align); - if (ef & PSL_I) - enable_intr(); + if (intrsave & PSL_I) + COM_UNLOCK(); + restore_intr(intrsave); return (val); } @@ -2815,21 +2904,23 @@ cd_setreg(com, reg, val) struct com_s *basecom; u_char car; int cy_align; - u_long ef; + int intrsave; cy_addr iobase; basecom = com_addr(com->unit & ~(CD1400_NO_OF_CHANNELS - 1)); car = com->unit & CD1400_CAR_CHAN; cy_align = com->cy_align; iobase = com->iobase; - ef = read_eflags(); - if (ef & PSL_I) - disable_intr(); + intrsave = save_intr(); + disable_intr(); + if (intrsave & PSL_I) + COM_LOCK(); if (basecom->car != car) cd_outb(iobase, CD1400_CAR, cy_align, basecom->car = car); cd_outb(iobase, reg, cy_align, val); - if (ef & PSL_I) - enable_intr(); + if (intrsave & PSL_I) + COM_UNLOCK(); + restore_intr(intrsave); } #ifdef CyDebug diff --git a/sys/i386/isa/icu_ipl.s b/sys/i386/isa/icu_ipl.s index 34753583a41e..d178d5c43c45 100644 --- a/sys/i386/isa/icu_ipl.s +++ b/sys/i386/isa/icu_ipl.s @@ -55,63 +55,6 @@ _imen: .long HWI_MASK SUPERALIGN_TEXT /* - * Interrupt priority mechanism - * -- soft splXX masks with group mechanism (cpl) - * -- h/w masks for currently active or unused interrupts (imen) - * -- ipending = active interrupts currently masked by cpl - */ - -ENTRY(splz) - /* - * The caller has restored cpl and checked that (ipending & ~cpl) - * is nonzero. We have to repeat the check since if there is an - * interrupt while we're looking, _doreti processing for the - * interrupt will handle all the unmasked pending interrupts - * because we restored early. We're repeating the calculation - * of (ipending & ~cpl) anyway so that the caller doesn't have - * to pass it, so this only costs one "jne". "bsfl %ecx,%ecx" - * is undefined when %ecx is 0 so we can't rely on the secondary - * btrl tests. - */ - movl _cpl,%eax -splz_next: - /* - * We don't need any locking here. (ipending & ~cpl) cannot grow - * while we're looking at it - any interrupt will shrink it to 0. - */ - movl %eax,%ecx - notl %ecx - andl _ipending,%ecx - jne splz_unpend - ret - - ALIGN_TEXT -splz_unpend: - bsfl %ecx,%ecx - btrl %ecx,_ipending - jnc splz_next - cmpl $NHWI,%ecx - jae splz_swi - /* - * We would prefer to call the intr handler directly here but that - * doesn't work for badly behaved handlers that want the interrupt - * frame. Also, there's a problem determining the unit number. - * We should change the interface so that the unit number is not - * determined at config time. - */ - jmp *vec(,%ecx,4) - - ALIGN_TEXT -splz_swi: - pushl %eax - orl imasks(,%ecx,4),%eax - movl %eax,_cpl - call *_ihandlers(,%ecx,4) - popl %eax - movl %eax,_cpl - jmp splz_next - -/* * Fake clock interrupt(s) so that they appear to come from our caller instead * of from here, so that system profiling works. * XXX do this more generally (for all vectors; look up the C entry point). diff --git a/sys/i386/isa/icu_vector.s b/sys/i386/isa/icu_vector.s index e427351ca205..d2b88bf705a3 100644 --- a/sys/i386/isa/icu_vector.s +++ b/sys/i386/isa/icu_vector.s @@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \ pushl %ecx ; \ pushl %edx ; \ pushl %ds ; \ + pushl %fs ; \ MAYBE_PUSHL_ES ; \ mov $KDSEL,%ax ; \ mov %ax,%ds ; \ + mov %ax,%fs ; \ MAYBE_MOVW_AX_ES ; \ FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \ pushl _intr_unit + (irq_num) * 4 ; \ @@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \ incl _cnt+V_INTR ; /* book-keeping can wait */ \ movl _intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ - movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \ +/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \ notl %eax ; \ - andl _ipending,%eax ; \ - jne 2f ; /* yes, maybe handle them */ \ + andl _spending,$SWI_MASK ; \ + jne 2f ; // yes, maybe handle them */ \ 1: ; \ MEXITCOUNT ; \ MAYBE_POPL_ES ; \ + popl %fs ; \ popl %ds ; \ popl %edx ; \ popl %ecx ; \ popl %eax ; \ iret ; \ + +#if 0 ; \ ALIGN_TEXT ; \ 2: ; \ @@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \ incb _intr_nesting_level ; /* ... really limit it ... */ \ sti ; /* ... to do this as early as possible */ \ MAYBE_POPL_ES ; /* discard most of thin frame ... */ \ + popl %fs ; \ popl %ecx ; /* ... original %ds ... */ \ popl %edx ; \ xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \ @@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \ movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \ movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \ movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \ - pushl %eax ; \ subl $4,%esp ; /* junk for unit number */ \ MEXITCOUNT ; \ jmp _doreti +#endif +/* + * Slow, threaded interrupts. + * + * XXX Most of the parameters here are obsolete. Fix this when we're + * done. + * XXX we really shouldn't return via doreti if we just schedule the + * interrupt handler and don't run anything. We could just do an + * iret. FIXME. + */ #define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \ pushl %ds ; /* save our data and extra segments ... */ \ pushl %es ; \ pushl %fs ; \ - mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \ - mov %ax,%ds ; /* ... early for obsolete reasons */ \ + mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ + mov %ax,%ds ; \ mov %ax,%es ; \ mov %ax,%fs ; \ maybe_extra_ipending ; \ @@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \ movb %al,_imen + IRQ_BYTE(irq_num) ; \ outb %al,$icu+ICU_IMR_OFFSET ; \ enable_icus ; \ - movl _cpl,%eax ; \ - testb $IRQ_BIT(irq_num),%reg ; \ - jne 2f ; \ - incb _intr_nesting_level ; \ + incb _intr_nesting_level ; /* XXX do we need this? */ \ __CONCAT(Xresume,irq_num): ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - movl _cpl,%eax ; \ - pushl %eax ; \ - pushl _intr_unit + (irq_num) * 4 ; \ - orl _intr_mask + (irq_num) * 4,%eax ; \ - movl %eax,_cpl ; \ + pushl $irq_num; /* pass the IRQ */ \ sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; /* must unmask _imen and icu atomically */ \ - movb _imen + IRQ_BYTE(irq_num),%al ; \ - andb $~IRQ_BIT(irq_num),%al ; \ - movb %al,_imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ - sti ; /* XXX _doreti repeats the cli/sti */ \ + call _sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ MEXITCOUNT ; \ /* We could usually avoid the following jmp by inlining some of */ \ /* _doreti, but it's probably better to use less cache. */ \ - jmp _doreti ; \ -; \ - ALIGN_TEXT ; \ -2: ; \ - /* XXX skip mcounting here to avoid double count */ \ - orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popal ; \ - addl $4+4,%esp ; \ - iret + jmp doreti_next /* and catch up inside doreti */ + +/* + * Reenable the interrupt mask after completing an interrupt. Called + * from ithd_loop. There are two separate functions, one for each + * ICU. + */ + .globl setimask0, setimask1 +setimask0: + cli + movb _imen,%al + outb %al,$IO_ICU1 + ICU_IMR_OFFSET + sti + ret + +setimask1: + cli + movb _imen + 1,%al + outb %al,$IO_ICU2 + ICU_IMR_OFFSET + sti + ret MCOUNT_LABEL(bintr) FAST_INTR(0,fastintr0, ENABLE_ICU1) @@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr) FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) +/* Threaded interrupts */ INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) @@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr) INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + MCOUNT_LABEL(eintr) .data @@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */ .long _swi_null, swi_net, _swi_null, _swi_null .long _swi_vm, _swi_null, _softclock -imasks: /* masks for interrupt handlers */ - .space NHWI*4 /* padding; HWI masks are elsewhere */ - - .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK - .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK - .text diff --git a/sys/i386/isa/intr_machdep.c b/sys/i386/isa/intr_machdep.c index 34a8c229bd6b..870760e1ce01 100644 --- a/sys/i386/isa/intr_machdep.c +++ b/sys/i386/isa/intr_machdep.c @@ -36,12 +36,6 @@ * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 * $FreeBSD$ */ -/* - * This file contains an aggregated module marked: - * Copyright (c) 1997, Stefan Esser <se@freebsd.org> - * All rights reserved. - * See the notice for details. - */ #include "opt_auto_eoi.h" @@ -51,11 +45,14 @@ #ifndef SMP #include <machine/lock.h> #endif +#include <sys/proc.h> #include <sys/systm.h> #include <sys/syslog.h> #include <sys/kernel.h> +#include <sys/kthread.h> #include <sys/malloc.h> #include <sys/module.h> +#include <sys/unistd.h> #include <sys/errno.h> #include <sys/interrupt.h> #include <machine/ipl.h> @@ -91,30 +88,14 @@ #include <i386/isa/mca_machdep.h> #endif -/* XXX should be in suitable include files */ -#ifdef PC98 -#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */ -#define ICU_SLAVEID 7 -#else -#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */ -#define ICU_SLAVEID 2 -#endif - -#ifdef APIC_IO /* - * This is to accommodate "mixed-mode" programming for - * motherboards that don't connect the 8254 to the IO APIC. + * Per-interrupt data. We consider the soft interrupt to be a special + * case, so these arrays have NHWI + NSWI entries, not ICU_LEN. */ -#define AUTO_EOI_1 1 -#endif - -#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN) - -u_long *intr_countp[ICU_LEN]; -inthand2_t *intr_handler[ICU_LEN]; -u_int intr_mask[ICU_LEN]; -static u_int* intr_mptr[ICU_LEN]; -void *intr_unit[ICU_LEN]; +u_long *intr_countp[NHWI + NSWI]; /* pointers to interrupt counters */ +inthand2_t *intr_handler[NHWI + NSWI]; /* first level interrupt handler */ +ithd *ithds[NHWI + NSWI]; /* real interrupt handler */ +void *intr_unit[NHWI + NSWI]; static inthand_t *fastintr[ICU_LEN] = { &IDTVEC(fastintr0), &IDTVEC(fastintr1), @@ -292,8 +273,9 @@ isa_nmi(cd) } /* - * Fill in default interrupt table (in case of spuruious interrupt - * during configuration of kernel, setup interrupt control unit + * Create a default interrupt table to avoid problems caused by + * spurious interrupts during configuration of kernel, then setup + * interrupt control unit. */ void isa_defaultirq() @@ -364,16 +346,6 @@ isa_strayintr(vcookiep) { int intr = (void **)vcookiep - &intr_unit[0]; - /* DON'T BOTHER FOR NOW! */ - /* for some reason, we get bursts of intr #7, even if not enabled! */ - /* - * Well the reason you got bursts of intr #7 is because someone - * raised an interrupt line and dropped it before the 8259 could - * prioritize it. This is documented in the intel data book. This - * means you have BAD hardware! I have changed this so that only - * the first 5 get logged, then it quits logging them, and puts - * out a special message. rgrimes 3/25/1993 - */ /* * XXX TODO print a different message for #7 if it is for a * glitch. Glitches can be distinguished from real #7's by @@ -405,36 +377,10 @@ isa_irq_pending() } #endif -int -update_intr_masks(void) -{ - int intr, n=0; - u_int mask,*maskptr; - - for (intr=0; intr < ICU_LEN; intr ++) { -#if defined(APIC_IO) - /* no 8259 SLAVE to ignore */ -#else - if (intr==ICU_SLAVEID) continue; /* ignore 8259 SLAVE output */ -#endif /* APIC_IO */ - maskptr = intr_mptr[intr]; - if (!maskptr) - continue; - *maskptr |= SWI_LOW_MASK | (1 << intr); - mask = *maskptr; - if (mask != intr_mask[intr]) { -#if 0 - printf ("intr_mask[%2d] old=%08x new=%08x ptr=%p.\n", - intr, intr_mask[intr], mask, maskptr); -#endif - intr_mask[intr]=mask; - n++; - } - - } - return (n); -} - +/* + * Update intrnames array with the specified name. This is used by + * vmstat(8) and the like. + */ static void update_intrname(int intr, char *name) { @@ -485,7 +431,7 @@ found: } int -icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) +icu_setup(int intr, inthand2_t *handler, void *arg, int flags) { #ifdef FAST_HI int select; /* the select register is 8 bits */ @@ -493,7 +439,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) u_int32_t value; /* the window register is 32 bits */ #endif /* FAST_HI */ u_long ef; - u_int mask = (maskptr ? *maskptr : 0); #if defined(APIC_IO) if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ @@ -506,8 +451,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) ef = read_eflags(); disable_intr(); intr_handler[intr] = handler; - intr_mptr[intr] = maskptr; - intr_mask[intr] = mask | SWI_LOW_MASK | (1 << intr); intr_unit[intr] = arg; #ifdef FAST_HI if (flags & INTR_FAST) { @@ -547,11 +490,15 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ INTREN(1 << intr); - MPINTR_UNLOCK(); write_eflags(ef); return (0); } +/* + * Dissociate an interrupt handler from an IRQ and set the handler to + * the stray interrupt handler. The 'handler' parameter is used only + * for consistency checking. + */ int icu_unset(intr, handler) int intr; @@ -567,8 +514,6 @@ icu_unset(intr, handler) disable_intr(); intr_countp[intr] = &intrcnt[1 + intr]; intr_handler[intr] = isa_strayintr; - intr_mptr[intr] = NULL; - intr_mask[intr] = HWI_MASK | SWI_MASK; intr_unit[intr] = &intr_unit[intr]; #ifdef FAST_HI_XXX /* XXX how do I re-create dvp here? */ @@ -581,353 +526,172 @@ icu_unset(intr, handler) setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ - MPINTR_UNLOCK(); write_eflags(ef); return (0); } -/* The following notice applies beyond this point in the file */ - -/* - * Copyright (c) 1997, Stefan Esser <se@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - * - */ - -typedef struct intrec { - intrmask_t mask; - inthand2_t *handler; - void *argument; - struct intrec *next; - char *name; - int intr; - intrmask_t *maskptr; - int flags; -} intrec; - -static intrec *intreclist_head[ICU_LEN]; - -/* - * The interrupt multiplexer calls each of the handlers in turn. The - * ipl is initially quite low. It is raised as necessary for each call - * and lowered after the call. Thus out of order handling is possible - * even for interrupts of the same type. This is probably no more - * harmful than out of order handling in general (not harmful except - * for real time response which we don't support anyway). - */ -static void -intr_mux(void *arg) -{ - intrec *p; - intrmask_t oldspl; - - for (p = arg; p != NULL; p = p->next) { - oldspl = splq(p->mask); - p->handler(p->argument); - splx(oldspl); - } -} - -static intrec* -find_idesc(unsigned *maskptr, int irq) -{ - intrec *p = intreclist_head[irq]; - - while (p && p->maskptr != maskptr) - p = p->next; - - return (p); -} - -static intrec** -find_pred(intrec *idesc, int irq) +intrec * +inthand_add(const char *name, int irq, inthand2_t handler, void *arg, + int pri, int flags) { - intrec **pp = &intreclist_head[irq]; - intrec *p = *pp; - - while (p != idesc) { - if (p == NULL) - return (NULL); - pp = &p->next; - p = *pp; - } - return (pp); -} - -/* - * Both the low level handler and the shared interrupt multiplexer - * block out further interrupts as set in the handlers "mask", while - * the handler is running. In fact *maskptr should be used for this - * purpose, but since this requires one more pointer dereference on - * each interrupt, we rather bother update "mask" whenever *maskptr - * changes. The function "update_masks" should be called **after** - * all manipulation of the linked list of interrupt handlers hung - * off of intrdec_head[irq] is complete, since the chain of handlers - * will both determine the *maskptr values and the instances of mask - * that are fixed. This function should be called with the irq for - * which a new handler has been add blocked, since the masks may not - * yet know about the use of this irq for a device of a certain class. - */ + ithd *ithd = ithds[irq]; /* descriptor for the IRQ */ + intrec *head; /* chain of handlers for IRQ */ + intrec *idesc; /* descriptor for this handler */ + struct proc *p; /* interrupt thread */ + int errcode = 0; -static void -update_mux_masks(void) -{ - int irq; - for (irq = 0; irq < ICU_LEN; irq++) { - intrec *idesc = intreclist_head[irq]; - while (idesc != NULL) { - if (idesc->maskptr != NULL) { - /* our copy of *maskptr may be stale, refresh */ - idesc->mask = *idesc->maskptr; - } - idesc = idesc->next; + if (name == NULL) /* no name? */ + panic ("anonymous interrupt"); + if (ithd == NULL || ithd->it_ih == NULL) { + /* first handler for this irq. */ + if (ithd == NULL) { + ithd = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK); + if (ithd == NULL) + return (NULL); + bzero(ithd, sizeof(struct ithd)); + ithd->irq = irq; + ithds[irq] = ithd; } - } -} - -static void -update_masks(intrmask_t *maskptr, int irq) -{ - intrmask_t mask = 1 << irq; - - if (maskptr == NULL) - return; - - if (find_idesc(maskptr, irq) == NULL) { - /* no reference to this maskptr was found in this irq's chain */ - if ((*maskptr & mask) == 0) - return; - /* the irq was included in the classes mask, remove it */ - *maskptr &= ~mask; - } else { - /* a reference to this maskptr was found in this irq's chain */ - if ((*maskptr & mask) != 0) - return; - /* put the irq into the classes mask */ - *maskptr |= mask; - } - /* we need to update all values in the intr_mask[irq] array */ - update_intr_masks(); - /* update mask in chains of the interrupt multiplex handler as well */ - update_mux_masks(); -} - -/* - * Add interrupt handler to linked list hung off of intreclist_head[irq] - * and install shared interrupt multiplex handler, if necessary - */ - -static int -add_intrdesc(intrec *idesc) -{ - int irq = idesc->intr; - - intrec *head = intreclist_head[irq]; - - if (head == NULL) { - /* first handler for this irq, just install it */ - if (icu_setup(irq, idesc->handler, idesc->argument, - idesc->maskptr, idesc->flags) != 0) - return (-1); - - update_intrname(irq, idesc->name); - /* keep reference */ - intreclist_head[irq] = idesc; - } else { - if ((idesc->flags & INTR_EXCL) != 0 - || (head->flags & INTR_EXCL) != 0) { + /* + * If we have a fast interrupt, we need to set the + * handler address directly. Do that below. For a + * slow interrupt, we don't need to know more details, + * so do it here because it's tidier. + */ + if ((flags & INTR_FAST) == 0) { /* - * can't append new handler, if either list head or - * new handler do not allow interrupts to be shared + * Only create a kernel thread if we don't already + * have one. */ - if (bootverbose) - printf("\tdevice combination doesn't support " - "shared irq%d\n", irq); - return (-1); - } - if (head->next == NULL) { + if (ithd->it_proc == NULL) { + errcode = kthread_create(ithd_loop, NULL, &p, + RFSTOPPED | RFHIGHPID, "irq%d: %s", irq, + name); + if (errcode) + panic("inthand_add: Can't create " + "interrupt thread"); + p->p_rtprio.type = RTP_PRIO_ITHREAD; + p->p_stat = SWAIT; /* we're idle */ + + /* Put in linkages. */ + ithd->it_proc = p; + p->p_ithd = ithd; + } else + snprintf(ithd->it_proc->p_comm, MAXCOMLEN, + "irq%d: %s", irq, name); + p->p_rtprio.prio = pri; + /* - * second handler for this irq, replace device driver's - * handler by shared interrupt multiplexer function + * The interrupt process must be in place, but + * not necessarily schedulable, before we + * initialize the ICU, since it may cause an + * immediate interrupt. */ - icu_unset(irq, head->handler); - if (icu_setup(irq, intr_mux, head, 0, 0) != 0) - return (-1); - if (bootverbose) - printf("\tusing shared irq%d.\n", irq); - update_intrname(irq, "mux"); + if (icu_setup(irq, &sched_ithd, arg, flags) != 0) + panic("inthand_add: Can't initialize ICU"); } - /* just append to the end of the chain */ - while (head->next != NULL) - head = head->next; - head->next = idesc; - } - update_masks(idesc->maskptr, irq); - return (0); -} - -/* - * Create and activate an interrupt handler descriptor data structure. - * - * The dev_instance pointer is required for resource management, and will - * only be passed through to resource_claim(). - * - * There will be functions that derive a driver and unit name from a - * dev_instance variable, and those functions will be used to maintain the - * interrupt counter label array referenced by systat and vmstat to report - * device interrupt rates (->update_intrlabels). - * - * Add the interrupt handler descriptor data structure created by an - * earlier call of create_intr() to the linked list for its irq and - * adjust the interrupt masks if necessary. - * - * WARNING: This is an internal function and not to be used by device - * drivers. It is subject to change without notice. - */ - -intrec * -inthand_add(const char *name, int irq, inthand2_t handler, void *arg, - intrmask_t *maskptr, int flags) -{ - intrec *idesc; - int errcode = -1; - intrmask_t oldspl; - - if (ICU_LEN > 8 * sizeof *maskptr) { - printf("create_intr: ICU_LEN of %d too high for %d bit intrmask\n", - ICU_LEN, 8 * sizeof *maskptr); + } else if ((flags & INTR_EXCL) != 0 + || (ithd->it_ih->flags & INTR_EXCL) != 0) { + /* + * We can't append the new handler if either + * list ithd or new handler do not allow + * interrupts to be shared. + */ + if (bootverbose) + printf("\tdevice combination %s and %s " + "doesn't support shared irq%d\n", + ithd->it_ih->name, name, irq); + return(NULL); + } else if (flags & INTR_FAST) { + /* We can only have one fast interrupt by itself. */ + if (bootverbose) + printf("\tCan't add fast interrupt %s" + " to normal interrupt %s on irq%d", + name, ithd->it_ih->name, irq); return (NULL); + } else { /* update p_comm */ + p = ithd->it_proc; + if (strlen(p->p_comm) + strlen(name) < MAXCOMLEN) { + strcat(p->p_comm, " "); + strcat(p->p_comm, name); + } else if (strlen(p->p_comm) == MAXCOMLEN) + p->p_comm[MAXCOMLEN - 1] = '+'; + else + strcat(p->p_comm, "+"); } - if ((unsigned)irq >= ICU_LEN) { - printf("create_intr: requested irq%d too high, limit is %d\n", - irq, ICU_LEN -1); + idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK); + if (idesc == NULL) return (NULL); - } + bzero(idesc, sizeof (struct intrec)); - idesc = malloc(sizeof *idesc, M_DEVBUF, M_WAITOK); - if (idesc == NULL) - return NULL; - bzero(idesc, sizeof *idesc); + idesc->handler = handler; + idesc->argument = arg; + idesc->flags = flags; + idesc->ithd = ithd; - if (name == NULL) - name = "???"; idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK); if (idesc->name == NULL) { free(idesc, M_DEVBUF); - return NULL; + return (NULL); } strcpy(idesc->name, name); - idesc->handler = handler; - idesc->argument = arg; - idesc->maskptr = maskptr; - idesc->intr = irq; - idesc->flags = flags; - - /* block this irq */ - oldspl = splq(1 << irq); - - /* add irq to class selected by maskptr */ - errcode = add_intrdesc(idesc); - splx(oldspl); - - if (errcode != 0) { + /* Slow interrupts got set up above. */ + if ((flags & INTR_FAST) + && (icu_setup(irq, idesc->handler, idesc->argument, + idesc->flags) != 0) ) { if (bootverbose) - printf("\tintr_connect(irq%d) failed, result=%d\n", + printf("\tinthand_add(irq%d) failed, result=%d\n", irq, errcode); free(idesc->name, M_DEVBUF); free(idesc, M_DEVBUF); - idesc = NULL; + return NULL; } - + head = ithd->it_ih; /* look at chain of handlers */ + if (head) { + while (head->next != NULL) + head = head->next; /* find the end */ + head->next = idesc; /* hook it in there */ + } else + ithd->it_ih = idesc; /* put it up front */ + update_intrname(irq, idesc->name); return (idesc); } /* - * Deactivate and remove the interrupt handler descriptor data connected - * created by an earlier call of intr_connect() from the linked list and - * adjust theinterrupt masks if necessary. + * Deactivate and remove linked list the interrupt handler descriptor + * data connected created by an earlier call of inthand_add(), then + * adjust the interrupt masks if necessary. * - * Return the memory held by the interrupt handler descriptor data structure - * to the system. Make sure, the handler is not actively used anymore, before. + * Return the memory held by the interrupt handler descriptor data + * structure to the system. First ensure the handler is not actively + * in use. */ int inthand_remove(intrec *idesc) { - intrec **hook, *head; - int irq; - int errcode = 0; - intrmask_t oldspl; + ithd *ithd; /* descriptor for the IRQ */ + intrec *ih; /* chain of handlers */ if (idesc == NULL) return (-1); + ithd = idesc->ithd; + ih = ithd->it_ih; - irq = idesc->intr; - - /* find pointer that keeps the reference to this interrupt descriptor */ - hook = find_pred(idesc, irq); - if (hook == NULL) + if (ih == idesc) /* first in the chain */ + ithd->it_ih = idesc->next; /* unhook it */ + else { + while ((ih != NULL) + && (ih->next != idesc) ) + ih = ih->next; + if (ih->next != idesc) return (-1); - - /* make copy of original list head, the line after may overwrite it */ - head = intreclist_head[irq]; - - /* unlink: make predecessor point to idesc->next instead of to idesc */ - *hook = idesc->next; - - /* now check whether the element we removed was the list head */ - if (idesc == head) { - - oldspl = splq(1 << irq); - - /* check whether the new list head is the only element on list */ - head = intreclist_head[irq]; - if (head != NULL) { - icu_unset(irq, intr_mux); - if (head->next != NULL) { - /* install the multiplex handler with new list head as argument */ - errcode = icu_setup(irq, intr_mux, head, 0, 0); - if (errcode == 0) - update_intrname(irq, NULL); - } else { - /* install the one remaining handler for this irq */ - errcode = icu_setup(irq, head->handler, - head->argument, - head->maskptr, head->flags); - if (errcode == 0) - update_intrname(irq, head->name); + ih->next = ih->next->next; } - } else { - /* revert to old handler, eg: strayintr */ - icu_unset(irq, idesc->handler); - } - splx(oldspl); - } - update_masks(idesc->maskptr, irq); + + if (ithd->it_ih == NULL) /* no handlers left, */ + icu_unset(ithd->irq, idesc->handler); free(idesc, M_DEVBUF); return (0); } diff --git a/sys/i386/isa/intr_machdep.h b/sys/i386/isa/intr_machdep.h index 5982295b1ab4..87c97a35f5ef 100644 --- a/sys/i386/isa/intr_machdep.h +++ b/sys/i386/isa/intr_machdep.h @@ -98,7 +98,6 @@ #define TPR_BLOCK_XCPUSTOP 0xaf /* */ #define TPR_BLOCK_ALL 0xff /* all INTs */ - #ifdef TEST_TEST1 /* put a 'fake' HWI in top of APIC prio 0x3x, 32 + 31 = 63 = 0x3f */ #define XTEST1_OFFSET (ICU_OFFSET + 31) @@ -145,8 +144,9 @@ extern u_long intrcnt[]; /* counts for for each device and stray */ extern char intrnames[]; /* string table containing device names */ extern u_long *intr_countp[]; /* pointers into intrcnt[] */ extern inthand2_t *intr_handler[]; /* C entry points of intr handlers */ -extern u_int intr_mask[]; /* sets of intrs masked during handling of 1 */ +extern ithd *ithds[]; extern void *intr_unit[]; /* cookies to pass to intr handlers */ +extern ithd softinterrupt; /* soft interrupt thread */ inthand_t IDTVEC(fastintr0), IDTVEC(fastintr1), @@ -190,26 +190,60 @@ inthand_t #endif /** TEST_TEST1 */ #endif /* SMP || APIC_IO */ +#ifdef PC98 +#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */ +#define ICU_SLAVEID 7 +#else +#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */ +#define ICU_SLAVEID 2 +#endif + +#ifdef APIC_IO +/* + * This is to accommodate "mixed-mode" programming for + * motherboards that don't connect the 8254 to the IO APIC. + */ +#define AUTO_EOI_1 1 +#endif + +#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN) + void isa_defaultirq __P((void)); int isa_nmi __P((int cd)); int icu_setup __P((int intr, inthand2_t *func, void *arg, - u_int *maskptr, int flags)); + int flags)); int icu_unset __P((int intr, inthand2_t *handler)); -int update_intr_masks __P((void)); intrmask_t splq __P((intrmask_t mask)); -#define INTR_FAST 0x00000001 /* fast interrupt handler */ -#define INTR_EXCL 0x00010000 /* excl. intr, default is shared */ +/* + * Describe a hardware interrupt handler. These structures are + * accessed via the array intreclist, which contains one pointer per + * hardware interrupt. + * + * Multiple interrupt handlers for a specific IRQ can be chained + * together via the 'next' pointer. + */ +typedef struct intrec { + inthand2_t *handler; /* code address of handler */ + void *argument; /* argument to pass to handler */ + enum intr_type flags; /* flag bits (sys/bus.h) */ + char *name; /* name of handler */ + ithd *ithd; /* handler we're connected to */ + struct intrec *next; /* next handler for this irq */ +} intrec; /* * WARNING: These are internal functions and not to be used by device drivers! * They are subject to change without notice. */ struct intrec *inthand_add(const char *name, int irq, inthand2_t handler, - void *arg, intrmask_t *maskptr, int flags); - + void *arg, int pri, int flags); int inthand_remove(struct intrec *idesc); +void sched_ithd(void *); +void ithd_loop(void *); +void start_softintr(void *); +void intr_soft(void *); #endif /* LOCORE */ diff --git a/sys/i386/isa/ipl.s b/sys/i386/isa/ipl.s index 93612301fa85..1ee9ace4559e 100644 --- a/sys/i386/isa/ipl.s +++ b/sys/i386/isa/ipl.s @@ -44,7 +44,6 @@ * AT/386 * Vector interrupt control section * - * cpl - Current interrupt disable mask * *_imask - Interrupt masks for various spl*() functions * ipending - Pending interrupts (set when a masked interrupt occurs) */ @@ -53,8 +52,6 @@ ALIGN_DATA /* current priority (all off) */ - .globl _cpl -_cpl: .long HWI_MASK | SWI_MASK .globl _tty_imask _tty_imask: .long SWI_TTY_MASK @@ -71,9 +68,9 @@ _softnet_imask: .long SWI_NET_MASK .globl _softtty_imask _softtty_imask: .long SWI_TTY_MASK -/* pending interrupts blocked by splxxx() */ - .globl _ipending -_ipending: .long 0 +/* pending software interrupts */ + .globl _spending +_spending: .long 0 /* set with bits for which queue to service */ .globl _netisr @@ -100,59 +97,30 @@ _netisrs: _doreti: FAKE_MCOUNT(_bintr) /* init "from" _bintr -> _doreti */ addl $4,%esp /* discard unit number */ - popl %eax /* cpl or cml to restore */ doreti_next: - /* - * Check for pending HWIs and SWIs atomically with restoring cpl - * and exiting. The check has to be atomic with exiting to stop - * (ipending & ~cpl) changing from zero to nonzero while we're - * looking at it (this wouldn't be fatal but it would increase - * interrupt latency). Restoring cpl has to be atomic with exiting - * so that the stack cannot pile up (the nesting level of interrupt - * handlers is limited by the number of bits in cpl). - */ -#ifdef SMP - cli /* early to prevent INT deadlock */ -doreti_next2: -#endif - movl %eax,%ecx - notl %ecx /* set bit = unmasked level */ -#ifndef SMP - cli -#endif - andl _ipending,%ecx /* set bit = unmasked pending INT */ - jne doreti_unpend - movl %eax,_cpl decb _intr_nesting_level /* Check for ASTs that can be handled now. */ testl $AST_PENDING,_astpending - je doreti_exit - testb $SEL_RPL_MASK,TF_CS(%esp) - jne doreti_ast - testl $PSL_VM,TF_EFLAGS(%esp) - je doreti_exit - cmpl $1,_in_vm86call - jne doreti_ast + je doreti_exit /* no AST, exit */ + testb $SEL_RPL_MASK,TF_CS(%esp) /* are we in user mode? */ + jne doreti_ast /* yes, do it now. */ + testl $PSL_VM,TF_EFLAGS(%esp) /* kernel mode */ + je doreti_exit /* and not VM86 mode, defer */ + cmpl $1,_in_vm86call /* are we in a VM86 call? */ + jne doreti_ast /* yes, we can do it */ /* - * doreti_exit - release MP lock, pop registers, iret. + * doreti_exit: release MP lock, pop registers, iret. * - * Note that the syscall trap shotcuts to doreti_syscall_ret. + * Note that the syscall trap shortcuts to doreti_syscall_ret. * The segment register pop is a special case, since it may * fault if (for example) a sigreturn specifies bad segment - * registers. The fault is handled in trap.c + * registers. The fault is handled in trap.c. */ - doreti_exit: MEXITCOUNT -#ifdef SMP - /* release the kernel lock */ - movl $_mp_lock, %edx /* GIANT_LOCK */ - call _MPrellock_edx -#endif /* SMP */ - .globl doreti_popl_fs .globl doreti_syscall_ret doreti_syscall_ret: @@ -170,6 +138,13 @@ doreti_popl_ds: doreti_iret: iret + /* + * doreti_iret_fault and friends. Alternative return code for + * the case where we get a fault in the doreti_exit code + * above. trap() (i386/i386/trap.c) catches this specific + * case, sends the process a signal and continues in the + * corresponding place in the code below. + */ ALIGN_TEXT .globl doreti_iret_fault doreti_iret_fault: @@ -189,93 +164,11 @@ doreti_popl_fs_fault: jmp alltraps_with_regs_pushed ALIGN_TEXT -doreti_unpend: - /* - * Enabling interrupts is safe because we haven't restored cpl yet. - * %ecx contains the next probable ready interrupt (~cpl & ipending) - */ -#ifdef SMP - bsfl %ecx, %ecx /* locate the next dispatchable int */ - lock - btrl %ecx, _ipending /* is it really still pending? */ - jnc doreti_next2 /* some intr cleared memory copy */ - sti /* late to prevent INT deadlock */ -#else - sti - bsfl %ecx,%ecx /* slow, but not worth optimizing */ - btrl %ecx,_ipending - jnc doreti_next /* some intr cleared memory copy */ -#endif /* SMP */ - /* - * Execute handleable interrupt - * - * Set up JUMP to _ihandlers[%ecx] for HWIs. - * Set up CALL of _ihandlers[%ecx] for SWIs. - * This is a bit early for the SMP case - we have to push %ecx and - * %edx, but could push only %ecx and load %edx later. - */ - movl _ihandlers(,%ecx,4),%edx - cmpl $NHWI,%ecx - jae doreti_swi /* software interrupt handling */ - cli /* else hardware int handling */ -#ifdef SMP - movl %eax,_cpl /* same as non-smp case right now */ -#else - movl %eax,_cpl -#endif - MEXITCOUNT -#ifdef APIC_INTR_DIAGNOSTIC - lock - incl CNAME(apic_itrace_doreti)(,%ecx,4) -#ifdef APIC_INTR_DIAGNOSTIC_IRQ - cmpl $APIC_INTR_DIAGNOSTIC_IRQ,%ecx - jne 9f - pushl %eax - pushl %ecx - pushl %edx - pushl $APIC_ITRACE_DORETI - call log_intr_event - addl $4,%esp - popl %edx - popl %ecx - popl %eax -9: -#endif -#endif - jmp *%edx - - ALIGN_TEXT -doreti_swi: - pushl %eax - /* - * At least the SWI_CLOCK handler has to run at a possibly strictly - * lower cpl, so we have to restore - * all the h/w bits in cpl now and have to worry about stack growth. - * The worst case is currently (30 Jan 1994) 2 SWI handlers nested - * in dying interrupt frames and about 12 HWIs nested in active - * interrupt frames. There are only 4 different SWIs and the HWI - * and SWI masks limit the nesting further. - * - * The SMP case is currently the same as the non-SMP case. - */ -#ifdef SMP - orl imasks(,%ecx,4), %eax /* or in imasks */ - movl %eax,_cpl /* set cpl for call */ -#else - orl imasks(,%ecx,4),%eax - movl %eax,_cpl -#endif - call *%edx - popl %eax /* cpl to restore */ - jmp doreti_next - - ALIGN_TEXT doreti_ast: andl $~AST_PENDING,_astpending sti movl $T_ASTFLT,TF_TRAPNO(%esp) - call _trap - subl %eax,%eax /* recover cpl|cml */ + call _ast movb $1,_intr_nesting_level /* for doreti_next to decrement */ jmp doreti_next diff --git a/sys/i386/isa/ipl_funcs.c b/sys/i386/isa/ipl_funcs.c index d27d97fa9b1f..14eb2402eb0e 100644 --- a/sys/i386/isa/ipl_funcs.c +++ b/sys/i386/isa/ipl_funcs.c @@ -27,11 +27,13 @@ */ #include <sys/param.h> +#include <sys/bus.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/sysctl.h> #include <machine/ipl.h> -#include <machine/globals.h> +#include <sys/proc.h> +#include <i386/isa/icu.h> #include <i386/isa/intr_machdep.h> /* @@ -45,236 +47,55 @@ void name(void) \ { \ atomic_set_int(var, bits); \ + sched_ithd((void *) SOFTINTR); \ } -DO_SETBITS(setdelayed, &ipending, loadandclear(&idelayed)) +DO_SETBITS(setdelayed, &spending, loadandclear(&idelayed)) +DO_SETBITS(setsoftcamnet,&spending, SWI_CAMNET_PENDING) +DO_SETBITS(setsoftcambio,&spending, SWI_CAMBIO_PENDING) +DO_SETBITS(setsoftclock, &spending, SWI_CLOCK_PENDING) +DO_SETBITS(setsoftnet, &spending, SWI_NET_PENDING) +DO_SETBITS(setsofttty, &spending, SWI_TTY_PENDING) +DO_SETBITS(setsoftvm, &spending, SWI_VM_PENDING) +DO_SETBITS(setsofttq, &spending, SWI_TQ_PENDING) -DO_SETBITS(setsoftcamnet,&ipending, SWI_CAMNET_PENDING) -DO_SETBITS(setsoftcambio,&ipending, SWI_CAMBIO_PENDING) -DO_SETBITS(setsoftclock, &ipending, SWI_CLOCK_PENDING) -DO_SETBITS(setsoftnet, &ipending, SWI_NET_PENDING) -DO_SETBITS(setsofttty, &ipending, SWI_TTY_PENDING) -DO_SETBITS(setsoftvm, &ipending, SWI_VM_PENDING) -DO_SETBITS(setsofttq, &ipending, SWI_TQ_PENDING) - -DO_SETBITS(schedsoftcamnet, &idelayed, SWI_CAMNET_PENDING) -DO_SETBITS(schedsoftcambio, &idelayed, SWI_CAMBIO_PENDING) -DO_SETBITS(schedsoftnet, &idelayed, SWI_NET_PENDING) -DO_SETBITS(schedsofttty, &idelayed, SWI_TTY_PENDING) -DO_SETBITS(schedsoftvm, &idelayed, SWI_VM_PENDING) -DO_SETBITS(schedsofttq, &idelayed, SWI_TQ_PENDING) +/* + * We don't need to schedule soft interrupts any more, it happens + * automatically. + */ +#define schedsoftcamnet +#define schedsoftcambio +#define schedsoftnet +#define schedsofttty +#define schedsoftvm +#define schedsofttq unsigned softclockpending(void) { - return (ipending & SWI_CLOCK_PENDING); + return (spending & SWI_CLOCK_PENDING); } /* - * Support for SPL assertions. - */ - -#ifdef INVARIANT_SUPPORT - -#define SPLASSERT_IGNORE 0 -#define SPLASSERT_LOG 1 -#define SPLASSERT_PANIC 2 - -static int splassertmode = SPLASSERT_LOG; -SYSCTL_INT(_kern, OID_AUTO, splassertmode, CTLFLAG_RW, - &splassertmode, 0, "Set the mode of SPLASSERT"); - -static void -init_splassertmode(void *ignored) -{ - TUNABLE_INT_FETCH("kern.splassertmode", 0, splassertmode); -} -SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_splassertmode, NULL); - -static void -splassertfail(char *str, const char *msg, char *name, int level) -{ - switch (splassertmode) { - case SPLASSERT_IGNORE: - break; - case SPLASSERT_LOG: - printf(str, msg, name, level); - printf("\n"); - break; - case SPLASSERT_PANIC: - panic(str, msg, name, level); - break; - } -} - -#define GENSPLASSERT(NAME, MODIFIER) \ -void \ -NAME##assert(const char *msg) \ -{ \ - if ((cpl & (MODIFIER)) != (MODIFIER)) \ - splassertfail("%s: not %s, cpl == %#x", \ - msg, __XSTRING(NAME) + 3, cpl); \ -} -#else -#define GENSPLASSERT(NAME, MODIFIER) -#endif - -/************************************************************************ - * GENERAL SPL CODE * - ************************************************************************ - * - * Implement splXXX(), spl0(), splx(), and splq(). splXXX() disables a - * set of interrupts (e.g. splbio() disables interrupts relating to - * device I/O) and returns the previous interrupt mask. splx() restores - * the previous interrupt mask, spl0() is a special case which enables - * all interrupts and is typically used inside i386/i386 swtch.s and - * fork_trampoline. splq() is a generic version of splXXX(). - * - * The SPL routines mess around with the 'cpl' global, which masks - * interrupts. Interrupts are not *actually* masked. What happens is - * that if an interrupt masked by the cpl occurs, the appropriate bit - * in 'ipending' is set and the interrupt is defered. When we clear - * bits in the cpl we must check to see if any ipending interrupts have - * been unmasked and issue the synchronously, which is what the splz() - * call does. - * - * Because the cpl is often saved and restored in a nested fashion, cpl - * modifications are only allowed in the SMP case when the MP lock is held - * to prevent multiple processes from tripping over each other's masks. - * The cpl is saved when you do a context switch (mi_switch()) and restored - * when your process gets cpu again. - * - * An interrupt routine is allowed to modify the cpl as long as it restores - * it prior to returning (thus the interrupted mainline code doesn't notice - * anything amiss). For the SMP case, the interrupt routine must hold - * the MP lock for any cpl manipulation. - * - * Likewise, due to the deterministic nature of cpl modifications, we do - * NOT need to use locked instructions to modify it. + * Dummy spl calls. The only reason for these is to not break + * all the code which expects to call them. */ - -#ifndef SMP - -#define GENSPL(NAME, OP, MODIFIER, PC) \ -GENSPLASSERT(NAME, MODIFIER) \ -unsigned NAME(void) \ -{ \ - unsigned x; \ - \ - x = cpl; \ - cpl OP MODIFIER; \ - return (x); \ -} - -void -spl0(void) -{ - cpl = 0; - if (ipending) - splz(); -} - -void -splx(unsigned ipl) -{ - cpl = ipl; - if (ipending & ~ipl) - splz(); -} - -intrmask_t -splq(intrmask_t mask) -{ - intrmask_t tmp = cpl; - cpl |= mask; - return (tmp); -} - -#else /* !SMP */ - -#include <machine/smp.h> -#include <machine/smptests.h> - -/* - * SMP CASE - * - * Mostly the same as the non-SMP case now, but it didn't used to be - * this clean. - */ - -#define GENSPL(NAME, OP, MODIFIER, PC) \ -GENSPLASSERT(NAME, MODIFIER) \ -unsigned NAME(void) \ -{ \ - unsigned x; \ - \ - x = cpl; \ - cpl OP MODIFIER; \ - \ - return (x); \ -} - -/* - * spl0() - unmask all interrupts - * - * The MP lock must be held on entry - * This routine may only be called from mainline code. - */ -void -spl0(void) -{ - KASSERT(inside_intr == 0, ("spl0: called from interrupt")); - cpl = 0; - if (ipending) - splz(); -} - -/* - * splx() - restore previous interrupt mask - * - * The MP lock must be held on entry - */ - -void -splx(unsigned ipl) -{ - cpl = ipl; - if (inside_intr == 0 && (ipending & ~cpl) != 0) - splz(); -} - - -/* - * splq() - blocks specified interrupts - * - * The MP lock must be held on entry - */ -intrmask_t -splq(intrmask_t mask) -{ - intrmask_t tmp = cpl; - cpl |= mask; - return (tmp); -} - -#endif /* !SMP */ - -/* Finally, generate the actual spl*() functions */ - -/* NAME: OP: MODIFIER: PC: */ -GENSPL(splbio, |=, bio_imask, 2) -GENSPL(splcam, |=, cam_imask, 7) -GENSPL(splclock, =, HWI_MASK | SWI_MASK, 3) -GENSPL(splhigh, =, HWI_MASK | SWI_MASK, 4) -GENSPL(splimp, |=, net_imask, 5) -GENSPL(splnet, |=, SWI_NET_MASK, 6) -GENSPL(splsoftcam, |=, SWI_CAMBIO_MASK | SWI_CAMNET_MASK, 8) -GENSPL(splsoftcambio, |=, SWI_CAMBIO_MASK, 9) -GENSPL(splsoftcamnet, |=, SWI_CAMNET_MASK, 10) -GENSPL(splsoftclock, =, SWI_CLOCK_MASK, 11) -GENSPL(splsofttty, |=, SWI_TTY_MASK, 12) -GENSPL(splsoftvm, |=, SWI_VM_MASK, 16) -GENSPL(splsofttq, |=, SWI_TQ_MASK, 17) -GENSPL(splstatclock, |=, stat_imask, 13) -GENSPL(spltty, |=, tty_imask, 14) -GENSPL(splvm, |=, net_imask | bio_imask | cam_imask, 15) +void spl0 (void) {} +void splx (intrmask_t x) {} +intrmask_t splq(intrmask_t mask) {return 0; } +intrmask_t splbio(void) {return 0; } +intrmask_t splcam(void) {return 0; } +intrmask_t splclock(void) {return 0; } +intrmask_t splhigh(void) {return 0; } +intrmask_t splimp(void) {return 0; } +intrmask_t splnet(void) {return 0; } +intrmask_t splsoftcam(void) {return 0; } +intrmask_t splsoftcambio(void) {return 0; } +intrmask_t splsoftcamnet(void) {return 0; } +intrmask_t splsoftclock(void) {return 0; } +intrmask_t splsofttty(void) {return 0; } +intrmask_t splsoftvm(void) {return 0; } +intrmask_t splsofttq(void) {return 0; } +intrmask_t splstatclock(void) {return 0; } +intrmask_t spltty(void) {return 0; } +intrmask_t splvm(void) {return 0; } diff --git a/sys/i386/isa/ithread.c b/sys/i386/isa/ithread.c new file mode 100644 index 000000000000..4ceac4229d1c --- /dev/null +++ b/sys/i386/isa/ithread.c @@ -0,0 +1,353 @@ +/*- + * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Berkeley Software Design Inc's name may not be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * From BSDI: intr.c,v 1.6.2.5 1999/07/06 19:16:52 cp Exp + * $FreeBSD$ + */ + +/* Interrupt thread code. */ + +#include "opt_auto_eoi.h" + +#include "isa.h" + +#include <sys/param.h> +#include <sys/rtprio.h> /* change this name XXX */ +#ifndef SMP +#include <machine/lock.h> +#endif +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/syslog.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/unistd.h> +#include <sys/errno.h> +#include <sys/interrupt.h> +#include <machine/ipl.h> +#include <machine/md_var.h> +#include <machine/segments.h> +#include <sys/bus.h> + +#if defined(APIC_IO) +#include <machine/smp.h> +#include <machine/smptests.h> /** FAST_HI */ +#include <machine/resource.h> +#endif /* APIC_IO */ +#ifdef PC98 +#include <pc98/pc98/pc98.h> +#include <pc98/pc98/pc98_machdep.h> +#include <pc98/pc98/epsonio.h> +#else +#include <i386/isa/isa.h> +#endif +#include <i386/isa/icu.h> + +#if NISA > 0 +#include <isa/isavar.h> +#endif +#include <i386/isa/intr_machdep.h> +#include <sys/interrupt.h> +#ifdef APIC_IO +#include <machine/clock.h> +#endif + +#include "mca.h" +#if NMCA > 0 +#include <i386/isa/mca_machdep.h> +#endif + +#include <sys/vmmeter.h> +#include <machine/mutex.h> +#include <sys/ktr.h> +#include <machine/cpu.h> +#if 0 +#include <ddb/ddb.h> +#endif + +u_long softintrcnt [NSWI]; + +SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr, NULL) + +/* + * Schedule a heavyweight interrupt process. This function is called + * from the interrupt handlers Xintr<num>. + */ +void +sched_ithd(void *cookie) +{ + int irq = (int) cookie; /* IRQ we're handling */ + ithd *ir = ithds[irq]; /* and the process that does it */ + + /* This used to be in icu_vector.s */ + /* + * We count software interrupts when we process them. The + * code here follows previous practice, but there's an + * argument for counting hardware interrupts when they're + * processed too. + */ + if (irq < NHWI) /* real interrupt, */ + atomic_add_long(intr_countp[irq], 1); /* one more for this IRQ */ + atomic_add_int(&cnt.v_intr, 1); /* one more global interrupt */ + + CTR3(KTR_INTR, "sched_ithd pid %d(%s) need=%d", + ir->it_proc->p_pid, ir->it_proc->p_comm, ir->it_need); + +#if 0 + /* + * If we are in the debugger, we can't use interrupt threads to + * process interrupts since the threads are scheduled. Instead, + * call the interrupt handlers directly. This should be able to + * go away once we have light-weight interrupt handlers. + */ + if (db_active) { + intrec *ih; /* and our interrupt handler chain */ +#if 0 + membar_unlock(); /* push out "it_need=0" */ +#endif + for (ih = ir->it_ih; ih != NULL; ih = ih->next) { + if ((ih->flags & INTR_MPSAFE) == 0) + mtx_enter(&Giant, MTX_DEF); + ih->handler(ih->argument); + if ((ih->flags & INTR_MPSAFE) == 0) + mtx_exit(&Giant, MTX_DEF); + } + + INTREN (1 << ir->irq); /* reset the mask bit */ + return; + } +#endif + + /* + * Set it_need so that if the thread is already running but close + * to done, it will do another go-round. Then get the sched lock + * and see if the thread is on whichkqs yet. If not, put it on + * there. In any case, kick everyone so that if the new thread + * is higher priority than their current thread, it gets run now. + */ + ir->it_need = 1; + mtx_enter(&sched_lock, MTX_SPIN); + if (ir->it_proc->p_stat == SWAIT) { /* not on run queue */ + CTR1(KTR_INTR, "sched_ithd: setrunqueue %d", + ir->it_proc->p_pid); +/* membar_lock(); */ + ir->it_proc->p_stat = SRUN; + setrunqueue(ir->it_proc); + aston(); + } + else { +if (irq < NHWI && (irq & 7) != 0) + CTR3(KTR_INTR, "sched_ithd %d: it_need %d, state %d", + ir->it_proc->p_pid, + ir->it_need, + ir->it_proc->p_stat ); + } + mtx_exit(&sched_lock, MTX_SPIN); +#if 0 + aston(); /* ??? check priorities first? */ +#else + need_resched(); +#endif +} + +/* + * This is the main code for all interrupt threads. It gets put on + * whichkqs by setrunqueue above. + */ +void +ithd_loop(void *dummy) +{ + ithd *me; /* our thread context */ + intrec *ih; /* and our interrupt handler chain */ + + me = curproc->p_ithd; /* point to myself */ + + /* + * As long as we have interrupts outstanding, go through the + * list of handlers, giving each one a go at it. + */ + for (;;) { + CTR3(KTR_INTR, "ithd_loop pid %d(%s) need=%d", + me->it_proc->p_pid, me->it_proc->p_comm, me->it_need); + while (me->it_need) { + /* + * Service interrupts. If another interrupt + * arrives while we are running, they will set + * it_need to denote that we should make + * another pass. + */ + me->it_need = 0; +#if 0 + membar_unlock(); /* push out "it_need=0" */ +#endif + for (ih = me->it_ih; ih != NULL; ih = ih->next) { + CTR5(KTR_INTR, + "ithd_loop pid %d ih=%p: %p(%p) flg=%x", + me->it_proc->p_pid, (void *)ih, + (void *)ih->handler, ih->argument, + ih->flags); + + if ((ih->flags & INTR_MPSAFE) == 0) + mtx_enter(&Giant, MTX_DEF); + ih->handler(ih->argument); + if ((ih->flags & INTR_MPSAFE) == 0) + mtx_exit(&Giant, MTX_DEF); + } + } + + /* + * Processed all our interrupts. Now get the sched + * lock. This may take a while and it_need may get + * set again, so we have to check it again. + */ + mtx_enter(&sched_lock, MTX_SPIN); + if (!me->it_need) { + + INTREN (1 << me->irq); /* reset the mask bit */ + me->it_proc->p_stat = SWAIT; /* we're idle */ +#ifdef APIC_IO + CTR1(KTR_INTR, "ithd_loop pid %d: done", + me->it_proc->p_pid); +#else + CTR2(KTR_INTR, "ithd_loop pid %d: done, imen=%x", + me->it_proc->p_pid, imen); +#endif + mi_switch(); + CTR1(KTR_INTR, "ithd_loop pid %d: resumed", + me->it_proc->p_pid); + } + mtx_exit(&sched_lock, MTX_SPIN); + } +} + +/* + * Start soft interrupt thread. + */ +void +start_softintr(void *dummy) +{ + int error; + struct proc *p; + ithd *softintr; /* descriptor for the "IRQ" */ + intrec *idesc; /* descriptor for this handler */ + char *name = "sintr"; /* name for idesc */ + int i; + + if (ithds[SOFTINTR]) { /* we already have a thread */ + printf("start_softintr: already running"); + return; + } + /* first handler for this irq. */ + softintr = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK); + if (softintr == NULL) + panic ("Can't create soft interrupt thread"); + bzero(softintr, sizeof(struct ithd)); + softintr->irq = SOFTINTR; + ithds[SOFTINTR] = softintr; + error = kthread_create(intr_soft, NULL, &p, + RFSTOPPED | RFHIGHPID, "softinterrupt"); + if (error) + panic("start_softintr: kthread_create error %d\n", error); + + p->p_rtprio.type = RTP_PRIO_ITHREAD; + p->p_rtprio.prio = PI_SOFT; /* soft interrupt */ + p->p_stat = SWAIT; /* we're idle */ + + /* Put in linkages. */ + softintr->it_proc = p; + p->p_ithd = softintr; /* reverse link */ + + idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK); + if (idesc == NULL) + panic ("Can't create soft interrupt thread"); + bzero(idesc, sizeof (struct intrec)); + + idesc->ithd = softintr; + idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK); + if (idesc->name == NULL) + panic ("Can't create soft interrupt thread"); + strcpy(idesc->name, name); + for (i = NHWI; i < NHWI + NSWI; i++) + intr_countp[i] = &softintrcnt [i - NHWI]; +} + +/* + * Software interrupt process code. + */ +void +intr_soft(void *dummy) +{ + int i; + ithd *me; /* our thread context */ + + me = curproc->p_ithd; /* point to myself */ + + /* Main loop */ + for (;;) { +#if 0 + CTR3(KTR_INTR, "intr_soft pid %d(%s) need=%d", + me->it_proc->p_pid, me->it_proc->p_comm, + me->it_need); +#endif + + /* + * Service interrupts. If another interrupt arrives + * while we are running, they will set it_need to + * denote that we should make another pass. + */ + me->it_need = 0; + while ((i = ffs(spending))) { + i--; + atomic_add_long(intr_countp[i], 1); + spending &= ~ (1 << i); + mtx_enter(&Giant, MTX_DEF); + (ihandlers[i])(); + mtx_exit(&Giant, MTX_DEF); + } + /* + * Processed all our interrupts. Now get the sched + * lock. This may take a while and it_need may get + * set again, so we have to check it again. + */ + mtx_enter(&sched_lock, MTX_SPIN); + if (!me->it_need) { +#if 0 + CTR1(KTR_INTR, "intr_soft pid %d: done", + me->it_proc->p_pid); +#endif + me->it_proc->p_stat = SWAIT; /* we're idle */ + mi_switch(); +#if 0 + CTR1(KTR_INTR, "intr_soft pid %d: resumed", + me->it_proc->p_pid); +#endif + } + mtx_exit(&sched_lock, MTX_SPIN); + } +} diff --git a/sys/i386/isa/loran.c b/sys/i386/isa/loran.c index 577a608f7113..c43bf8524c24 100644 --- a/sys/i386/isa/loran.c +++ b/sys/i386/isa/loran.c @@ -620,7 +620,7 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, loran_timecounter, CTLFLAG_RD, /**********************************************************************/ struct isa_driver lorandriver = { - INTR_TYPE_TTY | INTR_TYPE_FAST, + INTR_TYPE_TTY | INTR_FAST, loranprobe, loranattach, "loran" diff --git a/sys/i386/isa/nmi.c b/sys/i386/isa/nmi.c index 34a8c229bd6b..870760e1ce01 100644 --- a/sys/i386/isa/nmi.c +++ b/sys/i386/isa/nmi.c @@ -36,12 +36,6 @@ * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 * $FreeBSD$ */ -/* - * This file contains an aggregated module marked: - * Copyright (c) 1997, Stefan Esser <se@freebsd.org> - * All rights reserved. - * See the notice for details. - */ #include "opt_auto_eoi.h" @@ -51,11 +45,14 @@ #ifndef SMP #include <machine/lock.h> #endif +#include <sys/proc.h> #include <sys/systm.h> #include <sys/syslog.h> #include <sys/kernel.h> +#include <sys/kthread.h> #include <sys/malloc.h> #include <sys/module.h> +#include <sys/unistd.h> #include <sys/errno.h> #include <sys/interrupt.h> #include <machine/ipl.h> @@ -91,30 +88,14 @@ #include <i386/isa/mca_machdep.h> #endif -/* XXX should be in suitable include files */ -#ifdef PC98 -#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */ -#define ICU_SLAVEID 7 -#else -#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */ -#define ICU_SLAVEID 2 -#endif - -#ifdef APIC_IO /* - * This is to accommodate "mixed-mode" programming for - * motherboards that don't connect the 8254 to the IO APIC. + * Per-interrupt data. We consider the soft interrupt to be a special + * case, so these arrays have NHWI + NSWI entries, not ICU_LEN. */ -#define AUTO_EOI_1 1 -#endif - -#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN) - -u_long *intr_countp[ICU_LEN]; -inthand2_t *intr_handler[ICU_LEN]; -u_int intr_mask[ICU_LEN]; -static u_int* intr_mptr[ICU_LEN]; -void *intr_unit[ICU_LEN]; +u_long *intr_countp[NHWI + NSWI]; /* pointers to interrupt counters */ +inthand2_t *intr_handler[NHWI + NSWI]; /* first level interrupt handler */ +ithd *ithds[NHWI + NSWI]; /* real interrupt handler */ +void *intr_unit[NHWI + NSWI]; static inthand_t *fastintr[ICU_LEN] = { &IDTVEC(fastintr0), &IDTVEC(fastintr1), @@ -292,8 +273,9 @@ isa_nmi(cd) } /* - * Fill in default interrupt table (in case of spuruious interrupt - * during configuration of kernel, setup interrupt control unit + * Create a default interrupt table to avoid problems caused by + * spurious interrupts during configuration of kernel, then setup + * interrupt control unit. */ void isa_defaultirq() @@ -364,16 +346,6 @@ isa_strayintr(vcookiep) { int intr = (void **)vcookiep - &intr_unit[0]; - /* DON'T BOTHER FOR NOW! */ - /* for some reason, we get bursts of intr #7, even if not enabled! */ - /* - * Well the reason you got bursts of intr #7 is because someone - * raised an interrupt line and dropped it before the 8259 could - * prioritize it. This is documented in the intel data book. This - * means you have BAD hardware! I have changed this so that only - * the first 5 get logged, then it quits logging them, and puts - * out a special message. rgrimes 3/25/1993 - */ /* * XXX TODO print a different message for #7 if it is for a * glitch. Glitches can be distinguished from real #7's by @@ -405,36 +377,10 @@ isa_irq_pending() } #endif -int -update_intr_masks(void) -{ - int intr, n=0; - u_int mask,*maskptr; - - for (intr=0; intr < ICU_LEN; intr ++) { -#if defined(APIC_IO) - /* no 8259 SLAVE to ignore */ -#else - if (intr==ICU_SLAVEID) continue; /* ignore 8259 SLAVE output */ -#endif /* APIC_IO */ - maskptr = intr_mptr[intr]; - if (!maskptr) - continue; - *maskptr |= SWI_LOW_MASK | (1 << intr); - mask = *maskptr; - if (mask != intr_mask[intr]) { -#if 0 - printf ("intr_mask[%2d] old=%08x new=%08x ptr=%p.\n", - intr, intr_mask[intr], mask, maskptr); -#endif - intr_mask[intr]=mask; - n++; - } - - } - return (n); -} - +/* + * Update intrnames array with the specified name. This is used by + * vmstat(8) and the like. + */ static void update_intrname(int intr, char *name) { @@ -485,7 +431,7 @@ found: } int -icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) +icu_setup(int intr, inthand2_t *handler, void *arg, int flags) { #ifdef FAST_HI int select; /* the select register is 8 bits */ @@ -493,7 +439,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) u_int32_t value; /* the window register is 32 bits */ #endif /* FAST_HI */ u_long ef; - u_int mask = (maskptr ? *maskptr : 0); #if defined(APIC_IO) if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ @@ -506,8 +451,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) ef = read_eflags(); disable_intr(); intr_handler[intr] = handler; - intr_mptr[intr] = maskptr; - intr_mask[intr] = mask | SWI_LOW_MASK | (1 << intr); intr_unit[intr] = arg; #ifdef FAST_HI if (flags & INTR_FAST) { @@ -547,11 +490,15 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ INTREN(1 << intr); - MPINTR_UNLOCK(); write_eflags(ef); return (0); } +/* + * Dissociate an interrupt handler from an IRQ and set the handler to + * the stray interrupt handler. The 'handler' parameter is used only + * for consistency checking. + */ int icu_unset(intr, handler) int intr; @@ -567,8 +514,6 @@ icu_unset(intr, handler) disable_intr(); intr_countp[intr] = &intrcnt[1 + intr]; intr_handler[intr] = isa_strayintr; - intr_mptr[intr] = NULL; - intr_mask[intr] = HWI_MASK | SWI_MASK; intr_unit[intr] = &intr_unit[intr]; #ifdef FAST_HI_XXX /* XXX how do I re-create dvp here? */ @@ -581,353 +526,172 @@ icu_unset(intr, handler) setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ - MPINTR_UNLOCK(); write_eflags(ef); return (0); } -/* The following notice applies beyond this point in the file */ - -/* - * Copyright (c) 1997, Stefan Esser <se@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - * - */ - -typedef struct intrec { - intrmask_t mask; - inthand2_t *handler; - void *argument; - struct intrec *next; - char *name; - int intr; - intrmask_t *maskptr; - int flags; -} intrec; - -static intrec *intreclist_head[ICU_LEN]; - -/* - * The interrupt multiplexer calls each of the handlers in turn. The - * ipl is initially quite low. It is raised as necessary for each call - * and lowered after the call. Thus out of order handling is possible - * even for interrupts of the same type. This is probably no more - * harmful than out of order handling in general (not harmful except - * for real time response which we don't support anyway). - */ -static void -intr_mux(void *arg) -{ - intrec *p; - intrmask_t oldspl; - - for (p = arg; p != NULL; p = p->next) { - oldspl = splq(p->mask); - p->handler(p->argument); - splx(oldspl); - } -} - -static intrec* -find_idesc(unsigned *maskptr, int irq) -{ - intrec *p = intreclist_head[irq]; - - while (p && p->maskptr != maskptr) - p = p->next; - - return (p); -} - -static intrec** -find_pred(intrec *idesc, int irq) +intrec * +inthand_add(const char *name, int irq, inthand2_t handler, void *arg, + int pri, int flags) { - intrec **pp = &intreclist_head[irq]; - intrec *p = *pp; - - while (p != idesc) { - if (p == NULL) - return (NULL); - pp = &p->next; - p = *pp; - } - return (pp); -} - -/* - * Both the low level handler and the shared interrupt multiplexer - * block out further interrupts as set in the handlers "mask", while - * the handler is running. In fact *maskptr should be used for this - * purpose, but since this requires one more pointer dereference on - * each interrupt, we rather bother update "mask" whenever *maskptr - * changes. The function "update_masks" should be called **after** - * all manipulation of the linked list of interrupt handlers hung - * off of intrdec_head[irq] is complete, since the chain of handlers - * will both determine the *maskptr values and the instances of mask - * that are fixed. This function should be called with the irq for - * which a new handler has been add blocked, since the masks may not - * yet know about the use of this irq for a device of a certain class. - */ + ithd *ithd = ithds[irq]; /* descriptor for the IRQ */ + intrec *head; /* chain of handlers for IRQ */ + intrec *idesc; /* descriptor for this handler */ + struct proc *p; /* interrupt thread */ + int errcode = 0; -static void -update_mux_masks(void) -{ - int irq; - for (irq = 0; irq < ICU_LEN; irq++) { - intrec *idesc = intreclist_head[irq]; - while (idesc != NULL) { - if (idesc->maskptr != NULL) { - /* our copy of *maskptr may be stale, refresh */ - idesc->mask = *idesc->maskptr; - } - idesc = idesc->next; + if (name == NULL) /* no name? */ + panic ("anonymous interrupt"); + if (ithd == NULL || ithd->it_ih == NULL) { + /* first handler for this irq. */ + if (ithd == NULL) { + ithd = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK); + if (ithd == NULL) + return (NULL); + bzero(ithd, sizeof(struct ithd)); + ithd->irq = irq; + ithds[irq] = ithd; } - } -} - -static void -update_masks(intrmask_t *maskptr, int irq) -{ - intrmask_t mask = 1 << irq; - - if (maskptr == NULL) - return; - - if (find_idesc(maskptr, irq) == NULL) { - /* no reference to this maskptr was found in this irq's chain */ - if ((*maskptr & mask) == 0) - return; - /* the irq was included in the classes mask, remove it */ - *maskptr &= ~mask; - } else { - /* a reference to this maskptr was found in this irq's chain */ - if ((*maskptr & mask) != 0) - return; - /* put the irq into the classes mask */ - *maskptr |= mask; - } - /* we need to update all values in the intr_mask[irq] array */ - update_intr_masks(); - /* update mask in chains of the interrupt multiplex handler as well */ - update_mux_masks(); -} - -/* - * Add interrupt handler to linked list hung off of intreclist_head[irq] - * and install shared interrupt multiplex handler, if necessary - */ - -static int -add_intrdesc(intrec *idesc) -{ - int irq = idesc->intr; - - intrec *head = intreclist_head[irq]; - - if (head == NULL) { - /* first handler for this irq, just install it */ - if (icu_setup(irq, idesc->handler, idesc->argument, - idesc->maskptr, idesc->flags) != 0) - return (-1); - - update_intrname(irq, idesc->name); - /* keep reference */ - intreclist_head[irq] = idesc; - } else { - if ((idesc->flags & INTR_EXCL) != 0 - || (head->flags & INTR_EXCL) != 0) { + /* + * If we have a fast interrupt, we need to set the + * handler address directly. Do that below. For a + * slow interrupt, we don't need to know more details, + * so do it here because it's tidier. + */ + if ((flags & INTR_FAST) == 0) { /* - * can't append new handler, if either list head or - * new handler do not allow interrupts to be shared + * Only create a kernel thread if we don't already + * have one. */ - if (bootverbose) - printf("\tdevice combination doesn't support " - "shared irq%d\n", irq); - return (-1); - } - if (head->next == NULL) { + if (ithd->it_proc == NULL) { + errcode = kthread_create(ithd_loop, NULL, &p, + RFSTOPPED | RFHIGHPID, "irq%d: %s", irq, + name); + if (errcode) + panic("inthand_add: Can't create " + "interrupt thread"); + p->p_rtprio.type = RTP_PRIO_ITHREAD; + p->p_stat = SWAIT; /* we're idle */ + + /* Put in linkages. */ + ithd->it_proc = p; + p->p_ithd = ithd; + } else + snprintf(ithd->it_proc->p_comm, MAXCOMLEN, + "irq%d: %s", irq, name); + p->p_rtprio.prio = pri; + /* - * second handler for this irq, replace device driver's - * handler by shared interrupt multiplexer function + * The interrupt process must be in place, but + * not necessarily schedulable, before we + * initialize the ICU, since it may cause an + * immediate interrupt. */ - icu_unset(irq, head->handler); - if (icu_setup(irq, intr_mux, head, 0, 0) != 0) - return (-1); - if (bootverbose) - printf("\tusing shared irq%d.\n", irq); - update_intrname(irq, "mux"); + if (icu_setup(irq, &sched_ithd, arg, flags) != 0) + panic("inthand_add: Can't initialize ICU"); } - /* just append to the end of the chain */ - while (head->next != NULL) - head = head->next; - head->next = idesc; - } - update_masks(idesc->maskptr, irq); - return (0); -} - -/* - * Create and activate an interrupt handler descriptor data structure. - * - * The dev_instance pointer is required for resource management, and will - * only be passed through to resource_claim(). - * - * There will be functions that derive a driver and unit name from a - * dev_instance variable, and those functions will be used to maintain the - * interrupt counter label array referenced by systat and vmstat to report - * device interrupt rates (->update_intrlabels). - * - * Add the interrupt handler descriptor data structure created by an - * earlier call of create_intr() to the linked list for its irq and - * adjust the interrupt masks if necessary. - * - * WARNING: This is an internal function and not to be used by device - * drivers. It is subject to change without notice. - */ - -intrec * -inthand_add(const char *name, int irq, inthand2_t handler, void *arg, - intrmask_t *maskptr, int flags) -{ - intrec *idesc; - int errcode = -1; - intrmask_t oldspl; - - if (ICU_LEN > 8 * sizeof *maskptr) { - printf("create_intr: ICU_LEN of %d too high for %d bit intrmask\n", - ICU_LEN, 8 * sizeof *maskptr); + } else if ((flags & INTR_EXCL) != 0 + || (ithd->it_ih->flags & INTR_EXCL) != 0) { + /* + * We can't append the new handler if either + * list ithd or new handler do not allow + * interrupts to be shared. + */ + if (bootverbose) + printf("\tdevice combination %s and %s " + "doesn't support shared irq%d\n", + ithd->it_ih->name, name, irq); + return(NULL); + } else if (flags & INTR_FAST) { + /* We can only have one fast interrupt by itself. */ + if (bootverbose) + printf("\tCan't add fast interrupt %s" + " to normal interrupt %s on irq%d", + name, ithd->it_ih->name, irq); return (NULL); + } else { /* update p_comm */ + p = ithd->it_proc; + if (strlen(p->p_comm) + strlen(name) < MAXCOMLEN) { + strcat(p->p_comm, " "); + strcat(p->p_comm, name); + } else if (strlen(p->p_comm) == MAXCOMLEN) + p->p_comm[MAXCOMLEN - 1] = '+'; + else + strcat(p->p_comm, "+"); } - if ((unsigned)irq >= ICU_LEN) { - printf("create_intr: requested irq%d too high, limit is %d\n", - irq, ICU_LEN -1); + idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK); + if (idesc == NULL) return (NULL); - } + bzero(idesc, sizeof (struct intrec)); - idesc = malloc(sizeof *idesc, M_DEVBUF, M_WAITOK); - if (idesc == NULL) - return NULL; - bzero(idesc, sizeof *idesc); + idesc->handler = handler; + idesc->argument = arg; + idesc->flags = flags; + idesc->ithd = ithd; - if (name == NULL) - name = "???"; idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK); if (idesc->name == NULL) { free(idesc, M_DEVBUF); - return NULL; + return (NULL); } strcpy(idesc->name, name); - idesc->handler = handler; - idesc->argument = arg; - idesc->maskptr = maskptr; - idesc->intr = irq; - idesc->flags = flags; - - /* block this irq */ - oldspl = splq(1 << irq); - - /* add irq to class selected by maskptr */ - errcode = add_intrdesc(idesc); - splx(oldspl); - - if (errcode != 0) { + /* Slow interrupts got set up above. */ + if ((flags & INTR_FAST) + && (icu_setup(irq, idesc->handler, idesc->argument, + idesc->flags) != 0) ) { if (bootverbose) - printf("\tintr_connect(irq%d) failed, result=%d\n", + printf("\tinthand_add(irq%d) failed, result=%d\n", irq, errcode); free(idesc->name, M_DEVBUF); free(idesc, M_DEVBUF); - idesc = NULL; + return NULL; } - + head = ithd->it_ih; /* look at chain of handlers */ + if (head) { + while (head->next != NULL) + head = head->next; /* find the end */ + head->next = idesc; /* hook it in there */ + } else + ithd->it_ih = idesc; /* put it up front */ + update_intrname(irq, idesc->name); return (idesc); } /* - * Deactivate and remove the interrupt handler descriptor data connected - * created by an earlier call of intr_connect() from the linked list and - * adjust theinterrupt masks if necessary. + * Deactivate and remove linked list the interrupt handler descriptor + * data connected created by an earlier call of inthand_add(), then + * adjust the interrupt masks if necessary. * - * Return the memory held by the interrupt handler descriptor data structure - * to the system. Make sure, the handler is not actively used anymore, before. + * Return the memory held by the interrupt handler descriptor data + * structure to the system. First ensure the handler is not actively + * in use. */ int inthand_remove(intrec *idesc) { - intrec **hook, *head; - int irq; - int errcode = 0; - intrmask_t oldspl; + ithd *ithd; /* descriptor for the IRQ */ + intrec *ih; /* chain of handlers */ if (idesc == NULL) return (-1); + ithd = idesc->ithd; + ih = ithd->it_ih; - irq = idesc->intr; - - /* find pointer that keeps the reference to this interrupt descriptor */ - hook = find_pred(idesc, irq); - if (hook == NULL) + if (ih == idesc) /* first in the chain */ + ithd->it_ih = idesc->next; /* unhook it */ + else { + while ((ih != NULL) + && (ih->next != idesc) ) + ih = ih->next; + if (ih->next != idesc) return (-1); - - /* make copy of original list head, the line after may overwrite it */ - head = intreclist_head[irq]; - - /* unlink: make predecessor point to idesc->next instead of to idesc */ - *hook = idesc->next; - - /* now check whether the element we removed was the list head */ - if (idesc == head) { - - oldspl = splq(1 << irq); - - /* check whether the new list head is the only element on list */ - head = intreclist_head[irq]; - if (head != NULL) { - icu_unset(irq, intr_mux); - if (head->next != NULL) { - /* install the multiplex handler with new list head as argument */ - errcode = icu_setup(irq, intr_mux, head, 0, 0); - if (errcode == 0) - update_intrname(irq, NULL); - } else { - /* install the one remaining handler for this irq */ - errcode = icu_setup(irq, head->handler, - head->argument, - head->maskptr, head->flags); - if (errcode == 0) - update_intrname(irq, head->name); + ih->next = ih->next->next; } - } else { - /* revert to old handler, eg: strayintr */ - icu_unset(irq, idesc->handler); - } - splx(oldspl); - } - update_masks(idesc->maskptr, irq); + + if (ithd->it_ih == NULL) /* no handlers left, */ + icu_unset(ithd->irq, idesc->handler); free(idesc, M_DEVBUF); return (0); } diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index 637853e25264..8610e35f1f11 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -245,6 +245,12 @@ npx_probe(dev) setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); npx_idt_probeintr = idt[npx_intrno]; + + /* + * XXX This looks highly bogus, but it appears that npc_probe1 + * needs interrupts enabled. Does this make any difference + * here? + */ enable_intr(); result = npx_probe1(dev); disable_intr(); @@ -797,7 +803,7 @@ npxdna() /* * Record new context early in case frstor causes an IRQ13. */ - npxproc = curproc; + PCPU_SET(npxproc, CURPROC); curpcb->pcb_savefpu.sv_ex_sw = 0; /* * The following frstor may cause an IRQ13 when the state being @@ -834,16 +840,18 @@ npxsave(addr) fnsave(addr); /* fnop(); */ start_emulating(); - npxproc = NULL; + PCPU_SET(npxproc, NULL); #else /* SMP */ + int intrstate; u_char icu1_mask; u_char icu2_mask; u_char old_icu1_mask; u_char old_icu2_mask; struct gate_descriptor save_idt_npxintr; + intrstate = save_intr(); disable_intr(); old_icu1_mask = inb(IO_ICU1 + 1); old_icu2_mask = inb(IO_ICU2 + 1); @@ -851,12 +859,12 @@ npxsave(addr) outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask)); outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8)); idt[npx_intrno] = npx_idt_probeintr; - enable_intr(); + write_eflags(intrstate); stop_emulating(); fnsave(addr); fnop(); start_emulating(); - npxproc = NULL; + PCPU_SET(npxproc, NULL); disable_intr(); icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */ icu2_mask = inb(IO_ICU2 + 1); @@ -866,7 +874,7 @@ npxsave(addr) (icu2_mask & ~(npx0_imask >> 8)) | (old_icu2_mask & (npx0_imask >> 8))); idt[npx_intrno] = save_idt_npxintr; - enable_intr(); /* back to usual state */ + restore_intr(intrstate); /* back to previous state */ #endif /* SMP */ } diff --git a/sys/i386/isa/vector.s b/sys/i386/isa/vector.s index 5447a90126a0..79f2320e6b8e 100644 --- a/sys/i386/isa/vector.s +++ b/sys/i386/isa/vector.s @@ -16,9 +16,10 @@ #include <i386/isa/isa.h> #endif +#define FAST_INTR_HANDLER_USES_ES 1 #ifdef FAST_INTR_HANDLER_USES_ES #define ACTUALLY_PUSHED 1 -#define MAYBE_MOVW_AX_ES movl %ax,%es +#define MAYBE_MOVW_AX_ES movw %ax,%es #define MAYBE_POPL_ES popl %es #define MAYBE_PUSHL_ES pushl %es #else @@ -36,11 +37,6 @@ .data ALIGN_DATA - .globl _intr_nesting_level -_intr_nesting_level: - .byte 0 - .space 3 - /* * Interrupt counters and names for export to vmstat(8) and friends. * @@ -58,7 +54,6 @@ _eintrcnt: _intrnames: .space NR_INTRNAMES * 16 _eintrnames: - .text /* |