aboutsummaryrefslogtreecommitdiff
path: root/sys/i386
diff options
context:
space:
mode:
authorJason Evans <jasone@FreeBSD.org>2000-09-07 01:33:02 +0000
committerJason Evans <jasone@FreeBSD.org>2000-09-07 01:33:02 +0000
commit0384fff8c5b098545c3db311b0e0aa1ec4c9ae7e (patch)
treebc6e36e781569f3efe04995c0b0befebb9154ef5 /sys/i386
parent62ae6c89ad2b03770097d05590093f93b9d94e08 (diff)
downloadsrc-0384fff8c5b098545c3db311b0e0aa1ec4c9ae7e.tar.gz
src-0384fff8c5b098545c3db311b0e0aa1ec4c9ae7e.zip
Major update to the way synchronization is done in the kernel. Highlights
include: * Mutual exclusion is used instead of spl*(). See mutex(9). (Note: The alpha port is still in transition and currently uses both.) * Per-CPU idle processes. * Interrupts are run in their own separate kernel threads and can be preempted (i386 only). Partially contributed by: BSDi (BSD/OS) Submissions by (at least): cp, dfr, dillon, grog, jake, jhb, sheldonh
Notes
Notes: svn path=/head/; revision=65557
Diffstat (limited to 'sys/i386')
-rw-r--r--sys/i386/i386/apic_vector.s132
-rw-r--r--sys/i386/i386/autoconf.c8
-rw-r--r--sys/i386/i386/exception.s41
-rw-r--r--sys/i386/i386/genassym.c27
-rw-r--r--sys/i386/i386/globals.s38
-rw-r--r--sys/i386/i386/i386-gdbstub.c10
-rw-r--r--sys/i386/i386/identcpu.c3
-rw-r--r--sys/i386/i386/initcpu.c6
-rw-r--r--sys/i386/i386/legacy.c32
-rw-r--r--sys/i386/i386/locore.s3
-rw-r--r--sys/i386/i386/machdep.c37
-rw-r--r--sys/i386/i386/mp_machdep.c88
-rw-r--r--sys/i386/i386/mpapic.c3
-rw-r--r--sys/i386/i386/mpboot.s36
-rw-r--r--sys/i386/i386/mplock.s343
-rw-r--r--sys/i386/i386/mptable.c88
-rw-r--r--sys/i386/i386/nexus.c32
-rw-r--r--sys/i386/i386/perfmon.c15
-rw-r--r--sys/i386/i386/pmap.c2
-rw-r--r--sys/i386/i386/swtch.s269
-rw-r--r--sys/i386/i386/synch_machdep.c559
-rw-r--r--sys/i386/i386/trap.c391
-rw-r--r--sys/i386/i386/tsc.c155
-rw-r--r--sys/i386/i386/vm86bios.s10
-rw-r--r--sys/i386/i386/vm_machdep.c51
-rw-r--r--sys/i386/include/asnames.h24
-rw-r--r--sys/i386/include/cpu.h12
-rw-r--r--sys/i386/include/cpufunc.h21
-rw-r--r--sys/i386/include/globaldata.h33
-rw-r--r--sys/i386/include/globals.h42
-rw-r--r--sys/i386/include/ipl.h17
-rw-r--r--sys/i386/include/lock.h45
-rw-r--r--sys/i386/include/mptable.h88
-rw-r--r--sys/i386/include/mutex.h786
-rw-r--r--sys/i386/include/pcb.h6
-rw-r--r--sys/i386/include/pcpu.h33
-rw-r--r--sys/i386/include/smp.h38
-rw-r--r--sys/i386/include/smptests.h5
-rw-r--r--sys/i386/isa/apic_ipl.s74
-rw-r--r--sys/i386/isa/apic_vector.s132
-rw-r--r--sys/i386/isa/atpic_vector.s92
-rw-r--r--sys/i386/isa/bs/bsif.h13
-rw-r--r--sys/i386/isa/clock.c155
-rw-r--r--sys/i386/isa/cy.c197
-rw-r--r--sys/i386/isa/icu_ipl.s57
-rw-r--r--sys/i386/isa/icu_vector.s92
-rw-r--r--sys/i386/isa/intr_machdep.c524
-rw-r--r--sys/i386/isa/intr_machdep.h50
-rw-r--r--sys/i386/isa/ipl.s149
-rw-r--r--sys/i386/isa/ipl_funcs.c267
-rw-r--r--sys/i386/isa/ithread.c353
-rw-r--r--sys/i386/isa/loran.c2
-rw-r--r--sys/i386/isa/nmi.c524
-rw-r--r--sys/i386/isa/npx.c18
-rw-r--r--sys/i386/isa/vector.s9
55 files changed, 3418 insertions, 2819 deletions
diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s
index 2a7559df7f97..54bf00366c81 100644
--- a/sys/i386/i386/apic_vector.s
+++ b/sys/i386/i386/apic_vector.s
@@ -17,7 +17,7 @@
/*
- * Macros for interrupt interrupt entry, call to handler, and exit.
+ * Macros for interrupt entry, call to handler, and exit.
*/
#define FAST_INTR(irq_num, vec_name) \
@@ -121,7 +121,7 @@ IDTVEC(vec_name) ; \
/*
- * Test to see if the source is currntly masked, clear if so.
+ * Test to see if the source is currently masked, clear if so.
*/
#define UNMASK_IRQ(irq_num) \
IMASK_LOCK ; /* into critical reg */ \
@@ -200,7 +200,16 @@ log_intr_event:
#else
#define APIC_ITRACE(name, irq_num, id)
#endif
-
+
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -216,87 +225,24 @@ IDTVEC(vec_name) ; \
maybe_extra_ipending ; \
; \
APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \
- lock ; /* MP-safe */ \
- btsl $(irq_num), iactive ; /* lazy masking */ \
- jc 1f ; /* already active */ \
; \
MASK_LEVEL_IRQ(irq_num) ; \
EOI_IRQ(irq_num) ; \
0: ; \
- APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- MP_TRYLOCK ; /* XXX this is going away... */ \
- testl %eax, %eax ; /* did we get it? */ \
- jz 3f ; /* no */ \
-; \
- APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 2f ; /* this INT masked */ \
-; \
incb _intr_nesting_level ; \
; \
/* entry point used by doreti_unpend for HWIs. */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \
- lock ; incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4, %eax ; \
- lock ; incl (%eax) ; \
-; \
- movl _cpl, %eax ; \
- pushl %eax ; \
- orl _intr_mask + (irq_num) * 4, %eax ; \
- movl %eax, _cpl ; \
- lock ; \
- andl $~IRQ_BIT(irq_num), _ipending ; \
-; \
- pushl _intr_unit + (irq_num) * 4 ; \
+ pushl $irq_num; /* pass the IRQ */ \
APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \
; \
- lock ; andl $~IRQ_BIT(irq_num), iactive ; \
- UNMASK_IRQ(irq_num) ; \
- APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \
- sti ; /* doreti repeats cli/sti */ \
MEXITCOUNT ; \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-1: ; /* active */ \
- APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
- MASK_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- lock ; \
- btsl $(irq_num), iactive ; /* still active */ \
- jnc 0b ; /* retry */ \
- POP_FRAME ; \
- iret ; /* XXX: iactive bit might be 0 now */ \
- ALIGN_TEXT ; \
-2: ; /* masked by cpl, leave iactive set */ \
- APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- MP_RELLOCK ; \
- POP_FRAME ; \
- iret ; \
- ALIGN_TEXT ; \
-3: ; /* other cpu has isr lock */ \
- APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 4f ; /* this INT masked */ \
- call forward_irq ; /* forward irq to lock holder */ \
- POP_FRAME ; /* and return */ \
- iret ; \
- ALIGN_TEXT ; \
-4: ; /* blocked */ \
- APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- POP_FRAME ; /* and return */ \
- iret
+ jmp doreti_next
/*
* Handle "spurious INTerrupts".
@@ -434,20 +380,10 @@ _Xcpuast:
FAKE_MCOUNT(13*4(%esp))
- /*
- * Giant locks do not come cheap.
- * A lot of cycles are going to be wasted here.
- */
- call _get_mplock
-
- movl _cpl, %eax
- pushl %eax
orl $AST_PENDING, _astpending /* XXX */
incb _intr_nesting_level
sti
- pushl $0
-
movl _cpuid, %eax
lock
btrl %eax, _checkstate_pending_ast
@@ -461,7 +397,7 @@ _Xcpuast:
lock
incl CNAME(cpuast_cnt)
MEXITCOUNT
- jmp _doreti
+ jmp doreti_next
1:
/* We are already in the process of delivering an ast for this CPU */
POP_FRAME
@@ -487,40 +423,24 @@ _Xforward_irq:
FAKE_MCOUNT(13*4(%esp))
- MP_TRYLOCK
- testl %eax,%eax /* Did we get the lock ? */
- jz 1f /* No */
-
lock
incl CNAME(forward_irq_hitcnt)
cmpb $4, _intr_nesting_level
- jae 2f
+ jae 1f
- movl _cpl, %eax
- pushl %eax
incb _intr_nesting_level
sti
- pushl $0
-
MEXITCOUNT
- jmp _doreti /* Handle forwarded interrupt */
+ jmp doreti_next /* Handle forwarded interrupt */
1:
lock
- incl CNAME(forward_irq_misscnt)
- call forward_irq /* Oops, we've lost the isr lock */
- MEXITCOUNT
- POP_FRAME
- iret
-2:
- lock
incl CNAME(forward_irq_toodeepcnt)
-3:
- MP_RELLOCK
MEXITCOUNT
POP_FRAME
iret
+#if 0
/*
*
*/
@@ -532,9 +452,11 @@ forward_irq:
cmpl $0, CNAME(forward_irq_enabled)
jz 4f
+/* XXX - this is broken now, because mp_lock doesn't exist
movl _mp_lock,%eax
cmpl $FREE_LOCK,%eax
jne 1f
+ */
movl $0, %eax /* Pick CPU #0 if noone has lock */
1:
shrl $24,%eax
@@ -559,6 +481,7 @@ forward_irq:
jnz 3b
4:
ret
+#endif
/*
* Executed by a CPU when it receives an Xcpustop IPI from another CPU,
@@ -654,6 +577,7 @@ MCOUNT_LABEL(bintr)
FAST_INTR(22,fastintr22)
FAST_INTR(23,fastintr23)
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, CLKINTR_PENDING)
INTR(1,intr1,)
INTR(2,intr2,)
@@ -728,15 +652,11 @@ _ihandlers:
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
+#if 0
/* active flag for lazy masking */
iactive:
.long 0
+#endif
#ifdef COUNT_XINVLTLB_HITS
.globl _xhits
diff --git a/sys/i386/i386/autoconf.c b/sys/i386/i386/autoconf.c
index b209065027d6..4edda4bdcab5 100644
--- a/sys/i386/i386/autoconf.c
+++ b/sys/i386/i386/autoconf.c
@@ -163,14 +163,6 @@ configure(dummy)
* XXX this is slightly misplaced.
*/
spl0();
-
- /*
- * Allow lowering of the ipl to the lowest kernel level if we
- * panic (or call tsleep() before clearing `cold'). No level is
- * completely safe (since a panic may occur in a critical region
- * at splhigh()), but we want at least bio interrupts to work.
- */
- safepri = cpl;
}
static void
diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s
index acb8b40f2810..9e77114a1385 100644
--- a/sys/i386/i386/exception.s
+++ b/sys/i386/i386/exception.s
@@ -38,6 +38,7 @@
#include <machine/asmacros.h>
#include <machine/ipl.h>
#include <machine/lock.h>
+#include <machine/mutex.h>
#include <machine/psl.h>
#include <machine/trap.h>
#ifdef SMP
@@ -175,20 +176,12 @@ IDTVEC(fpu)
mov %ax,%fs
FAKE_MCOUNT(13*4(%esp))
-#ifdef SMP
MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%eax
- pushl %eax /* save original cpl */
- pushl $0 /* dummy unit to finish intr frame */
-#else /* SMP */
- movl _cpl,%eax
- pushl %eax
pushl $0 /* dummy unit to finish intr frame */
- incl _cnt+V_TRAP
-#endif /* SMP */
+ call __mtx_enter_giant_def
call _npx_intr
+ call __mtx_exit_giant_def
incb _intr_nesting_level
MEXITCOUNT
@@ -205,9 +198,6 @@ IDTVEC(align)
* gate (TGT), else disabled if this was an interrupt gate (IGT).
* Note that int0x80_syscall is a trap gate. Only page faults
* use an interrupt gate.
- *
- * Note that all calls to MP_LOCK must occur with interrupts enabled
- * in order to be able to take IPI's while waiting for the lock.
*/
SUPERALIGN_TEXT
@@ -227,16 +217,12 @@ alltraps_with_regs_pushed:
FAKE_MCOUNT(13*4(%esp))
calltrap:
FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */
- MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%ebx /* keep orig. cpl here during trap() */
call _trap
/*
* Return via _doreti to handle ASTs. Have to change trap frame
* to interrupt frame.
*/
- pushl %ebx /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
incb _intr_nesting_level
MEXITCOUNT
@@ -274,16 +260,11 @@ IDTVEC(syscall)
movl %eax,TF_EFLAGS(%esp)
movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
@@ -312,21 +293,18 @@ IDTVEC(int0x80_syscall)
mov %ax,%fs
movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
ENTRY(fork_trampoline)
+ MTX_EXIT(_sched_lock, %ecx)
+ sti
call _spl0
#ifdef SMP
@@ -355,7 +333,6 @@ ENTRY(fork_trampoline)
/*
* Return via _doreti to handle ASTs.
*/
- pushl $0 /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
movb $1,_intr_nesting_level
MEXITCOUNT
diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c
index 60accd19ba8e..78c607591875 100644
--- a/sys/i386/i386/genassym.c
+++ b/sys/i386/i386/genassym.c
@@ -51,6 +51,10 @@
#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/resourcevar.h>
+/* XXX */
+#ifdef KTR_PERCPU
+#include <sys/ktr.h>
+#endif
#include <machine/frame.h>
#include <machine/bootinfo.h>
#include <machine/tss.h>
@@ -73,6 +77,7 @@
#include <machine/sigframe.h>
#include <machine/globaldata.h>
#include <machine/vm86.h>
+#include <machine/mutex.h>
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
@@ -127,9 +132,7 @@ ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
-#ifdef SMP
-ASSYM(PCB_MPNEST, offsetof(struct pcb, pcb_mpnest));
-#endif
+ASSYM(PCB_SCHEDNEST, offsetof(struct pcb, pcb_schednest));
ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
@@ -170,7 +173,9 @@ ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab));
ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend));
ASSYM(GD_SIZEOF, sizeof(struct globaldata));
ASSYM(GD_CURPROC, offsetof(struct globaldata, gd_curproc));
+ASSYM(GD_PREVPROC, offsetof(struct globaldata, gd_prevproc));
ASSYM(GD_NPXPROC, offsetof(struct globaldata, gd_npxproc));
+ASSYM(GD_IDLEPROC, offsetof(struct globaldata, gd_idleproc));
ASSYM(GD_CURPCB, offsetof(struct globaldata, gd_curpcb));
ASSYM(GD_COMMON_TSS, offsetof(struct globaldata, gd_common_tss));
ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime));
@@ -178,11 +183,21 @@ ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks));
ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd));
ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt));
ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending));
+ASSYM(GD_INTR_NESTING_LEVEL, offsetof(struct globaldata, gd_intr_nesting_level));
#ifdef USER_LDT
ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt));
#endif
+ASSYM(GD_WITNESS_SPIN_CHECK, offsetof(struct globaldata, gd_witness_spin_check));
+
+/* XXX */
+#ifdef KTR_PERCPU
+ASSYM(GD_KTR_IDX, offsetof(struct globaldata, gd_ktr_idx));
+ASSYM(GD_KTR_BUF, offsetof(struct globaldata, gd_ktr_buf));
+ASSYM(GD_KTR_BUF_DATA, offsetof(struct globaldata, gd_ktr_buf_data));
+#endif
+
#ifdef SMP
ASSYM(GD_CPUID, offsetof(struct globaldata, gd_cpuid));
ASSYM(GD_CPU_LOCKID, offsetof(struct globaldata, gd_cpu_lockid));
@@ -211,3 +226,9 @@ ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL));
ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
ASSYM(GPROC0_SEL, GPROC0_SEL);
ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame));
+
+ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
+ASSYM(MTX_RECURSE, offsetof(struct mtx, mtx_recurse));
+ASSYM(MTX_SAVEFL, offsetof(struct mtx, mtx_savefl));
+
+ASSYM(MTX_UNOWNED, MTX_UNOWNED);
diff --git a/sys/i386/i386/globals.s b/sys/i386/i386/globals.s
index 31fbfd5e98b1..f3181429cad5 100644
--- a/sys/i386/i386/globals.s
+++ b/sys/i386/i386/globals.s
@@ -61,44 +61,74 @@ globaldata:
#else
.set globaldata,0
#endif
- .globl gd_curproc, gd_curpcb, gd_npxproc, gd_astpending
- .globl gd_common_tss, gd_switchtime, gd_switchticks
+ .globl gd_curproc, gd_prevproc, gd_curpcb, gd_npxproc, gd_idleproc
+ .globl gd_astpending, gd_common_tss, gd_switchtime, gd_switchticks
+ .globl gd_intr_nesting_level
.set gd_curproc,globaldata + GD_CURPROC
+ .set gd_prevproc,globaldata + GD_PREVPROC
.set gd_astpending,globaldata + GD_ASTPENDING
.set gd_curpcb,globaldata + GD_CURPCB
.set gd_npxproc,globaldata + GD_NPXPROC
+ .set gd_idleproc,globaldata + GD_IDLEPROC
.set gd_common_tss,globaldata + GD_COMMON_TSS
.set gd_switchtime,globaldata + GD_SWITCHTIME
.set gd_switchticks,globaldata + GD_SWITCHTICKS
+ .set gd_intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL
.globl gd_common_tssd, gd_tss_gdt
.set gd_common_tssd,globaldata + GD_COMMON_TSSD
.set gd_tss_gdt,globaldata + GD_TSS_GDT
+ .globl gd_witness_spin_check
+ .set gd_witness_spin_check, globaldata + GD_WITNESS_SPIN_CHECK
+
#ifdef USER_LDT
.globl gd_currentldt
.set gd_currentldt,globaldata + GD_CURRENTLDT
#endif
+/* XXX - doesn't work yet */
+#ifdef KTR_PERCPU
+ .globl gd_ktr_idx, gd_ktr_buf, gd_ktr_buf_data
+ .set gd_ktr_idx,globaldata + GD_KTR_IDX
+ .set gd_ktr_buf,globaldata + GD_KTR_BUF
+ .set gd_ktr_buf_data,globaldata + GD_KTR_BUF_DATA
+#endif
+
#ifndef SMP
- .globl _curproc, _curpcb, _npxproc, _astpending
- .globl _common_tss, _switchtime, _switchticks
+ .globl _curproc, _prevproc, _curpcb, _npxproc, _idleproc,
+ .globl _astpending, _common_tss, _switchtime, _switchticks
+ .global _intr_nesting_level
.set _curproc,globaldata + GD_CURPROC
+ .set _prevproc,globaldata + GD_PREVPROC
.set _astpending,globaldata + GD_ASTPENDING
.set _curpcb,globaldata + GD_CURPCB
.set _npxproc,globaldata + GD_NPXPROC
+ .set _idleproc,globaldata + GD_IDLEPROC
.set _common_tss,globaldata + GD_COMMON_TSS
.set _switchtime,globaldata + GD_SWITCHTIME
.set _switchticks,globaldata + GD_SWITCHTICKS
+ .set _intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL
.globl _common_tssd, _tss_gdt
.set _common_tssd,globaldata + GD_COMMON_TSSD
.set _tss_gdt,globaldata + GD_TSS_GDT
+ .globl _witness_spin_check
+ .set _witness_spin_check,globaldata + GD_WITNESS_SPIN_CHECK
+
#ifdef USER_LDT
.globl _currentldt
.set _currentldt,globaldata + GD_CURRENTLDT
#endif
+
+/* XXX - doesn't work yet */
+#ifdef KTR_PERCPU
+ .globl _ktr_idx, _ktr_buf, _ktr_buf_data
+ .set _ktr_idx,globaldata + GD_KTR_IDX
+ .set _ktr_buf,globaldata + GD_KTR_BUF
+ .set _ktr_buf_data,globaldata + GD_KTR_BUF_DATA
+#endif
#endif
#ifdef SMP
diff --git a/sys/i386/i386/i386-gdbstub.c b/sys/i386/i386/i386-gdbstub.c
index 986b8d4daa1f..b442a377c44f 100644
--- a/sys/i386/i386/i386-gdbstub.c
+++ b/sys/i386/i386/i386-gdbstub.c
@@ -188,7 +188,8 @@ getpacket (char *buffer)
unsigned char ch;
int s;
- s = spltty ();
+ s = read_eflags();
+ disable_intr();
do
{
/* wait around for the start character, ignore all other characters */
@@ -239,7 +240,7 @@ getpacket (char *buffer)
}
}
while (checksum != xmitcsum);
- splx (s);
+ write_eflags(s);
}
/* send the packet in buffer. */
@@ -253,7 +254,8 @@ putpacket (char *buffer)
int s;
/* $<packet info>#<checksum>. */
- s = spltty ();
+ s = read_eflags();
+ disable_intr();
do
{
/*
@@ -285,7 +287,7 @@ putpacket (char *buffer)
putDebugChar (hexchars[checksum & 0xf]);
}
while ((getDebugChar () & 0x7f) != '+');
- splx (s);
+ write_eflags(s);
}
static char remcomInBuffer[BUFMAX];
diff --git a/sys/i386/i386/identcpu.c b/sys/i386/i386/identcpu.c
index 0e11e2b8eadf..71ecd63de85a 100644
--- a/sys/i386/i386/identcpu.c
+++ b/sys/i386/i386/identcpu.c
@@ -42,6 +42,7 @@
#include "opt_cpu.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
@@ -53,6 +54,8 @@
#include <machine/specialreg.h>
#include <machine/md_var.h>
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#define IDENTBLUE_CYRIX486 0
diff --git a/sys/i386/i386/initcpu.c b/sys/i386/i386/initcpu.c
index be86c65cb279..b9395bfc7f85 100644
--- a/sys/i386/i386/initcpu.c
+++ b/sys/i386/i386/initcpu.c
@@ -607,12 +607,14 @@ void
enable_K5_wt_alloc(void)
{
u_int64_t msr;
+ int intrstate;
/*
* Write allocate is supported only on models 1, 2, and 3, with
* a stepping of 4 or greater.
*/
if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) {
+ intrstate = save_intr();
disable_intr();
msr = rdmsr(0x83); /* HWCR */
wrmsr(0x83, msr & !(0x10));
@@ -645,7 +647,7 @@ enable_K5_wt_alloc(void)
msr=rdmsr(0x83);
wrmsr(0x83, msr|0x10); /* enable write allocate */
- enable_intr();
+ restore_intr(intrstate);
}
}
@@ -708,7 +710,6 @@ enable_K6_wt_alloc(void)
wrmsr(0x0c0000082, whcr);
write_eflags(eflags);
- enable_intr();
}
void
@@ -770,7 +771,6 @@ enable_K6_2_wt_alloc(void)
wrmsr(0x0c0000082, whcr);
write_eflags(eflags);
- enable_intr();
}
#endif /* I585_CPU && CPU_WT_ALLOC */
diff --git a/sys/i386/i386/legacy.c b/sys/i386/i386/legacy.c
index 8a3077058718..5b6cdbc85618 100644
--- a/sys/i386/i386/legacy.c
+++ b/sys/i386/i386/legacy.c
@@ -68,7 +68,10 @@
#else
#include <i386/isa/isa.h>
#endif
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
+#include <sys/rtprio.h>
static struct rman irq_rman, drq_rman, port_rman, mem_rman;
@@ -397,9 +400,9 @@ static int
nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
int flags, void (*ihand)(void *), void *arg, void **cookiep)
{
- intrmask_t *mask;
driver_t *driver;
- int error, icflags;
+ int error, icflags;
+ int pri; /* interrupt thread priority */
/* somebody tried to setup an irq that failed to allocate! */
if (irq == NULL)
@@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
driver = device_get_driver(child);
switch (flags) {
- case INTR_TYPE_TTY:
- mask = &tty_imask;
+ case INTR_TYPE_TTY: /* keyboard or parallel port */
+ pri = PI_TTYLOW;
break;
- case (INTR_TYPE_TTY | INTR_TYPE_FAST):
- mask = &tty_imask;
+ case (INTR_TYPE_TTY | INTR_FAST): /* sio */
+ pri = PI_TTYHIGH;
icflags |= INTR_FAST;
break;
case INTR_TYPE_BIO:
- mask = &bio_imask;
+ /*
+ * XXX We need to refine this. BSD/OS distinguishes
+ * between tape and disk priorities.
+ */
+ pri = PI_DISK;
break;
case INTR_TYPE_NET:
- mask = &net_imask;
+ pri = PI_NET;
break;
case INTR_TYPE_CAM:
- mask = &cam_imask;
+ pri = PI_DISK; /* XXX or PI_CAM? */
break;
case INTR_TYPE_MISC:
- mask = 0;
+ pri = PI_DULL; /* don't care */
break;
+ /* We didn't specify an interrupt level. */
default:
- panic("still using grody create_intr interface");
+ panic("nexus_setup_intr: no interrupt type in flags");
}
/*
@@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
return (error);
*cookiep = inthand_add(device_get_nameunit(child), irq->r_start,
- ihand, arg, mask, icflags);
+ ihand, arg, pri, icflags);
if (*cookiep == NULL)
error = EINVAL; /* XXX ??? */
diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s
index bddd7d5be868..fa95fb0d6b53 100644
--- a/sys/i386/i386/locore.s
+++ b/sys/i386/i386/locore.s
@@ -862,9 +862,6 @@ map_read_write:
movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */
movl $1, %ecx /* one private pt coming right up */
fillkpt(R(SMPptpa), $PG_RW)
-
-/* Initialize mp lock to allow early traps */
- movl $1, R(_mp_lock)
#endif /* SMP */
/* install a pde for temporary double map of bottom of VA */
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 6edecf04db54..875c9d5a7a8a 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -58,6 +58,7 @@
#include <sys/sysproto.h>
#include <sys/signalvar.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/linker.h>
#include <sys/malloc.h>
#include <sys/proc.h>
@@ -98,10 +99,12 @@
#include <machine/bootinfo.h>
#include <machine/ipl.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
+#include <machine/globaldata.h>
+#include <machine/globals.h>
#ifdef SMP
#include <machine/smp.h>
-#include <machine/globaldata.h>
#endif
#ifdef PERFMON
#include <machine/perfmon.h>
@@ -110,6 +113,7 @@
#ifdef OLD_BUS_ARCH
#include <i386/isa/isa_device.h>
#endif
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#include <isa/rtc.h>
#include <machine/vm86.h>
@@ -247,6 +251,11 @@ vm_offset_t clean_sva, clean_eva;
static vm_offset_t pager_sva, pager_eva;
static struct trapframe proc0_tf;
+struct cpuhead cpuhead;
+
+mtx_t sched_lock;
+mtx_t Giant;
+
#define offsetof(type, member) ((size_t)(&((type *)0)->member))
static void
@@ -431,6 +440,11 @@ again:
bufinit();
vm_pager_bufferinit();
+ SLIST_INIT(&cpuhead);
+ SLIST_INSERT_HEAD(&cpuhead, GLOBALDATA, gd_allcpu);
+
+ mtx_init(&sched_lock, "sched lock", MTX_SPIN);
+
#ifdef SMP
/*
* OK, enough kmem_alloc/malloc state should be up, lets get on with it!
@@ -1817,11 +1831,6 @@ init386(first)
#endif
int off;
- /*
- * Prevent lowering of the ipl if we call tsleep() early.
- */
- safepri = cpl;
-
proc0.p_addr = proc0paddr;
atdevbase = ISA_HOLE_START + KERNBASE;
@@ -1871,6 +1880,10 @@ init386(first)
r_gdt.rd_base = (int) gdt;
lgdt(&r_gdt);
+ /* setup curproc so that mutexes work */
+ PCPU_SET(curproc, &proc0);
+ PCPU_SET(prevproc, &proc0);
+
/* make ldt memory segments */
/*
* The data segment limit must not cover the user area because we
@@ -1953,7 +1966,7 @@ init386(first)
/* make an initial tss so cpu can get interrupt stack on syscall! */
common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16;
- common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
+ common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
private_tss = 0;
tss_gdt = &gdt[GPROC0_SEL].sd;
@@ -1974,6 +1987,12 @@ init386(first)
dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+ /*
+ * We grab Giant during the vm86bios routines, so we need to ensure
+ * that it is up and running before we use vm86.
+ */
+ mtx_init(&Giant, "Giant", MTX_DEF);
+
vm86_initialize();
getmemsize(first);
@@ -2009,9 +2028,7 @@ init386(first)
/* setup proc 0's pcb */
proc0.p_addr->u_pcb.pcb_flags = 0;
proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
-#ifdef SMP
- proc0.p_addr->u_pcb.pcb_mpnest = 1;
-#endif
+ proc0.p_addr->u_pcb.pcb_schednest = 0;
proc0.p_addr->u_pcb.pcb_ext = 0;
proc0.p_md.md_regs = &proc0_tf;
}
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 61c5ecf73205..95b5759f9e66 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/i386/i386/mpapic.c b/sys/i386/i386/mpapic.c
index a3594a8ed20c..3f971d83548d 100644
--- a/sys/i386/i386/mpapic.c
+++ b/sys/i386/i386/mpapic.c
@@ -28,11 +28,14 @@
#include "opt_smp.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
+#include <sys/proc.h>
#include <machine/smptests.h> /** TEST_TEST1 */
#include <machine/smp.h>
#include <machine/mpapic.h>
+#include <machine/globaldata.h>
#include <machine/segments.h>
#include <i386/isa/intr_machdep.h> /* Xspuriousint() */
diff --git a/sys/i386/i386/mpboot.s b/sys/i386/i386/mpboot.s
index d3602d29a2f4..9ede02c24342 100644
--- a/sys/i386/i386/mpboot.s
+++ b/sys/i386/i386/mpboot.s
@@ -114,43 +114,9 @@ mp_begin: /* now running relocated at KERNBASE */
CHECKPOINT(0x39, 6)
- /* wait till we can get into the kernel */
- call _boot_get_mplock
-
- /* Now, let's prepare for some REAL WORK :-) */
+ /* Now, let's prepare for some REAL WORK :-) This doesn't return. */
call _ap_init
- call _rel_mplock
- lock /* Avoid livelock (PIII Errata 39) */
- addl $0,-4(%esp)
-2:
- cmpl $0, CNAME(smp_started) /* Wait for last AP to be ready */
- jz 2b
- call _get_mplock
-
- /* let her rip! (loads new stack) */
- jmp _cpu_switch
-
-NON_GPROF_ENTRY(wait_ap)
- pushl %ebp
- movl %esp, %ebp
- call _rel_mplock
- lock /* Avoid livelock (PIII Errata 39) */
- addl $0,0(%esp)
- movl %eax, 8(%ebp)
-1:
- cmpl $0, CNAME(smp_started)
- jnz 2f
- decl %eax
- cmpl $0, %eax
- jge 1b
-2:
- call _get_mplock
- movl %ebp, %esp
- popl %ebp
- ret
-
-
/*
* This is the embedded trampoline or bootstrap that is
* copied into 'real-mode' low memory, it is where the
diff --git a/sys/i386/i386/mplock.s b/sys/i386/i386/mplock.s
deleted file mode 100644
index dc5ba01e1f05..000000000000
--- a/sys/i386/i386/mplock.s
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- * ----------------------------------------------------------------------------
- * "THE BEER-WARE LICENSE" (Revision 42):
- * <phk@FreeBSD.org> wrote this file. As long as you retain this notice you
- * can do whatever you want with this stuff. If we meet some day, and you think
- * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
- * ----------------------------------------------------------------------------
- *
- * $FreeBSD$
- *
- * Functions for locking between CPUs in a SMP system.
- *
- * This is an "exclusive counting semaphore". This means that it can be
- * free (0xffffffff) or be owned by a CPU (0xXXYYYYYY where XX is CPU-id
- * and YYYYYY is the count).
- *
- * Contrary to most implementations around, this one is entirely atomic:
- * The attempt to seize/release the semaphore and the increment/decrement
- * is done in one atomic operation. This way we are safe from all kinds
- * of weird reentrancy situations.
- */
-
-#include <machine/asmacros.h>
-#include <machine/smptests.h> /** GRAB_LOPRIO */
-#include <machine/apic.h>
-
-#define GLPROFILE_NOT
-
-#ifdef CHEAP_TPR
-
-/* we assumme that the 'reserved bits' can be written with zeros */
-
-#else /* CHEAP_TPR */
-
-#error HEADS UP: this code needs work
-/*
- * The APIC doc says that reserved bits must be written with whatever
- * value they currently contain, ie you should: read, modify, write,
- * instead of just writing new values to the TPR register. Current
- * silicon seems happy with just writing. If the behaviour of the
- * silicon changes, all code that access the lapic_tpr must be modified.
- * The last version to contain such code was:
- * Id: mplock.s,v 1.17 1997/08/10 20:59:07 fsmp Exp
- */
-
-#endif /* CHEAP_TPR */
-
-#ifdef GRAB_LOPRIO
-/*
- * Claim LOWest PRIOrity, ie. attempt to grab ALL INTerrupts.
- */
-
-/* after 1st acquire of lock we grab all hardware INTs */
-#define GRAB_HWI movl $ALLHWI_LEVEL, lapic_tpr
-
-/* after last release of lock give up LOW PRIO (ie, arbitrate INTerrupts) */
-#define ARB_HWI movl $LOPRIO_LEVEL, lapic_tpr /* CHEAP_TPR */
-
-#else /* GRAB_LOPRIO */
-
-#define GRAB_HWI /* nop */
-#define ARB_HWI /* nop */
-
-#endif /* GRAB_LOPRIO */
-
-
- .text
-
-#ifdef SMP
-
-/***********************************************************************
- * void MPgetlock_edx(unsigned int *lock : %edx)
- * ----------------------------------
- * Destroys %eax, %ecx. %edx must hold lock argument.
- *
- * Grabs hardware interrupts on first aquire.
- *
- * NOTE: Serialization is not required if we already hold the lock, since
- * we already hold the lock, nor do we need a locked instruction if we
- * already hold the lock.
- */
-
-NON_GPROF_ENTRY(MPgetlock_edx)
-1:
- movl (%edx), %eax /* Get current contents of lock */
- movl %eax, %ecx
- andl $CPU_FIELD,%ecx
- cmpl _cpu_lockid, %ecx /* Do we already own the lock? */
- jne 2f
- incl %eax /* yes, just bump the count */
- movl %eax, (%edx) /* serialization not required */
- ret
-2:
- movl $FREE_LOCK, %eax /* lock must be free */
- movl _cpu_lockid, %ecx
- incl %ecx
- lock
- cmpxchg %ecx, (%edx) /* attempt to replace %eax<->%ecx */
-#ifdef GLPROFILE
- jne 3f
- incl _gethits2
-#else
- jne 1b
-#endif /* GLPROFILE */
- GRAB_HWI /* 1st acquire, grab hw INTs */
- ret
-#ifdef GLPROFILE
-3:
- incl _gethits3
- jmp 1b
-#endif
-
-/***********************************************************************
- * int MPtrylock(unsigned int *lock)
- * ---------------------------------
- * Destroys %eax, %ecx and %edx.
- * Returns 1 if lock was successfull
- */
-
-NON_GPROF_ENTRY(MPtrylock)
- movl 4(%esp), %edx /* Get the address of the lock */
-
- movl $FREE_LOCK, %eax /* Assume it's free */
- movl _cpu_lockid, %ecx /* - get pre-shifted logical cpu id */
- incl %ecx /* - new count is one */
- lock
- cmpxchg %ecx, (%edx) /* - try it atomically */
- jne 1f /* ...do not collect $200 */
-#ifdef GLPROFILE
- incl _tryhits2
-#endif /* GLPROFILE */
- GRAB_HWI /* 1st acquire, grab hw INTs */
- movl $1, %eax
- ret
-1:
- movl (%edx), %eax /* Try to see if we have it already */
- andl $COUNT_FIELD, %eax /* - get count */
- movl _cpu_lockid, %ecx /* - get pre-shifted logical cpu id */
- orl %ecx, %eax /* - combine them */
- movl %eax, %ecx
- incl %ecx /* - new count is one more */
- lock
- cmpxchg %ecx, (%edx) /* - try it atomically */
- jne 2f /* - miss */
-#ifdef GLPROFILE
- incl _tryhits
-#endif /* GLPROFILE */
- movl $1, %eax
- ret
-2:
-#ifdef GLPROFILE
- incl _tryhits3
-#endif /* GLPROFILE */
- movl $0, %eax
- ret
-
-
-/***********************************************************************
- * void MPrellock_edx(unsigned int *lock : %edx)
- * ----------------------------------
- * Destroys %ecx, argument must be in %edx
- *
- * SERIALIZATION NOTE!
- *
- * After a lot of arguing, it turns out that there is no problem with
- * not having a synchronizing instruction in the MP unlock code. There
- * are two things to keep in mind: First, Intel guarentees that writes
- * are ordered amoungst themselves. Second, the P6 is allowed to reorder
- * reads around writes. Third, the P6 maintains cache consistency (snoops
- * the bus). The second is not an issue since the one read we do is the
- * basis for the conditional which determines whether the write will be
- * made or not.
- *
- * Therefore, no synchronizing instruction is required on unlock. There are
- * three performance cases: First, if a single cpu is getting and releasing
- * the lock the removal of the synchronizing instruction saves approx
- * 200 nS (testing w/ duel cpu PIII 450). Second, if one cpu is contending
- * for the lock while the other holds it, the removal of the synchronizing
- * instruction results in a 700nS LOSS in performance. Third, if two cpu's
- * are switching off ownership of the MP lock but not contending for it (the
- * most common case), this results in a 400nS IMPROVEMENT in performance.
- *
- * Since our goal is to reduce lock contention in the first place, we have
- * decided to remove the synchronizing instruction from the unlock code.
- */
-
-NON_GPROF_ENTRY(MPrellock_edx)
- movl (%edx), %ecx /* - get the value */
- decl %ecx /* - new count is one less */
- testl $COUNT_FIELD, %ecx /* - Unless it's zero... */
- jnz 2f
- ARB_HWI /* last release, arbitrate hw INTs */
- movl $FREE_LOCK, %ecx /* - In which case we release it */
-#if 0
- lock
- addl $0,0(%esp) /* see note above */
-#endif
-2:
- movl %ecx, (%edx)
- ret
-
-/***********************************************************************
- * void get_mplock()
- * -----------------
- * All registers preserved
- *
- * Stack (after call to _MPgetlock):
- *
- * edx 4(%esp)
- * ecx 8(%esp)
- * eax 12(%esp)
- *
- * Requirements: Interrupts should be enabled on call so we can take
- * IPI's and FAST INTs while we are waiting for the lock
- * (else the system may not be able to halt).
- *
- * XXX there are still places where get_mplock() is called
- * with interrupts disabled, so we have to temporarily reenable
- * interrupts.
- *
- * Side effects: The current cpu will be given ownership of the
- * hardware interrupts when it first aquires the lock.
- *
- * Costs: Initial aquisition requires the use of a costly locked
- * instruction, but recursive aquisition is cheap. Release
- * is very cheap.
- */
-
-NON_GPROF_ENTRY(get_mplock)
- pushl %eax
- pushl %ecx
- pushl %edx
- movl $_mp_lock, %edx
- pushfl
- testl $(1<<9), (%esp)
- jz 2f
- call _MPgetlock_edx
- addl $4,%esp
-1:
- popl %edx
- popl %ecx
- popl %eax
- ret
-2:
- sti
- call _MPgetlock_edx
- popfl
- jmp 1b
-
-/*
- * Special version of get_mplock that is used during bootstrap when we can't
- * yet enable interrupts of any sort since the APIC isn't online yet. We
- * do an endrun around MPgetlock_edx to avoid enabling interrupts.
- *
- * XXX FIXME.. - APIC should be online from the start to simplify IPI's.
- */
-NON_GPROF_ENTRY(boot_get_mplock)
- pushl %eax
- pushl %ecx
- pushl %edx
-#ifdef GRAB_LOPRIO
- pushfl
- pushl lapic_tpr
- cli
-#endif
-
- movl $_mp_lock, %edx
- call _MPgetlock_edx
-
-#ifdef GRAB_LOPRIO
- popl lapic_tpr
- popfl
-#endif
- popl %edx
- popl %ecx
- popl %eax
- ret
-
-/***********************************************************************
- * void try_mplock()
- * -----------------
- * reg %eax == 1 if success
- */
-
-NON_GPROF_ENTRY(try_mplock)
- pushl %ecx
- pushl %edx
- pushl $_mp_lock
- call _MPtrylock
- add $4, %esp
- popl %edx
- popl %ecx
- ret
-
-/***********************************************************************
- * void rel_mplock()
- * -----------------
- * All registers preserved
- */
-
-NON_GPROF_ENTRY(rel_mplock)
- pushl %ecx
- pushl %edx
- movl $_mp_lock,%edx
- call _MPrellock_edx
- popl %edx
- popl %ecx
- ret
-
-#endif
-
-/***********************************************************************
- *
- */
- .data
- .p2align 2 /* xx_lock aligned on int boundary */
-
-#ifdef SMP
-
- .globl _mp_lock
-_mp_lock: .long 0
-
-#ifdef GLPROFILE
- .globl _gethits
-_gethits:
- .long 0
-_gethits2:
- .long 0
-_gethits3:
- .long 0
-
- .globl _tryhits
-_tryhits:
- .long 0
-_tryhits2:
- .long 0
-_tryhits3:
- .long 0
-
-msg:
- .asciz "lock hits: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x\n"
-#endif /* GLPROFILE */
-#endif /* SMP */
diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c
index 61c5ecf73205..95b5759f9e66 100644
--- a/sys/i386/i386/mptable.c
+++ b/sys/i386/i386/mptable.c
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/i386/i386/nexus.c b/sys/i386/i386/nexus.c
index 8a3077058718..5b6cdbc85618 100644
--- a/sys/i386/i386/nexus.c
+++ b/sys/i386/i386/nexus.c
@@ -68,7 +68,10 @@
#else
#include <i386/isa/isa.h>
#endif
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
+#include <sys/rtprio.h>
static struct rman irq_rman, drq_rman, port_rman, mem_rman;
@@ -397,9 +400,9 @@ static int
nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
int flags, void (*ihand)(void *), void *arg, void **cookiep)
{
- intrmask_t *mask;
driver_t *driver;
- int error, icflags;
+ int error, icflags;
+ int pri; /* interrupt thread priority */
/* somebody tried to setup an irq that failed to allocate! */
if (irq == NULL)
@@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
driver = device_get_driver(child);
switch (flags) {
- case INTR_TYPE_TTY:
- mask = &tty_imask;
+ case INTR_TYPE_TTY: /* keyboard or parallel port */
+ pri = PI_TTYLOW;
break;
- case (INTR_TYPE_TTY | INTR_TYPE_FAST):
- mask = &tty_imask;
+ case (INTR_TYPE_TTY | INTR_FAST): /* sio */
+ pri = PI_TTYHIGH;
icflags |= INTR_FAST;
break;
case INTR_TYPE_BIO:
- mask = &bio_imask;
+ /*
+ * XXX We need to refine this. BSD/OS distinguishes
+ * between tape and disk priorities.
+ */
+ pri = PI_DISK;
break;
case INTR_TYPE_NET:
- mask = &net_imask;
+ pri = PI_NET;
break;
case INTR_TYPE_CAM:
- mask = &cam_imask;
+ pri = PI_DISK; /* XXX or PI_CAM? */
break;
case INTR_TYPE_MISC:
- mask = 0;
+ pri = PI_DULL; /* don't care */
break;
+ /* We didn't specify an interrupt level. */
default:
- panic("still using grody create_intr interface");
+ panic("nexus_setup_intr: no interrupt type in flags");
}
/*
@@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
return (error);
*cookiep = inthand_add(device_get_nameunit(child), irq->r_start,
- ihand, arg, mask, icflags);
+ ihand, arg, pri, icflags);
if (*cookiep == NULL)
error = EINVAL; /* XXX ??? */
diff --git a/sys/i386/i386/perfmon.c b/sys/i386/i386/perfmon.c
index 574f416df2be..2efa51642d85 100644
--- a/sys/i386/i386/perfmon.c
+++ b/sys/i386/i386/perfmon.c
@@ -118,16 +118,19 @@ perfmon_avail(void)
int
perfmon_setup(int pmc, unsigned int control)
{
+ int intrstate;
+
if (pmc < 0 || pmc >= NPMC)
return EINVAL;
perfmon_inuse |= (1 << pmc);
control &= ~(PMCF_SYS_FLAGS << 16);
+ intrstate = save_intr();
disable_intr();
ctl_shadow[pmc] = control;
writectl(pmc);
wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
- enable_intr();
+ restore_intr(intrstate);
return 0;
}
@@ -162,15 +165,18 @@ perfmon_fini(int pmc)
int
perfmon_start(int pmc)
{
+ int intrstate;
+
if (pmc < 0 || pmc >= NPMC)
return EINVAL;
if (perfmon_inuse & (1 << pmc)) {
+ intrstate = save_intr();
disable_intr();
ctl_shadow[pmc] |= (PMCF_EN << 16);
wrmsr(msr_pmc[pmc], pmc_shadow[pmc]);
writectl(pmc);
- enable_intr();
+ restore_intr(intrstate);
return 0;
}
return EBUSY;
@@ -179,15 +185,18 @@ perfmon_start(int pmc)
int
perfmon_stop(int pmc)
{
+ int intrstate;
+
if (pmc < 0 || pmc >= NPMC)
return EINVAL;
if (perfmon_inuse & (1 << pmc)) {
+ intrstate = save_intr();
disable_intr();
pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
ctl_shadow[pmc] &= ~(PMCF_EN << 16);
writectl(pmc);
- enable_intr();
+ restore_intr(intrstate);
return 0;
}
return EBUSY;
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index edae2929fb87..7ce9120d243f 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -668,7 +668,7 @@ pmap_pte_quick(pmap, va)
* (unsigned *) prv_PMAP1 = newpf | PG_RW | PG_V;
cpu_invlpg(prv_PADDR1);
}
- return prv_PADDR1 + ((unsigned) index & (NPTEPG - 1));
+ return (unsigned *)(prv_PADDR1 + (index & (NPTEPG - 1)));
#else
if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s
index c895fefa8c15..db56a1b40af6 100644
--- a/sys/i386/i386/swtch.s
+++ b/sys/i386/i386/swtch.s
@@ -73,189 +73,6 @@ _tlb_flush_count: .long 0
.text
-/*
- * When no processes are on the runq, cpu_switch() branches to _idle
- * to wait for something to come ready.
- */
- ALIGN_TEXT
- .type _idle,@function
-_idle:
- xorl %ebp,%ebp
- movl %ebp,_switchtime
-
-#ifdef SMP
-
- /* when called, we have the mplock, intr disabled */
- /* use our idleproc's "context" */
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
- /* Keep space for nonexisting return addr, or profiling bombs */
- movl $gd_idlestack_top-4, %ecx
- addl %fs:0, %ecx
- movl %ecx, %esp
-
- /* update common_tss.tss_esp0 pointer */
- movl %ecx, _common_tss + TSS_ESP0
-
- movl _cpuid, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $gd_common_tssd, %edi
- addl %fs:0, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- *
- * NOTE: spl*() may only be called while we hold the MP lock (which
- * we do).
- */
- call _spl0
-
- cli
-
- /*
- * _REALLY_ free the lock, no matter how deep the prior nesting.
- * We will recover the nesting on the way out when we have a new
- * proc to load.
- *
- * XXX: we had damn well better be sure we had it before doing this!
- */
- movl $FREE_LOCK, %eax
- movl %eax, _mp_lock
-
- /* do NOT have lock, intrs disabled */
- .globl idle_loop
-idle_loop:
-
- cmpl $0,_smp_active
- jne 1f
- cmpl $0,_cpuid
- je 1f
- jmp 2f
-
-1:
- call _procrunnable
- testl %eax,%eax
- jnz 3f
-
- /*
- * Handle page-zeroing in the idle loop. Called with interrupts
- * disabled and the MP lock released. Inside vm_page_zero_idle
- * we enable interrupts and grab the mplock as required.
- */
- cmpl $0,_do_page_zero_idle
- je 2f
-
- call _vm_page_zero_idle /* internal locking */
- testl %eax, %eax
- jnz idle_loop
-2:
-
- /* enable intrs for a halt */
- movl $0, lapic_tpr /* 1st candidate for an INT */
- call *_hlt_vector /* wait for interrupt */
- cli
- jmp idle_loop
-
- /*
- * Note that interrupts must be enabled while obtaining the MP lock
- * in order to be able to take IPI's while blocked.
- */
-3:
- movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */
- sti
- call _get_mplock
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _rel_mplock
- jmp idle_loop
-
-#else /* !SMP */
-
- movl $HIDENAME(tmpstk),%esp
-#if defined(OVERLY_CONSERVATIVE_PTD_MGMT)
-#if defined(SWTCH_OPTIM_STATS)
- incl _swtch_optim_stats
-#endif
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
-#endif
-
- /* update common_tss.tss_esp0 pointer */
- movl %esp, _common_tss + TSS_ESP0
-
- movl $0, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $_common_tssd, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- */
- call _spl0
-
- ALIGN_TEXT
-idle_loop:
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _vm_page_zero_idle
- testl %eax, %eax
- jnz idle_loop
- call *_hlt_vector /* wait for interrupt */
- jmp idle_loop
-
-#endif /* SMP */
-
-CROSSJUMPTARGET(_idle)
-
ENTRY(default_halt)
sti
#ifndef SMP
@@ -264,16 +81,23 @@ ENTRY(default_halt)
ret
/*
+ * cpu_throw()
+ */
+ENTRY(cpu_throw)
+ jmp sw1
+
+/*
* cpu_switch()
*/
ENTRY(cpu_switch)
/* switch to new process. first, save context as needed */
movl _curproc,%ecx
+ movl %ecx,_prevproc
/* if no process to save, don't bother */
testl %ecx,%ecx
- je sw1
+ jz sw1
#ifdef SMP
movb P_ONCPU(%ecx), %al /* save "last" cpu */
@@ -299,7 +123,7 @@ ENTRY(cpu_switch)
movl %edi,PCB_EDI(%edx)
movl %gs,PCB_GS(%edx)
- /* test if debug regisers should be saved */
+ /* test if debug registers should be saved */
movb PCB_FLAGS(%edx),%al
andb $PCB_DBREGS,%al
jz 1f /* no, skip over */
@@ -319,15 +143,12 @@ ENTRY(cpu_switch)
movl %eax,PCB_DR0(%edx)
1:
+ /* save sched_lock recursion count */
+ movl _sched_lock+MTX_RECURSE,%eax
+ movl %eax,PCB_SCHEDNEST(%edx)
+
#ifdef SMP
- movl _mp_lock, %eax
/* XXX FIXME: we should be saving the local APIC TPR */
-#ifdef DIAGNOSTIC
- cmpl $FREE_LOCK, %eax /* is it free? */
- je badsw4 /* yes, bad medicine! */
-#endif /* DIAGNOSTIC */
- andl $COUNT_FIELD, %eax /* clear CPU portion */
- movl %eax, PCB_MPNEST(%edx) /* store it */
#endif /* SMP */
#if NNPX > 0
@@ -341,25 +162,33 @@ ENTRY(cpu_switch)
1:
#endif /* NNPX > 0 */
- movl $0,_curproc /* out of process */
-
- /* save is done, now choose a new process or idle */
+ /* save is done, now choose a new process */
sw1:
- cli
#ifdef SMP
/* Stop scheduling if smp_active goes zero and we are not BSP */
cmpl $0,_smp_active
jne 1f
cmpl $0,_cpuid
- CROSSJUMP(je, _idle, jne) /* wind down */
+ je 1f
+
+ movl _idleproc, %eax
+ jmp sw1b
1:
#endif
+ /*
+ * Choose a new process to schedule. chooseproc() returns idleproc
+ * if it cannot find another process to run.
+ */
sw1a:
call _chooseproc /* trash ecx, edx, ret eax*/
- testl %eax,%eax
- CROSSJUMP(je, _idle, jne) /* if no proc, idle */
+
+#ifdef DIAGNOSTIC
+ testl %eax,%eax /* no process? */
+ jz badsw3 /* no, panic */
+#endif
+sw1b:
movl %eax,%ecx
xorl %eax,%eax
@@ -456,9 +285,6 @@ sw1a:
movl %ecx, _curproc /* into next process */
#ifdef SMP
- movl _cpu_lockid, %eax
- orl PCB_MPNEST(%edx), %eax /* add next count from PROC */
- movl %eax, _mp_lock /* load the mp_lock */
/* XXX FIXME: we should be restoring the local APIC TPR */
#endif /* SMP */
@@ -500,7 +326,22 @@ cpu_switch_load_gs:
movl %eax,%dr7
1:
- sti
+ /*
+ * restore sched_lock recursion count and transfer ownership to
+ * new process
+ */
+ movl PCB_SCHEDNEST(%edx),%eax
+ movl %eax,_sched_lock+MTX_RECURSE
+
+ movl _curproc,%eax
+ movl %eax,_sched_lock+MTX_LOCK
+
+#ifdef DIAGNOSTIC
+ pushfl
+ popl %ecx
+ testl $0x200, %ecx /* interrupts enabled? */
+ jnz badsw6 /* that way madness lies */
+#endif
ret
CROSSJUMPTARGET(sw1a)
@@ -517,15 +358,27 @@ badsw2:
call _panic
sw0_2: .asciz "cpu_switch: not SRUN"
+
+badsw3:
+ pushl $sw0_3
+ call _panic
+
+sw0_3: .asciz "cpu_switch: chooseproc returned NULL"
+
#endif
-#if defined(SMP) && defined(DIAGNOSTIC)
-badsw4:
- pushl $sw0_4
+#ifdef DIAGNOSTIC
+badsw5:
+ pushl $sw0_5
+ call _panic
+
+sw0_5: .asciz "cpu_switch: interrupts enabled (again)"
+badsw6:
+ pushl $sw0_6
call _panic
-sw0_4: .asciz "cpu_switch: do not have lock"
-#endif /* SMP && DIAGNOSTIC */
+sw0_6: .asciz "cpu_switch: interrupts enabled"
+#endif
/*
* savectx(pcb)
diff --git a/sys/i386/i386/synch_machdep.c b/sys/i386/i386/synch_machdep.c
new file mode 100644
index 000000000000..029225dbf314
--- /dev/null
+++ b/sys/i386/i386/synch_machdep.c
@@ -0,0 +1,559 @@
+/*-
+ * Copyright (c) 1997, 1998 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
+ * $FreeBSD$
+ */
+
+#define MTX_STRS /* define common strings */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <ddb/ddb.h>
+#include <machine/atomic.h>
+#include <machine/clock.h>
+#include <machine/cpu.h>
+#include <machine/mutex.h>
+
+/* All mutexes in system (used for debug/panic) */
+mtx_t all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
+ TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
+ { NULL, NULL }, &all_mtx, &all_mtx
+#ifdef SMP_DEBUG
+ , NULL, { NULL, NULL }, NULL, 0
+#endif
+};
+
+int mtx_cur_cnt;
+int mtx_max_cnt;
+
+extern void _mtx_enter_giant_def(void);
+extern void _mtx_exit_giant_def(void);
+
+static void propagate_priority(struct proc *) __unused;
+
+#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED)
+#define mtx_owner(m) (mtx_unowned(m) ? NULL \
+ : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
+
+#define RETIP(x) *(((int *)(&x)) - 1)
+#define SET_PRIO(p, pri) (p)->p_priority = (pri)
+
+/*
+ * XXX Temporary, for use from assembly language
+ */
+
+void
+_mtx_enter_giant_def(void)
+{
+
+ mtx_enter(&Giant, MTX_DEF);
+}
+
+void
+_mtx_exit_giant_def(void)
+{
+
+ mtx_exit(&Giant, MTX_DEF);
+}
+
+static void
+propagate_priority(struct proc *p)
+{
+ int pri = p->p_priority;
+ mtx_t *m = p->p_blocked;
+
+ for (;;) {
+ struct proc *p1;
+
+ p = mtx_owner(m);
+
+ if (p == NULL) {
+ /*
+ * This really isn't quite right. Really
+ * ought to bump priority of process that
+ * next acquires the mutex.
+ */
+ MPASS(m->mtx_lock == MTX_CONTESTED);
+ return;
+ }
+ MPASS(p->p_magic == P_MAGIC);
+ if (p->p_priority <= pri)
+ return;
+ /*
+ * If lock holder is actually running, just bump priority.
+ */
+ if (TAILQ_NEXT(p, p_procq) == NULL) {
+ MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
+ SET_PRIO(p, pri);
+ return;
+ }
+ /*
+ * If on run queue move to new run queue, and
+ * quit.
+ */
+#if 1
+ if (p->p_stat == SRUN) {
+#else
+ if ((m = p->p_blocked) == NULL) {
+#endif
+ MPASS(p->p_stat == SRUN);
+ remrunqueue(p);
+ SET_PRIO(p, pri);
+ setrunqueue(p);
+ return;
+ }
+
+ /*
+ * If we aren't blocked on a mutex, give up and quit.
+ */
+ if (p->p_stat != SMTX) {
+ printf(
+ "XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n",
+ p->p_pid, p->p_comm, p->p_stat, m->mtx_description);
+ return;
+ }
+
+ /*
+ * Pick up the mutex that p is blocked on.
+ */
+ m = p->p_blocked;
+ MPASS(m != NULL);
+
+ printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
+ p->p_comm, m->mtx_description);
+ /*
+ * Check if the proc needs to be moved up on
+ * the blocked chain
+ */
+ if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL ||
+ p1->p_priority <= pri) {
+ if (p1)
+ printf(
+ "XXX: previous process %d(%s) has higher priority\n",
+ p->p_pid, p->p_comm);
+ else
+ printf("XXX: process at head of run queue\n");
+ continue;
+ }
+
+ /*
+ * Remove proc from blocked chain
+ */
+ TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
+ TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
+ MPASS(p1->p_magic == P_MAGIC);
+ if (p1->p_priority > pri)
+ break;
+ }
+ if (p1)
+ TAILQ_INSERT_BEFORE(p1, p, p_procq);
+ else
+ TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
+ CTR4(KTR_LOCK,
+ "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s",
+ p, p1, m, m->mtx_description);
+ }
+}
+
+void
+mtx_enter_hard(mtx_t *m, int type, int flags)
+{
+ struct proc *p = CURPROC;
+
+ KASSERT(p != NULL, ("curproc is NULL in mutex"));
+
+ switch (type) {
+ case MTX_DEF:
+ if ((m->mtx_lock & MTX_FLAGMASK) == (u_int)p) {
+ m->mtx_recurse++;
+ atomic_set_int(&m->mtx_lock, MTX_RECURSE);
+ CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m);
+ return;
+ }
+ CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%x) [0x%x]",
+ m, m->mtx_lock, RETIP(m));
+ while (!atomic_cmpset_int(&m->mtx_lock, MTX_UNOWNED, (int)p)) {
+ int v;
+ struct proc *p1;
+
+ mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
+ /*
+ * check if the lock has been released while
+ * waiting for the schedlock.
+ */
+ if ((v = m->mtx_lock) == MTX_UNOWNED) {
+ mtx_exit(&sched_lock, MTX_SPIN);
+ continue;
+ }
+ /*
+ * The mutex was marked contested on release. This
+ * means that there are processes blocked on it.
+ */
+ if (v == MTX_CONTESTED) {
+ p1 = TAILQ_FIRST(&m->mtx_blocked);
+ KASSERT(p1 != NULL, ("contested mutex has no contesters"));
+ KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
+ m->mtx_lock = (int)p | MTX_CONTESTED;
+ if (p1->p_priority < p->p_priority) {
+ SET_PRIO(p, p1->p_priority);
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ return;
+ }
+ /*
+ * If the mutex isn't already contested and
+ * a failure occurs setting the contested bit the
+ * mutex was either release or the
+ * state of the RECURSION bit changed.
+ */
+ if ((v & MTX_CONTESTED) == 0 &&
+ !atomic_cmpset_int(&m->mtx_lock, v,
+ v | MTX_CONTESTED)) {
+ mtx_exit(&sched_lock, MTX_SPIN);
+ continue;
+ }
+
+ /* We definitely have to sleep for this lock */
+ mtx_assert(m, MA_NOTOWNED);
+
+#ifdef notyet
+ /*
+ * If we're borrowing an interrupted thread's VM
+ * context must clean up before going to sleep.
+ */
+ if (p->p_flag & (P_ITHD | P_SITHD)) {
+ ithd_t *it = (ithd_t *)p;
+
+ if (it->it_interrupted) {
+ CTR2(KTR_LOCK,
+ "mtx_enter: 0x%x interrupted 0x%x",
+ it, it->it_interrupted);
+ intr_thd_fixup(it);
+ }
+ }
+#endif
+
+ /* Put us on the list of procs blocked on this mutex */
+ if (TAILQ_EMPTY(&m->mtx_blocked)) {
+ p1 = (struct proc *)(m->mtx_lock &
+ MTX_FLAGMASK);
+ LIST_INSERT_HEAD(&p1->p_contested, m,
+ mtx_contested);
+ TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
+ } else {
+ TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
+ if (p1->p_priority > p->p_priority)
+ break;
+ if (p1)
+ TAILQ_INSERT_BEFORE(p1, p, p_procq);
+ else
+ TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
+ p_procq);
+ }
+
+ p->p_blocked = m; /* Who we're blocked on */
+ p->p_stat = SMTX;
+#if 0
+ propagate_priority(p);
+#endif
+ CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s",
+ p, m, m->mtx_description);
+ mi_switch();
+ CTR3(KTR_LOCK,
+ "mtx_enter: p 0x%p free from blocked on [0x%p] %s",
+ p, m, m->mtx_description);
+ mtx_exit(&sched_lock, MTX_SPIN);
+ }
+ return;
+ case MTX_SPIN:
+ case MTX_SPIN | MTX_FIRST:
+ case MTX_SPIN | MTX_TOPHALF:
+ {
+ int i = 0;
+
+ if (m->mtx_lock == (u_int)p) {
+ m->mtx_recurse++;
+ return;
+ }
+ CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
+ for (;;) {
+ if (atomic_cmpset_int(&m->mtx_lock, MTX_UNOWNED,
+ (u_int)p))
+ break;
+ while (m->mtx_lock != MTX_UNOWNED) {
+ if (i++ < 1000000)
+ continue;
+ if (i++ < 6000000)
+ DELAY (1);
+#ifdef DDB
+ else if (!db_active) {
+#else
+ else {
+#endif
+#if 0
+ Debugger ("spinning");
+ panic("spin lock %s held by 0x%x for > 5 seconds",
+ m->mtx_description,
+ m->mtx_lock);
+#endif
+ }
+ }
+ }
+
+#ifdef SMP_DEBUG
+ if (type != MTX_SPIN)
+ m->mtx_savefl = 0xdeadbeef;
+ else
+#endif
+ m->mtx_savefl = flags;
+ CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m);
+ return;
+ }
+ }
+}
+
+void
+mtx_exit_hard(mtx_t *m, int type)
+{
+ struct proc *p, *p1;
+ mtx_t *m1;
+ int pri;
+
+ switch (type) {
+ case MTX_DEF:
+ case MTX_DEF | MTX_NOSWITCH:
+ if (m->mtx_recurse != 0) {
+ if (--(m->mtx_recurse) == 0)
+ atomic_clear_int(&m->mtx_lock, MTX_RECURSE);
+ CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m);
+ return;
+ }
+ mtx_enter(&sched_lock, MTX_SPIN);
+ CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m);
+ p = CURPROC;
+ p1 = TAILQ_FIRST(&m->mtx_blocked);
+ MPASS(p->p_magic == P_MAGIC);
+ MPASS(p1->p_magic == P_MAGIC);
+ TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
+ if (TAILQ_EMPTY(&m->mtx_blocked)) {
+ LIST_REMOVE(m, mtx_contested);
+ atomic_cmpset_int(&m->mtx_lock, m->mtx_lock,
+ MTX_UNOWNED);
+ CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m);
+ } else
+ m->mtx_lock = MTX_CONTESTED;
+ pri = MAXPRI;
+ LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
+ int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
+ if (cp < pri)
+ pri = cp;
+ }
+ if (pri > p->p_nativepri)
+ pri = p->p_nativepri;
+ SET_PRIO(p, pri);
+ CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p",
+ m, p1);
+ p1->p_blocked = NULL;
+ p1->p_stat = SRUN;
+ setrunqueue(p1);
+ if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
+#ifdef notyet
+ if (p->p_flag & (P_ITHD | P_SITHD)) {
+ ithd_t *it = (ithd_t *)p;
+
+ if (it->it_interrupted) {
+ CTR2(KTR_LOCK,
+ "mtx_exit: 0x%x interruped 0x%x",
+ it, it->it_interrupted);
+ intr_thd_fixup(it);
+ }
+ }
+#endif
+ setrunqueue(p);
+ CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%x",
+ m, m->mtx_lock);
+ mi_switch();
+ CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%x",
+ m, m->mtx_lock);
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ return;
+ case MTX_SPIN:
+ case MTX_SPIN | MTX_FIRST:
+ if (m->mtx_recurse != 0) {
+ m->mtx_recurse--;
+ return;
+ }
+ if (atomic_cmpset_int(&m->mtx_lock, CURTHD, MTX_UNOWNED)) {
+ if (type & MTX_FIRST) {
+ enable_intr(); /* XXX is this kosher? */
+ } else {
+ MPASS(m->mtx_savefl != 0xdeadbeef);
+ write_eflags(m->mtx_savefl);
+ }
+ return;
+ }
+ panic("unsucuessful release of spin lock");
+ case MTX_SPIN | MTX_TOPHALF:
+ if (m->mtx_recurse != 0) {
+ m->mtx_recurse--;
+ return;
+ }
+ if (atomic_cmpset_int(&m->mtx_lock, CURTHD, MTX_UNOWNED))
+ return;
+ panic("unsucuessful release of spin lock");
+ default:
+ panic("mtx_exit_hard: unsupported type 0x%x\n", type);
+ }
+}
+
+#define MV_DESTROY 0 /* validate before destory */
+#define MV_INIT 1 /* validate before init */
+
+#ifdef SMP_DEBUG
+
+int mtx_validate __P((mtx_t *, int));
+
+int
+mtx_validate(mtx_t *m, int when)
+{
+ mtx_t *mp;
+ int i;
+ int retval = 0;
+
+ if (m == &all_mtx || cold)
+ return 0;
+
+ mtx_enter(&all_mtx, MTX_DEF);
+ ASS(kernacc((caddr_t)all_mtx.mtx_next, 4, 1) == 1);
+ ASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
+ for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
+ if (kernacc((caddr_t)mp->mtx_next, 4, 1) != 1) {
+ panic("mtx_validate: mp=%p mp->mtx_next=%p",
+ mp, mp->mtx_next);
+ }
+ i++;
+ if (i > mtx_cur_cnt) {
+ panic("mtx_validate: too many in chain, known=%d\n",
+ mtx_cur_cnt);
+ }
+ }
+ ASS(i == mtx_cur_cnt);
+ switch (when) {
+ case MV_DESTROY:
+ for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
+ if (mp == m)
+ break;
+ ASS(mp == m);
+ break;
+ case MV_INIT:
+ for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
+ if (mp == m) {
+ /*
+ * Not good. This mutex already exits
+ */
+ retval = 1;
+#if 1
+ printf("re-initing existing mutex %s\n",
+ m->mtx_description);
+ ASS(m->mtx_lock == MTX_UNOWNED);
+ retval = 1;
+#else
+ panic("re-initing existing mutex %s",
+ m->mtx_description);
+#endif
+ }
+ }
+ mtx_exit(&all_mtx, MTX_DEF);
+ return (retval);
+}
+#endif
+
+void
+mtx_init(mtx_t *m, char *t, int flag)
+{
+
+ CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t);
+#ifdef SMP_DEBUG
+ if (mtx_validate(m, MV_INIT)) /* diagnostic and error correction */
+ return;
+#endif
+ bzero((void *)m, sizeof *m);
+ TAILQ_INIT(&m->mtx_blocked);
+ m->mtx_description = t;
+ m->mtx_lock = MTX_UNOWNED;
+ /* Put on all mutex queue */
+ mtx_enter(&all_mtx, MTX_DEF);
+ m->mtx_next = &all_mtx;
+ m->mtx_prev = all_mtx.mtx_prev;
+ m->mtx_prev->mtx_next = m;
+ all_mtx.mtx_prev = m;
+ if (++mtx_cur_cnt > mtx_max_cnt)
+ mtx_max_cnt = mtx_cur_cnt;
+ mtx_exit(&all_mtx, MTX_DEF);
+ witness_init(m, flag);
+}
+
+void
+mtx_destroy(mtx_t *m)
+{
+
+ CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description);
+#ifdef SMP_DEBUG
+ if (m->mtx_next == NULL)
+ panic("mtx_destroy: %p (%s) already destroyed",
+ m, m->mtx_description);
+
+ if (!mtx_owned(m)) {
+ ASS(m->mtx_lock == MTX_UNOWNED);
+ } else {
+ ASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0);
+ }
+ mtx_validate(m, MV_DESTROY); /* diagnostic */
+#endif
+
+#ifdef WITNESS
+ if (m->mtx_witness)
+ witness_destroy(m);
+#endif /* WITNESS */
+
+ /* Remove from the all mutex queue */
+ mtx_enter(&all_mtx, MTX_DEF);
+ m->mtx_next->mtx_prev = m->mtx_prev;
+ m->mtx_prev->mtx_next = m->mtx_next;
+#ifdef SMP_DEBUG
+ m->mtx_next = m->mtx_prev = NULL;
+#endif
+ mtx_cur_cnt--;
+ mtx_exit(&all_mtx, MTX_DEF);
+}
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 51de1ac9e650..f32dfaeeddc0 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -49,10 +49,12 @@
#include "opt_trap.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/pioctl.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/syscall.h>
@@ -76,12 +78,14 @@
#include <machine/cpu.h>
#include <machine/ipl.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#include <machine/pcb.h>
#ifdef SMP
#include <machine/smp.h>
#endif
#include <machine/tss.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#ifdef POWERFAIL_NMI
@@ -96,11 +100,14 @@
#include "isa.h"
#include "npx.h"
+#include <sys/sysctl.h>
+
int (*pmath_emulate) __P((struct trapframe *));
extern void trap __P((struct trapframe frame));
extern int trapwrite __P((unsigned addr));
extern void syscall2 __P((struct trapframe frame));
+extern void ast __P((struct trapframe frame));
static int trap_pfault __P((struct trapframe *, int, vm_offset_t));
static void trap_fatal __P((struct trapframe *, vm_offset_t));
@@ -142,7 +149,7 @@ static char *trap_msg[] = {
};
static __inline int userret __P((struct proc *p, struct trapframe *frame,
- u_quad_t oticks, int have_mplock));
+ u_quad_t oticks, int have_giant));
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
extern int has_f00f_bug;
@@ -158,18 +165,18 @@ SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
&panic_on_nmi, 0, "Panic on NMI");
static __inline int
-userret(p, frame, oticks, have_mplock)
+userret(p, frame, oticks, have_giant)
struct proc *p;
struct trapframe *frame;
u_quad_t oticks;
- int have_mplock;
+ int have_giant;
{
int sig, s;
while ((sig = CURSIG(p)) != 0) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
postsig(sig);
}
@@ -184,31 +191,34 @@ userret(p, frame, oticks, have_mplock)
* mi_switch()'ed, we might not be on the queue indicated by
* our priority.
*/
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
- }
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
mi_switch();
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
- while ((sig = CURSIG(p)) != 0)
+ while ((sig = CURSIG(p)) != 0) {
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
+ }
postsig(sig);
+ }
}
/*
* Charge system time if profiling.
*/
if (p->p_flag & P_PROFIL) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
addupc_task(p, frame->tf_eip,
(u_int)(p->p_sticks - oticks) * psratio);
}
curpriority = p->p_priority;
- return(have_mplock);
+ return(have_giant);
}
/*
@@ -226,13 +236,20 @@ trap(frame)
u_quad_t sticks = 0;
int i = 0, ucode = 0, type, code;
vm_offset_t eva;
+#ifdef POWERFAIL_NMI
+ static int lastalert = 0;
+#endif
- if (!(frame.tf_eflags & PSL_I)) {
+ atomic_add_int(&cnt.v_trap, 1);
+
+ if ((frame.tf_eflags & PSL_I) == 0) {
/*
- * Buggy application or kernel code has disabled interrupts
- * and then trapped. Enabling interrupts now is wrong, but
- * it is better than running with interrupts disabled until
- * they are accidentally enabled later.
+ * Buggy application or kernel code has disabled
+ * interrupts and then trapped. Enabling interrupts
+ * now is wrong, but it is better than running with
+ * interrupts disabled until they are accidentally
+ * enabled later. XXX Consider whether is this still
+ * correct.
*/
type = frame.tf_trapno;
if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM))
@@ -252,54 +269,27 @@ trap(frame)
eva = 0;
if (frame.tf_trapno == T_PAGEFLT) {
/*
- * For some Cyrix CPUs, %cr2 is clobbered by interrupts.
- * This problem is worked around by using an interrupt
- * gate for the pagefault handler. We are finally ready
- * to read %cr2 and then must reenable interrupts.
- *
- * XXX this should be in the switch statement, but the
- * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the
- * flow of control too much for this to be obviously
- * correct.
+ * For some Cyrix CPUs, %cr2 is clobbered by
+ * interrupts. This problem is worked around by using
+ * an interrupt gate for the pagefault handler. We
+ * are finally ready to read %cr2 and then must
+ * reenable interrupts.
*/
eva = rcr2();
enable_intr();
- }
+ }
+
+ mtx_enter(&Giant, MTX_DEF);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
restart:
#endif
+
type = frame.tf_trapno;
code = frame.tf_err;
- if (in_vm86call) {
- if (frame.tf_eflags & PSL_VM &&
- (type == T_PROTFLT || type == T_STKFLT)) {
- i = vm86_emulate((struct vm86frame *)&frame);
- if (i != 0)
- /*
- * returns to original process
- */
- vm86_trap((struct vm86frame *)&frame);
- return;
- }
- switch (type) {
- /*
- * these traps want either a process context, or
- * assume a normal userspace trap.
- */
- case T_PROTFLT:
- case T_SEGNPFLT:
- trap_fatal(&frame, eva);
- return;
- case T_TRCTRAP:
- type = T_BPTFLT; /* kernel breakpoint */
- /* FALL THROUGH */
- }
- goto kernel_trap; /* normal kernel trap handling */
- }
-
- if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) {
+ if ((ISPL(frame.tf_cs) == SEL_UPL) ||
+ ((frame.tf_eflags & PSL_VM) && !in_vm86call)) {
/* user trap */
sticks = p->p_sticks;
@@ -322,16 +312,6 @@ restart:
i = SIGFPE;
break;
- case T_ASTFLT: /* Allow process switch */
- astoff();
- cnt.v_soft++;
- if (p->p_flag & P_OWEUPC) {
- p->p_flag &= ~P_OWEUPC;
- addupc_task(p, p->p_stats->p_prof.pr_addr,
- p->p_stats->p_prof.pr_ticks);
- }
- goto out;
-
/*
* The following two traps can happen in
* vm86 mode, and, if so, we want to handle
@@ -342,7 +322,7 @@ restart:
if (frame.tf_eflags & PSL_VM) {
i = vm86_emulate((struct vm86frame *)&frame);
if (i == 0)
- goto out;
+ goto user;
break;
}
/* FALL THROUGH */
@@ -357,14 +337,20 @@ restart:
case T_PAGEFLT: /* page fault */
i = trap_pfault(&frame, TRUE, eva);
- if (i == -1)
- return;
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if (i == -2)
+ if (i == -2) {
+ /*
+ * f00f hack workaround has triggered, treat
+ * as illegal instruction not page fault.
+ */
+ frame.tf_trapno = T_PRIVINFLT;
goto restart;
+ }
#endif
- if (i == 0)
+ if (i == -1)
goto out;
+ if (i == 0)
+ goto user;
ucode = T_PAGEFLT;
break;
@@ -377,7 +363,15 @@ restart:
#if NISA > 0
case T_NMI:
#ifdef POWERFAIL_NMI
- goto handle_powerfail;
+#ifndef TIMER_FREQ
+# define TIMER_FREQ 1193182
+#endif
+ if (time_second - lastalert > 10) {
+ log(LOG_WARNING, "NMI: power fail\n");
+ sysbeep(TIMER_FREQ/880, hz);
+ lastalert = time_second;
+ }
+ goto out;
#else /* !POWERFAIL_NMI */
/* machine/parity/power fail/"kitchen sink" faults */
if (isa_nmi(code) == 0) {
@@ -391,7 +385,7 @@ restart:
kdb_trap (type, 0, &frame);
}
#endif /* DDB */
- return;
+ goto out;
} else if (panic_on_nmi)
panic("NMI indicates hardware failure");
break;
@@ -410,9 +404,9 @@ restart:
case T_DNA:
#if NNPX > 0
- /* if a transparent fault (due to context switch "late") */
+ /* transparent fault (due to context switch "late") */
if (npxdna())
- return;
+ goto out;
#endif
if (!pmath_emulate) {
i = SIGFPE;
@@ -422,7 +416,7 @@ restart:
i = (*pmath_emulate)(&frame);
if (i == 0) {
if (!(frame.tf_eflags & PSL_T))
- return;
+ goto out;
frame.tf_eflags &= ~PSL_T;
i = SIGTRAP;
}
@@ -435,13 +429,12 @@ restart:
break;
}
} else {
-kernel_trap:
/* kernel trap */
switch (type) {
case T_PAGEFLT: /* page fault */
(void) trap_pfault(&frame, FALSE, eva);
- return;
+ goto out;
case T_DNA:
#if NNPX > 0
@@ -451,31 +444,35 @@ kernel_trap:
* registered such use.
*/
if (npxdna())
- return;
+ goto out;
#endif
break;
- case T_PROTFLT: /* general protection fault */
- case T_SEGNPFLT: /* segment not present fault */
/*
- * Invalid segment selectors and out of bounds
- * %eip's and %esp's can be set up in user mode.
- * This causes a fault in kernel mode when the
- * kernel tries to return to user mode. We want
- * to get this fault so that we can fix the
- * problem here and not have to check all the
- * selectors and pointers when the user changes
- * them.
+ * The following two traps can happen in
+ * vm86 mode, and, if so, we want to handle
+ * them specially.
*/
-#define MAYBE_DORETI_FAULT(where, whereto) \
- do { \
- if (frame.tf_eip == (int)where) { \
- frame.tf_eip = (int)whereto; \
- return; \
- } \
- } while (0)
-
- if (intr_nesting_level == 0) {
+ case T_PROTFLT: /* general protection fault */
+ case T_STKFLT: /* stack fault */
+ if (frame.tf_eflags & PSL_VM) {
+ i = vm86_emulate((struct vm86frame *)&frame);
+ if (i != 0)
+ /*
+ * returns to original process
+ */
+ vm86_trap((struct vm86frame *)&frame);
+ goto out;
+ }
+ /* FALL THROUGH */
+
+ case T_SEGNPFLT: /* segment not present fault */
+ if (in_vm86call)
+ break;
+
+ if (intr_nesting_level != 0)
+ break;
+
/*
* Invalid %fs's and %gs's can be created using
* procfs or PT_SETREGS or by invalidating the
@@ -488,20 +485,38 @@ kernel_trap:
if (frame.tf_eip == (int)cpu_switch_load_gs) {
curpcb->pcb_gs = 0;
psignal(p, SIGBUS);
- return;
+ goto out;
+ }
+
+ /*
+ * Invalid segment selectors and out of bounds
+ * %eip's and %esp's can be set up in user mode.
+ * This causes a fault in kernel mode when the
+ * kernel tries to return to user mode. We want
+ * to get this fault so that we can fix the
+ * problem here and not have to check all the
+ * selectors and pointers when the user changes
+ * them.
+ */
+ if (frame.tf_eip == (int)doreti_iret) {
+ frame.tf_eip = (int)doreti_iret_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_ds) {
+ frame.tf_eip = (int)doreti_popl_ds_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_es) {
+ frame.tf_eip = (int)doreti_popl_es_fault;
+ goto out;
}
- MAYBE_DORETI_FAULT(doreti_iret,
- doreti_iret_fault);
- MAYBE_DORETI_FAULT(doreti_popl_ds,
- doreti_popl_ds_fault);
- MAYBE_DORETI_FAULT(doreti_popl_es,
- doreti_popl_es_fault);
- MAYBE_DORETI_FAULT(doreti_popl_fs,
- doreti_popl_fs_fault);
+ if (frame.tf_eip == (int)doreti_popl_fs) {
+ frame.tf_eip = (int)doreti_popl_fs_fault;
+ goto out;
+ }
if (curpcb && curpcb->pcb_onfault) {
frame.tf_eip = (int)curpcb->pcb_onfault;
- return;
- }
+ goto out;
}
break;
@@ -517,7 +532,7 @@ kernel_trap:
*/
if (frame.tf_eflags & PSL_NT) {
frame.tf_eflags &= ~PSL_NT;
- return;
+ goto out;
}
break;
@@ -529,7 +544,7 @@ kernel_trap:
* silently until the syscall handler has
* saved the flags.
*/
- return;
+ goto out;
}
if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
/*
@@ -537,7 +552,7 @@ kernel_trap:
* flags. Stop single stepping it.
*/
frame.tf_eflags &= ~PSL_T;
- return;
+ goto out;
}
/*
* Ignore debug register trace traps due to
@@ -549,13 +564,13 @@ kernel_trap:
* in kernel space because that is useful when
* debugging the kernel.
*/
- if (user_dbreg_trap()) {
+ if (user_dbreg_trap() && !in_vm86call) {
/*
* Reset breakpoint bits because the
* processor doesn't
*/
load_dr6(rdr6() & 0xfffffff0);
- return;
+ goto out;
}
/*
* Fall through (TRCTRAP kernel mode, kernel address)
@@ -567,28 +582,19 @@ kernel_trap:
*/
#ifdef DDB
if (kdb_trap (type, 0, &frame))
- return;
+ goto out;
#endif
break;
#if NISA > 0
case T_NMI:
#ifdef POWERFAIL_NMI
-#ifndef TIMER_FREQ
-# define TIMER_FREQ 1193182
-#endif
- handle_powerfail:
- {
- static unsigned lastalert = 0;
-
- if(time_second - lastalert > 10)
- {
+ if (time_second - lastalert > 10) {
log(LOG_WARNING, "NMI: power fail\n");
sysbeep(TIMER_FREQ/880, hz);
lastalert = time_second;
- }
- return;
}
+ goto out;
#else /* !POWERFAIL_NMI */
/* machine/parity/power fail/"kitchen sink" faults */
if (isa_nmi(code) == 0) {
@@ -602,16 +608,16 @@ kernel_trap:
kdb_trap (type, 0, &frame);
}
#endif /* DDB */
- return;
+ goto out;
} else if (panic_on_nmi == 0)
- return;
+ goto out;
/* FALL THROUGH */
#endif /* POWERFAIL_NMI */
#endif /* NISA > 0 */
}
trap_fatal(&frame, eva);
- return;
+ goto out;
}
/* Translate fault for emulators (e.g. Linux) */
@@ -630,8 +636,10 @@ kernel_trap:
}
#endif
-out:
+user:
userret(p, &frame, sticks, 1);
+out:
+ mtx_exit(&Giant, MTX_DEF);
}
#ifdef notyet
@@ -769,10 +777,8 @@ trap_pfault(frame, usermode, eva)
* fault.
*/
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) {
- frame->tf_trapno = T_PRIVINFLT;
+ if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
return -2;
- }
#endif
if (usermode)
goto nogo;
@@ -869,8 +875,7 @@ trap_fatal(frame, eva)
frame->tf_eflags & PSL_VM ? "vm86" :
ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
#ifdef SMP
- /* three seperate prints in case of a trap on an unmapped page */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
@@ -917,26 +922,6 @@ trap_fatal(frame, eva)
} else {
printf("Idle\n");
}
- printf("interrupt mask = ");
- if ((cpl & net_imask) == net_imask)
- printf("net ");
- if ((cpl & tty_imask) == tty_imask)
- printf("tty ");
- if ((cpl & bio_imask) == bio_imask)
- printf("bio ");
- if ((cpl & cam_imask) == cam_imask)
- printf("cam ");
- if (cpl == 0)
- printf("none");
-#ifdef SMP
-/**
- * XXX FIXME:
- * we probably SHOULD have stopped the other CPUs before now!
- * another CPU COULD have been touching cpl at this moment...
- */
- printf(" <- SMP: XXX");
-#endif
- printf("\n");
#ifdef KDB
if (kdb_trap(&psl))
@@ -973,8 +958,7 @@ dblfault_handler()
printf("esp = 0x%x\n", common_tss.tss_esp);
printf("ebp = 0x%x\n", common_tss.tss_ebp);
#ifdef SMP
- /* three seperate prints in case of a trap on an unmapped page */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
@@ -1048,12 +1032,14 @@ syscall2(frame)
int error;
int narg;
int args[8];
- int have_mplock = 0;
+ int have_giant = 0;
u_int code;
+ atomic_add_int(&cnt.v_syscall, 1);
+
#ifdef DIAGNOSTIC
if (ISPL(frame.tf_cs) != SEL_UPL) {
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
panic("syscall");
/* NOT REACHED */
}
@@ -1075,9 +1061,9 @@ syscall2(frame)
/*
* The prep code is not MP aware.
*/
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
- rel_mplock();
+ mtx_exit(&Giant, MTX_DEF);
} else {
/*
* Need to check if this is a 32 bit or 64 bit syscall.
@@ -1114,8 +1100,8 @@ syscall2(frame)
*/
if (params && (i = narg * sizeof(int)) &&
(error = copyin(params, (caddr_t)args, (u_int)i))) {
- get_mplock();
- have_mplock = 1;
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL))
ktrsyscall(p->p_tracep, code, narg, args);
@@ -1129,15 +1115,15 @@ syscall2(frame)
* we are ktracing
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
- get_mplock();
- have_mplock = 1;
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
ktrsyscall(p->p_tracep, code, narg, args);
}
@@ -1192,9 +1178,9 @@ bad:
* Traced syscall. trapsignal() is not MP aware.
*/
if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
frame.tf_eflags &= ~PSL_T;
trapsignal(p, SIGTRAP, 0);
@@ -1203,13 +1189,13 @@ bad:
/*
* Handle reschedule and other end-of-syscall issues
*/
- have_mplock = userret(p, &frame, sticks, have_mplock);
+ have_giant = userret(p, &frame, sticks, have_giant);
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSRET)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
}
@@ -1225,27 +1211,66 @@ bad:
/*
* Release the MP lock if we had to get it
*/
- if (have_mplock)
- rel_mplock();
+ if (have_giant)
+ mtx_exit(&Giant, MTX_DEF);
+
+ mtx_assert(&sched_lock, MA_NOTOWNED);
+ mtx_assert(&Giant, MA_NOTOWNED);
+}
+
+void
+ast(frame)
+ struct trapframe frame;
+{
+ struct proc *p = CURPROC;
+ u_quad_t sticks;
+
+ /*
+ * handle atomicy by looping since interrupts are enabled and the
+ * MP lock is not held.
+ */
+ sticks = ((volatile struct proc *)p)->p_sticks;
+ while (sticks != ((volatile struct proc *)p)->p_sticks)
+ sticks = ((volatile struct proc *)p)->p_sticks;
+
+ astoff();
+ atomic_add_int(&cnt.v_soft, 1);
+ if (p->p_flag & P_OWEUPC) {
+ mtx_enter(&Giant, MTX_DEF);
+ p->p_flag &= ~P_OWEUPC;
+ addupc_task(p, p->p_stats->p_prof.pr_addr,
+ p->p_stats->p_prof.pr_ticks);
+}
+ if (userret(p, &frame, sticks, mtx_owned(&Giant)) != 0)
+ mtx_exit(&Giant, MTX_DEF);
}
/*
* Simplified back end of syscall(), used when returning from fork()
- * directly into user mode. MP lock is held on entry and should be
- * held on return.
+ * directly into user mode. Giant is not held on entry, and must not
+ * be held on return.
*/
void
fork_return(p, frame)
struct proc *p;
struct trapframe frame;
{
+ int have_giant;
+
frame.tf_eax = 0; /* Child returns zero */
frame.tf_eflags &= ~PSL_C; /* success */
frame.tf_edx = 1;
- userret(p, &frame, 0, 1);
+ have_giant = userret(p, &frame, 0, mtx_owned(&Giant));
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSRET))
+ if (KTRPOINT(p, KTR_SYSRET)) {
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
+ }
ktrsysret(p->p_tracep, SYS_fork, 0, 0);
+ }
#endif
+ if (have_giant)
+ mtx_exit(&Giant, MTX_DEF);
}
diff --git a/sys/i386/i386/tsc.c b/sys/i386/i386/tsc.c
index 15044abbaa3b..724f3c2817ba 100644
--- a/sys/i386/i386/tsc.c
+++ b/sys/i386/i386/tsc.c
@@ -54,6 +54,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/proc.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/kernel.h>
@@ -93,10 +94,6 @@
#include <i386/isa/mca_machdep.h>
#endif
-#ifdef SMP
-#define disable_intr() CLOCK_DISABLE_INTR()
-#define enable_intr() CLOCK_ENABLE_INTR()
-
#ifdef APIC_IO
#include <i386/isa/intr_machdep.h>
/* The interrupt triggered by the 8254 (timer) chip */
@@ -104,7 +101,6 @@ int apic_8254_intr;
static u_long read_intr_count __P((int vec));
static void setup_8254_mixed_mode __P((void));
#endif
-#endif /* SMP */
/*
* 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -147,7 +143,9 @@ int tsc_is_broken;
int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
static int beeping = 0;
+#if 0
static u_int clk_imask = HWI_MASK | SWI_MASK;
+#endif
static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
static u_int hardclock_max_count;
static u_int32_t i8254_lastcount;
@@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD,
static void
clkintr(struct clockframe frame)
{
+ int intrsave;
+
if (timecounter->tc_get_timecount == i8254_get_timecount) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
if (i8254_ticked)
i8254_ticked = 0;
else {
@@ -214,7 +216,8 @@ clkintr(struct clockframe frame)
i8254_lastcount = 0;
}
clkintr_pending = 0;
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
}
timer_func(&frame);
switch (timer0_state) {
@@ -233,14 +236,17 @@ clkintr(struct clockframe frame)
break;
case ACQUIRE_PENDING:
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = TIMER_DIV(new_rate);
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer_func = new_function;
timer0_state = ACQUIRED;
setdelayed();
@@ -249,7 +255,9 @@ clkintr(struct clockframe frame)
case RELEASE_PENDING:
if ((timer0_prescaler_count += timer0_max_count)
>= hardclock_max_count) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = hardclock_max_count;
@@ -257,7 +265,8 @@ clkintr(struct clockframe frame)
TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer0_prescaler_count = 0;
timer_func = hardclock;
timer0_state = RELEASED;
@@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc)
static int
getit(void)
{
- u_long ef;
- int high, low;
+ int high, low, intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -417,7 +426,7 @@ getit(void)
high = inb(TIMER_CNTR0);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return ((high << 8) | low);
}
@@ -523,6 +532,7 @@ sysbeepstop(void *chan)
int
sysbeep(int pitch, int period)
{
+ int intrsave;
int x = splclock();
if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
@@ -531,10 +541,13 @@ sysbeep(int pitch, int period)
splx(x);
return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */
}
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_CNTR2, pitch);
outb(TIMER_CNTR2, (pitch>>8));
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
if (!beeping) {
/* enable counter2 output to speaker */
outb(IO_PPI, inb(IO_PPI) | 3);
@@ -683,11 +696,12 @@ fail:
static void
set_timer_freq(u_int freq, int intr_freq)
{
- u_long ef;
+ int intrsave;
int new_timer0_max_count;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
timer_freq = freq;
new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
if (new_timer0_max_count != timer0_max_count) {
@@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq)
outb(TIMER_CNTR0, timer0_max_count >> 8);
}
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq)
void
i8254_restore(void)
{
- u_long ef;
+ int intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -979,8 +994,8 @@ cpu_initclocks()
{
int diag;
#ifdef APIC_IO
- int apic_8254_trial;
- struct intrec *clkdesc;
+ int apic_8254_trial, num_8254_ticks;
+ struct intrec *clkdesc, *rtcdesc;
#endif /* APIC_IO */
if (statclock_disable) {
@@ -1014,14 +1029,15 @@ cpu_initclocks()
} else
panic("APIC_IO: Cannot route 8254 interrupt to CPU");
}
-
- clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
-
#else /* APIC_IO */
- inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask,
+ /*
+ * XXX Check the priority of this interrupt handler. I
+ * couldn't find anything suitable in the BSD/OS code (grog,
+ * 19 July 2000).
+ */
+ /* Setup the PIC clk handler. The APIC handler is setup later */
+ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME,
INTR_EXCL);
INTREN(IRQ0);
@@ -1032,8 +1048,18 @@ cpu_initclocks()
writertc(RTC_STATUSB, RTCSB_24HR);
/* Don't bother enabling the statistics clock. */
- if (statclock_disable)
+ if (statclock_disable) {
+#ifdef APIC_IO
+ /*
+ * XXX - if statclock is disabled, don't attempt the APIC
+ * trial. Not sure this is sane for APIC_IO.
+ */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif /* APIC_IO */
return;
+ }
diag = rtcin(RTC_DIAG);
if (diag != 0)
printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS);
@@ -1041,34 +1067,44 @@ cpu_initclocks()
#ifdef APIC_IO
if (isa_apic_irq(8) != 8)
panic("APIC RTC != 8");
-#endif /* APIC_IO */
- inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask,
- INTR_EXCL);
-
-#ifdef APIC_IO
- INTREN(APIC_IRQ8);
-#else
- INTREN(IRQ8);
-#endif /* APIC_IO */
+ if (apic_8254_trial) {
+ /*
+ * XXX - We use fast interrupts for clk and rtc long enough to
+ * perform the APIC probe and then revert to exclusive
+ * interrupts.
+ */
+ clkdesc = inthand_add("clk", apic_8254_intr,
+ (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST);
+ INTREN(1 << apic_8254_intr);
- writertc(RTC_STATUSB, rtc_statusb);
+ rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL,
+ PI_REALTIME, INTR_FAST); /* XXX */
+ INTREN(APIC_IRQ8);
+ writertc(RTC_STATUSB, rtc_statusb);
-#ifdef APIC_IO
- if (apic_8254_trial) {
-
printf("APIC_IO: Testing 8254 interrupt delivery\n");
while (read_intr_count(8) < 6)
; /* nothing */
- if (read_intr_count(apic_8254_intr) < 3) {
+ num_8254_ticks = read_intr_count(apic_8254_intr);
+
+ /* disable and remove our fake handlers */
+ INTRDIS(1 << apic_8254_intr);
+ inthand_remove(clkdesc);
+
+ writertc(RTC_STATUSA, rtc_statusa);
+ writertc(RTC_STATUSB, RTCSB_24HR);
+
+ INTRDIS(APIC_IRQ8);
+ inthand_remove(rtcdesc);
+
+ if (num_8254_ticks < 3) {
/*
* The MP table is broken.
* The 8254 was not connected to the specified pin
* on the IO APIC.
* Workaround: Limited variant of mixed mode.
*/
- INTRDIS(1 << apic_8254_intr);
- inthand_remove(clkdesc);
printf("APIC_IO: Broken MP table detected: "
"8254 is not connected to "
"IOAPIC #%d intpin %d\n",
@@ -1087,13 +1123,27 @@ cpu_initclocks()
}
apic_8254_intr = apic_irq(0, 0);
setup_8254_mixed_mode();
- inthand_add("clk", apic_8254_intr,
- (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
}
}
+
+ /* Finally, setup the real clock handlers */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif
+
+ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME,
+ INTR_EXCL);
+#ifdef APIC_IO
+ INTREN(APIC_IRQ8);
+#else
+ INTREN(IRQ8);
+#endif
+
+ writertc(RTC_STATUSB, rtc_statusb);
+
+#ifdef APIC_IO
if (apic_int_type(0, 0) != 3 ||
int_to_apicintpin[apic_8254_intr].ioapic != 0 ||
int_to_apicintpin[apic_8254_intr].int_pin != 0)
@@ -1198,11 +1248,12 @@ static unsigned
i8254_get_timecount(struct timecounter *tc)
{
u_int count;
- u_long ef;
+ int intrsave;
u_int high, low;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc)
count = timer0_max_count - ((high << 8) | low);
if (count < i8254_lastcount ||
(!i8254_ticked && (clkintr_pending ||
- ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) &&
+ ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) &&
#ifdef APIC_IO
#define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */
/* XXX this assumes that apic_8254_intr is < 24. */
@@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc)
i8254_lastcount = count;
count += i8254_offset;
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return (count);
}
diff --git a/sys/i386/i386/vm86bios.s b/sys/i386/i386/vm86bios.s
index 6a11c2685488..14b4259005bf 100644
--- a/sys/i386/i386/vm86bios.s
+++ b/sys/i386/i386/vm86bios.s
@@ -62,11 +62,9 @@ ENTRY(vm86_bioscall)
pushl %edi
pushl %gs
-#ifdef SMP
pushl %edx
- MP_LOCK /* Get global lock */
+ call __mtx_enter_giant_def /* Get global lock */
popl %edx
-#endif
#if NNPX > 0
movl _curproc,%ecx
@@ -135,13 +133,9 @@ ENTRY(vm86_bioscall)
/*
* Return via _doreti
*/
-#ifdef SMP
- pushl _cpl /* cpl to restore */
-#else
- pushl _cpl /* cpl to restore */
-#endif
subl $4,%esp /* dummy unit */
incb _intr_nesting_level
+ call __mtx_exit_giant_def
MEXITCOUNT
jmp _doreti
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index cfb6ceef44d6..831ab3b168a6 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -57,12 +57,14 @@
#include <sys/vnode.h>
#include <sys/vmmeter.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/sysctl.h>
#include <sys/unistd.h>
#include <machine/clock.h>
#include <machine/cpu.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#ifdef SMP
#include <machine/smp.h>
#endif
@@ -177,9 +179,8 @@ cpu_fork(p1, p2, flags)
* pcb2->pcb_onfault: cloned above (always NULL here?).
*/
-#ifdef SMP
- pcb2->pcb_mpnest = 1;
-#endif
+ pcb2->pcb_schednest = 0;
+
/*
* XXX don't copy the i/o pages. this should probably be fixed.
*/
@@ -256,8 +257,11 @@ cpu_exit(p)
reset_dbregs();
pcb->pcb_flags &= ~PCB_DBREGS;
}
+ mtx_enter(&sched_lock, MTX_SPIN);
+ mtx_exit(&Giant, MTX_DEF | MTX_NOSWITCH);
+ mtx_assert(&Giant, MA_NOTOWNED);
cnt.v_swtch++;
- cpu_switch(p);
+ cpu_switch();
panic("cpu_exit");
}
@@ -406,17 +410,10 @@ vunmapbuf(bp)
static void
cpu_reset_proxy()
{
- u_int saved_mp_lock;
cpu_reset_proxy_active = 1;
while (cpu_reset_proxy_active == 1)
- ; /* Wait for other cpu to disable interupts */
- saved_mp_lock = mp_lock;
- mp_lock = 1;
- printf("cpu_reset_proxy: Grabbed mp lock for BSP\n");
- cpu_reset_proxy_active = 3;
- while (cpu_reset_proxy_active == 3)
- ; /* Wait for other cpu to enable interrupts */
+ ; /* Wait for other cpu to see that we've started */
stop_cpus((1<<cpu_reset_proxyid));
printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
DELAY(1000000);
@@ -453,6 +450,7 @@ cpu_reset()
cpu_reset_proxyid = cpuid;
cpustop_restartfunc = cpu_reset_proxy;
+ cpu_reset_proxy_active = 0;
printf("cpu_reset: Restarting BSP\n");
started_cpus = (1<<0); /* Restart CPU #0 */
@@ -461,17 +459,9 @@ cpu_reset()
cnt++; /* Wait for BSP to announce restart */
if (cpu_reset_proxy_active == 0)
printf("cpu_reset: Failed to restart BSP\n");
- __asm __volatile("cli" : : : "memory");
+ enable_intr();
cpu_reset_proxy_active = 2;
- cnt = 0;
- while (cpu_reset_proxy_active == 2 && cnt < 10000000)
- cnt++; /* Do nothing */
- if (cpu_reset_proxy_active == 2) {
- printf("cpu_reset: BSP did not grab mp lock\n");
- cpu_reset_real(); /* XXX: Bogus ? */
- }
- cpu_reset_proxy_active = 4;
- __asm __volatile("sti" : : : "memory");
+
while (1);
/* NOTREACHED */
}
@@ -553,7 +543,7 @@ vm_page_zero_idle()
static int free_rover;
static int zero_state;
vm_page_t m;
- int s;
+ int s, intrsave;
/*
* Attempt to maintain approximately 1/2 of our free pages in a
@@ -569,11 +559,10 @@ vm_page_zero_idle()
if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
return(0);
-#ifdef SMP
- if (try_mplock()) {
-#endif
+ if (mtx_try_enter(&Giant, MTX_DEF)) {
s = splvm();
- __asm __volatile("sti" : : : "memory");
+ intrsave = save_intr();
+ enable_intr();
zero_state = 0;
m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
if (m != NULL && (m->flags & PG_ZERO) == 0) {
@@ -595,14 +584,10 @@ vm_page_zero_idle()
}
free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
splx(s);
- __asm __volatile("cli" : : : "memory");
-#ifdef SMP
- rel_mplock();
-#endif
+ restore_intr(intrsave);
+ mtx_exit(&Giant, MTX_DEF);
return (1);
-#ifdef SMP
}
-#endif
/*
* We have to enable interrupts for a moment if the try_mplock fails
* in order to potentially take an IPI. XXX this should be in
diff --git a/sys/i386/include/asnames.h b/sys/i386/include/asnames.h
index 3ccbee6be344..efdb0f9710a1 100644
--- a/sys/i386/include/asnames.h
+++ b/sys/i386/include/asnames.h
@@ -131,6 +131,7 @@
#define _Xintr7 Xintr7
#define _Xintr8 Xintr8
#define _Xintr9 Xintr9
+#define _Xtintr0 Xtintr0
#define _Xinvltlb Xinvltlb
#define _Xrendezvous Xrendezvous
#define _Xmchk Xmchk
@@ -155,6 +156,7 @@
#define _arith_invalid arith_invalid
#define _arith_overflow arith_overflow
#define _arith_underflow arith_underflow
+#define _ast ast
#define _bcopy bcopy
#define _bcopy_vector bcopy_vector
#define _bigJump bigJump
@@ -184,7 +186,6 @@
#define _cnt cnt
#define _copyin_vector copyin_vector
#define _copyout_vector copyout_vector
-#define _cpl cpl
#define _cpl_lock cpl_lock
#define _cpu cpu
#define _cpu0prvpage cpu0prvpage
@@ -222,6 +223,7 @@
#define _get_isrlock get_isrlock
#define _get_mplock get_mplock
#define _get_syscall_lock get_syscall_lock
+#define _Giant Giant
#define _idle idle
#define _ihandlers ihandlers
#define _imen imen
@@ -232,13 +234,11 @@
#define _intr_countp intr_countp
#define _intr_handler intr_handler
#define _intr_mask intr_mask
-#define _intr_nesting_level intr_nesting_level
#define _intr_unit intr_unit
#define _intrcnt intrcnt
#define _intrnames intrnames
#define _invltlb_ok invltlb_ok
#define _ioapic ioapic
-#define _ipending ipending
#define _isr_lock isr_lock
#define _kernelname kernelname
#define _lapic lapic
@@ -249,6 +249,8 @@
#define _mp_gdtbase mp_gdtbase
#define _mp_lock mp_lock
#define _mp_ncpus mp_ncpus
+#define __mtx_enter_giant_def _mtx_enter_giant_def
+#define __mtx_exit_giant_def _mtx_exit_giant_def
#define _mul64 mul64
#define _net_imask net_imask
#define _netisr netisr
@@ -281,6 +283,8 @@
#define _round_reg round_reg
#define _s_lock s_lock
#define _s_unlock s_unlock
+#define _sched_ithd sched_ithd
+#define _sched_lock sched_lock
#define _set_precision_flag_down set_precision_flag_down
#define _set_precision_flag_up set_precision_flag_up
#define _set_user_ldt set_user_ldt
@@ -293,6 +297,7 @@
#define _softclock softclock
#define _softnet_imask softnet_imask
#define _softtty_imask softtty_imask
+#define _spending spending
#define _spl0 spl0
#define _splz splz
#define _ss_lock ss_lock
@@ -326,9 +331,9 @@
#if defined(SMP) || defined(__ELF__)
#ifdef SMP
-#define FS(x) %fs:gd_ ## x
+#define FS(x) %fs:gd_ ## x
#else
-#define FS(x) x
+#define FS(x) x
#endif
#define _common_tss FS(common_tss)
@@ -337,6 +342,8 @@
#define _cpu_lockid FS(cpu_lockid)
#define _curpcb FS(curpcb)
#define _curproc FS(curproc)
+#define _prevproc FS(prevproc)
+#define _idleproc FS(idleproc)
#define _astpending FS(astpending)
#define _currentldt FS(currentldt)
#define _inside_intr FS(inside_intr)
@@ -353,9 +360,16 @@
#define _ss_eflags FS(ss_eflags)
#define _switchticks FS(switchticks)
#define _switchtime FS(switchtime)
+#define _intr_nesting_level FS(intr_nesting_level)
#define _tss_gdt FS(tss_gdt)
#define _idlestack FS(idlestack)
#define _idlestack_top FS(idlestack_top)
+#define _witness_spin_check FS(witness_spin_check)
+/*
+#define _ktr_idx FS(ktr_idx)
+#define _ktr_buf FS(ktr_buf)
+#define _ktr_buf_data FS(ktr_buf_data)
+*/
#endif
diff --git a/sys/i386/include/cpu.h b/sys/i386/include/cpu.h
index ffabf7f8ed54..18822b87cc5b 100644
--- a/sys/i386/include/cpu.h
+++ b/sys/i386/include/cpu.h
@@ -46,6 +46,7 @@
#include <machine/psl.h>
#include <machine/frame.h>
#include <machine/segments.h>
+#include <machine/globals.h>
/*
* definitions of cpu-dependent requirements
@@ -86,7 +87,9 @@
* added, we will have an atomicy problem. The type of atomicy we need is
* a non-locked orl.
*/
-#define need_resched() do { astpending = AST_RESCHED|AST_PENDING; } while (0)
+#define need_resched() do { \
+ PCPU_SET(astpending, AST_RESCHED|AST_PENDING); \
+} while (0)
#define resched_wanted() (astpending & AST_RESCHED)
/*
@@ -109,8 +112,9 @@
* it off (asynchronous need_resched() conflicts are not critical).
*/
#define signotify(p) aston()
-
-#define aston() do { astpending |= AST_PENDING; } while (0)
+#define aston() do { \
+ PCPU_SET(astpending, astpending | AST_PENDING); \
+} while (0)
#define astoff()
/*
@@ -135,7 +139,9 @@
#ifdef _KERNEL
extern char btext[];
extern char etext[];
+#ifndef intr_nesting_level
extern u_char intr_nesting_level;
+#endif
void fork_trampoline __P((void));
void fork_return __P((struct proc *, struct trapframe));
diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h
index 9a4052fd41d1..39868df422aa 100644
--- a/sys/i386/include/cpufunc.h
+++ b/sys/i386/include/cpufunc.h
@@ -86,20 +86,29 @@ static __inline void
disable_intr(void)
{
__asm __volatile("cli" : : : "memory");
-#ifdef SMP
- MPINTR_LOCK();
-#endif
}
static __inline void
enable_intr(void)
{
-#ifdef SMP
- MPINTR_UNLOCK();
-#endif
__asm __volatile("sti");
}
+static __inline u_int
+save_intr(void)
+{
+ u_int ef;
+
+ __asm __volatile("pushfl; popl %0" : "=r" (ef));
+ return (ef);
+}
+
+static __inline void
+restore_intr(u_int ef)
+{
+ __asm __volatile("pushl %0; popfl" : : "r" (ef) : "memory" );
+}
+
#define HAVE_INLINE_FFS
static __inline int
diff --git a/sys/i386/include/globaldata.h b/sys/i386/include/globaldata.h
index 58bd9cfe9416..440da60b4b83 100644
--- a/sys/i386/include/globaldata.h
+++ b/sys/i386/include/globaldata.h
@@ -26,6 +26,20 @@
* $FreeBSD$
*/
+#ifndef _MACHINE_GLOBALDATA_H_
+#define _MACHINE_GLOBALDATA_H_
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/pmap.h>
+#include <machine/segments.h>
+#include <machine/tss.h>
+
+/* XXX */
+#ifdef KTR_PERCPU
+#include <sys/ktr.h>
+#endif
+
/*
* This structure maps out the global data that needs to be kept on a
* per-cpu basis. genassym uses this to generate offsets for the assembler
@@ -41,11 +55,14 @@
struct globaldata {
struct privatespace *gd_prvspace; /* self-reference */
struct proc *gd_curproc;
+ struct proc *gd_prevproc;
struct proc *gd_npxproc;
struct pcb *gd_curpcb;
+ struct proc *gd_idleproc;
struct timeval gd_switchtime;
struct i386tss gd_common_tss;
int gd_switchticks;
+ int gd_intr_nesting_level;
struct segment_descriptor gd_common_tssd;
struct segment_descriptor *gd_tss_gdt;
#ifdef USER_LDT
@@ -67,8 +84,22 @@ struct globaldata {
unsigned *gd_prv_PADDR1;
#endif
u_int gd_astpending;
+ SLIST_ENTRY(globaldata) gd_allcpu;
+ int gd_witness_spin_check;
+#ifdef KTR_PERCPU
+#ifdef KTR
+ volatile int gd_ktr_idx;
+ char *gd_ktr_buf;
+ char gd_ktr_buf_data[KTR_SIZE];
+#endif
+#endif
};
+extern struct globaldata globaldata;
+
+SLIST_HEAD(cpuhead, globaldata);
+extern struct cpuhead cpuhead;
+
#ifdef SMP
/*
* This is the upper (0xff800000) address space layout that is per-cpu.
@@ -93,3 +124,5 @@ struct privatespace {
extern struct privatespace SMP_prvspace[];
#endif
+
+#endif /* ! _MACHINE_GLOBALDATA_H_ */
diff --git a/sys/i386/include/globals.h b/sys/i386/include/globals.h
index ae05d5644e76..71bbbd580d9e 100644
--- a/sys/i386/include/globals.h
+++ b/sys/i386/include/globals.h
@@ -74,6 +74,14 @@
__asm("movl %0,%%fs:gd_" #name : : "r" (val)); \
}
+static __inline int
+_global_globaldata(void)
+{
+ int val;
+ __asm("movl %%fs:globaldata,%0" : "=r" (val));
+ return (val);
+}
+
#if defined(SMP) || defined(KLD_MODULE) || defined(ACTUALLY_LKM_NOT_KERNEL)
/*
* The following set of macros works for UP kernel as well, but for maximum
@@ -82,18 +90,21 @@
* portability between UP and SMP kernels.
*/
#define curproc GLOBAL_RVALUE_NV(curproc, struct proc *)
+#define prevproc GLOBAL_RVALUE_NV(prevproc, struct proc *)
#define curpcb GLOBAL_RVALUE_NV(curpcb, struct pcb *)
-#define npxproc GLOBAL_LVALUE(npxproc, struct proc *)
+#define npxproc GLOBAL_RVALUE_NV(npxproc, struct proc *)
+#define idleproc GLOBAL_RVALUE_NV(idleproc, struct proc *)
#define common_tss GLOBAL_LVALUE(common_tss, struct i386tss)
#define switchtime GLOBAL_LVALUE(switchtime, struct timeval)
#define switchticks GLOBAL_LVALUE(switchticks, int)
+#define intr_nesting_level GLOBAL_RVALUE(intr_nesting_level, u_char)
#define common_tssd GLOBAL_LVALUE(common_tssd, struct segment_descriptor)
#define tss_gdt GLOBAL_LVALUE(tss_gdt, struct segment_descriptor *)
-#define astpending GLOBAL_LVALUE(astpending, u_int)
+#define astpending GLOBAL_RVALUE(astpending, u_int)
#ifdef USER_LDT
-#define currentldt GLOBAL_LVALUE(currentldt, int)
+#define currentldt GLOBAL_RVALUE(currentldt, int)
#endif
#ifdef SMP
@@ -109,19 +120,32 @@
#define prv_CADDR3 GLOBAL_RVALUE(prv_CADDR3, caddr_t)
#define prv_PADDR1 GLOBAL_RVALUE(prv_PADDR1, unsigned *)
#endif
+
+#define witness_spin_check GLOBAL_RVALUE(witness_spin_check, int)
+
#endif /*UP kernel*/
GLOBAL_FUNC(curproc)
+GLOBAL_FUNC(prevproc)
GLOBAL_FUNC(astpending)
GLOBAL_FUNC(curpcb)
GLOBAL_FUNC(npxproc)
+GLOBAL_FUNC(idleproc)
GLOBAL_FUNC(common_tss)
GLOBAL_FUNC(switchtime)
GLOBAL_FUNC(switchticks)
+GLOBAL_FUNC(intr_nesting_level)
GLOBAL_FUNC(common_tssd)
GLOBAL_FUNC(tss_gdt)
+/* XXX */
+#ifdef KTR_PERCPU
+GLOBAL_FUNC(ktr_idx)
+GLOBAL_FUNC(ktr_buf)
+GLOBAL_FUNC(ktr_buf_data)
+#endif
+
#ifdef USER_LDT
GLOBAL_FUNC(currentldt)
#endif
@@ -140,7 +164,17 @@ GLOBAL_FUNC(prv_CADDR3)
GLOBAL_FUNC(prv_PADDR1)
#endif
-#define SET_CURPROC(x) (_global_curproc_set_nv((int)x))
+GLOBAL_FUNC(witness_spin_check)
+
+#ifdef SMP
+#define GLOBALDATA GLOBAL_RVALUE(globaldata, struct globaldata *)
+#else
+#define GLOBALDATA (&globaldata)
+#endif
+
+#define CURPROC curproc
+
+#define PCPU_SET(name, value) (_global_##name##_set((int)value))
#endif /* _KERNEL */
diff --git a/sys/i386/include/ipl.h b/sys/i386/include/ipl.h
index 54d3f4b7b4b5..08726df51d84 100644
--- a/sys/i386/include/ipl.h
+++ b/sys/i386/include/ipl.h
@@ -43,9 +43,19 @@
#endif
/*
+ * Software interrupt level. We treat the software interrupt as a
+ * single interrupt at a fictive hardware interrupt level.
+ */
+#define SOFTINTR (NHWI + 0)
+
+/*
* Software interrupt bit numbers in priority order. The priority only
* determines which swi will be dispatched next; a higher priority swi
* may be dispatched when a nested h/w interrupt handler returns.
+ *
+ * XXX FIXME: There's no longer a relation between the SWIs and the
+ * HWIs, so it makes more sense for these values to start at 0, but
+ * there's lots of code which expects them to start at NHWI.
*/
#define SWI_TTY (NHWI + 0)
#define SWI_NET (NHWI + 1)
@@ -104,12 +114,9 @@
#ifdef notyet /* in <sys/interrupt.h> until pci drivers stop hacking on them */
extern unsigned bio_imask; /* group of interrupts masked with splbio() */
#endif
-extern unsigned cpl; /* current priority level mask */
-#ifdef SMP
-extern unsigned cil; /* current INTerrupt level mask */
-#endif
+
extern volatile unsigned idelayed; /* interrupts to become pending */
-extern volatile unsigned ipending; /* active interrupts masked by cpl */
+extern volatile unsigned spending; /* pending software interrupts */
#ifdef notyet /* in <sys/systm.h> until pci drivers stop hacking on them */
extern unsigned net_imask; /* group of interrupts masked with splimp() */
extern unsigned stat_imask; /* interrupts masked with splstatclock() */
diff --git a/sys/i386/include/lock.h b/sys/i386/include/lock.h
index 534f77e8d2fb..b4af09d9c579 100644
--- a/sys/i386/include/lock.h
+++ b/sys/i386/include/lock.h
@@ -37,21 +37,6 @@
#define MPLOCKED lock ;
/*
- * Some handy macros to allow logical organization.
- */
-
-#define MP_LOCK call _get_mplock
-
-#define MP_TRYLOCK \
- pushl $_mp_lock ; /* GIANT_LOCK */ \
- call _MPtrylock ; /* try to get lock */ \
- add $4, %esp
-
-#define MP_RELLOCK \
- movl $_mp_lock,%edx ; /* GIANT_LOCK */ \
- call _MPrellock_edx
-
-/*
* Protects the IO APIC and apic_imen as a critical region.
*/
#define IMASK_LOCK \
@@ -66,7 +51,8 @@
#define MPLOCKED /* NOP */
-#define MP_LOCK /* NOP */
+#define IMASK_LOCK /* NOP */
+#define IMASK_UNLOCK /* NOP */
#endif /* SMP */
@@ -77,32 +63,15 @@
#include <machine/smptests.h> /** xxx_LOCK */
/*
- * Locks regions protected in UP kernel via cli/sti.
- */
-#ifdef USE_MPINTRLOCK
-#define MPINTR_LOCK() s_lock(&mpintr_lock)
-#define MPINTR_UNLOCK() s_unlock(&mpintr_lock)
-#else
-#define MPINTR_LOCK()
-#define MPINTR_UNLOCK()
-#endif /* USE_MPINTRLOCK */
-
-/*
* sio/cy lock.
* XXX should rc (RISCom/8) use this?
*/
#ifdef USE_COMLOCK
#define COM_LOCK() s_lock(&com_lock)
#define COM_UNLOCK() s_unlock(&com_lock)
-#define COM_DISABLE_INTR() \
- { __asm __volatile("cli" : : : "memory"); COM_LOCK(); }
-#define COM_ENABLE_INTR() \
- { COM_UNLOCK(); __asm __volatile("sti"); }
#else
#define COM_LOCK()
#define COM_UNLOCK()
-#define COM_DISABLE_INTR() disable_intr()
-#define COM_ENABLE_INTR() enable_intr()
#endif /* USE_COMLOCK */
/*
@@ -112,22 +81,13 @@
#ifdef USE_CLOCKLOCK
#define CLOCK_LOCK() s_lock(&clock_lock)
#define CLOCK_UNLOCK() s_unlock(&clock_lock)
-#define CLOCK_DISABLE_INTR() \
- { __asm __volatile("cli" : : : "memory"); CLOCK_LOCK(); }
-#define CLOCK_ENABLE_INTR() \
- { CLOCK_UNLOCK(); __asm __volatile("sti"); }
#else
#define CLOCK_LOCK()
#define CLOCK_UNLOCK()
-#define CLOCK_DISABLE_INTR() disable_intr()
-#define CLOCK_ENABLE_INTR() enable_intr()
#endif /* USE_CLOCKLOCK */
#else /* SMP */
-#define MPINTR_LOCK()
-#define MPINTR_UNLOCK()
-
#define COM_LOCK()
#define COM_UNLOCK()
#define CLOCK_LOCK()
@@ -168,6 +128,7 @@ extern struct simplelock clock_lock;
extern struct simplelock com_lock;
extern struct simplelock mpintr_lock;
extern struct simplelock mcount_lock;
+extern struct simplelock panic_lock;
#if !defined(SIMPLELOCK_DEBUG) && NCPUS > 1
/*
diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h
index 61c5ecf73205..95b5759f9e66 100644
--- a/sys/i386/include/mptable.h
+++ b/sys/i386/include/mptable.h
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/i386/include/mutex.h b/sys/i386/include/mutex.h
new file mode 100644
index 000000000000..ef0c9638fc18
--- /dev/null
+++ b/sys/i386/include/mutex.h
@@ -0,0 +1,786 @@
+/*-
+ * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: mutex.h,v 2.7.2.35 2000/04/27 03:10:26 cp Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_MUTEX_H_
+#define _MACHINE_MUTEX_H_
+
+#ifndef LOCORE
+
+#include <sys/ktr.h>
+#include <sys/queue.h>
+#include <machine/atomic.h>
+#include <machine/cpufunc.h>
+#include <machine/globals.h>
+
+/*
+ * If kern_mutex.c is being built, compile non-inlined versions of various
+ * functions so that kernel modules can use them.
+ */
+#ifndef _KERN_MUTEX_C_
+#define _MTX_INLINE static __inline
+#else
+#define _MTX_INLINE
+#endif
+
+/*
+ * Mutex flags
+ *
+ * Types
+ */
+#define MTX_DEF 0x0 /* Default (spin/sleep) */
+#define MTX_SPIN 0x1 /* Spin only lock */
+
+/* Options */
+#define MTX_RLIKELY 0x4 /* (opt) Recursion likely */
+#define MTX_NORECURSE 0x8 /* No recursion possible */
+#define MTX_NOSPIN 0x10 /* Don't spin before sleeping */
+#define MTX_NOSWITCH 0x20 /* Do not switch on release */
+#define MTX_FIRST 0x40 /* First spin lock holder */
+#define MTX_TOPHALF 0x80 /* Interrupts not disabled on spin */
+
+/* options that should be passed on to mtx_enter_hard, mtx_exit_hard */
+#define MTX_HARDOPTS (MTX_SPIN | MTX_FIRST | MTX_TOPHALF | MTX_NOSWITCH)
+
+/* Flags/value used in mtx_lock */
+#define MTX_RECURSE 0x01 /* (non-spin) lock held recursively */
+#define MTX_CONTESTED 0x02 /* (non-spin) lock contested */
+#define MTX_FLAGMASK ~(MTX_RECURSE | MTX_CONTESTED)
+#define MTX_UNOWNED 0x8 /* Cookie for free mutex */
+
+struct proc; /* XXX */
+
+/*
+ * Sleep/spin mutex
+ */
+struct mtx {
+ volatile u_int mtx_lock; /* lock owner/gate/flags */
+ volatile u_short mtx_recurse; /* number of recursive holds */
+ u_short mtx_f1;
+ u_int mtx_savefl; /* saved flags (for spin locks) */
+ char *mtx_description;
+ TAILQ_HEAD(, proc) mtx_blocked;
+ LIST_ENTRY(mtx) mtx_contested;
+ struct mtx *mtx_next; /* all locks in system */
+ struct mtx *mtx_prev;
+#ifdef SMP_DEBUG
+ /* If you add anything here, adjust the mtxf_t definition below */
+ struct witness *mtx_witness;
+ LIST_ENTRY(mtx) mtx_held;
+ char *mtx_file;
+ int mtx_line;
+#endif /* SMP_DEBUG */
+};
+
+typedef struct mtx mtx_t;
+
+/*
+ * Filler for structs which need to remain the same size
+ * whether or not SMP_DEBUG is turned on.
+ */
+typedef struct mtxf {
+#ifdef SMP_DEBUG
+ char mtxf_data[0];
+#else
+ char mtxf_data[4*sizeof(void *) + sizeof(int)];
+#endif
+} mtxf_t;
+
+#define mp_fixme(string)
+
+#ifdef _KERNEL
+/* Misc */
+#define CURTHD ((u_int)CURPROC) /* Current thread ID */
+
+/* Prototypes */
+void mtx_init(mtx_t *m, char *description, int flag);
+void mtx_enter_hard(mtx_t *, int type, int flags);
+void mtx_exit_hard(mtx_t *, int type);
+void mtx_destroy(mtx_t *m);
+
+#if (defined(KLD_MODULE) || defined(_KERN_MUTEX_C_))
+void mtx_enter(mtx_t *mtxp, int type);
+int mtx_try_enter(mtx_t *mtxp, int type);
+void mtx_exit(mtx_t *mtxp, int type);
+#endif
+
+/* Global locks */
+extern mtx_t sched_lock;
+extern mtx_t Giant;
+
+/*
+ * Used to replace return with an exit Giant and return.
+ */
+
+#define EGAR(a) \
+do { \
+ mtx_exit(&Giant, MTX_DEF); \
+ return (a); \
+} while (0)
+
+#define VEGAR \
+do { \
+ mtx_exit(&Giant, MTX_DEF); \
+ return; \
+} while (0)
+
+#define DROP_GIANT() \
+do { \
+ int _giantcnt; \
+ WITNESS_SAVE_DECL(Giant); \
+ \
+ WITNESS_SAVE(&Giant, Giant); \
+ for (_giantcnt = 0; mtx_owned(&Giant); _giantcnt++) \
+ mtx_exit(&Giant, MTX_DEF)
+
+#define PICKUP_GIANT() \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_enter(&Giant, MTX_DEF); \
+ WITNESS_RESTORE(&Giant, Giant); \
+} while (0)
+
+#define PARTIAL_PICKUP_GIANT() \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_enter(&Giant, MTX_DEF); \
+ WITNESS_RESTORE(&Giant, Giant)
+
+
+/*
+ * Debugging
+ */
+#ifndef SMP_DEBUG
+#define mtx_assert(m, what)
+#else /* SMP_DEBUG */
+
+#define MA_OWNED 1
+#define MA_NOTOWNED 2
+#define mtx_assert(m, what) { \
+ switch ((what)) { \
+ case MA_OWNED: \
+ ASS(mtx_owned((m))); \
+ break; \
+ case MA_NOTOWNED: \
+ ASS(!mtx_owned((m))); \
+ break; \
+ default: \
+ panic("unknown mtx_assert at %s:%d", __FILE__, __LINE__); \
+ } \
+}
+
+#ifdef INVARIANTS
+#define ASS(ex) MPASS(ex)
+#define MPASS(ex) if (!(ex)) panic("Assertion %s failed at %s:%d", \
+ #ex, __FILE__, __LINE__)
+#define MPASS2(ex, what) if (!(ex)) panic("Assertion %s failed at %s:%d", \
+ what, __FILE__, __LINE__)
+
+#ifdef MTX_STRS
+char STR_IEN[] = "fl & 0x200";
+char STR_IDIS[] = "!(fl & 0x200)";
+#else /* MTX_STRS */
+extern char STR_IEN[];
+extern char STR_IDIS[];
+#endif /* MTX_STRS */
+#define ASS_IEN MPASS2(read_eflags() & 0x200, STR_IEN)
+#define ASS_IDIS MPASS2((read_eflags() & 0x200) == 0, STR_IDIS)
+#endif /* INVARIANTS */
+
+#endif /* SMP_DEBUG */
+
+#if !defined(SMP_DEBUG) || !defined(INVARIANTS)
+#define ASS(ex)
+#define MPASS(ex)
+#define MPASS2(ex, where)
+#define ASS_IEN
+#define ASS_IDIS
+#endif /* !defined(SMP_DEBUG) || !defined(INVARIANTS) */
+
+#ifdef WITNESS
+#ifndef SMP_DEBUG
+#error WITNESS requires SMP_DEBUG
+#endif /* SMP_DEBUG */
+#define WITNESS_ENTER(m, f) \
+ if ((m)->mtx_witness != NULL) \
+ witness_enter((m), (f), __FILE__, __LINE__)
+#define WITNESS_EXIT(m, f) \
+ if ((m)->mtx_witness != NULL) \
+ witness_exit((m), (f), __FILE__, __LINE__)
+
+#define WITNESS_SLEEP(check, m) witness_sleep(check, (m), __FILE__, __LINE__)
+#define WITNESS_SAVE_DECL(n) \
+ char * __CONCAT(n, __wf); \
+ int __CONCAT(n, __wl)
+
+#define WITNESS_SAVE(m, n) \
+do { \
+ if ((m)->mtx_witness != NULL) \
+ witness_save(m, &__CONCAT(n, __wf), &__CONCAT(n, __wl)); \
+} while (0)
+
+#define WITNESS_RESTORE(m, n) \
+do { \
+ if ((m)->mtx_witness != NULL) \
+ witness_restore(m, __CONCAT(n, __wf), __CONCAT(n, __wl)); \
+} while (0)
+
+void witness_init(mtx_t *, int flag);
+void witness_destroy(mtx_t *);
+void witness_enter(mtx_t *, int, char *, int);
+void witness_try_enter(mtx_t *, int, char *, int);
+void witness_exit(mtx_t *, int, char *, int);
+void witness_display(void(*)(const char *fmt, ...));
+void witness_list(struct proc *);
+int witness_sleep(int, mtx_t *, char *, int);
+void witness_save(mtx_t *, char **, int *);
+void witness_restore(mtx_t *, char *, int);
+#else /* WITNESS */
+#define WITNESS_ENTER(m, flag)
+#define WITNESS_EXIT(m, flag)
+#define WITNESS_SLEEP(check, m)
+#define WITNESS_SAVE_DECL(n)
+#define WITNESS_SAVE(m, n)
+#define WITNESS_RESTORE(m, n)
+
+/*
+ * flag++ is slezoid way of shutting up unused parameter warning
+ * in mtx_init()
+ */
+#define witness_init(m, flag) flag++
+#define witness_destroy(m)
+#define witness_enter(m, flag, f, l)
+#define witness_try_enter(m, flag, f, l )
+#define witness_exit(m, flag, f, l)
+#endif /* WITNESS */
+
+/*
+ * Assembly macros (for internal use only)
+ *------------------------------------------------------------------------------
+ */
+
+#define _V(x) __STRING(x)
+
+#ifndef I386_CPU
+
+/*
+ * For 486 and newer processors.
+ */
+
+/* Get a sleep lock, deal with recursion inline. */
+#define _getlock_sleep(mtxp, tid, type) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \
+" " MPLOCKED "" \
+" cmpxchgl %3,%1;" /* Try */ \
+" jz 1f;" /* Got it */ \
+" andl $" _V(MTX_FLAGMASK) ",%%eax;" /* turn off spec bits */ \
+" cmpl %%eax,%3;" /* already have it? */ \
+" je 2f;" /* yes, recurse */ \
+" pushl %4;" \
+" pushl %5;" \
+" call mtx_enter_hard;" \
+" addl $8,%%esp;" \
+" jmp 1f;" \
+"2: lock; orl $" _V(MTX_RECURSE) ",%1;" \
+" incw %2;" \
+"1:" \
+"# getlock_sleep" \
+ : "=&a" (_res), /* 0 (dummy output) */ \
+ "+m" (mtxp->mtx_lock), /* 1 */ \
+ "+m" (mtxp->mtx_recurse) /* 2 */ \
+ : "r" (tid), /* 3 (input) */ \
+ "gi" (type), /* 4 */ \
+ "g" (mtxp) /* 5 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/* Get a spin lock, handle recursion inline (as the less common case) */
+#define _getlock_spin_block(mtxp, tid, type) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" pushfl;" \
+" cli;" \
+" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \
+" " MPLOCKED "" \
+" cmpxchgl %3,%1;" /* Try */ \
+" jz 2f;" /* got it */ \
+" pushl %4;" \
+" pushl %5;" \
+" call mtx_enter_hard;" /* mtx_enter_hard(mtxp, type, oflags) */ \
+" addl $0xc,%%esp;" \
+" jmp 1f;" \
+"2: popl %2;" /* save flags */ \
+"1:" \
+"# getlock_spin_block" \
+ : "=&a" (_res), /* 0 (dummy output) */ \
+ "+m" (mtxp->mtx_lock), /* 1 */ \
+ "=m" (mtxp->mtx_savefl) /* 2 */ \
+ : "r" (tid), /* 3 (input) */ \
+ "gi" (type), /* 4 */ \
+ "g" (mtxp) /* 5 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Get a lock without any recursion handling. Calls the hard enter function if
+ * we can't get it inline.
+ */
+#define _getlock_norecurse(mtxp, tid, type) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \
+" " MPLOCKED "" \
+" cmpxchgl %2,%1;" /* Try */ \
+" jz 1f;" /* got it */ \
+" pushl %3;" \
+" pushl %4;" \
+" call mtx_enter_hard;" /* mtx_enter_hard(mtxp, type) */ \
+" addl $8,%%esp;" \
+"1:" \
+"# getlock_norecurse" \
+ : "=&a" (_res), /* 0 (dummy output) */ \
+ "+m" (mtxp->mtx_lock) /* 1 */ \
+ : "r" (tid), /* 2 (input) */ \
+ "gi" (type), /* 3 */ \
+ "g" (mtxp) /* 4 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Release a sleep lock assuming we haven't recursed on it, recursion is handled
+ * in the hard function.
+ */
+#define _exitlock_norecurse(mtxp, tid, type) ({ \
+ int _tid = (int)(tid); \
+ \
+ __asm __volatile ( \
+" " MPLOCKED "" \
+" cmpxchgl %4,%0;" /* try easy rel */ \
+" jz 1f;" /* released! */ \
+" pushl %2;" \
+" pushl %3;" \
+" call mtx_exit_hard;" \
+" addl $8,%%esp;" \
+"1:" \
+"# exitlock_norecurse" \
+ : "+m" (mtxp->mtx_lock), /* 0 */ \
+ "+a" (_tid) /* 1 */ \
+ : "gi" (type), /* 2 (input) */ \
+ "g" (mtxp), /* 3 */ \
+ "r" (MTX_UNOWNED) /* 4 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Release a sleep lock when its likely we recursed (the code to
+ * deal with simple recursion is inline).
+ */
+#define _exitlock(mtxp, tid, type) ({ \
+ int _tid = (int)(tid); \
+ \
+ __asm __volatile ( \
+" " MPLOCKED "" \
+" cmpxchgl %5,%0;" /* try easy rel */ \
+" jz 1f;" /* released! */ \
+" testl $" _V(MTX_RECURSE) ",%%eax;" /* recursed? */ \
+" jnz 3f;" /* handle recursion */ \
+ /* Lock not recursed and contested: do the hard way */ \
+" pushl %3;" \
+" pushl %4;" \
+" call mtx_exit_hard;" /* mtx_exit_hard(mtxp,type) */ \
+" addl $8,%%esp;" \
+" jmp 1f;" \
+ /* lock recursed, lower recursion level */ \
+"3: decw %1;" /* one less level */ \
+" jnz 1f;" /* still recursed, done */ \
+" lock; andl $~" _V(MTX_RECURSE) ",%0;" /* turn off recurse flag */ \
+"1:" \
+"# exitlock" \
+ : "+m" (mtxp->mtx_lock), /* 0 */ \
+ "+m" (mtxp->mtx_recurse), /* 1 */ \
+ "+a" (_tid) /* 2 */ \
+ : "gi" (type), /* 3 (input) */ \
+ "g" (mtxp), /* 4 */ \
+ "r" (MTX_UNOWNED) /* 5 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Release a spin lock (with possible recursion).
+ *
+ * We use cmpxchgl to clear lock (instead of simple store) to flush posting
+ * buffers and make the change visible to other CPU's.
+ */
+#define _exitlock_spin(mtxp, inten1, inten2) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" movw %1,%%ax;" \
+" decw %%ax;" \
+" js 1f;" \
+" movw %%ax,%1;" \
+" jmp 2f;" \
+"1: movl %0,%%eax;" \
+" movl $ " _V(MTX_UNOWNED) ",%%ecx;" \
+" " inten1 ";" \
+" " MPLOCKED "" \
+" cmpxchgl %%ecx,%0;" \
+" " inten2 ";" \
+"2:" \
+"# exitlock_spin" \
+ : "+m" (mtxp->mtx_lock), /* 0 */ \
+ "+m" (mtxp->mtx_recurse), /* 1 */ \
+ "=&a" (_res) /* 2 */ \
+ : "g" (mtxp->mtx_savefl) /* 3 (used in 'inten') */ \
+ : "memory", "ecx" /* used */ ); \
+})
+
+#else /* I386_CPU */
+
+/*
+ * For 386 processors only.
+ */
+
+/* Get a sleep lock, deal with recursion inline. */
+#define _getlock_sleep(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) { \
+ if (((mp)->mtx_lock & MTX_FLAGMASK) != (tid)) \
+ mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0); \
+ else { \
+ atomic_set_int(&(mp)->mtx_lock, MTX_RECURSE); \
+ (mp)->mtx_recurse++; \
+ } \
+ } \
+} while (0)
+
+/* Get a spin lock, handle recursion inline (as the less common case) */
+#define _getlock_spin_block(mp, tid, type) do { \
+ u_int _mtx_fl = read_eflags(); \
+ disable_intr(); \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
+ mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _mtx_fl); \
+ else \
+ (mp)->mtx_savefl = _mtx_fl; \
+} while (0)
+
+/*
+ * Get a lock without any recursion handling. Calls the hard enter function if
+ * we can't get it inline.
+ */
+#define _getlock_norecurse(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
+ mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0); \
+} while (0)
+
+/*
+ * Release a sleep lock assuming we haven't recursed on it, recursion is handled
+ * in the hard function.
+ */
+#define _exitlock_norecurse(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) \
+ mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \
+} while (0)
+
+/*
+ * Release a sleep lock when its likely we recursed (the code to
+ * deal with simple recursion is inline).
+ */
+#define _exitlock(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) { \
+ if ((mp)->mtx_lock & MTX_RECURSE) { \
+ if (--((mp)->mtx_recurse) == 0) \
+ atomic_clear_int(&(mp)->mtx_lock, \
+ MTX_RECURSE); \
+ } else { \
+ mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \
+ } \
+ } \
+} while (0)
+
+/* Release a spin lock (with possible recursion). */
+#define _exitlock_spin(mp, inten1, inten2) do { \
+ if ((mp)->mtx_recurse == 0) { \
+ atomic_cmpset_int(&(mp)->mtx_lock, (mp)->mtx_lock, \
+ MTX_UNOWNED); \
+ write_eflags((mp)->mtx_savefl); \
+ } else { \
+ (mp)->mtx_recurse--; \
+ } \
+} while (0)
+
+#endif /* I386_CPU */
+
+/*
+ * Externally visible mutex functions.
+ *------------------------------------------------------------------------------
+ */
+
+/*
+ * Return non-zero if a mutex is already owned by the current thread.
+ */
+#define mtx_owned(m) (((m)->mtx_lock & MTX_FLAGMASK) == CURTHD)
+
+/* Common strings */
+#ifdef MTX_STRS
+#ifdef KTR_EXTEND
+
+/*
+ * KTR_EXTEND saves file name and line for all entries, so we don't need them
+ * here. Theoretically we should also change the entries which refer to them
+ * (from CTR5 to CTR3), but since they're just passed to snprinf as the last
+ * parameters, it doesn't do any harm to leave them.
+ */
+char STR_mtx_enter_fmt[] = "GOT %s [%x] r=%d";
+char STR_mtx_exit_fmt[] = "REL %s [%x] r=%d";
+char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%x] result=%d";
+#else
+char STR_mtx_enter_fmt[] = "GOT %s [%x] at %s:%d r=%d";
+char STR_mtx_exit_fmt[] = "REL %s [%x] at %s:%d r=%d";
+char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%x] at %s:%d result=%d";
+#endif
+char STR_mtx_bad_type[] = "((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0";
+char STR_mtx_owned[] = "mtx_owned(_mpp)";
+char STR_mtx_recurse[] = "_mpp->mtx_recurse == 0";
+#else /* MTX_STRS */
+extern char STR_mtx_enter_fmt[];
+extern char STR_mtx_bad_type[];
+extern char STR_mtx_exit_fmt[];
+extern char STR_mtx_owned[];
+extern char STR_mtx_recurse[];
+extern char STR_mtx_try_enter_fmt[];
+#endif /* MTX_STRS */
+
+#ifndef KLD_MODULE
+/*
+ * Get lock 'm', the macro handles the easy (and most common cases) and leaves
+ * the slow stuff to the mtx_enter_hard() function.
+ *
+ * Note: since type is usually a constant much of this code is optimized out.
+ */
+_MTX_INLINE void
+mtx_enter(mtx_t *mtxp, int type)
+{
+ mtx_t *_mpp = mtxp;
+
+ /* bits only valid on mtx_exit() */
+ MPASS2(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0,
+ STR_mtx_bad_type);
+
+ do {
+ if ((type) & MTX_SPIN) {
+ /*
+ * Easy cases of spin locks:
+ *
+ * 1) We already own the lock and will simply
+ * recurse on it (if RLIKELY)
+ *
+ * 2) The lock is free, we just get it
+ */
+ if ((type) & MTX_RLIKELY) {
+ /*
+ * Check for recursion, if we already
+ * have this lock we just bump the
+ * recursion count.
+ */
+ if (_mpp->mtx_lock == CURTHD) {
+ _mpp->mtx_recurse++;
+ break; /* Done */
+ }
+ }
+
+ if (((type) & MTX_TOPHALF) == 0) {
+ /*
+ * If an interrupt thread uses this
+ * we must block interrupts here.
+ */
+ if ((type) & MTX_FIRST) {
+ ASS_IEN;
+ disable_intr();
+ _getlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ } else {
+ _getlock_spin_block(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ }
+ } else
+ _getlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ } else {
+ /* Sleep locks */
+ if ((type) & MTX_RLIKELY)
+ _getlock_sleep(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ else
+ _getlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ }
+ } while (0);
+ WITNESS_ENTER(_mpp, type);
+ CTR5(KTR_LOCK, STR_mtx_enter_fmt,
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__,
+ (_mpp)->mtx_recurse);
+}
+
+/*
+ * Attempt to get MTX_DEF lock, return non-zero if lock acquired.
+ *
+ * XXX DOES NOT HANDLE RECURSION
+ */
+_MTX_INLINE int
+mtx_try_enter(mtx_t *mtxp, int type)
+{
+ mtx_t *const _mpp = mtxp;
+ int _rval;
+
+ _rval = atomic_cmpset_int(&_mpp->mtx_lock, MTX_UNOWNED, CURTHD);
+#ifdef SMP_DEBUG
+ if (_rval && (_mpp)->mtx_witness != NULL) {
+ ASS((_mpp)->mtx_recurse == 0);
+ witness_try_enter(_mpp, type, __FILE__, __LINE__);
+ }
+#endif
+ CTR5(KTR_LOCK, STR_mtx_try_enter_fmt,
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, _rval);
+
+ return _rval;
+}
+
+#define mtx_legal2block() (read_eflags() & 0x200)
+
+/*
+ * Release lock m.
+ */
+_MTX_INLINE void
+mtx_exit(mtx_t *mtxp, int type)
+{
+ mtx_t *const _mpp = mtxp;
+
+ MPASS2(mtx_owned(_mpp), STR_mtx_owned);
+ WITNESS_EXIT(_mpp, type);
+ CTR5(KTR_LOCK, STR_mtx_exit_fmt,
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__,
+ (_mpp)->mtx_recurse);
+ if ((type) & MTX_SPIN) {
+ if ((type) & MTX_NORECURSE) {
+ MPASS2(_mpp->mtx_recurse == 0, STR_mtx_recurse);
+ atomic_cmpset_int(&_mpp->mtx_lock, _mpp->mtx_lock,
+ MTX_UNOWNED);
+ if (((type) & MTX_TOPHALF) == 0) {
+ if ((type) & MTX_FIRST) {
+ ASS_IDIS;
+ enable_intr();
+ } else
+ write_eflags(_mpp->mtx_savefl);
+ }
+ } else {
+ if ((type) & MTX_TOPHALF)
+ _exitlock_spin(_mpp,,);
+ else {
+ if ((type) & MTX_FIRST) {
+ ASS_IDIS;
+ _exitlock_spin(_mpp,, "sti");
+ } else {
+ _exitlock_spin(_mpp,
+ "pushl %3", "popfl");
+ }
+ }
+ }
+ } else {
+ /* Handle sleep locks */
+ if ((type) & MTX_RLIKELY)
+ _exitlock(_mpp, CURTHD, (type) & MTX_HARDOPTS);
+ else {
+ _exitlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ }
+ }
+}
+
+#endif /* KLD_MODULE */
+#endif /* _KERNEL */
+
+#else /* !LOCORE */
+
+/*
+ * Simple assembly macros to get and release non-recursive spin locks
+ */
+
+#if defined(I386_CPU)
+
+#define MTX_EXIT(lck, reg) \
+ movl $ MTX_UNOWNED,lck+MTX_LOCK;
+
+#else /* I386_CPU */
+
+#define MTX_ENTER(reg, lck) \
+9: movl $ MTX_UNOWNED,%eax; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+ jnz 9b
+
+/* Must use locked bus op (cmpxchg) when setting to unowned (barrier) */
+#define MTX_EXIT(lck,reg) \
+ movl lck+MTX_LOCK,%eax; \
+ movl $ MTX_UNOWNED,reg; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+
+#define MTX_ENTER_WITH_RECURSION(reg, lck) \
+ movl lck+MTX_LOCK,%eax; \
+ cmpl PCPU_CURPROC,%eax; \
+ jne 9f; \
+ incw lck+MTX_RECURSECNT; \
+ jmp 8f; \
+9: movl $ MTX_UNOWNED,%eax; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+ jnz 9b; \
+8:
+
+#define MTX_EXIT_WITH_RECURSION(lck,reg) \
+ movw lck+MTX_RECURSECNT,%ax; \
+ decw %ax; \
+ js 9f; \
+ movw %ax,lck+MTX_RECURSECNT; \
+ jmp 8f; \
+9: movl lck+MTX_LOCK,%eax; \
+ movl $ MTX_UNOWNED,reg; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+8:
+
+#endif /* I386_CPU */
+#endif /* !LOCORE */
+#endif /* __MACHINE_MUTEX_H */
diff --git a/sys/i386/include/pcb.h b/sys/i386/include/pcb.h
index 08beb5a83059..1c7af8505ab1 100644
--- a/sys/i386/include/pcb.h
+++ b/sys/i386/include/pcb.h
@@ -72,11 +72,7 @@ struct pcb {
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
#define PCB_DBREGS 0x02 /* process using debug registers */
caddr_t pcb_onfault; /* copyin/out fault recovery */
-#ifdef SMP
- u_long pcb_mpnest;
-#else
- u_long pcb_mpnest_dontuse;
-#endif
+ int pcb_schednest;
int pcb_gs;
struct pcb_ext *pcb_ext; /* optional pcb extension */
u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */
diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h
index 58bd9cfe9416..440da60b4b83 100644
--- a/sys/i386/include/pcpu.h
+++ b/sys/i386/include/pcpu.h
@@ -26,6 +26,20 @@
* $FreeBSD$
*/
+#ifndef _MACHINE_GLOBALDATA_H_
+#define _MACHINE_GLOBALDATA_H_
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/pmap.h>
+#include <machine/segments.h>
+#include <machine/tss.h>
+
+/* XXX */
+#ifdef KTR_PERCPU
+#include <sys/ktr.h>
+#endif
+
/*
* This structure maps out the global data that needs to be kept on a
* per-cpu basis. genassym uses this to generate offsets for the assembler
@@ -41,11 +55,14 @@
struct globaldata {
struct privatespace *gd_prvspace; /* self-reference */
struct proc *gd_curproc;
+ struct proc *gd_prevproc;
struct proc *gd_npxproc;
struct pcb *gd_curpcb;
+ struct proc *gd_idleproc;
struct timeval gd_switchtime;
struct i386tss gd_common_tss;
int gd_switchticks;
+ int gd_intr_nesting_level;
struct segment_descriptor gd_common_tssd;
struct segment_descriptor *gd_tss_gdt;
#ifdef USER_LDT
@@ -67,8 +84,22 @@ struct globaldata {
unsigned *gd_prv_PADDR1;
#endif
u_int gd_astpending;
+ SLIST_ENTRY(globaldata) gd_allcpu;
+ int gd_witness_spin_check;
+#ifdef KTR_PERCPU
+#ifdef KTR
+ volatile int gd_ktr_idx;
+ char *gd_ktr_buf;
+ char gd_ktr_buf_data[KTR_SIZE];
+#endif
+#endif
};
+extern struct globaldata globaldata;
+
+SLIST_HEAD(cpuhead, globaldata);
+extern struct cpuhead cpuhead;
+
#ifdef SMP
/*
* This is the upper (0xff800000) address space layout that is per-cpu.
@@ -93,3 +124,5 @@ struct privatespace {
extern struct privatespace SMP_prvspace[];
#endif
+
+#endif /* ! _MACHINE_GLOBALDATA_H_ */
diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h
index 69b716ba8579..20d4fa3a8873 100644
--- a/sys/i386/include/smp.h
+++ b/sys/i386/include/smp.h
@@ -15,6 +15,9 @@
#ifdef _KERNEL
+#ifdef I386_CPU
+#error SMP not supported with I386_CPU
+#endif
#if defined(SMP) && !defined(APIC_IO)
# error APIC_IO required for SMP, add "options APIC_IO" to your config file.
#endif /* SMP && !APIC_IO */
@@ -57,23 +60,6 @@ extern int bootMP_size;
/* functions in mpboot.s */
void bootMP __P((void));
-/* global data in mplock.s */
-extern u_int mp_lock;
-extern u_int isr_lock;
-#ifdef RECURSIVE_MPINTRLOCK
-extern u_int mpintr_lock;
-#endif /* RECURSIVE_MPINTRLOCK */
-
-/* functions in mplock.s */
-void get_mplock __P((void));
-void rel_mplock __P((void));
-int try_mplock __P((void));
-#ifdef RECURSIVE_MPINTRLOCK
-void get_mpintrlock __P((void));
-void rel_mpintrlock __P((void));
-int try_mpintrlock __P((void));
-#endif /* RECURSIVE_MPINTRLOCK */
-
/* global data in apic_vector.s */
extern volatile u_int stopped_cpus;
extern volatile u_int started_cpus;
@@ -185,23 +171,7 @@ extern int smp_started;
extern volatile int smp_idle_loops;
#endif /* !LOCORE */
-#else /* !SMP && !APIC_IO */
-
-/*
- * Create dummy MP lock empties
- */
-
-static __inline void
-get_mplock(void)
-{
-}
-
-static __inline void
-rel_mplock(void)
-{
-}
-
-#endif
+#endif /* SMP && !APIC_IO */
#endif /* _KERNEL */
#endif /* _MACHINE_SMP_H_ */
diff --git a/sys/i386/include/smptests.h b/sys/i386/include/smptests.h
index f9ac4a36919e..304e99051295 100644
--- a/sys/i386/include/smptests.h
+++ b/sys/i386/include/smptests.h
@@ -86,7 +86,6 @@
* These defines enable critical region locking of areas that were
* protected via cli/sti in the UP kernel.
*
- * MPINTRLOCK protects all the generic areas.
* COMLOCK protects the sio/cy drivers.
* CLOCKLOCK protects clock hardware and data
* known to be incomplete:
@@ -94,7 +93,6 @@
* ?
*/
#ifdef PUSHDOWN_LEVEL_1
-#define USE_MPINTRLOCK
#define USE_COMLOCK
#define USE_CLOCKLOCK
#endif
@@ -176,9 +174,8 @@
/*
* Send CPUSTOP IPI for stop/restart of other CPUs on DDB break.
- *
-#define VERBOSE_CPUSTOP_ON_DDBBREAK
*/
+#define VERBOSE_CPUSTOP_ON_DDBBREAK
#define CPUSTOP_ON_DDBBREAK
diff --git a/sys/i386/isa/apic_ipl.s b/sys/i386/isa/apic_ipl.s
index 94771f3eadb3..0def1de7e02d 100644
--- a/sys/i386/isa/apic_ipl.s
+++ b/sys/i386/isa/apic_ipl.s
@@ -69,78 +69,6 @@ _apic_imen:
SUPERALIGN_TEXT
/*
- * splz() - dispatch pending interrupts after cpl reduced
- *
- * Interrupt priority mechanism
- * -- soft splXX masks with group mechanism (cpl)
- * -- h/w masks for currently active or unused interrupts (imen)
- * -- ipending = active interrupts currently masked by cpl
- */
-
-ENTRY(splz)
- /*
- * The caller has restored cpl and checked that (ipending & ~cpl)
- * is nonzero. However, since ipending can change at any time
- * (by an interrupt or, with SMP, by another cpu), we have to
- * repeat the check. At the moment we must own the MP lock in
- * the SMP case because the interruput handlers require it. We
- * loop until no unmasked pending interrupts remain.
- *
- * No new unmaksed pending interrupts will be added during the
- * loop because, being unmasked, the interrupt code will be able
- * to execute the interrupts.
- *
- * Interrupts come in two flavors: Hardware interrupts and software
- * interrupts. We have to detect the type of interrupt (based on the
- * position of the interrupt bit) and call the appropriate dispatch
- * routine.
- *
- * NOTE: "bsfl %ecx,%ecx" is undefined when %ecx is 0 so we can't
- * rely on the secondary btrl tests.
- */
- movl _cpl,%eax
-splz_next:
- /*
- * We don't need any locking here. (ipending & ~cpl) cannot grow
- * while we're looking at it - any interrupt will shrink it to 0.
- */
- movl %eax,%ecx
- notl %ecx /* set bit = unmasked level */
- andl _ipending,%ecx /* set bit = unmasked pending INT */
- jne splz_unpend
- ret
-
- ALIGN_TEXT
-splz_unpend:
- bsfl %ecx,%ecx
- lock
- btrl %ecx,_ipending
- jnc splz_next
- cmpl $NHWI,%ecx
- jae splz_swi
- /*
- * We would prefer to call the intr handler directly here but that
- * doesn't work for badly behaved handlers that want the interrupt
- * frame. Also, there's a problem determining the unit number.
- * We should change the interface so that the unit number is not
- * determined at config time.
- *
- * The vec[] routines build the proper frame on the stack,
- * then call one of _Xintr0 thru _XintrNN.
- */
- jmp *_vec(,%ecx,4)
-
- ALIGN_TEXT
-splz_swi:
- pushl %eax
- orl imasks(,%ecx,4),%eax
- movl %eax,_cpl
- call *_ihandlers(,%ecx,4)
- popl %eax
- movl %eax,_cpl
- jmp splz_next
-
-/*
* Fake clock interrupt(s) so that they appear to come from our caller instead
* of from here, so that system profiling works.
* XXX do this more generally (for all vectors; look up the C entry point).
@@ -161,8 +89,6 @@ __CONCAT(vec,irq_num): ; \
pushl $KCSEL ; \
pushl %eax ; \
cli ; \
- lock ; /* MP-safe */ \
- andl $~IRQ_BIT(irq_num), iactive ; /* lazy masking */ \
MEXITCOUNT ; \
APIC_ITRACE(apic_itrace_splz, irq_num, APIC_ITRACE_SPLZ) ; \
jmp __CONCAT(_Xintr,irq_num)
diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s
index 2a7559df7f97..54bf00366c81 100644
--- a/sys/i386/isa/apic_vector.s
+++ b/sys/i386/isa/apic_vector.s
@@ -17,7 +17,7 @@
/*
- * Macros for interrupt interrupt entry, call to handler, and exit.
+ * Macros for interrupt entry, call to handler, and exit.
*/
#define FAST_INTR(irq_num, vec_name) \
@@ -121,7 +121,7 @@ IDTVEC(vec_name) ; \
/*
- * Test to see if the source is currntly masked, clear if so.
+ * Test to see if the source is currently masked, clear if so.
*/
#define UNMASK_IRQ(irq_num) \
IMASK_LOCK ; /* into critical reg */ \
@@ -200,7 +200,16 @@ log_intr_event:
#else
#define APIC_ITRACE(name, irq_num, id)
#endif
-
+
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -216,87 +225,24 @@ IDTVEC(vec_name) ; \
maybe_extra_ipending ; \
; \
APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \
- lock ; /* MP-safe */ \
- btsl $(irq_num), iactive ; /* lazy masking */ \
- jc 1f ; /* already active */ \
; \
MASK_LEVEL_IRQ(irq_num) ; \
EOI_IRQ(irq_num) ; \
0: ; \
- APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- MP_TRYLOCK ; /* XXX this is going away... */ \
- testl %eax, %eax ; /* did we get it? */ \
- jz 3f ; /* no */ \
-; \
- APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 2f ; /* this INT masked */ \
-; \
incb _intr_nesting_level ; \
; \
/* entry point used by doreti_unpend for HWIs. */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \
- lock ; incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4, %eax ; \
- lock ; incl (%eax) ; \
-; \
- movl _cpl, %eax ; \
- pushl %eax ; \
- orl _intr_mask + (irq_num) * 4, %eax ; \
- movl %eax, _cpl ; \
- lock ; \
- andl $~IRQ_BIT(irq_num), _ipending ; \
-; \
- pushl _intr_unit + (irq_num) * 4 ; \
+ pushl $irq_num; /* pass the IRQ */ \
APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \
; \
- lock ; andl $~IRQ_BIT(irq_num), iactive ; \
- UNMASK_IRQ(irq_num) ; \
- APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \
- sti ; /* doreti repeats cli/sti */ \
MEXITCOUNT ; \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-1: ; /* active */ \
- APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
- MASK_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- lock ; \
- btsl $(irq_num), iactive ; /* still active */ \
- jnc 0b ; /* retry */ \
- POP_FRAME ; \
- iret ; /* XXX: iactive bit might be 0 now */ \
- ALIGN_TEXT ; \
-2: ; /* masked by cpl, leave iactive set */ \
- APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- MP_RELLOCK ; \
- POP_FRAME ; \
- iret ; \
- ALIGN_TEXT ; \
-3: ; /* other cpu has isr lock */ \
- APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 4f ; /* this INT masked */ \
- call forward_irq ; /* forward irq to lock holder */ \
- POP_FRAME ; /* and return */ \
- iret ; \
- ALIGN_TEXT ; \
-4: ; /* blocked */ \
- APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- POP_FRAME ; /* and return */ \
- iret
+ jmp doreti_next
/*
* Handle "spurious INTerrupts".
@@ -434,20 +380,10 @@ _Xcpuast:
FAKE_MCOUNT(13*4(%esp))
- /*
- * Giant locks do not come cheap.
- * A lot of cycles are going to be wasted here.
- */
- call _get_mplock
-
- movl _cpl, %eax
- pushl %eax
orl $AST_PENDING, _astpending /* XXX */
incb _intr_nesting_level
sti
- pushl $0
-
movl _cpuid, %eax
lock
btrl %eax, _checkstate_pending_ast
@@ -461,7 +397,7 @@ _Xcpuast:
lock
incl CNAME(cpuast_cnt)
MEXITCOUNT
- jmp _doreti
+ jmp doreti_next
1:
/* We are already in the process of delivering an ast for this CPU */
POP_FRAME
@@ -487,40 +423,24 @@ _Xforward_irq:
FAKE_MCOUNT(13*4(%esp))
- MP_TRYLOCK
- testl %eax,%eax /* Did we get the lock ? */
- jz 1f /* No */
-
lock
incl CNAME(forward_irq_hitcnt)
cmpb $4, _intr_nesting_level
- jae 2f
+ jae 1f
- movl _cpl, %eax
- pushl %eax
incb _intr_nesting_level
sti
- pushl $0
-
MEXITCOUNT
- jmp _doreti /* Handle forwarded interrupt */
+ jmp doreti_next /* Handle forwarded interrupt */
1:
lock
- incl CNAME(forward_irq_misscnt)
- call forward_irq /* Oops, we've lost the isr lock */
- MEXITCOUNT
- POP_FRAME
- iret
-2:
- lock
incl CNAME(forward_irq_toodeepcnt)
-3:
- MP_RELLOCK
MEXITCOUNT
POP_FRAME
iret
+#if 0
/*
*
*/
@@ -532,9 +452,11 @@ forward_irq:
cmpl $0, CNAME(forward_irq_enabled)
jz 4f
+/* XXX - this is broken now, because mp_lock doesn't exist
movl _mp_lock,%eax
cmpl $FREE_LOCK,%eax
jne 1f
+ */
movl $0, %eax /* Pick CPU #0 if noone has lock */
1:
shrl $24,%eax
@@ -559,6 +481,7 @@ forward_irq:
jnz 3b
4:
ret
+#endif
/*
* Executed by a CPU when it receives an Xcpustop IPI from another CPU,
@@ -654,6 +577,7 @@ MCOUNT_LABEL(bintr)
FAST_INTR(22,fastintr22)
FAST_INTR(23,fastintr23)
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, CLKINTR_PENDING)
INTR(1,intr1,)
INTR(2,intr2,)
@@ -728,15 +652,11 @@ _ihandlers:
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
+#if 0
/* active flag for lazy masking */
iactive:
.long 0
+#endif
#ifdef COUNT_XINVLTLB_HITS
.globl _xhits
diff --git a/sys/i386/isa/atpic_vector.s b/sys/i386/isa/atpic_vector.s
index e427351ca205..d2b88bf705a3 100644
--- a/sys/i386/isa/atpic_vector.s
+++ b/sys/i386/isa/atpic_vector.s
@@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \
pushl %ecx ; \
pushl %edx ; \
pushl %ds ; \
+ pushl %fs ; \
MAYBE_PUSHL_ES ; \
mov $KDSEL,%ax ; \
mov %ax,%ds ; \
+ mov %ax,%fs ; \
MAYBE_MOVW_AX_ES ; \
FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \
pushl _intr_unit + (irq_num) * 4 ; \
@@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \
incl _cnt+V_INTR ; /* book-keeping can wait */ \
movl _intr_countp + (irq_num) * 4,%eax ; \
incl (%eax) ; \
- movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \
+/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \
notl %eax ; \
- andl _ipending,%eax ; \
- jne 2f ; /* yes, maybe handle them */ \
+ andl _spending,$SWI_MASK ; \
+ jne 2f ; // yes, maybe handle them */ \
1: ; \
MEXITCOUNT ; \
MAYBE_POPL_ES ; \
+ popl %fs ; \
popl %ds ; \
popl %edx ; \
popl %ecx ; \
popl %eax ; \
iret ; \
+
+#if 0
; \
ALIGN_TEXT ; \
2: ; \
@@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \
incb _intr_nesting_level ; /* ... really limit it ... */ \
sti ; /* ... to do this as early as possible */ \
MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
+ popl %fs ; \
popl %ecx ; /* ... original %ds ... */ \
popl %edx ; \
xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \
@@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \
movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \
movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \
movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
subl $4,%esp ; /* junk for unit number */ \
MEXITCOUNT ; \
jmp _doreti
+#endif
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \
pushl %ds ; /* save our data and extra segments ... */ \
pushl %es ; \
pushl %fs ; \
- mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \
- mov %ax,%ds ; /* ... early for obsolete reasons */ \
+ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \
+ mov %ax,%ds ; \
mov %ax,%es ; \
mov %ax,%fs ; \
maybe_extra_ipending ; \
@@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \
movb %al,_imen + IRQ_BYTE(irq_num) ; \
outb %al,$icu+ICU_IMR_OFFSET ; \
enable_icus ; \
- movl _cpl,%eax ; \
- testb $IRQ_BIT(irq_num),%reg ; \
- jne 2f ; \
- incb _intr_nesting_level ; \
+ incb _intr_nesting_level ; /* XXX do we need this? */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \
- incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- incl (%eax) ; \
- movl _cpl,%eax ; \
- pushl %eax ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- orl _intr_mask + (irq_num) * 4,%eax ; \
- movl %eax,_cpl ; \
+ pushl $irq_num; /* pass the IRQ */ \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; /* must unmask _imen and icu atomically */ \
- movb _imen + IRQ_BYTE(irq_num),%al ; \
- andb $~IRQ_BIT(irq_num),%al ; \
- movb %al,_imen + IRQ_BYTE(irq_num) ; \
- outb %al,$icu+ICU_IMR_OFFSET ; \
- sti ; /* XXX _doreti repeats the cli/sti */ \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
MEXITCOUNT ; \
/* We could usually avoid the following jmp by inlining some of */ \
/* _doreti, but it's probably better to use less cache. */ \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- /* XXX skip mcounting here to avoid double count */ \
- orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \
- popl %fs ; \
- popl %es ; \
- popl %ds ; \
- popal ; \
- addl $4+4,%esp ; \
- iret
+ jmp doreti_next /* and catch up inside doreti */
+
+/*
+ * Reenable the interrupt mask after completing an interrupt. Called
+ * from ithd_loop. There are two separate functions, one for each
+ * ICU.
+ */
+ .globl setimask0, setimask1
+setimask0:
+ cli
+ movb _imen,%al
+ outb %al,$IO_ICU1 + ICU_IMR_OFFSET
+ sti
+ ret
+
+setimask1:
+ cli
+ movb _imen + 1,%al
+ outb %al,$IO_ICU2 + ICU_IMR_OFFSET
+ sti
+ ret
MCOUNT_LABEL(bintr)
FAST_INTR(0,fastintr0, ENABLE_ICU1)
@@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr)
FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2)
FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2)
FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2)
+
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING)
INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,)
INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,)
@@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr)
INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
+
MCOUNT_LABEL(eintr)
.data
@@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
.text
diff --git a/sys/i386/isa/bs/bsif.h b/sys/i386/isa/bs/bsif.h
index 5a89681bcce0..6dcc2ab1b86d 100644
--- a/sys/i386/isa/bs/bsif.h
+++ b/sys/i386/isa/bs/bsif.h
@@ -208,17 +208,10 @@ static BS_INLINE void memcopy __P((void *from, void *to, register size_t len));
u_int32_t bs_adapter_info __P((int));
#define delay(y) DELAY(y)
extern int dma_init_flag;
-#ifdef SMP
-#error XXX see comments in i386/isa/bs/bsif.h for details
-/*
- * ipending is 'opaque' in SMP, and can't be accessed this way.
- * Since its my belief that this is PC98 code, and that PC98 and SMP
- * are mutually exclusive, the above compile-time error is the "fix".
- * Please inform smp@freebsd.org if this is NOT the case.
- */
-#else
+
#define softintr(y) ipending |= (1 << y)
-#endif /* SMP */
+
+#endif /* IPENDING */
static BS_INLINE void
memcopy(from, to, len)
diff --git a/sys/i386/isa/clock.c b/sys/i386/isa/clock.c
index 15044abbaa3b..724f3c2817ba 100644
--- a/sys/i386/isa/clock.c
+++ b/sys/i386/isa/clock.c
@@ -54,6 +54,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/proc.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/kernel.h>
@@ -93,10 +94,6 @@
#include <i386/isa/mca_machdep.h>
#endif
-#ifdef SMP
-#define disable_intr() CLOCK_DISABLE_INTR()
-#define enable_intr() CLOCK_ENABLE_INTR()
-
#ifdef APIC_IO
#include <i386/isa/intr_machdep.h>
/* The interrupt triggered by the 8254 (timer) chip */
@@ -104,7 +101,6 @@ int apic_8254_intr;
static u_long read_intr_count __P((int vec));
static void setup_8254_mixed_mode __P((void));
#endif
-#endif /* SMP */
/*
* 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -147,7 +143,9 @@ int tsc_is_broken;
int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
static int beeping = 0;
+#if 0
static u_int clk_imask = HWI_MASK | SWI_MASK;
+#endif
static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
static u_int hardclock_max_count;
static u_int32_t i8254_lastcount;
@@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD,
static void
clkintr(struct clockframe frame)
{
+ int intrsave;
+
if (timecounter->tc_get_timecount == i8254_get_timecount) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
if (i8254_ticked)
i8254_ticked = 0;
else {
@@ -214,7 +216,8 @@ clkintr(struct clockframe frame)
i8254_lastcount = 0;
}
clkintr_pending = 0;
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
}
timer_func(&frame);
switch (timer0_state) {
@@ -233,14 +236,17 @@ clkintr(struct clockframe frame)
break;
case ACQUIRE_PENDING:
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = TIMER_DIV(new_rate);
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer_func = new_function;
timer0_state = ACQUIRED;
setdelayed();
@@ -249,7 +255,9 @@ clkintr(struct clockframe frame)
case RELEASE_PENDING:
if ((timer0_prescaler_count += timer0_max_count)
>= hardclock_max_count) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = hardclock_max_count;
@@ -257,7 +265,8 @@ clkintr(struct clockframe frame)
TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer0_prescaler_count = 0;
timer_func = hardclock;
timer0_state = RELEASED;
@@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc)
static int
getit(void)
{
- u_long ef;
- int high, low;
+ int high, low, intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -417,7 +426,7 @@ getit(void)
high = inb(TIMER_CNTR0);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return ((high << 8) | low);
}
@@ -523,6 +532,7 @@ sysbeepstop(void *chan)
int
sysbeep(int pitch, int period)
{
+ int intrsave;
int x = splclock();
if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
@@ -531,10 +541,13 @@ sysbeep(int pitch, int period)
splx(x);
return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */
}
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_CNTR2, pitch);
outb(TIMER_CNTR2, (pitch>>8));
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
if (!beeping) {
/* enable counter2 output to speaker */
outb(IO_PPI, inb(IO_PPI) | 3);
@@ -683,11 +696,12 @@ fail:
static void
set_timer_freq(u_int freq, int intr_freq)
{
- u_long ef;
+ int intrsave;
int new_timer0_max_count;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
timer_freq = freq;
new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
if (new_timer0_max_count != timer0_max_count) {
@@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq)
outb(TIMER_CNTR0, timer0_max_count >> 8);
}
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq)
void
i8254_restore(void)
{
- u_long ef;
+ int intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -979,8 +994,8 @@ cpu_initclocks()
{
int diag;
#ifdef APIC_IO
- int apic_8254_trial;
- struct intrec *clkdesc;
+ int apic_8254_trial, num_8254_ticks;
+ struct intrec *clkdesc, *rtcdesc;
#endif /* APIC_IO */
if (statclock_disable) {
@@ -1014,14 +1029,15 @@ cpu_initclocks()
} else
panic("APIC_IO: Cannot route 8254 interrupt to CPU");
}
-
- clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
-
#else /* APIC_IO */
- inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask,
+ /*
+ * XXX Check the priority of this interrupt handler. I
+ * couldn't find anything suitable in the BSD/OS code (grog,
+ * 19 July 2000).
+ */
+ /* Setup the PIC clk handler. The APIC handler is setup later */
+ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME,
INTR_EXCL);
INTREN(IRQ0);
@@ -1032,8 +1048,18 @@ cpu_initclocks()
writertc(RTC_STATUSB, RTCSB_24HR);
/* Don't bother enabling the statistics clock. */
- if (statclock_disable)
+ if (statclock_disable) {
+#ifdef APIC_IO
+ /*
+ * XXX - if statclock is disabled, don't attempt the APIC
+ * trial. Not sure this is sane for APIC_IO.
+ */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif /* APIC_IO */
return;
+ }
diag = rtcin(RTC_DIAG);
if (diag != 0)
printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS);
@@ -1041,34 +1067,44 @@ cpu_initclocks()
#ifdef APIC_IO
if (isa_apic_irq(8) != 8)
panic("APIC RTC != 8");
-#endif /* APIC_IO */
- inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask,
- INTR_EXCL);
-
-#ifdef APIC_IO
- INTREN(APIC_IRQ8);
-#else
- INTREN(IRQ8);
-#endif /* APIC_IO */
+ if (apic_8254_trial) {
+ /*
+ * XXX - We use fast interrupts for clk and rtc long enough to
+ * perform the APIC probe and then revert to exclusive
+ * interrupts.
+ */
+ clkdesc = inthand_add("clk", apic_8254_intr,
+ (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST);
+ INTREN(1 << apic_8254_intr);
- writertc(RTC_STATUSB, rtc_statusb);
+ rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL,
+ PI_REALTIME, INTR_FAST); /* XXX */
+ INTREN(APIC_IRQ8);
+ writertc(RTC_STATUSB, rtc_statusb);
-#ifdef APIC_IO
- if (apic_8254_trial) {
-
printf("APIC_IO: Testing 8254 interrupt delivery\n");
while (read_intr_count(8) < 6)
; /* nothing */
- if (read_intr_count(apic_8254_intr) < 3) {
+ num_8254_ticks = read_intr_count(apic_8254_intr);
+
+ /* disable and remove our fake handlers */
+ INTRDIS(1 << apic_8254_intr);
+ inthand_remove(clkdesc);
+
+ writertc(RTC_STATUSA, rtc_statusa);
+ writertc(RTC_STATUSB, RTCSB_24HR);
+
+ INTRDIS(APIC_IRQ8);
+ inthand_remove(rtcdesc);
+
+ if (num_8254_ticks < 3) {
/*
* The MP table is broken.
* The 8254 was not connected to the specified pin
* on the IO APIC.
* Workaround: Limited variant of mixed mode.
*/
- INTRDIS(1 << apic_8254_intr);
- inthand_remove(clkdesc);
printf("APIC_IO: Broken MP table detected: "
"8254 is not connected to "
"IOAPIC #%d intpin %d\n",
@@ -1087,13 +1123,27 @@ cpu_initclocks()
}
apic_8254_intr = apic_irq(0, 0);
setup_8254_mixed_mode();
- inthand_add("clk", apic_8254_intr,
- (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
}
}
+
+ /* Finally, setup the real clock handlers */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif
+
+ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME,
+ INTR_EXCL);
+#ifdef APIC_IO
+ INTREN(APIC_IRQ8);
+#else
+ INTREN(IRQ8);
+#endif
+
+ writertc(RTC_STATUSB, rtc_statusb);
+
+#ifdef APIC_IO
if (apic_int_type(0, 0) != 3 ||
int_to_apicintpin[apic_8254_intr].ioapic != 0 ||
int_to_apicintpin[apic_8254_intr].int_pin != 0)
@@ -1198,11 +1248,12 @@ static unsigned
i8254_get_timecount(struct timecounter *tc)
{
u_int count;
- u_long ef;
+ int intrsave;
u_int high, low;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc)
count = timer0_max_count - ((high << 8) | low);
if (count < i8254_lastcount ||
(!i8254_ticked && (clkintr_pending ||
- ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) &&
+ ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) &&
#ifdef APIC_IO
#define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */
/* XXX this assumes that apic_8254_intr is < 24. */
@@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc)
i8254_lastcount = count;
count += i8254_offset;
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return (count);
}
diff --git a/sys/i386/isa/cy.c b/sys/i386/isa/cy.c
index 52a8cf36892f..5487d8fe6299 100644
--- a/sys/i386/isa/cy.c
+++ b/sys/i386/isa/cy.c
@@ -94,11 +94,6 @@
#error "The cy device requires the old isa compatibility shims"
#endif
-#ifdef SMP
-#define disable_intr() COM_DISABLE_INTR()
-#define enable_intr() COM_ENABLE_INTR()
-#endif /* SMP */
-
/*
* Dictionary so that I can name everything *sio* or *com* to compare with
* sio.c. There is also lots of ugly formatting and unnecessary ifdefs to
@@ -366,7 +361,7 @@ static struct com_s *p_com_addr[NSIO];
#define com_addr(unit) (p_com_addr[unit])
struct isa_driver siodriver = {
- INTR_TYPE_TTY | INTR_TYPE_FAST,
+ INTR_TYPE_TTY | INTR_FAST,
sioprobe,
sioattach,
driver_name
@@ -604,11 +599,9 @@ cyattach_common(cy_iobase, cy_align)
com->lt_out.c_cflag = com->lt_in.c_cflag = CLOCAL;
}
if (siosetwater(com, com->it_in.c_ispeed) != 0) {
- enable_intr();
free(com, M_DEVBUF);
return (0);
}
- enable_intr();
termioschars(&com->it_in);
com->it_in.c_ispeed = com->it_in.c_ospeed = comdefaultrate;
com->it_out = com->it_in;
@@ -662,6 +655,7 @@ sioopen(dev, flag, mode, p)
int s;
struct tty *tp;
int unit;
+ int intrsave;
mynor = minor(dev);
unit = MINOR_TO_UNIT(mynor);
@@ -768,14 +762,17 @@ open_top:
}
}
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
(void) inb(com->line_status_port);
(void) inb(com->data_port);
com->prev_modem_status = com->last_modem_status
= inb(com->modem_status_port);
outb(iobase + com_ier, IER_ERXRDY | IER_ETXRDY | IER_ERLS
| IER_EMSC);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#else /* !0 */
/*
* Flush fifos. This requires a full channel reset which
@@ -786,13 +783,16 @@ open_top:
CD1400_CCR_CMDRESET | CD1400_CCR_CHANRESET);
cd1400_channel_cmd(com, com->channel_control);
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com->prev_modem_status = com->last_modem_status
= cd_getreg(com, CD1400_MSVR2);
cd_setreg(com, CD1400_SRER,
com->intr_enable
= CD1400_SRER_MDMCH | CD1400_SRER_RXDATA);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#endif /* 0 */
/*
* Handle initial DCD. Callout devices get a fake initial
@@ -875,6 +875,7 @@ comhardclose(com)
int s;
struct tty *tp;
int unit;
+ int intrsave;
unit = com->unit;
iobase = com->iobase;
@@ -888,10 +889,13 @@ comhardclose(com)
outb(iobase + com_cfcr, com->cfcr_image &= ~CFCR_SBREAK);
#else
/* XXX */
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com->etc = ETC_NONE;
cd_setreg(com, CD1400_COR2, com->cor[1] &= ~CD1400_COR2_ETC);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
cd1400_channel_cmd(com, CD1400_CCR_CMDRESET | CD1400_CCR_FTF);
#endif
@@ -899,9 +903,12 @@ comhardclose(com)
#if 0
outb(iobase + com_ier, 0);
#else
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
cd_setreg(com, CD1400_SRER, com->intr_enable = 0);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#endif
tp = com->tp;
if ((tp->t_cflag & HUPCL)
@@ -991,6 +998,11 @@ siodtrwakeup(chan)
wakeup(&com->dtr_wait);
}
+/*
+ * This function:
+ * a) needs to be called with COM_LOCK() held, and
+ * b) needs to return with COM_LOCK() held.
+ */
static void
sioinput(com)
struct com_s *com;
@@ -1000,6 +1012,7 @@ sioinput(com)
u_char line_status;
int recv_data;
struct tty *tp;
+ int intrsave;
buf = com->ibuf;
tp = com->tp;
@@ -1016,7 +1029,15 @@ sioinput(com)
* slinput is reasonably fast (usually 40 instructions plus
* call overhead).
*/
+
do {
+ /*
+ * This may look odd, but it is using save-and-enable
+ * semantics instead of the save-and-disable semantics
+ * that are used everywhere else.
+ */
+ intrsave = save_intr();
+ COM_UNLOCK();
enable_intr();
incc = com->iptr - buf;
if (tp->t_rawq.c_cc + incc > tp->t_ihiwat
@@ -1038,10 +1059,18 @@ sioinput(com)
tp->t_lflag &= ~FLUSHO;
comstart(tp);
}
- disable_intr();
+ restore_intr(intrsave);
+ COM_LOCK();
} while (buf < com->iptr);
} else {
do {
+ /*
+ * This may look odd, but it is using save-and-enable
+ * semantics instead of the save-and-disable semantics
+ * that are used everywhere else.
+ */
+ intrsave = save_intr();
+ COM_UNLOCK();
enable_intr();
line_status = buf[com->ierroff];
recv_data = *buf++;
@@ -1057,7 +1086,8 @@ sioinput(com)
recv_data |= TTY_PE;
}
(*linesw[tp->t_line].l_rint)(recv_data, tp);
- disable_intr();
+ restore_intr(intrsave);
+ COM_LOCK();
} while (buf < com->iptr);
}
com_events -= (com->iptr - com->ibuf);
@@ -1729,6 +1759,7 @@ static void
siopoll()
{
int unit;
+ int intrsave;
#ifdef CyDebug
++cy_timeouts;
@@ -1751,7 +1782,9 @@ repeat:
* (actually never opened devices) so that we don't
* loop.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
incc = com->iptr - com->ibuf;
com->iptr = com->ibuf;
if (com->state & CS_CHECKMSR) {
@@ -1759,7 +1792,8 @@ repeat:
com->state &= ~CS_CHECKMSR;
}
com_events -= incc;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (incc != 0)
log(LOG_DEBUG,
"sio%d: %d events for device with no tp\n",
@@ -1767,29 +1801,39 @@ repeat:
continue;
}
if (com->iptr != com->ibuf) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
sioinput(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
if (com->state & CS_CHECKMSR) {
u_char delta_modem_status;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
+ sioinput(com);
delta_modem_status = com->last_modem_status
^ com->prev_modem_status;
com->prev_modem_status = com->last_modem_status;
com_events -= LOTS_OF_EVENTS;
com->state &= ~CS_CHECKMSR;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (delta_modem_status & MSR_DCD)
(*linesw[tp->t_line].l_modem)
(tp, com->prev_modem_status & MSR_DCD);
}
if (com->extra_state & CSE_ODONE) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com_events -= LOTS_OF_EVENTS;
com->extra_state &= ~CSE_ODONE;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (!(com->state & CS_BUSY)) {
tp->t_state &= ~TS_BUSY;
ttwwakeup(com->tp);
@@ -1801,10 +1845,13 @@ repeat:
}
}
if (com->state & CS_ODONE) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com_events -= LOTS_OF_EVENTS;
com->state &= ~CS_ODONE;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
(*linesw[tp->t_line].l_start)(tp);
}
if (com_events == 0)
@@ -1833,6 +1880,7 @@ comparam(tp, t)
u_char opt;
int s;
int unit;
+ int intrsave;
/* do historical conversions */
if (t->c_ispeed == 0)
@@ -1857,14 +1905,9 @@ comparam(tp, t)
else
(void)commctl(com, TIOCM_DTR, DMBIS);
- /*
- * This returns with interrupts disabled so that we can complete
- * the speed change atomically.
- */
(void) siosetwater(com, t->c_ispeed);
/* XXX we don't actually change the speed atomically. */
- enable_intr();
if (idivisor != 0) {
cd_setreg(com, CD1400_RBPR, idivisor);
@@ -1985,12 +2028,15 @@ comparam(tp, t)
if (cflag & CCTS_OFLOW)
opt |= CD1400_COR2_CCTS_OFLOW;
#endif
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (opt != com->cor[1]) {
cor_change |= CD1400_CCR_COR2;
cd_setreg(com, CD1400_COR2, com->cor[1] = opt);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
/*
* set channel option register 3 -
@@ -2111,7 +2157,9 @@ comparam(tp, t)
* XXX should have done this long ago, but there is too much state
* to change all atomically.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com->state &= ~CS_TTGO;
if (!(tp->t_state & TS_TTSTOP))
@@ -2177,7 +2225,8 @@ comparam(tp, t)
| CD1400_SRER_TXMPTY);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
splx(s);
comstart(tp);
if (com->ibufold != NULL) {
@@ -2196,6 +2245,7 @@ siosetwater(com, speed)
u_char *ibuf;
int ibufsize;
struct tty *tp;
+ int intrsave;
/*
* Make the buffer size large enough to handle a softtty interrupt
@@ -2207,7 +2257,6 @@ siosetwater(com, speed)
for (ibufsize = 128; ibufsize < cp4ticks;)
ibufsize <<= 1;
if (ibufsize == com->ibufsize) {
- disable_intr();
return (0);
}
@@ -2217,7 +2266,6 @@ siosetwater(com, speed)
*/
ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT);
if (ibuf == NULL) {
- disable_intr();
return (ENOMEM);
}
@@ -2235,7 +2283,9 @@ siosetwater(com, speed)
* Read current input buffer, if any. Continue with interrupts
* disabled.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->iptr != com->ibuf)
sioinput(com);
@@ -2254,6 +2304,9 @@ siosetwater(com, speed)
com->ibufend = ibuf + ibufsize;
com->ierroff = ibufsize;
com->ihighwater = ibuf + 3 * ibufsize / 4;
+
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (0);
}
@@ -2267,6 +2320,7 @@ comstart(tp)
bool_t started;
#endif
int unit;
+ int intrsave;
unit = DEV_TO_UNIT(tp->t_dev);
com = com_addr(unit);
@@ -2277,7 +2331,9 @@ comstart(tp)
started = FALSE;
#endif
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (tp->t_state & TS_TTSTOP) {
com->state &= ~CS_TTGO;
if (com->intr_enable & CD1400_SRER_TXRDY)
@@ -2313,7 +2369,8 @@ comstart(tp)
com->mcr_image |= com->mcr_rts);
#endif
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
ttwwakeup(tp);
splx(s);
@@ -2332,7 +2389,9 @@ comstart(tp)
sizeof com->obuf1);
com->obufs[0].l_next = NULL;
com->obufs[0].l_queued = TRUE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state & CS_BUSY) {
qp = com->obufq.l_next;
while ((next = qp->l_next) != NULL)
@@ -2351,7 +2410,8 @@ comstart(tp)
& ~CD1400_SRER_TXMPTY)
| CD1400_SRER_TXRDY);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) {
#ifdef CyDebug
@@ -2362,7 +2422,9 @@ comstart(tp)
sizeof com->obuf2);
com->obufs[1].l_next = NULL;
com->obufs[1].l_queued = TRUE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state & CS_BUSY) {
qp = com->obufq.l_next;
while ((next = qp->l_next) != NULL)
@@ -2381,7 +2443,8 @@ comstart(tp)
& ~CD1400_SRER_TXMPTY)
| CD1400_SRER_TXRDY);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
tp->t_state |= TS_BUSY;
}
@@ -2390,10 +2453,13 @@ comstart(tp)
++com->start_real;
#endif
#if 0
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state >= (CS_BUSY | CS_TTGO))
siointr1(com); /* fake interrupt to start output */
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#endif
ttwwakeup(tp);
splx(s);
@@ -2406,10 +2472,13 @@ comstop(tp, rw)
{
struct com_s *com;
bool_t wakeup_etc;
+ int intrsave;
com = com_addr(DEV_TO_UNIT(tp->t_dev));
wakeup_etc = FALSE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (rw & FWRITE) {
com->obufs[0].l_queued = FALSE;
com->obufs[1].l_queued = FALSE;
@@ -2432,7 +2501,8 @@ comstop(tp, rw)
com_events -= (com->iptr - com->ibuf);
com->iptr = com->ibuf;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (wakeup_etc)
wakeup(&com->etc);
if (rw & FWRITE && com->etc == ETC_NONE)
@@ -2448,6 +2518,7 @@ commctl(com, bits, how)
{
int mcr;
int msr;
+ int intrsave;
if (how == DMGET) {
if (com->channel_control & CD1400_CCR_RCVEN)
@@ -2485,7 +2556,9 @@ commctl(com, bits, how)
mcr |= com->mcr_dtr;
if (bits & TIOCM_RTS)
mcr |= com->mcr_rts;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
switch (how) {
case DMSET:
com->mcr_image = mcr;
@@ -2503,7 +2576,8 @@ commctl(com, bits, how)
cd_setreg(com, CD1400_MSVR2, mcr);
break;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (0);
}
@@ -2565,9 +2639,14 @@ comwakeup(chan)
com = com_addr(unit);
if (com != NULL
&& (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) {
+ int intrsave;
+
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
siointr1(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
}
#endif
@@ -2587,11 +2666,15 @@ comwakeup(chan)
for (errnum = 0; errnum < CE_NTYPES; ++errnum) {
u_int delta;
u_long total;
+ int intrsave;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
delta = com->delta_error_counts[errnum];
com->delta_error_counts[errnum] = 0;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (delta == 0)
continue;
total = com->error_counts[errnum] += delta;
@@ -2743,6 +2826,8 @@ cd_etc(com, etc)
struct com_s *com;
int etc;
{
+ int intrsave;
+
/*
* We can't change the hardware's ETC state while there are any
* characters in the tx fifo, since those characters would be
@@ -2754,26 +2839,28 @@ cd_etc(com, etc)
* for the tx to become empty so that the command is sure to be
* executed soon after we issue it.
*/
+ intrsave = save_intr();
disable_intr();
- if (com->etc == etc) {
- enable_intr();
+ COM_LOCK();
+ if (com->etc == etc)
goto wait;
- }
if ((etc == CD1400_ETC_SENDBREAK
&& (com->etc == ETC_BREAK_STARTING
|| com->etc == ETC_BREAK_STARTED))
|| (etc == CD1400_ETC_STOPBREAK
&& (com->etc == ETC_BREAK_ENDING || com->etc == ETC_BREAK_ENDED
|| com->etc == ETC_NONE))) {
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
return;
}
com->etc = etc;
cd_setreg(com, CD1400_SRER,
com->intr_enable
= (com->intr_enable & ~CD1400_SRER_TXRDY) | CD1400_SRER_TXMPTY);
- enable_intr();
wait:
+ COM_UNLOCK();
+ restore_intr(intrsave);
while (com->etc == etc
&& tsleep(&com->etc, TTIPRI | PCATCH, "cyetc", 0) == 0)
continue;
@@ -2787,7 +2874,7 @@ cd_getreg(com, reg)
struct com_s *basecom;
u_char car;
int cy_align;
- u_long ef;
+ int intrsave;
cy_addr iobase;
int val;
@@ -2795,14 +2882,16 @@ cd_getreg(com, reg)
car = com->unit & CD1400_CAR_CHAN;
cy_align = com->cy_align;
iobase = com->iobase;
- ef = read_eflags();
- if (ef & PSL_I)
- disable_intr();
+ intrsave = save_intr();
+ disable_intr();
+ if (intrsave & PSL_I)
+ COM_LOCK();
if (basecom->car != car)
cd_outb(iobase, CD1400_CAR, cy_align, basecom->car = car);
val = cd_inb(iobase, reg, cy_align);
- if (ef & PSL_I)
- enable_intr();
+ if (intrsave & PSL_I)
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (val);
}
@@ -2815,21 +2904,23 @@ cd_setreg(com, reg, val)
struct com_s *basecom;
u_char car;
int cy_align;
- u_long ef;
+ int intrsave;
cy_addr iobase;
basecom = com_addr(com->unit & ~(CD1400_NO_OF_CHANNELS - 1));
car = com->unit & CD1400_CAR_CHAN;
cy_align = com->cy_align;
iobase = com->iobase;
- ef = read_eflags();
- if (ef & PSL_I)
- disable_intr();
+ intrsave = save_intr();
+ disable_intr();
+ if (intrsave & PSL_I)
+ COM_LOCK();
if (basecom->car != car)
cd_outb(iobase, CD1400_CAR, cy_align, basecom->car = car);
cd_outb(iobase, reg, cy_align, val);
- if (ef & PSL_I)
- enable_intr();
+ if (intrsave & PSL_I)
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
#ifdef CyDebug
diff --git a/sys/i386/isa/icu_ipl.s b/sys/i386/isa/icu_ipl.s
index 34753583a41e..d178d5c43c45 100644
--- a/sys/i386/isa/icu_ipl.s
+++ b/sys/i386/isa/icu_ipl.s
@@ -55,63 +55,6 @@ _imen: .long HWI_MASK
SUPERALIGN_TEXT
/*
- * Interrupt priority mechanism
- * -- soft splXX masks with group mechanism (cpl)
- * -- h/w masks for currently active or unused interrupts (imen)
- * -- ipending = active interrupts currently masked by cpl
- */
-
-ENTRY(splz)
- /*
- * The caller has restored cpl and checked that (ipending & ~cpl)
- * is nonzero. We have to repeat the check since if there is an
- * interrupt while we're looking, _doreti processing for the
- * interrupt will handle all the unmasked pending interrupts
- * because we restored early. We're repeating the calculation
- * of (ipending & ~cpl) anyway so that the caller doesn't have
- * to pass it, so this only costs one "jne". "bsfl %ecx,%ecx"
- * is undefined when %ecx is 0 so we can't rely on the secondary
- * btrl tests.
- */
- movl _cpl,%eax
-splz_next:
- /*
- * We don't need any locking here. (ipending & ~cpl) cannot grow
- * while we're looking at it - any interrupt will shrink it to 0.
- */
- movl %eax,%ecx
- notl %ecx
- andl _ipending,%ecx
- jne splz_unpend
- ret
-
- ALIGN_TEXT
-splz_unpend:
- bsfl %ecx,%ecx
- btrl %ecx,_ipending
- jnc splz_next
- cmpl $NHWI,%ecx
- jae splz_swi
- /*
- * We would prefer to call the intr handler directly here but that
- * doesn't work for badly behaved handlers that want the interrupt
- * frame. Also, there's a problem determining the unit number.
- * We should change the interface so that the unit number is not
- * determined at config time.
- */
- jmp *vec(,%ecx,4)
-
- ALIGN_TEXT
-splz_swi:
- pushl %eax
- orl imasks(,%ecx,4),%eax
- movl %eax,_cpl
- call *_ihandlers(,%ecx,4)
- popl %eax
- movl %eax,_cpl
- jmp splz_next
-
-/*
* Fake clock interrupt(s) so that they appear to come from our caller instead
* of from here, so that system profiling works.
* XXX do this more generally (for all vectors; look up the C entry point).
diff --git a/sys/i386/isa/icu_vector.s b/sys/i386/isa/icu_vector.s
index e427351ca205..d2b88bf705a3 100644
--- a/sys/i386/isa/icu_vector.s
+++ b/sys/i386/isa/icu_vector.s
@@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \
pushl %ecx ; \
pushl %edx ; \
pushl %ds ; \
+ pushl %fs ; \
MAYBE_PUSHL_ES ; \
mov $KDSEL,%ax ; \
mov %ax,%ds ; \
+ mov %ax,%fs ; \
MAYBE_MOVW_AX_ES ; \
FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \
pushl _intr_unit + (irq_num) * 4 ; \
@@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \
incl _cnt+V_INTR ; /* book-keeping can wait */ \
movl _intr_countp + (irq_num) * 4,%eax ; \
incl (%eax) ; \
- movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \
+/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \
notl %eax ; \
- andl _ipending,%eax ; \
- jne 2f ; /* yes, maybe handle them */ \
+ andl _spending,$SWI_MASK ; \
+ jne 2f ; // yes, maybe handle them */ \
1: ; \
MEXITCOUNT ; \
MAYBE_POPL_ES ; \
+ popl %fs ; \
popl %ds ; \
popl %edx ; \
popl %ecx ; \
popl %eax ; \
iret ; \
+
+#if 0
; \
ALIGN_TEXT ; \
2: ; \
@@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \
incb _intr_nesting_level ; /* ... really limit it ... */ \
sti ; /* ... to do this as early as possible */ \
MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
+ popl %fs ; \
popl %ecx ; /* ... original %ds ... */ \
popl %edx ; \
xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \
@@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \
movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \
movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \
movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
subl $4,%esp ; /* junk for unit number */ \
MEXITCOUNT ; \
jmp _doreti
+#endif
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \
pushl %ds ; /* save our data and extra segments ... */ \
pushl %es ; \
pushl %fs ; \
- mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \
- mov %ax,%ds ; /* ... early for obsolete reasons */ \
+ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \
+ mov %ax,%ds ; \
mov %ax,%es ; \
mov %ax,%fs ; \
maybe_extra_ipending ; \
@@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \
movb %al,_imen + IRQ_BYTE(irq_num) ; \
outb %al,$icu+ICU_IMR_OFFSET ; \
enable_icus ; \
- movl _cpl,%eax ; \
- testb $IRQ_BIT(irq_num),%reg ; \
- jne 2f ; \
- incb _intr_nesting_level ; \
+ incb _intr_nesting_level ; /* XXX do we need this? */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \
- incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- incl (%eax) ; \
- movl _cpl,%eax ; \
- pushl %eax ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- orl _intr_mask + (irq_num) * 4,%eax ; \
- movl %eax,_cpl ; \
+ pushl $irq_num; /* pass the IRQ */ \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; /* must unmask _imen and icu atomically */ \
- movb _imen + IRQ_BYTE(irq_num),%al ; \
- andb $~IRQ_BIT(irq_num),%al ; \
- movb %al,_imen + IRQ_BYTE(irq_num) ; \
- outb %al,$icu+ICU_IMR_OFFSET ; \
- sti ; /* XXX _doreti repeats the cli/sti */ \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
MEXITCOUNT ; \
/* We could usually avoid the following jmp by inlining some of */ \
/* _doreti, but it's probably better to use less cache. */ \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- /* XXX skip mcounting here to avoid double count */ \
- orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \
- popl %fs ; \
- popl %es ; \
- popl %ds ; \
- popal ; \
- addl $4+4,%esp ; \
- iret
+ jmp doreti_next /* and catch up inside doreti */
+
+/*
+ * Reenable the interrupt mask after completing an interrupt. Called
+ * from ithd_loop. There are two separate functions, one for each
+ * ICU.
+ */
+ .globl setimask0, setimask1
+setimask0:
+ cli
+ movb _imen,%al
+ outb %al,$IO_ICU1 + ICU_IMR_OFFSET
+ sti
+ ret
+
+setimask1:
+ cli
+ movb _imen + 1,%al
+ outb %al,$IO_ICU2 + ICU_IMR_OFFSET
+ sti
+ ret
MCOUNT_LABEL(bintr)
FAST_INTR(0,fastintr0, ENABLE_ICU1)
@@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr)
FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2)
FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2)
FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2)
+
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING)
INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,)
INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,)
@@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr)
INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
+
MCOUNT_LABEL(eintr)
.data
@@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
.text
diff --git a/sys/i386/isa/intr_machdep.c b/sys/i386/isa/intr_machdep.c
index 34a8c229bd6b..870760e1ce01 100644
--- a/sys/i386/isa/intr_machdep.c
+++ b/sys/i386/isa/intr_machdep.c
@@ -36,12 +36,6 @@
* from: @(#)isa.c 7.2 (Berkeley) 5/13/91
* $FreeBSD$
*/
-/*
- * This file contains an aggregated module marked:
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- * See the notice for details.
- */
#include "opt_auto_eoi.h"
@@ -51,11 +45,14 @@
#ifndef SMP
#include <machine/lock.h>
#endif
+#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/unistd.h>
#include <sys/errno.h>
#include <sys/interrupt.h>
#include <machine/ipl.h>
@@ -91,30 +88,14 @@
#include <i386/isa/mca_machdep.h>
#endif
-/* XXX should be in suitable include files */
-#ifdef PC98
-#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */
-#define ICU_SLAVEID 7
-#else
-#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */
-#define ICU_SLAVEID 2
-#endif
-
-#ifdef APIC_IO
/*
- * This is to accommodate "mixed-mode" programming for
- * motherboards that don't connect the 8254 to the IO APIC.
+ * Per-interrupt data. We consider the soft interrupt to be a special
+ * case, so these arrays have NHWI + NSWI entries, not ICU_LEN.
*/
-#define AUTO_EOI_1 1
-#endif
-
-#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN)
-
-u_long *intr_countp[ICU_LEN];
-inthand2_t *intr_handler[ICU_LEN];
-u_int intr_mask[ICU_LEN];
-static u_int* intr_mptr[ICU_LEN];
-void *intr_unit[ICU_LEN];
+u_long *intr_countp[NHWI + NSWI]; /* pointers to interrupt counters */
+inthand2_t *intr_handler[NHWI + NSWI]; /* first level interrupt handler */
+ithd *ithds[NHWI + NSWI]; /* real interrupt handler */
+void *intr_unit[NHWI + NSWI];
static inthand_t *fastintr[ICU_LEN] = {
&IDTVEC(fastintr0), &IDTVEC(fastintr1),
@@ -292,8 +273,9 @@ isa_nmi(cd)
}
/*
- * Fill in default interrupt table (in case of spuruious interrupt
- * during configuration of kernel, setup interrupt control unit
+ * Create a default interrupt table to avoid problems caused by
+ * spurious interrupts during configuration of kernel, then setup
+ * interrupt control unit.
*/
void
isa_defaultirq()
@@ -364,16 +346,6 @@ isa_strayintr(vcookiep)
{
int intr = (void **)vcookiep - &intr_unit[0];
- /* DON'T BOTHER FOR NOW! */
- /* for some reason, we get bursts of intr #7, even if not enabled! */
- /*
- * Well the reason you got bursts of intr #7 is because someone
- * raised an interrupt line and dropped it before the 8259 could
- * prioritize it. This is documented in the intel data book. This
- * means you have BAD hardware! I have changed this so that only
- * the first 5 get logged, then it quits logging them, and puts
- * out a special message. rgrimes 3/25/1993
- */
/*
* XXX TODO print a different message for #7 if it is for a
* glitch. Glitches can be distinguished from real #7's by
@@ -405,36 +377,10 @@ isa_irq_pending()
}
#endif
-int
-update_intr_masks(void)
-{
- int intr, n=0;
- u_int mask,*maskptr;
-
- for (intr=0; intr < ICU_LEN; intr ++) {
-#if defined(APIC_IO)
- /* no 8259 SLAVE to ignore */
-#else
- if (intr==ICU_SLAVEID) continue; /* ignore 8259 SLAVE output */
-#endif /* APIC_IO */
- maskptr = intr_mptr[intr];
- if (!maskptr)
- continue;
- *maskptr |= SWI_LOW_MASK | (1 << intr);
- mask = *maskptr;
- if (mask != intr_mask[intr]) {
-#if 0
- printf ("intr_mask[%2d] old=%08x new=%08x ptr=%p.\n",
- intr, intr_mask[intr], mask, maskptr);
-#endif
- intr_mask[intr]=mask;
- n++;
- }
-
- }
- return (n);
-}
-
+/*
+ * Update intrnames array with the specified name. This is used by
+ * vmstat(8) and the like.
+ */
static void
update_intrname(int intr, char *name)
{
@@ -485,7 +431,7 @@ found:
}
int
-icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
+icu_setup(int intr, inthand2_t *handler, void *arg, int flags)
{
#ifdef FAST_HI
int select; /* the select register is 8 bits */
@@ -493,7 +439,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
u_int32_t value; /* the window register is 32 bits */
#endif /* FAST_HI */
u_long ef;
- u_int mask = (maskptr ? *maskptr : 0);
#if defined(APIC_IO)
if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */
@@ -506,8 +451,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
ef = read_eflags();
disable_intr();
intr_handler[intr] = handler;
- intr_mptr[intr] = maskptr;
- intr_mask[intr] = mask | SWI_LOW_MASK | (1 << intr);
intr_unit[intr] = arg;
#ifdef FAST_HI
if (flags & INTR_FAST) {
@@ -547,11 +490,15 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
INTREN(1 << intr);
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
+/*
+ * Dissociate an interrupt handler from an IRQ and set the handler to
+ * the stray interrupt handler. The 'handler' parameter is used only
+ * for consistency checking.
+ */
int
icu_unset(intr, handler)
int intr;
@@ -567,8 +514,6 @@ icu_unset(intr, handler)
disable_intr();
intr_countp[intr] = &intrcnt[1 + intr];
intr_handler[intr] = isa_strayintr;
- intr_mptr[intr] = NULL;
- intr_mask[intr] = HWI_MASK | SWI_MASK;
intr_unit[intr] = &intr_unit[intr];
#ifdef FAST_HI_XXX
/* XXX how do I re-create dvp here? */
@@ -581,353 +526,172 @@ icu_unset(intr, handler)
setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
-/* The following notice applies beyond this point in the file */
-
-/*
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice unmodified, this list of conditions, and the following
- * disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-typedef struct intrec {
- intrmask_t mask;
- inthand2_t *handler;
- void *argument;
- struct intrec *next;
- char *name;
- int intr;
- intrmask_t *maskptr;
- int flags;
-} intrec;
-
-static intrec *intreclist_head[ICU_LEN];
-
-/*
- * The interrupt multiplexer calls each of the handlers in turn. The
- * ipl is initially quite low. It is raised as necessary for each call
- * and lowered after the call. Thus out of order handling is possible
- * even for interrupts of the same type. This is probably no more
- * harmful than out of order handling in general (not harmful except
- * for real time response which we don't support anyway).
- */
-static void
-intr_mux(void *arg)
-{
- intrec *p;
- intrmask_t oldspl;
-
- for (p = arg; p != NULL; p = p->next) {
- oldspl = splq(p->mask);
- p->handler(p->argument);
- splx(oldspl);
- }
-}
-
-static intrec*
-find_idesc(unsigned *maskptr, int irq)
-{
- intrec *p = intreclist_head[irq];
-
- while (p && p->maskptr != maskptr)
- p = p->next;
-
- return (p);
-}
-
-static intrec**
-find_pred(intrec *idesc, int irq)
+intrec *
+inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
+ int pri, int flags)
{
- intrec **pp = &intreclist_head[irq];
- intrec *p = *pp;
-
- while (p != idesc) {
- if (p == NULL)
- return (NULL);
- pp = &p->next;
- p = *pp;
- }
- return (pp);
-}
-
-/*
- * Both the low level handler and the shared interrupt multiplexer
- * block out further interrupts as set in the handlers "mask", while
- * the handler is running. In fact *maskptr should be used for this
- * purpose, but since this requires one more pointer dereference on
- * each interrupt, we rather bother update "mask" whenever *maskptr
- * changes. The function "update_masks" should be called **after**
- * all manipulation of the linked list of interrupt handlers hung
- * off of intrdec_head[irq] is complete, since the chain of handlers
- * will both determine the *maskptr values and the instances of mask
- * that are fixed. This function should be called with the irq for
- * which a new handler has been add blocked, since the masks may not
- * yet know about the use of this irq for a device of a certain class.
- */
+ ithd *ithd = ithds[irq]; /* descriptor for the IRQ */
+ intrec *head; /* chain of handlers for IRQ */
+ intrec *idesc; /* descriptor for this handler */
+ struct proc *p; /* interrupt thread */
+ int errcode = 0;
-static void
-update_mux_masks(void)
-{
- int irq;
- for (irq = 0; irq < ICU_LEN; irq++) {
- intrec *idesc = intreclist_head[irq];
- while (idesc != NULL) {
- if (idesc->maskptr != NULL) {
- /* our copy of *maskptr may be stale, refresh */
- idesc->mask = *idesc->maskptr;
- }
- idesc = idesc->next;
+ if (name == NULL) /* no name? */
+ panic ("anonymous interrupt");
+ if (ithd == NULL || ithd->it_ih == NULL) {
+ /* first handler for this irq. */
+ if (ithd == NULL) {
+ ithd = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK);
+ if (ithd == NULL)
+ return (NULL);
+ bzero(ithd, sizeof(struct ithd));
+ ithd->irq = irq;
+ ithds[irq] = ithd;
}
- }
-}
-
-static void
-update_masks(intrmask_t *maskptr, int irq)
-{
- intrmask_t mask = 1 << irq;
-
- if (maskptr == NULL)
- return;
-
- if (find_idesc(maskptr, irq) == NULL) {
- /* no reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) == 0)
- return;
- /* the irq was included in the classes mask, remove it */
- *maskptr &= ~mask;
- } else {
- /* a reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) != 0)
- return;
- /* put the irq into the classes mask */
- *maskptr |= mask;
- }
- /* we need to update all values in the intr_mask[irq] array */
- update_intr_masks();
- /* update mask in chains of the interrupt multiplex handler as well */
- update_mux_masks();
-}
-
-/*
- * Add interrupt handler to linked list hung off of intreclist_head[irq]
- * and install shared interrupt multiplex handler, if necessary
- */
-
-static int
-add_intrdesc(intrec *idesc)
-{
- int irq = idesc->intr;
-
- intrec *head = intreclist_head[irq];
-
- if (head == NULL) {
- /* first handler for this irq, just install it */
- if (icu_setup(irq, idesc->handler, idesc->argument,
- idesc->maskptr, idesc->flags) != 0)
- return (-1);
-
- update_intrname(irq, idesc->name);
- /* keep reference */
- intreclist_head[irq] = idesc;
- } else {
- if ((idesc->flags & INTR_EXCL) != 0
- || (head->flags & INTR_EXCL) != 0) {
+ /*
+ * If we have a fast interrupt, we need to set the
+ * handler address directly. Do that below. For a
+ * slow interrupt, we don't need to know more details,
+ * so do it here because it's tidier.
+ */
+ if ((flags & INTR_FAST) == 0) {
/*
- * can't append new handler, if either list head or
- * new handler do not allow interrupts to be shared
+ * Only create a kernel thread if we don't already
+ * have one.
*/
- if (bootverbose)
- printf("\tdevice combination doesn't support "
- "shared irq%d\n", irq);
- return (-1);
- }
- if (head->next == NULL) {
+ if (ithd->it_proc == NULL) {
+ errcode = kthread_create(ithd_loop, NULL, &p,
+ RFSTOPPED | RFHIGHPID, "irq%d: %s", irq,
+ name);
+ if (errcode)
+ panic("inthand_add: Can't create "
+ "interrupt thread");
+ p->p_rtprio.type = RTP_PRIO_ITHREAD;
+ p->p_stat = SWAIT; /* we're idle */
+
+ /* Put in linkages. */
+ ithd->it_proc = p;
+ p->p_ithd = ithd;
+ } else
+ snprintf(ithd->it_proc->p_comm, MAXCOMLEN,
+ "irq%d: %s", irq, name);
+ p->p_rtprio.prio = pri;
+
/*
- * second handler for this irq, replace device driver's
- * handler by shared interrupt multiplexer function
+ * The interrupt process must be in place, but
+ * not necessarily schedulable, before we
+ * initialize the ICU, since it may cause an
+ * immediate interrupt.
*/
- icu_unset(irq, head->handler);
- if (icu_setup(irq, intr_mux, head, 0, 0) != 0)
- return (-1);
- if (bootverbose)
- printf("\tusing shared irq%d.\n", irq);
- update_intrname(irq, "mux");
+ if (icu_setup(irq, &sched_ithd, arg, flags) != 0)
+ panic("inthand_add: Can't initialize ICU");
}
- /* just append to the end of the chain */
- while (head->next != NULL)
- head = head->next;
- head->next = idesc;
- }
- update_masks(idesc->maskptr, irq);
- return (0);
-}
-
-/*
- * Create and activate an interrupt handler descriptor data structure.
- *
- * The dev_instance pointer is required for resource management, and will
- * only be passed through to resource_claim().
- *
- * There will be functions that derive a driver and unit name from a
- * dev_instance variable, and those functions will be used to maintain the
- * interrupt counter label array referenced by systat and vmstat to report
- * device interrupt rates (->update_intrlabels).
- *
- * Add the interrupt handler descriptor data structure created by an
- * earlier call of create_intr() to the linked list for its irq and
- * adjust the interrupt masks if necessary.
- *
- * WARNING: This is an internal function and not to be used by device
- * drivers. It is subject to change without notice.
- */
-
-intrec *
-inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
- intrmask_t *maskptr, int flags)
-{
- intrec *idesc;
- int errcode = -1;
- intrmask_t oldspl;
-
- if (ICU_LEN > 8 * sizeof *maskptr) {
- printf("create_intr: ICU_LEN of %d too high for %d bit intrmask\n",
- ICU_LEN, 8 * sizeof *maskptr);
+ } else if ((flags & INTR_EXCL) != 0
+ || (ithd->it_ih->flags & INTR_EXCL) != 0) {
+ /*
+ * We can't append the new handler if either
+ * list ithd or new handler do not allow
+ * interrupts to be shared.
+ */
+ if (bootverbose)
+ printf("\tdevice combination %s and %s "
+ "doesn't support shared irq%d\n",
+ ithd->it_ih->name, name, irq);
+ return(NULL);
+ } else if (flags & INTR_FAST) {
+ /* We can only have one fast interrupt by itself. */
+ if (bootverbose)
+ printf("\tCan't add fast interrupt %s"
+ " to normal interrupt %s on irq%d",
+ name, ithd->it_ih->name, irq);
return (NULL);
+ } else { /* update p_comm */
+ p = ithd->it_proc;
+ if (strlen(p->p_comm) + strlen(name) < MAXCOMLEN) {
+ strcat(p->p_comm, " ");
+ strcat(p->p_comm, name);
+ } else if (strlen(p->p_comm) == MAXCOMLEN)
+ p->p_comm[MAXCOMLEN - 1] = '+';
+ else
+ strcat(p->p_comm, "+");
}
- if ((unsigned)irq >= ICU_LEN) {
- printf("create_intr: requested irq%d too high, limit is %d\n",
- irq, ICU_LEN -1);
+ idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK);
+ if (idesc == NULL)
return (NULL);
- }
+ bzero(idesc, sizeof (struct intrec));
- idesc = malloc(sizeof *idesc, M_DEVBUF, M_WAITOK);
- if (idesc == NULL)
- return NULL;
- bzero(idesc, sizeof *idesc);
+ idesc->handler = handler;
+ idesc->argument = arg;
+ idesc->flags = flags;
+ idesc->ithd = ithd;
- if (name == NULL)
- name = "???";
idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK);
if (idesc->name == NULL) {
free(idesc, M_DEVBUF);
- return NULL;
+ return (NULL);
}
strcpy(idesc->name, name);
- idesc->handler = handler;
- idesc->argument = arg;
- idesc->maskptr = maskptr;
- idesc->intr = irq;
- idesc->flags = flags;
-
- /* block this irq */
- oldspl = splq(1 << irq);
-
- /* add irq to class selected by maskptr */
- errcode = add_intrdesc(idesc);
- splx(oldspl);
-
- if (errcode != 0) {
+ /* Slow interrupts got set up above. */
+ if ((flags & INTR_FAST)
+ && (icu_setup(irq, idesc->handler, idesc->argument,
+ idesc->flags) != 0) ) {
if (bootverbose)
- printf("\tintr_connect(irq%d) failed, result=%d\n",
+ printf("\tinthand_add(irq%d) failed, result=%d\n",
irq, errcode);
free(idesc->name, M_DEVBUF);
free(idesc, M_DEVBUF);
- idesc = NULL;
+ return NULL;
}
-
+ head = ithd->it_ih; /* look at chain of handlers */
+ if (head) {
+ while (head->next != NULL)
+ head = head->next; /* find the end */
+ head->next = idesc; /* hook it in there */
+ } else
+ ithd->it_ih = idesc; /* put it up front */
+ update_intrname(irq, idesc->name);
return (idesc);
}
/*
- * Deactivate and remove the interrupt handler descriptor data connected
- * created by an earlier call of intr_connect() from the linked list and
- * adjust theinterrupt masks if necessary.
+ * Deactivate and remove linked list the interrupt handler descriptor
+ * data connected created by an earlier call of inthand_add(), then
+ * adjust the interrupt masks if necessary.
*
- * Return the memory held by the interrupt handler descriptor data structure
- * to the system. Make sure, the handler is not actively used anymore, before.
+ * Return the memory held by the interrupt handler descriptor data
+ * structure to the system. First ensure the handler is not actively
+ * in use.
*/
int
inthand_remove(intrec *idesc)
{
- intrec **hook, *head;
- int irq;
- int errcode = 0;
- intrmask_t oldspl;
+ ithd *ithd; /* descriptor for the IRQ */
+ intrec *ih; /* chain of handlers */
if (idesc == NULL)
return (-1);
+ ithd = idesc->ithd;
+ ih = ithd->it_ih;
- irq = idesc->intr;
-
- /* find pointer that keeps the reference to this interrupt descriptor */
- hook = find_pred(idesc, irq);
- if (hook == NULL)
+ if (ih == idesc) /* first in the chain */
+ ithd->it_ih = idesc->next; /* unhook it */
+ else {
+ while ((ih != NULL)
+ && (ih->next != idesc) )
+ ih = ih->next;
+ if (ih->next != idesc)
return (-1);
-
- /* make copy of original list head, the line after may overwrite it */
- head = intreclist_head[irq];
-
- /* unlink: make predecessor point to idesc->next instead of to idesc */
- *hook = idesc->next;
-
- /* now check whether the element we removed was the list head */
- if (idesc == head) {
-
- oldspl = splq(1 << irq);
-
- /* check whether the new list head is the only element on list */
- head = intreclist_head[irq];
- if (head != NULL) {
- icu_unset(irq, intr_mux);
- if (head->next != NULL) {
- /* install the multiplex handler with new list head as argument */
- errcode = icu_setup(irq, intr_mux, head, 0, 0);
- if (errcode == 0)
- update_intrname(irq, NULL);
- } else {
- /* install the one remaining handler for this irq */
- errcode = icu_setup(irq, head->handler,
- head->argument,
- head->maskptr, head->flags);
- if (errcode == 0)
- update_intrname(irq, head->name);
+ ih->next = ih->next->next;
}
- } else {
- /* revert to old handler, eg: strayintr */
- icu_unset(irq, idesc->handler);
- }
- splx(oldspl);
- }
- update_masks(idesc->maskptr, irq);
+
+ if (ithd->it_ih == NULL) /* no handlers left, */
+ icu_unset(ithd->irq, idesc->handler);
free(idesc, M_DEVBUF);
return (0);
}
diff --git a/sys/i386/isa/intr_machdep.h b/sys/i386/isa/intr_machdep.h
index 5982295b1ab4..87c97a35f5ef 100644
--- a/sys/i386/isa/intr_machdep.h
+++ b/sys/i386/isa/intr_machdep.h
@@ -98,7 +98,6 @@
#define TPR_BLOCK_XCPUSTOP 0xaf /* */
#define TPR_BLOCK_ALL 0xff /* all INTs */
-
#ifdef TEST_TEST1
/* put a 'fake' HWI in top of APIC prio 0x3x, 32 + 31 = 63 = 0x3f */
#define XTEST1_OFFSET (ICU_OFFSET + 31)
@@ -145,8 +144,9 @@ extern u_long intrcnt[]; /* counts for for each device and stray */
extern char intrnames[]; /* string table containing device names */
extern u_long *intr_countp[]; /* pointers into intrcnt[] */
extern inthand2_t *intr_handler[]; /* C entry points of intr handlers */
-extern u_int intr_mask[]; /* sets of intrs masked during handling of 1 */
+extern ithd *ithds[];
extern void *intr_unit[]; /* cookies to pass to intr handlers */
+extern ithd softinterrupt; /* soft interrupt thread */
inthand_t
IDTVEC(fastintr0), IDTVEC(fastintr1),
@@ -190,26 +190,60 @@ inthand_t
#endif /** TEST_TEST1 */
#endif /* SMP || APIC_IO */
+#ifdef PC98
+#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */
+#define ICU_SLAVEID 7
+#else
+#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */
+#define ICU_SLAVEID 2
+#endif
+
+#ifdef APIC_IO
+/*
+ * This is to accommodate "mixed-mode" programming for
+ * motherboards that don't connect the 8254 to the IO APIC.
+ */
+#define AUTO_EOI_1 1
+#endif
+
+#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN)
+
void isa_defaultirq __P((void));
int isa_nmi __P((int cd));
int icu_setup __P((int intr, inthand2_t *func, void *arg,
- u_int *maskptr, int flags));
+ int flags));
int icu_unset __P((int intr, inthand2_t *handler));
-int update_intr_masks __P((void));
intrmask_t splq __P((intrmask_t mask));
-#define INTR_FAST 0x00000001 /* fast interrupt handler */
-#define INTR_EXCL 0x00010000 /* excl. intr, default is shared */
+/*
+ * Describe a hardware interrupt handler. These structures are
+ * accessed via the array intreclist, which contains one pointer per
+ * hardware interrupt.
+ *
+ * Multiple interrupt handlers for a specific IRQ can be chained
+ * together via the 'next' pointer.
+ */
+typedef struct intrec {
+ inthand2_t *handler; /* code address of handler */
+ void *argument; /* argument to pass to handler */
+ enum intr_type flags; /* flag bits (sys/bus.h) */
+ char *name; /* name of handler */
+ ithd *ithd; /* handler we're connected to */
+ struct intrec *next; /* next handler for this irq */
+} intrec;
/*
* WARNING: These are internal functions and not to be used by device drivers!
* They are subject to change without notice.
*/
struct intrec *inthand_add(const char *name, int irq, inthand2_t handler,
- void *arg, intrmask_t *maskptr, int flags);
-
+ void *arg, int pri, int flags);
int inthand_remove(struct intrec *idesc);
+void sched_ithd(void *);
+void ithd_loop(void *);
+void start_softintr(void *);
+void intr_soft(void *);
#endif /* LOCORE */
diff --git a/sys/i386/isa/ipl.s b/sys/i386/isa/ipl.s
index 93612301fa85..1ee9ace4559e 100644
--- a/sys/i386/isa/ipl.s
+++ b/sys/i386/isa/ipl.s
@@ -44,7 +44,6 @@
* AT/386
* Vector interrupt control section
*
- * cpl - Current interrupt disable mask
* *_imask - Interrupt masks for various spl*() functions
* ipending - Pending interrupts (set when a masked interrupt occurs)
*/
@@ -53,8 +52,6 @@
ALIGN_DATA
/* current priority (all off) */
- .globl _cpl
-_cpl: .long HWI_MASK | SWI_MASK
.globl _tty_imask
_tty_imask: .long SWI_TTY_MASK
@@ -71,9 +68,9 @@ _softnet_imask: .long SWI_NET_MASK
.globl _softtty_imask
_softtty_imask: .long SWI_TTY_MASK
-/* pending interrupts blocked by splxxx() */
- .globl _ipending
-_ipending: .long 0
+/* pending software interrupts */
+ .globl _spending
+_spending: .long 0
/* set with bits for which queue to service */
.globl _netisr
@@ -100,59 +97,30 @@ _netisrs:
_doreti:
FAKE_MCOUNT(_bintr) /* init "from" _bintr -> _doreti */
addl $4,%esp /* discard unit number */
- popl %eax /* cpl or cml to restore */
doreti_next:
- /*
- * Check for pending HWIs and SWIs atomically with restoring cpl
- * and exiting. The check has to be atomic with exiting to stop
- * (ipending & ~cpl) changing from zero to nonzero while we're
- * looking at it (this wouldn't be fatal but it would increase
- * interrupt latency). Restoring cpl has to be atomic with exiting
- * so that the stack cannot pile up (the nesting level of interrupt
- * handlers is limited by the number of bits in cpl).
- */
-#ifdef SMP
- cli /* early to prevent INT deadlock */
-doreti_next2:
-#endif
- movl %eax,%ecx
- notl %ecx /* set bit = unmasked level */
-#ifndef SMP
- cli
-#endif
- andl _ipending,%ecx /* set bit = unmasked pending INT */
- jne doreti_unpend
- movl %eax,_cpl
decb _intr_nesting_level
/* Check for ASTs that can be handled now. */
testl $AST_PENDING,_astpending
- je doreti_exit
- testb $SEL_RPL_MASK,TF_CS(%esp)
- jne doreti_ast
- testl $PSL_VM,TF_EFLAGS(%esp)
- je doreti_exit
- cmpl $1,_in_vm86call
- jne doreti_ast
+ je doreti_exit /* no AST, exit */
+ testb $SEL_RPL_MASK,TF_CS(%esp) /* are we in user mode? */
+ jne doreti_ast /* yes, do it now. */
+ testl $PSL_VM,TF_EFLAGS(%esp) /* kernel mode */
+ je doreti_exit /* and not VM86 mode, defer */
+ cmpl $1,_in_vm86call /* are we in a VM86 call? */
+ jne doreti_ast /* yes, we can do it */
/*
- * doreti_exit - release MP lock, pop registers, iret.
+ * doreti_exit: release MP lock, pop registers, iret.
*
- * Note that the syscall trap shotcuts to doreti_syscall_ret.
+ * Note that the syscall trap shortcuts to doreti_syscall_ret.
* The segment register pop is a special case, since it may
* fault if (for example) a sigreturn specifies bad segment
- * registers. The fault is handled in trap.c
+ * registers. The fault is handled in trap.c.
*/
-
doreti_exit:
MEXITCOUNT
-#ifdef SMP
- /* release the kernel lock */
- movl $_mp_lock, %edx /* GIANT_LOCK */
- call _MPrellock_edx
-#endif /* SMP */
-
.globl doreti_popl_fs
.globl doreti_syscall_ret
doreti_syscall_ret:
@@ -170,6 +138,13 @@ doreti_popl_ds:
doreti_iret:
iret
+ /*
+ * doreti_iret_fault and friends. Alternative return code for
+ * the case where we get a fault in the doreti_exit code
+ * above. trap() (i386/i386/trap.c) catches this specific
+ * case, sends the process a signal and continues in the
+ * corresponding place in the code below.
+ */
ALIGN_TEXT
.globl doreti_iret_fault
doreti_iret_fault:
@@ -189,93 +164,11 @@ doreti_popl_fs_fault:
jmp alltraps_with_regs_pushed
ALIGN_TEXT
-doreti_unpend:
- /*
- * Enabling interrupts is safe because we haven't restored cpl yet.
- * %ecx contains the next probable ready interrupt (~cpl & ipending)
- */
-#ifdef SMP
- bsfl %ecx, %ecx /* locate the next dispatchable int */
- lock
- btrl %ecx, _ipending /* is it really still pending? */
- jnc doreti_next2 /* some intr cleared memory copy */
- sti /* late to prevent INT deadlock */
-#else
- sti
- bsfl %ecx,%ecx /* slow, but not worth optimizing */
- btrl %ecx,_ipending
- jnc doreti_next /* some intr cleared memory copy */
-#endif /* SMP */
- /*
- * Execute handleable interrupt
- *
- * Set up JUMP to _ihandlers[%ecx] for HWIs.
- * Set up CALL of _ihandlers[%ecx] for SWIs.
- * This is a bit early for the SMP case - we have to push %ecx and
- * %edx, but could push only %ecx and load %edx later.
- */
- movl _ihandlers(,%ecx,4),%edx
- cmpl $NHWI,%ecx
- jae doreti_swi /* software interrupt handling */
- cli /* else hardware int handling */
-#ifdef SMP
- movl %eax,_cpl /* same as non-smp case right now */
-#else
- movl %eax,_cpl
-#endif
- MEXITCOUNT
-#ifdef APIC_INTR_DIAGNOSTIC
- lock
- incl CNAME(apic_itrace_doreti)(,%ecx,4)
-#ifdef APIC_INTR_DIAGNOSTIC_IRQ
- cmpl $APIC_INTR_DIAGNOSTIC_IRQ,%ecx
- jne 9f
- pushl %eax
- pushl %ecx
- pushl %edx
- pushl $APIC_ITRACE_DORETI
- call log_intr_event
- addl $4,%esp
- popl %edx
- popl %ecx
- popl %eax
-9:
-#endif
-#endif
- jmp *%edx
-
- ALIGN_TEXT
-doreti_swi:
- pushl %eax
- /*
- * At least the SWI_CLOCK handler has to run at a possibly strictly
- * lower cpl, so we have to restore
- * all the h/w bits in cpl now and have to worry about stack growth.
- * The worst case is currently (30 Jan 1994) 2 SWI handlers nested
- * in dying interrupt frames and about 12 HWIs nested in active
- * interrupt frames. There are only 4 different SWIs and the HWI
- * and SWI masks limit the nesting further.
- *
- * The SMP case is currently the same as the non-SMP case.
- */
-#ifdef SMP
- orl imasks(,%ecx,4), %eax /* or in imasks */
- movl %eax,_cpl /* set cpl for call */
-#else
- orl imasks(,%ecx,4),%eax
- movl %eax,_cpl
-#endif
- call *%edx
- popl %eax /* cpl to restore */
- jmp doreti_next
-
- ALIGN_TEXT
doreti_ast:
andl $~AST_PENDING,_astpending
sti
movl $T_ASTFLT,TF_TRAPNO(%esp)
- call _trap
- subl %eax,%eax /* recover cpl|cml */
+ call _ast
movb $1,_intr_nesting_level /* for doreti_next to decrement */
jmp doreti_next
diff --git a/sys/i386/isa/ipl_funcs.c b/sys/i386/isa/ipl_funcs.c
index d27d97fa9b1f..14eb2402eb0e 100644
--- a/sys/i386/isa/ipl_funcs.c
+++ b/sys/i386/isa/ipl_funcs.c
@@ -27,11 +27,13 @@
*/
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <machine/ipl.h>
-#include <machine/globals.h>
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
/*
@@ -45,236 +47,55 @@
void name(void) \
{ \
atomic_set_int(var, bits); \
+ sched_ithd((void *) SOFTINTR); \
}
-DO_SETBITS(setdelayed, &ipending, loadandclear(&idelayed))
+DO_SETBITS(setdelayed, &spending, loadandclear(&idelayed))
+DO_SETBITS(setsoftcamnet,&spending, SWI_CAMNET_PENDING)
+DO_SETBITS(setsoftcambio,&spending, SWI_CAMBIO_PENDING)
+DO_SETBITS(setsoftclock, &spending, SWI_CLOCK_PENDING)
+DO_SETBITS(setsoftnet, &spending, SWI_NET_PENDING)
+DO_SETBITS(setsofttty, &spending, SWI_TTY_PENDING)
+DO_SETBITS(setsoftvm, &spending, SWI_VM_PENDING)
+DO_SETBITS(setsofttq, &spending, SWI_TQ_PENDING)
-DO_SETBITS(setsoftcamnet,&ipending, SWI_CAMNET_PENDING)
-DO_SETBITS(setsoftcambio,&ipending, SWI_CAMBIO_PENDING)
-DO_SETBITS(setsoftclock, &ipending, SWI_CLOCK_PENDING)
-DO_SETBITS(setsoftnet, &ipending, SWI_NET_PENDING)
-DO_SETBITS(setsofttty, &ipending, SWI_TTY_PENDING)
-DO_SETBITS(setsoftvm, &ipending, SWI_VM_PENDING)
-DO_SETBITS(setsofttq, &ipending, SWI_TQ_PENDING)
-
-DO_SETBITS(schedsoftcamnet, &idelayed, SWI_CAMNET_PENDING)
-DO_SETBITS(schedsoftcambio, &idelayed, SWI_CAMBIO_PENDING)
-DO_SETBITS(schedsoftnet, &idelayed, SWI_NET_PENDING)
-DO_SETBITS(schedsofttty, &idelayed, SWI_TTY_PENDING)
-DO_SETBITS(schedsoftvm, &idelayed, SWI_VM_PENDING)
-DO_SETBITS(schedsofttq, &idelayed, SWI_TQ_PENDING)
+/*
+ * We don't need to schedule soft interrupts any more, it happens
+ * automatically.
+ */
+#define schedsoftcamnet
+#define schedsoftcambio
+#define schedsoftnet
+#define schedsofttty
+#define schedsoftvm
+#define schedsofttq
unsigned
softclockpending(void)
{
- return (ipending & SWI_CLOCK_PENDING);
+ return (spending & SWI_CLOCK_PENDING);
}
/*
- * Support for SPL assertions.
- */
-
-#ifdef INVARIANT_SUPPORT
-
-#define SPLASSERT_IGNORE 0
-#define SPLASSERT_LOG 1
-#define SPLASSERT_PANIC 2
-
-static int splassertmode = SPLASSERT_LOG;
-SYSCTL_INT(_kern, OID_AUTO, splassertmode, CTLFLAG_RW,
- &splassertmode, 0, "Set the mode of SPLASSERT");
-
-static void
-init_splassertmode(void *ignored)
-{
- TUNABLE_INT_FETCH("kern.splassertmode", 0, splassertmode);
-}
-SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_splassertmode, NULL);
-
-static void
-splassertfail(char *str, const char *msg, char *name, int level)
-{
- switch (splassertmode) {
- case SPLASSERT_IGNORE:
- break;
- case SPLASSERT_LOG:
- printf(str, msg, name, level);
- printf("\n");
- break;
- case SPLASSERT_PANIC:
- panic(str, msg, name, level);
- break;
- }
-}
-
-#define GENSPLASSERT(NAME, MODIFIER) \
-void \
-NAME##assert(const char *msg) \
-{ \
- if ((cpl & (MODIFIER)) != (MODIFIER)) \
- splassertfail("%s: not %s, cpl == %#x", \
- msg, __XSTRING(NAME) + 3, cpl); \
-}
-#else
-#define GENSPLASSERT(NAME, MODIFIER)
-#endif
-
-/************************************************************************
- * GENERAL SPL CODE *
- ************************************************************************
- *
- * Implement splXXX(), spl0(), splx(), and splq(). splXXX() disables a
- * set of interrupts (e.g. splbio() disables interrupts relating to
- * device I/O) and returns the previous interrupt mask. splx() restores
- * the previous interrupt mask, spl0() is a special case which enables
- * all interrupts and is typically used inside i386/i386 swtch.s and
- * fork_trampoline. splq() is a generic version of splXXX().
- *
- * The SPL routines mess around with the 'cpl' global, which masks
- * interrupts. Interrupts are not *actually* masked. What happens is
- * that if an interrupt masked by the cpl occurs, the appropriate bit
- * in 'ipending' is set and the interrupt is defered. When we clear
- * bits in the cpl we must check to see if any ipending interrupts have
- * been unmasked and issue the synchronously, which is what the splz()
- * call does.
- *
- * Because the cpl is often saved and restored in a nested fashion, cpl
- * modifications are only allowed in the SMP case when the MP lock is held
- * to prevent multiple processes from tripping over each other's masks.
- * The cpl is saved when you do a context switch (mi_switch()) and restored
- * when your process gets cpu again.
- *
- * An interrupt routine is allowed to modify the cpl as long as it restores
- * it prior to returning (thus the interrupted mainline code doesn't notice
- * anything amiss). For the SMP case, the interrupt routine must hold
- * the MP lock for any cpl manipulation.
- *
- * Likewise, due to the deterministic nature of cpl modifications, we do
- * NOT need to use locked instructions to modify it.
+ * Dummy spl calls. The only reason for these is to not break
+ * all the code which expects to call them.
*/
-
-#ifndef SMP
-
-#define GENSPL(NAME, OP, MODIFIER, PC) \
-GENSPLASSERT(NAME, MODIFIER) \
-unsigned NAME(void) \
-{ \
- unsigned x; \
- \
- x = cpl; \
- cpl OP MODIFIER; \
- return (x); \
-}
-
-void
-spl0(void)
-{
- cpl = 0;
- if (ipending)
- splz();
-}
-
-void
-splx(unsigned ipl)
-{
- cpl = ipl;
- if (ipending & ~ipl)
- splz();
-}
-
-intrmask_t
-splq(intrmask_t mask)
-{
- intrmask_t tmp = cpl;
- cpl |= mask;
- return (tmp);
-}
-
-#else /* !SMP */
-
-#include <machine/smp.h>
-#include <machine/smptests.h>
-
-/*
- * SMP CASE
- *
- * Mostly the same as the non-SMP case now, but it didn't used to be
- * this clean.
- */
-
-#define GENSPL(NAME, OP, MODIFIER, PC) \
-GENSPLASSERT(NAME, MODIFIER) \
-unsigned NAME(void) \
-{ \
- unsigned x; \
- \
- x = cpl; \
- cpl OP MODIFIER; \
- \
- return (x); \
-}
-
-/*
- * spl0() - unmask all interrupts
- *
- * The MP lock must be held on entry
- * This routine may only be called from mainline code.
- */
-void
-spl0(void)
-{
- KASSERT(inside_intr == 0, ("spl0: called from interrupt"));
- cpl = 0;
- if (ipending)
- splz();
-}
-
-/*
- * splx() - restore previous interrupt mask
- *
- * The MP lock must be held on entry
- */
-
-void
-splx(unsigned ipl)
-{
- cpl = ipl;
- if (inside_intr == 0 && (ipending & ~cpl) != 0)
- splz();
-}
-
-
-/*
- * splq() - blocks specified interrupts
- *
- * The MP lock must be held on entry
- */
-intrmask_t
-splq(intrmask_t mask)
-{
- intrmask_t tmp = cpl;
- cpl |= mask;
- return (tmp);
-}
-
-#endif /* !SMP */
-
-/* Finally, generate the actual spl*() functions */
-
-/* NAME: OP: MODIFIER: PC: */
-GENSPL(splbio, |=, bio_imask, 2)
-GENSPL(splcam, |=, cam_imask, 7)
-GENSPL(splclock, =, HWI_MASK | SWI_MASK, 3)
-GENSPL(splhigh, =, HWI_MASK | SWI_MASK, 4)
-GENSPL(splimp, |=, net_imask, 5)
-GENSPL(splnet, |=, SWI_NET_MASK, 6)
-GENSPL(splsoftcam, |=, SWI_CAMBIO_MASK | SWI_CAMNET_MASK, 8)
-GENSPL(splsoftcambio, |=, SWI_CAMBIO_MASK, 9)
-GENSPL(splsoftcamnet, |=, SWI_CAMNET_MASK, 10)
-GENSPL(splsoftclock, =, SWI_CLOCK_MASK, 11)
-GENSPL(splsofttty, |=, SWI_TTY_MASK, 12)
-GENSPL(splsoftvm, |=, SWI_VM_MASK, 16)
-GENSPL(splsofttq, |=, SWI_TQ_MASK, 17)
-GENSPL(splstatclock, |=, stat_imask, 13)
-GENSPL(spltty, |=, tty_imask, 14)
-GENSPL(splvm, |=, net_imask | bio_imask | cam_imask, 15)
+void spl0 (void) {}
+void splx (intrmask_t x) {}
+intrmask_t splq(intrmask_t mask) {return 0; }
+intrmask_t splbio(void) {return 0; }
+intrmask_t splcam(void) {return 0; }
+intrmask_t splclock(void) {return 0; }
+intrmask_t splhigh(void) {return 0; }
+intrmask_t splimp(void) {return 0; }
+intrmask_t splnet(void) {return 0; }
+intrmask_t splsoftcam(void) {return 0; }
+intrmask_t splsoftcambio(void) {return 0; }
+intrmask_t splsoftcamnet(void) {return 0; }
+intrmask_t splsoftclock(void) {return 0; }
+intrmask_t splsofttty(void) {return 0; }
+intrmask_t splsoftvm(void) {return 0; }
+intrmask_t splsofttq(void) {return 0; }
+intrmask_t splstatclock(void) {return 0; }
+intrmask_t spltty(void) {return 0; }
+intrmask_t splvm(void) {return 0; }
diff --git a/sys/i386/isa/ithread.c b/sys/i386/isa/ithread.c
new file mode 100644
index 000000000000..4ceac4229d1c
--- /dev/null
+++ b/sys/i386/isa/ithread.c
@@ -0,0 +1,353 @@
+/*-
+ * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From BSDI: intr.c,v 1.6.2.5 1999/07/06 19:16:52 cp Exp
+ * $FreeBSD$
+ */
+
+/* Interrupt thread code. */
+
+#include "opt_auto_eoi.h"
+
+#include "isa.h"
+
+#include <sys/param.h>
+#include <sys/rtprio.h> /* change this name XXX */
+#ifndef SMP
+#include <machine/lock.h>
+#endif
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/syslog.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/unistd.h>
+#include <sys/errno.h>
+#include <sys/interrupt.h>
+#include <machine/ipl.h>
+#include <machine/md_var.h>
+#include <machine/segments.h>
+#include <sys/bus.h>
+
+#if defined(APIC_IO)
+#include <machine/smp.h>
+#include <machine/smptests.h> /** FAST_HI */
+#include <machine/resource.h>
+#endif /* APIC_IO */
+#ifdef PC98
+#include <pc98/pc98/pc98.h>
+#include <pc98/pc98/pc98_machdep.h>
+#include <pc98/pc98/epsonio.h>
+#else
+#include <i386/isa/isa.h>
+#endif
+#include <i386/isa/icu.h>
+
+#if NISA > 0
+#include <isa/isavar.h>
+#endif
+#include <i386/isa/intr_machdep.h>
+#include <sys/interrupt.h>
+#ifdef APIC_IO
+#include <machine/clock.h>
+#endif
+
+#include "mca.h"
+#if NMCA > 0
+#include <i386/isa/mca_machdep.h>
+#endif
+
+#include <sys/vmmeter.h>
+#include <machine/mutex.h>
+#include <sys/ktr.h>
+#include <machine/cpu.h>
+#if 0
+#include <ddb/ddb.h>
+#endif
+
+u_long softintrcnt [NSWI];
+
+SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr, NULL)
+
+/*
+ * Schedule a heavyweight interrupt process. This function is called
+ * from the interrupt handlers Xintr<num>.
+ */
+void
+sched_ithd(void *cookie)
+{
+ int irq = (int) cookie; /* IRQ we're handling */
+ ithd *ir = ithds[irq]; /* and the process that does it */
+
+ /* This used to be in icu_vector.s */
+ /*
+ * We count software interrupts when we process them. The
+ * code here follows previous practice, but there's an
+ * argument for counting hardware interrupts when they're
+ * processed too.
+ */
+ if (irq < NHWI) /* real interrupt, */
+ atomic_add_long(intr_countp[irq], 1); /* one more for this IRQ */
+ atomic_add_int(&cnt.v_intr, 1); /* one more global interrupt */
+
+ CTR3(KTR_INTR, "sched_ithd pid %d(%s) need=%d",
+ ir->it_proc->p_pid, ir->it_proc->p_comm, ir->it_need);
+
+#if 0
+ /*
+ * If we are in the debugger, we can't use interrupt threads to
+ * process interrupts since the threads are scheduled. Instead,
+ * call the interrupt handlers directly. This should be able to
+ * go away once we have light-weight interrupt handlers.
+ */
+ if (db_active) {
+ intrec *ih; /* and our interrupt handler chain */
+#if 0
+ membar_unlock(); /* push out "it_need=0" */
+#endif
+ for (ih = ir->it_ih; ih != NULL; ih = ih->next) {
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_enter(&Giant, MTX_DEF);
+ ih->handler(ih->argument);
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_exit(&Giant, MTX_DEF);
+ }
+
+ INTREN (1 << ir->irq); /* reset the mask bit */
+ return;
+ }
+#endif
+
+ /*
+ * Set it_need so that if the thread is already running but close
+ * to done, it will do another go-round. Then get the sched lock
+ * and see if the thread is on whichkqs yet. If not, put it on
+ * there. In any case, kick everyone so that if the new thread
+ * is higher priority than their current thread, it gets run now.
+ */
+ ir->it_need = 1;
+ mtx_enter(&sched_lock, MTX_SPIN);
+ if (ir->it_proc->p_stat == SWAIT) { /* not on run queue */
+ CTR1(KTR_INTR, "sched_ithd: setrunqueue %d",
+ ir->it_proc->p_pid);
+/* membar_lock(); */
+ ir->it_proc->p_stat = SRUN;
+ setrunqueue(ir->it_proc);
+ aston();
+ }
+ else {
+if (irq < NHWI && (irq & 7) != 0)
+ CTR3(KTR_INTR, "sched_ithd %d: it_need %d, state %d",
+ ir->it_proc->p_pid,
+ ir->it_need,
+ ir->it_proc->p_stat );
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+#if 0
+ aston(); /* ??? check priorities first? */
+#else
+ need_resched();
+#endif
+}
+
+/*
+ * This is the main code for all interrupt threads. It gets put on
+ * whichkqs by setrunqueue above.
+ */
+void
+ithd_loop(void *dummy)
+{
+ ithd *me; /* our thread context */
+ intrec *ih; /* and our interrupt handler chain */
+
+ me = curproc->p_ithd; /* point to myself */
+
+ /*
+ * As long as we have interrupts outstanding, go through the
+ * list of handlers, giving each one a go at it.
+ */
+ for (;;) {
+ CTR3(KTR_INTR, "ithd_loop pid %d(%s) need=%d",
+ me->it_proc->p_pid, me->it_proc->p_comm, me->it_need);
+ while (me->it_need) {
+ /*
+ * Service interrupts. If another interrupt
+ * arrives while we are running, they will set
+ * it_need to denote that we should make
+ * another pass.
+ */
+ me->it_need = 0;
+#if 0
+ membar_unlock(); /* push out "it_need=0" */
+#endif
+ for (ih = me->it_ih; ih != NULL; ih = ih->next) {
+ CTR5(KTR_INTR,
+ "ithd_loop pid %d ih=%p: %p(%p) flg=%x",
+ me->it_proc->p_pid, (void *)ih,
+ (void *)ih->handler, ih->argument,
+ ih->flags);
+
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_enter(&Giant, MTX_DEF);
+ ih->handler(ih->argument);
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_exit(&Giant, MTX_DEF);
+ }
+ }
+
+ /*
+ * Processed all our interrupts. Now get the sched
+ * lock. This may take a while and it_need may get
+ * set again, so we have to check it again.
+ */
+ mtx_enter(&sched_lock, MTX_SPIN);
+ if (!me->it_need) {
+
+ INTREN (1 << me->irq); /* reset the mask bit */
+ me->it_proc->p_stat = SWAIT; /* we're idle */
+#ifdef APIC_IO
+ CTR1(KTR_INTR, "ithd_loop pid %d: done",
+ me->it_proc->p_pid);
+#else
+ CTR2(KTR_INTR, "ithd_loop pid %d: done, imen=%x",
+ me->it_proc->p_pid, imen);
+#endif
+ mi_switch();
+ CTR1(KTR_INTR, "ithd_loop pid %d: resumed",
+ me->it_proc->p_pid);
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ }
+}
+
+/*
+ * Start soft interrupt thread.
+ */
+void
+start_softintr(void *dummy)
+{
+ int error;
+ struct proc *p;
+ ithd *softintr; /* descriptor for the "IRQ" */
+ intrec *idesc; /* descriptor for this handler */
+ char *name = "sintr"; /* name for idesc */
+ int i;
+
+ if (ithds[SOFTINTR]) { /* we already have a thread */
+ printf("start_softintr: already running");
+ return;
+ }
+ /* first handler for this irq. */
+ softintr = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK);
+ if (softintr == NULL)
+ panic ("Can't create soft interrupt thread");
+ bzero(softintr, sizeof(struct ithd));
+ softintr->irq = SOFTINTR;
+ ithds[SOFTINTR] = softintr;
+ error = kthread_create(intr_soft, NULL, &p,
+ RFSTOPPED | RFHIGHPID, "softinterrupt");
+ if (error)
+ panic("start_softintr: kthread_create error %d\n", error);
+
+ p->p_rtprio.type = RTP_PRIO_ITHREAD;
+ p->p_rtprio.prio = PI_SOFT; /* soft interrupt */
+ p->p_stat = SWAIT; /* we're idle */
+
+ /* Put in linkages. */
+ softintr->it_proc = p;
+ p->p_ithd = softintr; /* reverse link */
+
+ idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK);
+ if (idesc == NULL)
+ panic ("Can't create soft interrupt thread");
+ bzero(idesc, sizeof (struct intrec));
+
+ idesc->ithd = softintr;
+ idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK);
+ if (idesc->name == NULL)
+ panic ("Can't create soft interrupt thread");
+ strcpy(idesc->name, name);
+ for (i = NHWI; i < NHWI + NSWI; i++)
+ intr_countp[i] = &softintrcnt [i - NHWI];
+}
+
+/*
+ * Software interrupt process code.
+ */
+void
+intr_soft(void *dummy)
+{
+ int i;
+ ithd *me; /* our thread context */
+
+ me = curproc->p_ithd; /* point to myself */
+
+ /* Main loop */
+ for (;;) {
+#if 0
+ CTR3(KTR_INTR, "intr_soft pid %d(%s) need=%d",
+ me->it_proc->p_pid, me->it_proc->p_comm,
+ me->it_need);
+#endif
+
+ /*
+ * Service interrupts. If another interrupt arrives
+ * while we are running, they will set it_need to
+ * denote that we should make another pass.
+ */
+ me->it_need = 0;
+ while ((i = ffs(spending))) {
+ i--;
+ atomic_add_long(intr_countp[i], 1);
+ spending &= ~ (1 << i);
+ mtx_enter(&Giant, MTX_DEF);
+ (ihandlers[i])();
+ mtx_exit(&Giant, MTX_DEF);
+ }
+ /*
+ * Processed all our interrupts. Now get the sched
+ * lock. This may take a while and it_need may get
+ * set again, so we have to check it again.
+ */
+ mtx_enter(&sched_lock, MTX_SPIN);
+ if (!me->it_need) {
+#if 0
+ CTR1(KTR_INTR, "intr_soft pid %d: done",
+ me->it_proc->p_pid);
+#endif
+ me->it_proc->p_stat = SWAIT; /* we're idle */
+ mi_switch();
+#if 0
+ CTR1(KTR_INTR, "intr_soft pid %d: resumed",
+ me->it_proc->p_pid);
+#endif
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ }
+}
diff --git a/sys/i386/isa/loran.c b/sys/i386/isa/loran.c
index 577a608f7113..c43bf8524c24 100644
--- a/sys/i386/isa/loran.c
+++ b/sys/i386/isa/loran.c
@@ -620,7 +620,7 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, loran_timecounter, CTLFLAG_RD,
/**********************************************************************/
struct isa_driver lorandriver = {
- INTR_TYPE_TTY | INTR_TYPE_FAST,
+ INTR_TYPE_TTY | INTR_FAST,
loranprobe,
loranattach,
"loran"
diff --git a/sys/i386/isa/nmi.c b/sys/i386/isa/nmi.c
index 34a8c229bd6b..870760e1ce01 100644
--- a/sys/i386/isa/nmi.c
+++ b/sys/i386/isa/nmi.c
@@ -36,12 +36,6 @@
* from: @(#)isa.c 7.2 (Berkeley) 5/13/91
* $FreeBSD$
*/
-/*
- * This file contains an aggregated module marked:
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- * See the notice for details.
- */
#include "opt_auto_eoi.h"
@@ -51,11 +45,14 @@
#ifndef SMP
#include <machine/lock.h>
#endif
+#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/unistd.h>
#include <sys/errno.h>
#include <sys/interrupt.h>
#include <machine/ipl.h>
@@ -91,30 +88,14 @@
#include <i386/isa/mca_machdep.h>
#endif
-/* XXX should be in suitable include files */
-#ifdef PC98
-#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */
-#define ICU_SLAVEID 7
-#else
-#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */
-#define ICU_SLAVEID 2
-#endif
-
-#ifdef APIC_IO
/*
- * This is to accommodate "mixed-mode" programming for
- * motherboards that don't connect the 8254 to the IO APIC.
+ * Per-interrupt data. We consider the soft interrupt to be a special
+ * case, so these arrays have NHWI + NSWI entries, not ICU_LEN.
*/
-#define AUTO_EOI_1 1
-#endif
-
-#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN)
-
-u_long *intr_countp[ICU_LEN];
-inthand2_t *intr_handler[ICU_LEN];
-u_int intr_mask[ICU_LEN];
-static u_int* intr_mptr[ICU_LEN];
-void *intr_unit[ICU_LEN];
+u_long *intr_countp[NHWI + NSWI]; /* pointers to interrupt counters */
+inthand2_t *intr_handler[NHWI + NSWI]; /* first level interrupt handler */
+ithd *ithds[NHWI + NSWI]; /* real interrupt handler */
+void *intr_unit[NHWI + NSWI];
static inthand_t *fastintr[ICU_LEN] = {
&IDTVEC(fastintr0), &IDTVEC(fastintr1),
@@ -292,8 +273,9 @@ isa_nmi(cd)
}
/*
- * Fill in default interrupt table (in case of spuruious interrupt
- * during configuration of kernel, setup interrupt control unit
+ * Create a default interrupt table to avoid problems caused by
+ * spurious interrupts during configuration of kernel, then setup
+ * interrupt control unit.
*/
void
isa_defaultirq()
@@ -364,16 +346,6 @@ isa_strayintr(vcookiep)
{
int intr = (void **)vcookiep - &intr_unit[0];
- /* DON'T BOTHER FOR NOW! */
- /* for some reason, we get bursts of intr #7, even if not enabled! */
- /*
- * Well the reason you got bursts of intr #7 is because someone
- * raised an interrupt line and dropped it before the 8259 could
- * prioritize it. This is documented in the intel data book. This
- * means you have BAD hardware! I have changed this so that only
- * the first 5 get logged, then it quits logging them, and puts
- * out a special message. rgrimes 3/25/1993
- */
/*
* XXX TODO print a different message for #7 if it is for a
* glitch. Glitches can be distinguished from real #7's by
@@ -405,36 +377,10 @@ isa_irq_pending()
}
#endif
-int
-update_intr_masks(void)
-{
- int intr, n=0;
- u_int mask,*maskptr;
-
- for (intr=0; intr < ICU_LEN; intr ++) {
-#if defined(APIC_IO)
- /* no 8259 SLAVE to ignore */
-#else
- if (intr==ICU_SLAVEID) continue; /* ignore 8259 SLAVE output */
-#endif /* APIC_IO */
- maskptr = intr_mptr[intr];
- if (!maskptr)
- continue;
- *maskptr |= SWI_LOW_MASK | (1 << intr);
- mask = *maskptr;
- if (mask != intr_mask[intr]) {
-#if 0
- printf ("intr_mask[%2d] old=%08x new=%08x ptr=%p.\n",
- intr, intr_mask[intr], mask, maskptr);
-#endif
- intr_mask[intr]=mask;
- n++;
- }
-
- }
- return (n);
-}
-
+/*
+ * Update intrnames array with the specified name. This is used by
+ * vmstat(8) and the like.
+ */
static void
update_intrname(int intr, char *name)
{
@@ -485,7 +431,7 @@ found:
}
int
-icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
+icu_setup(int intr, inthand2_t *handler, void *arg, int flags)
{
#ifdef FAST_HI
int select; /* the select register is 8 bits */
@@ -493,7 +439,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
u_int32_t value; /* the window register is 32 bits */
#endif /* FAST_HI */
u_long ef;
- u_int mask = (maskptr ? *maskptr : 0);
#if defined(APIC_IO)
if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */
@@ -506,8 +451,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
ef = read_eflags();
disable_intr();
intr_handler[intr] = handler;
- intr_mptr[intr] = maskptr;
- intr_mask[intr] = mask | SWI_LOW_MASK | (1 << intr);
intr_unit[intr] = arg;
#ifdef FAST_HI
if (flags & INTR_FAST) {
@@ -547,11 +490,15 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
INTREN(1 << intr);
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
+/*
+ * Dissociate an interrupt handler from an IRQ and set the handler to
+ * the stray interrupt handler. The 'handler' parameter is used only
+ * for consistency checking.
+ */
int
icu_unset(intr, handler)
int intr;
@@ -567,8 +514,6 @@ icu_unset(intr, handler)
disable_intr();
intr_countp[intr] = &intrcnt[1 + intr];
intr_handler[intr] = isa_strayintr;
- intr_mptr[intr] = NULL;
- intr_mask[intr] = HWI_MASK | SWI_MASK;
intr_unit[intr] = &intr_unit[intr];
#ifdef FAST_HI_XXX
/* XXX how do I re-create dvp here? */
@@ -581,353 +526,172 @@ icu_unset(intr, handler)
setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
-/* The following notice applies beyond this point in the file */
-
-/*
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice unmodified, this list of conditions, and the following
- * disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-typedef struct intrec {
- intrmask_t mask;
- inthand2_t *handler;
- void *argument;
- struct intrec *next;
- char *name;
- int intr;
- intrmask_t *maskptr;
- int flags;
-} intrec;
-
-static intrec *intreclist_head[ICU_LEN];
-
-/*
- * The interrupt multiplexer calls each of the handlers in turn. The
- * ipl is initially quite low. It is raised as necessary for each call
- * and lowered after the call. Thus out of order handling is possible
- * even for interrupts of the same type. This is probably no more
- * harmful than out of order handling in general (not harmful except
- * for real time response which we don't support anyway).
- */
-static void
-intr_mux(void *arg)
-{
- intrec *p;
- intrmask_t oldspl;
-
- for (p = arg; p != NULL; p = p->next) {
- oldspl = splq(p->mask);
- p->handler(p->argument);
- splx(oldspl);
- }
-}
-
-static intrec*
-find_idesc(unsigned *maskptr, int irq)
-{
- intrec *p = intreclist_head[irq];
-
- while (p && p->maskptr != maskptr)
- p = p->next;
-
- return (p);
-}
-
-static intrec**
-find_pred(intrec *idesc, int irq)
+intrec *
+inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
+ int pri, int flags)
{
- intrec **pp = &intreclist_head[irq];
- intrec *p = *pp;
-
- while (p != idesc) {
- if (p == NULL)
- return (NULL);
- pp = &p->next;
- p = *pp;
- }
- return (pp);
-}
-
-/*
- * Both the low level handler and the shared interrupt multiplexer
- * block out further interrupts as set in the handlers "mask", while
- * the handler is running. In fact *maskptr should be used for this
- * purpose, but since this requires one more pointer dereference on
- * each interrupt, we rather bother update "mask" whenever *maskptr
- * changes. The function "update_masks" should be called **after**
- * all manipulation of the linked list of interrupt handlers hung
- * off of intrdec_head[irq] is complete, since the chain of handlers
- * will both determine the *maskptr values and the instances of mask
- * that are fixed. This function should be called with the irq for
- * which a new handler has been add blocked, since the masks may not
- * yet know about the use of this irq for a device of a certain class.
- */
+ ithd *ithd = ithds[irq]; /* descriptor for the IRQ */
+ intrec *head; /* chain of handlers for IRQ */
+ intrec *idesc; /* descriptor for this handler */
+ struct proc *p; /* interrupt thread */
+ int errcode = 0;
-static void
-update_mux_masks(void)
-{
- int irq;
- for (irq = 0; irq < ICU_LEN; irq++) {
- intrec *idesc = intreclist_head[irq];
- while (idesc != NULL) {
- if (idesc->maskptr != NULL) {
- /* our copy of *maskptr may be stale, refresh */
- idesc->mask = *idesc->maskptr;
- }
- idesc = idesc->next;
+ if (name == NULL) /* no name? */
+ panic ("anonymous interrupt");
+ if (ithd == NULL || ithd->it_ih == NULL) {
+ /* first handler for this irq. */
+ if (ithd == NULL) {
+ ithd = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK);
+ if (ithd == NULL)
+ return (NULL);
+ bzero(ithd, sizeof(struct ithd));
+ ithd->irq = irq;
+ ithds[irq] = ithd;
}
- }
-}
-
-static void
-update_masks(intrmask_t *maskptr, int irq)
-{
- intrmask_t mask = 1 << irq;
-
- if (maskptr == NULL)
- return;
-
- if (find_idesc(maskptr, irq) == NULL) {
- /* no reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) == 0)
- return;
- /* the irq was included in the classes mask, remove it */
- *maskptr &= ~mask;
- } else {
- /* a reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) != 0)
- return;
- /* put the irq into the classes mask */
- *maskptr |= mask;
- }
- /* we need to update all values in the intr_mask[irq] array */
- update_intr_masks();
- /* update mask in chains of the interrupt multiplex handler as well */
- update_mux_masks();
-}
-
-/*
- * Add interrupt handler to linked list hung off of intreclist_head[irq]
- * and install shared interrupt multiplex handler, if necessary
- */
-
-static int
-add_intrdesc(intrec *idesc)
-{
- int irq = idesc->intr;
-
- intrec *head = intreclist_head[irq];
-
- if (head == NULL) {
- /* first handler for this irq, just install it */
- if (icu_setup(irq, idesc->handler, idesc->argument,
- idesc->maskptr, idesc->flags) != 0)
- return (-1);
-
- update_intrname(irq, idesc->name);
- /* keep reference */
- intreclist_head[irq] = idesc;
- } else {
- if ((idesc->flags & INTR_EXCL) != 0
- || (head->flags & INTR_EXCL) != 0) {
+ /*
+ * If we have a fast interrupt, we need to set the
+ * handler address directly. Do that below. For a
+ * slow interrupt, we don't need to know more details,
+ * so do it here because it's tidier.
+ */
+ if ((flags & INTR_FAST) == 0) {
/*
- * can't append new handler, if either list head or
- * new handler do not allow interrupts to be shared
+ * Only create a kernel thread if we don't already
+ * have one.
*/
- if (bootverbose)
- printf("\tdevice combination doesn't support "
- "shared irq%d\n", irq);
- return (-1);
- }
- if (head->next == NULL) {
+ if (ithd->it_proc == NULL) {
+ errcode = kthread_create(ithd_loop, NULL, &p,
+ RFSTOPPED | RFHIGHPID, "irq%d: %s", irq,
+ name);
+ if (errcode)
+ panic("inthand_add: Can't create "
+ "interrupt thread");
+ p->p_rtprio.type = RTP_PRIO_ITHREAD;
+ p->p_stat = SWAIT; /* we're idle */
+
+ /* Put in linkages. */
+ ithd->it_proc = p;
+ p->p_ithd = ithd;
+ } else
+ snprintf(ithd->it_proc->p_comm, MAXCOMLEN,
+ "irq%d: %s", irq, name);
+ p->p_rtprio.prio = pri;
+
/*
- * second handler for this irq, replace device driver's
- * handler by shared interrupt multiplexer function
+ * The interrupt process must be in place, but
+ * not necessarily schedulable, before we
+ * initialize the ICU, since it may cause an
+ * immediate interrupt.
*/
- icu_unset(irq, head->handler);
- if (icu_setup(irq, intr_mux, head, 0, 0) != 0)
- return (-1);
- if (bootverbose)
- printf("\tusing shared irq%d.\n", irq);
- update_intrname(irq, "mux");
+ if (icu_setup(irq, &sched_ithd, arg, flags) != 0)
+ panic("inthand_add: Can't initialize ICU");
}
- /* just append to the end of the chain */
- while (head->next != NULL)
- head = head->next;
- head->next = idesc;
- }
- update_masks(idesc->maskptr, irq);
- return (0);
-}
-
-/*
- * Create and activate an interrupt handler descriptor data structure.
- *
- * The dev_instance pointer is required for resource management, and will
- * only be passed through to resource_claim().
- *
- * There will be functions that derive a driver and unit name from a
- * dev_instance variable, and those functions will be used to maintain the
- * interrupt counter label array referenced by systat and vmstat to report
- * device interrupt rates (->update_intrlabels).
- *
- * Add the interrupt handler descriptor data structure created by an
- * earlier call of create_intr() to the linked list for its irq and
- * adjust the interrupt masks if necessary.
- *
- * WARNING: This is an internal function and not to be used by device
- * drivers. It is subject to change without notice.
- */
-
-intrec *
-inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
- intrmask_t *maskptr, int flags)
-{
- intrec *idesc;
- int errcode = -1;
- intrmask_t oldspl;
-
- if (ICU_LEN > 8 * sizeof *maskptr) {
- printf("create_intr: ICU_LEN of %d too high for %d bit intrmask\n",
- ICU_LEN, 8 * sizeof *maskptr);
+ } else if ((flags & INTR_EXCL) != 0
+ || (ithd->it_ih->flags & INTR_EXCL) != 0) {
+ /*
+ * We can't append the new handler if either
+ * list ithd or new handler do not allow
+ * interrupts to be shared.
+ */
+ if (bootverbose)
+ printf("\tdevice combination %s and %s "
+ "doesn't support shared irq%d\n",
+ ithd->it_ih->name, name, irq);
+ return(NULL);
+ } else if (flags & INTR_FAST) {
+ /* We can only have one fast interrupt by itself. */
+ if (bootverbose)
+ printf("\tCan't add fast interrupt %s"
+ " to normal interrupt %s on irq%d",
+ name, ithd->it_ih->name, irq);
return (NULL);
+ } else { /* update p_comm */
+ p = ithd->it_proc;
+ if (strlen(p->p_comm) + strlen(name) < MAXCOMLEN) {
+ strcat(p->p_comm, " ");
+ strcat(p->p_comm, name);
+ } else if (strlen(p->p_comm) == MAXCOMLEN)
+ p->p_comm[MAXCOMLEN - 1] = '+';
+ else
+ strcat(p->p_comm, "+");
}
- if ((unsigned)irq >= ICU_LEN) {
- printf("create_intr: requested irq%d too high, limit is %d\n",
- irq, ICU_LEN -1);
+ idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK);
+ if (idesc == NULL)
return (NULL);
- }
+ bzero(idesc, sizeof (struct intrec));
- idesc = malloc(sizeof *idesc, M_DEVBUF, M_WAITOK);
- if (idesc == NULL)
- return NULL;
- bzero(idesc, sizeof *idesc);
+ idesc->handler = handler;
+ idesc->argument = arg;
+ idesc->flags = flags;
+ idesc->ithd = ithd;
- if (name == NULL)
- name = "???";
idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK);
if (idesc->name == NULL) {
free(idesc, M_DEVBUF);
- return NULL;
+ return (NULL);
}
strcpy(idesc->name, name);
- idesc->handler = handler;
- idesc->argument = arg;
- idesc->maskptr = maskptr;
- idesc->intr = irq;
- idesc->flags = flags;
-
- /* block this irq */
- oldspl = splq(1 << irq);
-
- /* add irq to class selected by maskptr */
- errcode = add_intrdesc(idesc);
- splx(oldspl);
-
- if (errcode != 0) {
+ /* Slow interrupts got set up above. */
+ if ((flags & INTR_FAST)
+ && (icu_setup(irq, idesc->handler, idesc->argument,
+ idesc->flags) != 0) ) {
if (bootverbose)
- printf("\tintr_connect(irq%d) failed, result=%d\n",
+ printf("\tinthand_add(irq%d) failed, result=%d\n",
irq, errcode);
free(idesc->name, M_DEVBUF);
free(idesc, M_DEVBUF);
- idesc = NULL;
+ return NULL;
}
-
+ head = ithd->it_ih; /* look at chain of handlers */
+ if (head) {
+ while (head->next != NULL)
+ head = head->next; /* find the end */
+ head->next = idesc; /* hook it in there */
+ } else
+ ithd->it_ih = idesc; /* put it up front */
+ update_intrname(irq, idesc->name);
return (idesc);
}
/*
- * Deactivate and remove the interrupt handler descriptor data connected
- * created by an earlier call of intr_connect() from the linked list and
- * adjust theinterrupt masks if necessary.
+ * Deactivate and remove linked list the interrupt handler descriptor
+ * data connected created by an earlier call of inthand_add(), then
+ * adjust the interrupt masks if necessary.
*
- * Return the memory held by the interrupt handler descriptor data structure
- * to the system. Make sure, the handler is not actively used anymore, before.
+ * Return the memory held by the interrupt handler descriptor data
+ * structure to the system. First ensure the handler is not actively
+ * in use.
*/
int
inthand_remove(intrec *idesc)
{
- intrec **hook, *head;
- int irq;
- int errcode = 0;
- intrmask_t oldspl;
+ ithd *ithd; /* descriptor for the IRQ */
+ intrec *ih; /* chain of handlers */
if (idesc == NULL)
return (-1);
+ ithd = idesc->ithd;
+ ih = ithd->it_ih;
- irq = idesc->intr;
-
- /* find pointer that keeps the reference to this interrupt descriptor */
- hook = find_pred(idesc, irq);
- if (hook == NULL)
+ if (ih == idesc) /* first in the chain */
+ ithd->it_ih = idesc->next; /* unhook it */
+ else {
+ while ((ih != NULL)
+ && (ih->next != idesc) )
+ ih = ih->next;
+ if (ih->next != idesc)
return (-1);
-
- /* make copy of original list head, the line after may overwrite it */
- head = intreclist_head[irq];
-
- /* unlink: make predecessor point to idesc->next instead of to idesc */
- *hook = idesc->next;
-
- /* now check whether the element we removed was the list head */
- if (idesc == head) {
-
- oldspl = splq(1 << irq);
-
- /* check whether the new list head is the only element on list */
- head = intreclist_head[irq];
- if (head != NULL) {
- icu_unset(irq, intr_mux);
- if (head->next != NULL) {
- /* install the multiplex handler with new list head as argument */
- errcode = icu_setup(irq, intr_mux, head, 0, 0);
- if (errcode == 0)
- update_intrname(irq, NULL);
- } else {
- /* install the one remaining handler for this irq */
- errcode = icu_setup(irq, head->handler,
- head->argument,
- head->maskptr, head->flags);
- if (errcode == 0)
- update_intrname(irq, head->name);
+ ih->next = ih->next->next;
}
- } else {
- /* revert to old handler, eg: strayintr */
- icu_unset(irq, idesc->handler);
- }
- splx(oldspl);
- }
- update_masks(idesc->maskptr, irq);
+
+ if (ithd->it_ih == NULL) /* no handlers left, */
+ icu_unset(ithd->irq, idesc->handler);
free(idesc, M_DEVBUF);
return (0);
}
diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c
index 637853e25264..8610e35f1f11 100644
--- a/sys/i386/isa/npx.c
+++ b/sys/i386/isa/npx.c
@@ -245,6 +245,12 @@ npx_probe(dev)
setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
npx_idt_probeintr = idt[npx_intrno];
+
+ /*
+ * XXX This looks highly bogus, but it appears that npc_probe1
+ * needs interrupts enabled. Does this make any difference
+ * here?
+ */
enable_intr();
result = npx_probe1(dev);
disable_intr();
@@ -797,7 +803,7 @@ npxdna()
/*
* Record new context early in case frstor causes an IRQ13.
*/
- npxproc = curproc;
+ PCPU_SET(npxproc, CURPROC);
curpcb->pcb_savefpu.sv_ex_sw = 0;
/*
* The following frstor may cause an IRQ13 when the state being
@@ -834,16 +840,18 @@ npxsave(addr)
fnsave(addr);
/* fnop(); */
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
#else /* SMP */
+ int intrstate;
u_char icu1_mask;
u_char icu2_mask;
u_char old_icu1_mask;
u_char old_icu2_mask;
struct gate_descriptor save_idt_npxintr;
+ intrstate = save_intr();
disable_intr();
old_icu1_mask = inb(IO_ICU1 + 1);
old_icu2_mask = inb(IO_ICU2 + 1);
@@ -851,12 +859,12 @@ npxsave(addr)
outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask));
outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8));
idt[npx_intrno] = npx_idt_probeintr;
- enable_intr();
+ write_eflags(intrstate);
stop_emulating();
fnsave(addr);
fnop();
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
disable_intr();
icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */
icu2_mask = inb(IO_ICU2 + 1);
@@ -866,7 +874,7 @@ npxsave(addr)
(icu2_mask & ~(npx0_imask >> 8))
| (old_icu2_mask & (npx0_imask >> 8)));
idt[npx_intrno] = save_idt_npxintr;
- enable_intr(); /* back to usual state */
+ restore_intr(intrstate); /* back to previous state */
#endif /* SMP */
}
diff --git a/sys/i386/isa/vector.s b/sys/i386/isa/vector.s
index 5447a90126a0..79f2320e6b8e 100644
--- a/sys/i386/isa/vector.s
+++ b/sys/i386/isa/vector.s
@@ -16,9 +16,10 @@
#include <i386/isa/isa.h>
#endif
+#define FAST_INTR_HANDLER_USES_ES 1
#ifdef FAST_INTR_HANDLER_USES_ES
#define ACTUALLY_PUSHED 1
-#define MAYBE_MOVW_AX_ES movl %ax,%es
+#define MAYBE_MOVW_AX_ES movw %ax,%es
#define MAYBE_POPL_ES popl %es
#define MAYBE_PUSHL_ES pushl %es
#else
@@ -36,11 +37,6 @@
.data
ALIGN_DATA
- .globl _intr_nesting_level
-_intr_nesting_level:
- .byte 0
- .space 3
-
/*
* Interrupt counters and names for export to vmstat(8) and friends.
*
@@ -58,7 +54,6 @@ _eintrcnt:
_intrnames:
.space NR_INTRNAMES * 16
_eintrnames:
-
.text
/*