diff options
91 files changed, 1149 insertions, 253 deletions
diff --git a/sys/amd64/amd64/busdma_machdep.c b/sys/amd64/amd64/busdma_machdep.c index 63906ddd4d52..3dc9e76db7a5 100644 --- a/sys/amd64/amd64/busdma_machdep.c +++ b/sys/amd64/amd64/busdma_machdep.c @@ -31,6 +31,8 @@ #include <sys/malloc.h> #include <sys/bus.h> #include <sys/interrupt.h> +#include <sys/lock.h> +#include <sys/mutex.h> #include <vm/vm.h> #include <vm/vm_page.h> diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index bb552a33a4f6..e02569c157ec 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -264,6 +264,7 @@ cpu_startup(dummy) /* * Good {morning,afternoon,evening,night}. */ + mtx_lock(&vm_mtx); earlysetcpuclass(); startrtclock(); printcpuinfo(); @@ -397,6 +398,7 @@ again: exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*(ARG_MAX+(PAGE_SIZE*3)))); + mtx_unlock(&vm_mtx); /* * XXX: Mbuf system machine-specific initializations should * go here, if anywhere. @@ -2075,9 +2077,11 @@ f00f_hack(void *unused) { r_idt.rd_base = (int)new_idt; lidt(&r_idt); idt = new_idt; + mtx_lock(&vm_mtx); if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); + mtx_unlock(&vm_mtx); return; } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c index a5a913506527..8671530e367e 100644 --- a/sys/amd64/amd64/mem.c +++ b/sys/amd64/amd64/mem.c @@ -50,6 +50,8 @@ #include <sys/fcntl.h> #include <sys/ioccom.h> #include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> #include <sys/malloc.h> #include <sys/memrange.h> #include <sys/proc.h> @@ -153,13 +155,17 @@ mmrw(dev_t dev, struct uio *uio, int flags) case 0: v = uio->uio_offset; v &= ~PAGE_MASK; + mtx_lock(&vm_mtx); pmap_kenter((vm_offset_t)ptvmmap, v); + mtx_unlock(&vm_mtx); o = (int)uio->uio_offset & PAGE_MASK; c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK)); c = min(c, (u_int)(PAGE_SIZE - o)); c = min(c, (u_int)iov->iov_len); error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio); + mtx_lock(&vm_mtx); pmap_kremove((vm_offset_t)ptvmmap); + mtx_unlock(&vm_mtx); continue; /* minor device 1 is kernel memory */ @@ -177,14 +183,20 @@ mmrw(dev_t dev, struct uio *uio, int flags) return EFAULT; if (eaddr >= (vm_offset_t)VADDR(APTDPTDI, 0)) return EFAULT; + mtx_lock(&vm_mtx); for (; addr < eaddr; addr += PAGE_SIZE) - if (pmap_extract(kernel_pmap, addr) == 0) + if (pmap_extract(kernel_pmap, addr) == 0) { + mtx_unlock(&vm_mtx); return EFAULT; - + } + if (!kernacc((caddr_t)(int)uio->uio_offset, c, uio->uio_rw == UIO_READ ? - VM_PROT_READ : VM_PROT_WRITE)) + VM_PROT_READ : VM_PROT_WRITE)) { + mtx_unlock(&vm_mtx); return (EFAULT); + } + mtx_unlock(&vm_mtx); error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio); continue; } diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 93807eedea65..488a8a57534a 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -75,6 +75,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/mman.h> #include <sys/msgbuf.h> #include <sys/proc.h> diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index af7bfc1c577b..8924fa29d79a 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -330,9 +330,7 @@ restart: */ eva = rcr2(); enable_intr(); - mtx_lock(&Giant); i = trap_pfault(&frame, TRUE, eva); - mtx_unlock(&Giant); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) { /* @@ -443,9 +441,7 @@ restart: */ eva = rcr2(); enable_intr(); - mtx_lock(&Giant); (void) trap_pfault(&frame, FALSE, eva); - mtx_unlock(&Giant); goto out; case T_DNA: @@ -887,7 +883,9 @@ nogo: frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault; return (0); } + mtx_lock(&Giant); trap_fatal(frame, eva); + mtx_unlock(&Giant); return (-1); } @@ -1147,14 +1145,17 @@ syscall(frame) /* * Try to run the syscall without the MP lock if the syscall - * is MP safe. We have to obtain the MP lock no matter what if - * we are ktracing + * is MP safe. */ if ((callp->sy_narg & SYF_MPSAFE) == 0) { mtx_lock(&Giant); } #ifdef KTRACE + /* + * We have to obtain the MP lock no matter what if + * we are ktracing + */ if (KTRPOINT(p, KTR_SYSCALL)) { if (!mtx_owned(&Giant)) mtx_lock(&Giant); diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index fd626a32e804..eda238643a7a 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -290,11 +290,14 @@ void cpu_wait(p) struct proc *p; { + + mtx_lock(&vm_mtx); /* drop per-process resources */ pmap_dispose_proc(p); /* and clean-out the vmspace */ vmspace_free(p->p_vmspace); + mtx_unlock(&vm_mtx); } /* @@ -376,6 +379,7 @@ vmapbuf(bp) if ((bp->b_flags & B_PHYS) == 0) panic("vmapbuf"); + mtx_lock(&vm_mtx); for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data); addr < bp->b_data + bp->b_bufsize; addr += PAGE_SIZE, v += PAGE_SIZE) { @@ -391,6 +395,7 @@ vmapbuf(bp) vm_page_hold(PHYS_TO_VM_PAGE(pa)); pmap_kenter((vm_offset_t) v, pa); } + mtx_unlock(&vm_mtx); kva = bp->b_saveaddr; bp->b_saveaddr = bp->b_data; @@ -411,6 +416,7 @@ vunmapbuf(bp) if ((bp->b_flags & B_PHYS) == 0) panic("vunmapbuf"); + mtx_lock(&vm_mtx); for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data); addr < bp->b_data + bp->b_bufsize; addr += PAGE_SIZE) { @@ -418,6 +424,7 @@ vunmapbuf(bp) pmap_kremove((vm_offset_t) addr); vm_page_unhold(PHYS_TO_VM_PAGE(pa)); } + mtx_unlock(&vm_mtx); bp->b_data = bp->b_saveaddr; } @@ -574,12 +581,17 @@ vm_page_zero_idle() * pages because doing so may flush our L1 and L2 caches too much. */ - if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count)) + if (mtx_trylock(&vm_mtx) == 0) + return (0); + if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count)) { + mtx_unlock(&vm_mtx); return(0); - if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) + } + if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) { + mtx_unlock(&vm_mtx); return(0); + } - if (mtx_trylock(&Giant)) { s = splvm(); zero_state = 0; m = vm_page_list_find(PQ_FREE, free_rover, FALSE); @@ -602,10 +614,8 @@ vm_page_zero_idle() } free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK; splx(s); - mtx_unlock(&Giant); + mtx_unlock(&vm_mtx); return (1); - } - return (0); } /* diff --git a/sys/coda/coda_namecache.c b/sys/coda/coda_namecache.c index 3b73a679824e..9dfaf19750b6 100644 --- a/sys/coda/coda_namecache.c +++ b/sys/coda/coda_namecache.c @@ -81,6 +81,7 @@ #include <sys/errno.h> #include <sys/lock.h> #include <sys/malloc.h> +#include <sys/mutex.h> #include <sys/ucred.h> #include <vm/vm.h> diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c index 004ec23bfa5a..1a8e07832894 100644 --- a/sys/compat/linprocfs/linprocfs.c +++ b/sys/compat/linprocfs/linprocfs.c @@ -50,6 +50,7 @@ #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> +#include <sys/mutex.h> #include <sys/proc.h> #include <sys/resourcevar.h> #include <sys/sbuf.h> diff --git a/sys/compat/linprocfs/linprocfs_misc.c b/sys/compat/linprocfs/linprocfs_misc.c index 004ec23bfa5a..1a8e07832894 100644 --- a/sys/compat/linprocfs/linprocfs_misc.c +++ b/sys/compat/linprocfs/linprocfs_misc.c @@ -50,6 +50,7 @@ #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> +#include <sys/mutex.h> #include <sys/proc.h> #include <sys/resourcevar.h> #include <sys/sbuf.h> diff --git a/sys/compat/pecoff/imgact_pecoff.c b/sys/compat/pecoff/imgact_pecoff.c index 36b428872088..f5cbfa8ff019 100644 --- a/sys/compat/pecoff/imgact_pecoff.c +++ b/sys/compat/pecoff/imgact_pecoff.c @@ -49,6 +49,7 @@ #include <sys/imgact.h> #include <sys/lock.h> #include <sys/malloc.h> +#include <sys/mutex.h> #include <sys/mman.h> #include <sys/namei.h> #include <sys/proc.h> diff --git a/sys/dev/agp/agp.c b/sys/dev/agp/agp.c index 6419635a3e98..333c4c82797e 100644 --- a/sys/dev/agp/agp.c +++ b/sys/dev/agp/agp.c @@ -38,6 +38,7 @@ #include <sys/ioccom.h> #include <sys/agpio.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/proc.h> #include <pci/pcivar.h> diff --git a/sys/dev/agp/agp_ali.c b/sys/dev/agp/agp_ali.c index 86e070e8d2ae..aa805e107f91 100644 --- a/sys/dev/agp/agp_ali.c +++ b/sys/dev/agp/agp_ali.c @@ -35,6 +35,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <pci/pcivar.h> #include <pci/pcireg.h> diff --git a/sys/dev/agp/agp_amd.c b/sys/dev/agp/agp_amd.c index 0a498f72ae99..4aaf4e9bc16d 100644 --- a/sys/dev/agp/agp_amd.c +++ b/sys/dev/agp/agp_amd.c @@ -35,6 +35,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <pci/pcivar.h> #include <pci/pcireg.h> diff --git a/sys/dev/agp/agp_i810.c b/sys/dev/agp/agp_i810.c index 79fd56683214..5c4049383e1f 100644 --- a/sys/dev/agp/agp_i810.c +++ b/sys/dev/agp/agp_i810.c @@ -36,6 +36,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <pci/pcivar.h> #include <pci/pcireg.h> diff --git a/sys/dev/agp/agp_intel.c b/sys/dev/agp/agp_intel.c index a4b9a43b0045..dc1ef4da50b8 100644 --- a/sys/dev/agp/agp_intel.c +++ b/sys/dev/agp/agp_intel.c @@ -35,6 +35,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <pci/pcivar.h> #include <pci/pcireg.h> diff --git a/sys/dev/agp/agp_sis.c b/sys/dev/agp/agp_sis.c index 1f1a50b7eceb..a6a20a45a8fb 100644 --- a/sys/dev/agp/agp_sis.c +++ b/sys/dev/agp/agp_sis.c @@ -35,6 +35,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <pci/pcivar.h> #include <pci/pcireg.h> diff --git a/sys/dev/agp/agp_via.c b/sys/dev/agp/agp_via.c index 983348ea25d4..086b0276d135 100644 --- a/sys/dev/agp/agp_via.c +++ b/sys/dev/agp/agp_via.c @@ -35,6 +35,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <pci/pcivar.h> #include <pci/pcireg.h> diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index edf28909e18a..ae783ddb3e6b 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -71,6 +71,7 @@ #include <sys/linker.h> #include <sys/lock.h> #include <sys/malloc.h> +#include <sys/mutex.h> #include <sys/mdioctl.h> #include <sys/namei.h> #include <sys/proc.h> diff --git a/sys/fs/coda/coda_namecache.c b/sys/fs/coda/coda_namecache.c index 3b73a679824e..9dfaf19750b6 100644 --- a/sys/fs/coda/coda_namecache.c +++ b/sys/fs/coda/coda_namecache.c @@ -81,6 +81,7 @@ #include <sys/errno.h> #include <sys/lock.h> #include <sys/malloc.h> +#include <sys/mutex.h> #include <sys/ucred.h> #include <vm/vm.h> diff --git a/sys/fs/procfs/procfs_map.c b/sys/fs/procfs/procfs_map.c index 5e4a30c78c22..5c21993f50ca 100644 --- a/sys/fs/procfs/procfs_map.c +++ b/sys/fs/procfs/procfs_map.c @@ -42,6 +42,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/proc.h> #include <sys/vnode.h> diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c index 3a2f8d2e7932..1e28870675e5 100644 --- a/sys/fs/procfs/procfs_mem.c +++ b/sys/fs/procfs/procfs_mem.c @@ -48,6 +48,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/proc.h> #include <sys/ptrace.h> #include <sys/user.h> @@ -88,8 +89,14 @@ procfs_rwmem(curp, p, uio) * usage in that process can be messed up. */ vm = p->p_vmspace; - if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1)) + if ((p->p_flag & P_WEXIT)) return EFAULT; + + mtx_lock(&vm_mtx); + if (vm->vm_refcnt < 1) { + mtx_unlock(&vm_mtx); + return EFAULT; + } ++vm->vm_refcnt; /* * The map we want... @@ -207,7 +214,9 @@ procfs_rwmem(curp, p, uio) /* * Now do the i/o move. */ + mtx_unlock(&vm_mtx); error = uiomove((caddr_t)(kva + page_offset), len, uio); + mtx_lock(&vm_mtx); pmap_kremove(kva); @@ -226,6 +235,7 @@ procfs_rwmem(curp, p, uio) kmem_free(kernel_map, kva, PAGE_SIZE); vmspace_free(vm); + mtx_unlock(&vm_mtx); return (error); } diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c index 2940f404b779..ba812293e4ac 100644 --- a/sys/fs/specfs/spec_vnops.c +++ b/sys/fs/specfs/spec_vnops.c @@ -731,6 +731,8 @@ spec_getpages(ap) cnt.v_vnodein++; cnt.v_vnodepgsin += pcount; + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); /* Do the input. */ BUF_STRATEGY(bp); @@ -741,6 +743,8 @@ spec_getpages(ap) tsleep(bp, PVM, "spread", 0); splx(s); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); if ((bp->b_ioflags & BIO_ERROR) != 0) { if (bp->b_error) diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c index 869818f28067..3ac98bf508bb 100644 --- a/sys/fs/unionfs/union_subr.c +++ b/sys/fs/unionfs/union_subr.c @@ -45,6 +45,7 @@ #include <sys/filedesc.h> #include <sys/kernel.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/malloc.h> #include <sys/module.h> #include <sys/mount.h> diff --git a/sys/i386/i386/busdma_machdep.c b/sys/i386/i386/busdma_machdep.c index 63906ddd4d52..3dc9e76db7a5 100644 --- a/sys/i386/i386/busdma_machdep.c +++ b/sys/i386/i386/busdma_machdep.c @@ -31,6 +31,8 @@ #include <sys/malloc.h> #include <sys/bus.h> #include <sys/interrupt.h> +#include <sys/lock.h> +#include <sys/mutex.h> #include <vm/vm.h> #include <vm/vm_page.h> diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index bb552a33a4f6..e02569c157ec 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -264,6 +264,7 @@ cpu_startup(dummy) /* * Good {morning,afternoon,evening,night}. */ + mtx_lock(&vm_mtx); earlysetcpuclass(); startrtclock(); printcpuinfo(); @@ -397,6 +398,7 @@ again: exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, (16*(ARG_MAX+(PAGE_SIZE*3)))); + mtx_unlock(&vm_mtx); /* * XXX: Mbuf system machine-specific initializations should * go here, if anywhere. @@ -2075,9 +2077,11 @@ f00f_hack(void *unused) { r_idt.rd_base = (int)new_idt; lidt(&r_idt); idt = new_idt; + mtx_lock(&vm_mtx); if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); + mtx_unlock(&vm_mtx); return; } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ diff --git a/sys/i386/i386/mem.c b/sys/i386/i386/mem.c index a5a913506527..8671530e367e 100644 --- a/sys/i386/i386/mem.c +++ b/sys/i386/i386/mem.c @@ -50,6 +50,8 @@ #include <sys/fcntl.h> #include <sys/ioccom.h> #include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> #include <sys/malloc.h> #include <sys/memrange.h> #include <sys/proc.h> @@ -153,13 +155,17 @@ mmrw(dev_t dev, struct uio *uio, int flags) case 0: v = uio->uio_offset; v &= ~PAGE_MASK; + mtx_lock(&vm_mtx); pmap_kenter((vm_offset_t)ptvmmap, v); + mtx_unlock(&vm_mtx); o = (int)uio->uio_offset & PAGE_MASK; c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK)); c = min(c, (u_int)(PAGE_SIZE - o)); c = min(c, (u_int)iov->iov_len); error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio); + mtx_lock(&vm_mtx); pmap_kremove((vm_offset_t)ptvmmap); + mtx_unlock(&vm_mtx); continue; /* minor device 1 is kernel memory */ @@ -177,14 +183,20 @@ mmrw(dev_t dev, struct uio *uio, int flags) return EFAULT; if (eaddr >= (vm_offset_t)VADDR(APTDPTDI, 0)) return EFAULT; + mtx_lock(&vm_mtx); for (; addr < eaddr; addr += PAGE_SIZE) - if (pmap_extract(kernel_pmap, addr) == 0) + if (pmap_extract(kernel_pmap, addr) == 0) { + mtx_unlock(&vm_mtx); return EFAULT; - + } + if (!kernacc((caddr_t)(int)uio->uio_offset, c, uio->uio_rw == UIO_READ ? - VM_PROT_READ : VM_PROT_WRITE)) + VM_PROT_READ : VM_PROT_WRITE)) { + mtx_unlock(&vm_mtx); return (EFAULT); + } + mtx_unlock(&vm_mtx); error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio); continue; } diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 93807eedea65..488a8a57534a 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -75,6 +75,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/mman.h> #include <sys/msgbuf.h> #include <sys/proc.h> diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index af7bfc1c577b..8924fa29d79a 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -330,9 +330,7 @@ restart: */ eva = rcr2(); enable_intr(); - mtx_lock(&Giant); i = trap_pfault(&frame, TRUE, eva); - mtx_unlock(&Giant); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) { /* @@ -443,9 +441,7 @@ restart: */ eva = rcr2(); enable_intr(); - mtx_lock(&Giant); (void) trap_pfault(&frame, FALSE, eva); - mtx_unlock(&Giant); goto out; case T_DNA: @@ -887,7 +883,9 @@ nogo: frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault; return (0); } + mtx_lock(&Giant); trap_fatal(frame, eva); + mtx_unlock(&Giant); return (-1); } @@ -1147,14 +1145,17 @@ syscall(frame) /* * Try to run the syscall without the MP lock if the syscall - * is MP safe. We have to obtain the MP lock no matter what if - * we are ktracing + * is MP safe. */ if ((callp->sy_narg & SYF_MPSAFE) == 0) { mtx_lock(&Giant); } #ifdef KTRACE + /* + * We have to obtain the MP lock no matter what if + * we are ktracing + */ if (KTRPOINT(p, KTR_SYSCALL)) { if (!mtx_owned(&Giant)) mtx_lock(&Giant); diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index fd626a32e804..eda238643a7a 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -290,11 +290,14 @@ void cpu_wait(p) struct proc *p; { + + mtx_lock(&vm_mtx); /* drop per-process resources */ pmap_dispose_proc(p); /* and clean-out the vmspace */ vmspace_free(p->p_vmspace); + mtx_unlock(&vm_mtx); } /* @@ -376,6 +379,7 @@ vmapbuf(bp) if ((bp->b_flags & B_PHYS) == 0) panic("vmapbuf"); + mtx_lock(&vm_mtx); for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data); addr < bp->b_data + bp->b_bufsize; addr += PAGE_SIZE, v += PAGE_SIZE) { @@ -391,6 +395,7 @@ vmapbuf(bp) vm_page_hold(PHYS_TO_VM_PAGE(pa)); pmap_kenter((vm_offset_t) v, pa); } + mtx_unlock(&vm_mtx); kva = bp->b_saveaddr; bp->b_saveaddr = bp->b_data; @@ -411,6 +416,7 @@ vunmapbuf(bp) if ((bp->b_flags & B_PHYS) == 0) panic("vunmapbuf"); + mtx_lock(&vm_mtx); for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data); addr < bp->b_data + bp->b_bufsize; addr += PAGE_SIZE) { @@ -418,6 +424,7 @@ vunmapbuf(bp) pmap_kremove((vm_offset_t) addr); vm_page_unhold(PHYS_TO_VM_PAGE(pa)); } + mtx_unlock(&vm_mtx); bp->b_data = bp->b_saveaddr; } @@ -574,12 +581,17 @@ vm_page_zero_idle() * pages because doing so may flush our L1 and L2 caches too much. */ - if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count)) + if (mtx_trylock(&vm_mtx) == 0) + return (0); + if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count)) { + mtx_unlock(&vm_mtx); return(0); - if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) + } + if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) { + mtx_unlock(&vm_mtx); return(0); + } - if (mtx_trylock(&Giant)) { s = splvm(); zero_state = 0; m = vm_page_list_find(PQ_FREE, free_rover, FALSE); @@ -602,10 +614,8 @@ vm_page_zero_idle() } free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK; splx(s); - mtx_unlock(&Giant); + mtx_unlock(&vm_mtx); return (1); - } - return (0); } /* diff --git a/sys/i386/linux/linux_sysvec.c b/sys/i386/linux/linux_sysvec.c index 13c29f8e4e10..0734ba443536 100644 --- a/sys/i386/linux/linux_sysvec.c +++ b/sys/i386/linux/linux_sysvec.c @@ -41,6 +41,7 @@ #include <sys/imgact_aout.h> #include <sys/imgact_elf.h> #include <sys/malloc.h> +#include <sys/mutex.h> #include <sys/proc.h> #include <sys/signalvar.h> #include <sys/sysent.h> diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index 9478eb39b4e7..8becda31eb5c 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -171,6 +171,7 @@ exec_aout_imgact(imgp) if (error) return (error); + mtx_lock(&vm_mtx); /* * Destroy old process VM and create a new one (with a new stack) */ @@ -184,7 +185,9 @@ exec_aout_imgact(imgp) vp = imgp->vp; map = &vmspace->vm_map; vm_map_lock(map); + mtx_unlock(&vm_mtx); VOP_GETVOBJECT(vp, &object); + mtx_lock(&vm_mtx); vm_object_reference(object); text_end = virtual_offset + a_out->a_text; @@ -195,6 +198,7 @@ exec_aout_imgact(imgp) MAP_COPY_ON_WRITE | MAP_PREFAULT); if (error) { vm_map_unlock(map); + mtx_unlock(&vm_mtx); return (error); } data_end = text_end + a_out->a_data; @@ -207,6 +211,7 @@ exec_aout_imgact(imgp) MAP_COPY_ON_WRITE | MAP_PREFAULT); if (error) { vm_map_unlock(map); + mtx_unlock(&vm_mtx); return (error); } } @@ -217,6 +222,7 @@ exec_aout_imgact(imgp) VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { vm_map_unlock(map); + mtx_unlock(&vm_mtx); return (error); } } @@ -229,6 +235,8 @@ exec_aout_imgact(imgp) vmspace->vm_daddr = (caddr_t) (uintptr_t) (virtual_offset + a_out->a_text); + mtx_unlock(&vm_mtx); + /* Fill in image_params */ imgp->interpreted = 0; imgp->entry_addr = a_out->a_entry; diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index da7b9cb522ef..2a15e9c98e9a 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -230,6 +230,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o else map_len = round_page(offset+filsz) - file_addr; + mtx_lock(&vm_mtx); if (map_len != 0) { vm_object_reference(object); vm_map_lock(&vmspace->vm_map); @@ -244,12 +245,15 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o vm_map_unlock(&vmspace->vm_map); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); + mtx_unlock(&vm_mtx); return EINVAL; } /* we can stop now if we've covered it all */ - if (memsz == filsz) + if (memsz == filsz) { + mtx_unlock(&vm_mtx); return 0; + } } @@ -270,8 +274,10 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o map_addr, map_addr + map_len, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(&vmspace->vm_map); - if (rv != KERN_SUCCESS) + if (rv != KERN_SUCCESS) { + mtx_unlock(&vm_mtx); return EINVAL; + } } if (copy_len != 0) { @@ -287,14 +293,19 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); + mtx_unlock(&vm_mtx); return EINVAL; } /* send the page fragment to user space */ + mtx_unlock(&vm_mtx); error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len); + mtx_lock(&vm_mtx); vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE); - if (error) + if (error) { + mtx_unlock(&vm_mtx); return (error); + } } /* @@ -303,6 +314,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len, prot, FALSE); + mtx_unlock(&vm_mtx); return error; } @@ -498,9 +510,11 @@ exec_elf_imgact(struct image_params *imgp) if ((error = exec_extract_strings(imgp)) != 0) goto fail; + mtx_lock(&vm_mtx); exec_new_vmspace(imgp); vmspace = imgp->proc->p_vmspace; + mtx_unlock(&vm_mtx); for (i = 0; i < hdr->e_phnum; i++) { switch(phdr[i].p_type) { @@ -557,6 +571,7 @@ exec_elf_imgact(struct image_params *imgp) } } + /* XXX: lock the vm_mtx when twiddling vmspace? */ vmspace->vm_tsize = text_size >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; vmspace->vm_dsize = data_size >> PAGE_SHIFT; diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index f1a6a0bfd745..6f5c653c2ab3 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -485,11 +485,15 @@ start_init(void *dummy) * Need just enough stack to hold the faked-up "execve()" arguments. */ addr = trunc_page(USRSTACK - PAGE_SIZE); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0) panic("init: couldn't allocate argument space"); p->p_vmspace->vm_maxsaddr = (caddr_t)addr; p->p_vmspace->vm_ssize = 1; + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); if ((var = getenv("init_path")) != NULL) { strncpy(init_path, var, sizeof init_path); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 0b1b29e2517c..8f49538f038b 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -412,6 +412,7 @@ exec_map_first_page(imgp) VOP_GETVOBJECT(imgp->vp, &object); s = splvm(); + mtx_lock(&vm_mtx); ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); @@ -443,6 +444,7 @@ exec_map_first_page(imgp) vm_page_free(ma[0]); } splx(s); + mtx_unlock(&vm_mtx); return EIO; } } @@ -454,6 +456,7 @@ exec_map_first_page(imgp) pmap_kenter((vm_offset_t) imgp->image_header, VM_PAGE_TO_PHYS(ma[0])); imgp->firstpage = ma[0]; + mtx_unlock(&vm_mtx); return 0; } @@ -461,9 +464,12 @@ void exec_unmap_first_page(imgp) struct image_params *imgp; { + if (imgp->firstpage) { + mtx_lock(&vm_mtx); pmap_kremove((vm_offset_t) imgp->image_header); vm_page_unwire(imgp->firstpage, 1); + mtx_unlock(&vm_mtx); imgp->firstpage = NULL; } } @@ -482,6 +488,7 @@ exec_new_vmspace(imgp) caddr_t stack_addr = (caddr_t) (USRSTACK - MAXSSIZ); vm_map_t map = &vmspace->vm_map; + mtx_assert(&vm_mtx, MA_OWNED); imgp->vmspace_destroyed = 1; /* diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index d5dccab4e914..1af27d253d48 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -222,6 +222,7 @@ exit1(p, rv) * Can't free the entire vmspace as the kernel stack * may be mapped within that space also. */ + mtx_lock(&vm_mtx); if (vm->vm_refcnt == 1) { if (vm->vm_shm) shmexit(p); @@ -230,6 +231,7 @@ exit1(p, rv) (void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); } + mtx_unlock(&vm_mtx); PROC_LOCK(p); if (SESS_LEADER(p)) { diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index d3b991df85ad..62dcc061a5cd 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -220,6 +220,7 @@ fork1(p1, flags, procp) if ((flags & RFPROC) == 0) { vm_fork(p1, 0, flags); + mtx_assert(&vm_mtx, MA_NOTOWNED); /* * Close all file descriptors. @@ -567,6 +568,7 @@ again: * execution path later. (ie: directly into user mode) */ vm_fork(p1, p2, flags); + mtx_assert(&vm_mtx, MA_NOTOWNED); if (flags == (RFFDG | RFPROC)) { cnt.v_forks++; diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 27431ab4de09..f46313c0f4ad 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -498,8 +498,10 @@ dosetrlimit(p, which, limp) } addr = trunc_page(addr); size = round_page(size); + mtx_lock(&vm_mtx); (void) vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot, FALSE); + mtx_unlock(&vm_mtx); } break; diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 7d793de4ee2b..e09a3772c8b5 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -378,6 +378,13 @@ msleep(ident, mtx, priority, wmesg, timo) int rval = 0; WITNESS_SAVE_DECL(mtx); + KASSERT(ident == &proc0 || /* XXX: swapper */ + timo != 0 || /* XXX: we might still miss a wakeup */ + mtx_owned(&Giant) || mtx != NULL, + ("indefinite sleep without mutex, wmesg: \"%s\" ident: %p", + wmesg, ident)); + if (mtx_owned(&vm_mtx) && mtx != &vm_mtx) + panic("sleeping with vm_mtx held."); #ifdef KTRACE if (p && KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 1, 0); diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c index 344f163ec343..613d1e4e79ab 100644 --- a/sys/kern/link_elf.c +++ b/sys/kern/link_elf.c @@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu ef = (elf_file_t) lf; #ifdef SPARSE_MAPPING + mtx_lock(&vm_mtx); ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT); if (ef->object == NULL) { + mtx_unlock(&vm_mtx); free(ef, M_LINKER); error = ENOMEM; goto out; @@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { vm_object_deallocate(ef->object); + mtx_unlock(&vm_mtx); ef->object = 0; goto out; } + mtx_unlock(&vm_mtx); #else ef->address = malloc(mapsize, M_LINKER, M_WAITOK); if (!ef->address) { @@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu /* * Wire down the pages */ + mtx_lock(&vm_mtx); vm_map_pageable(kernel_map, (vm_offset_t) segbase, (vm_offset_t) segbase + segs[i]->p_memsz, FALSE); + mtx_unlock(&vm_mtx); #endif } @@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file) } #ifdef SPARSE_MAPPING if (ef->object) { + mtx_lock(&vm_mtx); vm_map_remove(kernel_map, (vm_offset_t) ef->address, (vm_offset_t) ef->address + (ef->object->size << PAGE_SHIFT)); vm_object_deallocate(ef->object); + mtx_unlock(&vm_mtx); } #else if (ef->address) diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c index 344f163ec343..613d1e4e79ab 100644 --- a/sys/kern/link_elf_obj.c +++ b/sys/kern/link_elf_obj.c @@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu ef = (elf_file_t) lf; #ifdef SPARSE_MAPPING + mtx_lock(&vm_mtx); ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT); if (ef->object == NULL) { + mtx_unlock(&vm_mtx); free(ef, M_LINKER); error = ENOMEM; goto out; @@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { vm_object_deallocate(ef->object); + mtx_unlock(&vm_mtx); ef->object = 0; goto out; } + mtx_unlock(&vm_mtx); #else ef->address = malloc(mapsize, M_LINKER, M_WAITOK); if (!ef->address) { @@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu /* * Wire down the pages */ + mtx_lock(&vm_mtx); vm_map_pageable(kernel_map, (vm_offset_t) segbase, (vm_offset_t) segbase + segs[i]->p_memsz, FALSE); + mtx_unlock(&vm_mtx); #endif } @@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file) } #ifdef SPARSE_MAPPING if (ef->object) { + mtx_lock(&vm_mtx); vm_map_remove(kernel_map, (vm_offset_t) ef->address, (vm_offset_t) ef->address + (ef->object->size << PAGE_SHIFT)); vm_object_deallocate(ef->object); + mtx_unlock(&vm_mtx); } #else if (ef->address) diff --git a/sys/kern/subr_blist.c b/sys/kern/subr_blist.c index 9ac4338c4a1c..061d151d6eaf 100644 --- a/sys/kern/subr_blist.c +++ b/sys/kern/subr_blist.c @@ -71,6 +71,7 @@ #include <sys/kernel.h> #include <sys/blist.h> #include <sys/malloc.h> +#include <sys/mutex.h> #include <vm/vm.h> #include <vm/vm_object.h> #include <vm/vm_kern.h> diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index af7bfc1c577b..8924fa29d79a 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -330,9 +330,7 @@ restart: */ eva = rcr2(); enable_intr(); - mtx_lock(&Giant); i = trap_pfault(&frame, TRUE, eva); - mtx_unlock(&Giant); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) { /* @@ -443,9 +441,7 @@ restart: */ eva = rcr2(); enable_intr(); - mtx_lock(&Giant); (void) trap_pfault(&frame, FALSE, eva); - mtx_unlock(&Giant); goto out; case T_DNA: @@ -887,7 +883,9 @@ nogo: frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault; return (0); } + mtx_lock(&Giant); trap_fatal(frame, eva); + mtx_unlock(&Giant); return (-1); } @@ -1147,14 +1145,17 @@ syscall(frame) /* * Try to run the syscall without the MP lock if the syscall - * is MP safe. We have to obtain the MP lock no matter what if - * we are ktracing + * is MP safe. */ if ((callp->sy_narg & SYF_MPSAFE) == 0) { mtx_lock(&Giant); } #ifdef KTRACE + /* + * We have to obtain the MP lock no matter what if + * we are ktracing + */ if (KTRPOINT(p, KTR_SYSCALL)) { if (!mtx_owned(&Giant)) mtx_lock(&Giant); diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 0c3240226e26..a78844898909 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -56,6 +56,7 @@ #include <sys/filedesc.h> #include <sys/filio.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/ttycom.h> #include <sys/stat.h> #include <sys/poll.h> @@ -253,6 +254,7 @@ pipespace(cpipe, size) * kernel_object. * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. */ + mtx_lock(&vm_mtx); object = vm_object_allocate(OBJT_DEFAULT, npages); buffer = (caddr_t) vm_map_min(kernel_map); @@ -264,6 +266,7 @@ pipespace(cpipe, size) error = vm_map_find(kernel_map, object, 0, (vm_offset_t *) &buffer, size, 1, VM_PROT_ALL, VM_PROT_ALL, 0); + mtx_unlock(&vm_mtx); if (error != KERN_SUCCESS) { vm_object_deallocate(object); @@ -551,6 +554,7 @@ pipe_build_write_buffer(wpipe, uio) size = wpipe->pipe_buffer.size; endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); + mtx_lock(&vm_mtx); addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { vm_page_t m; @@ -561,6 +565,7 @@ pipe_build_write_buffer(wpipe, uio) for (j = 0; j < i; j++) vm_page_unwire(wpipe->pipe_map.ms[j], 1); + mtx_unlock(&vm_mtx); return (EFAULT); } @@ -592,6 +597,7 @@ pipe_build_write_buffer(wpipe, uio) pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, wpipe->pipe_map.npages); + mtx_unlock(&vm_mtx); /* * and update the uio data */ @@ -625,8 +631,10 @@ pipe_destroy_write_buffer(wpipe) amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; } } + mtx_lock(&vm_mtx); for (i = 0; i < wpipe->pipe_map.npages; i++) vm_page_unwire(wpipe->pipe_map.ms[i], 1); + mtx_unlock(&vm_mtx); } /* @@ -1199,12 +1207,13 @@ pipeclose(cpipe) wakeup(ppipe); ppipe->pipe_peer = NULL; } - /* * free resources */ + mtx_lock(&vm_mtx); pipe_free_kmem(cpipe); zfree(pipe_zone, cpipe); + mtx_unlock(&vm_mtx); } } diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 32255bcb1ede..269814c9089d 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -61,7 +61,7 @@ 14 STD POSIX { int mknod(char *path, int mode, int dev); } 15 STD POSIX { int chmod(char *path, int mode); } 16 STD POSIX { int chown(char *path, int uid, int gid); } -17 STD BSD { int obreak(char *nsize); } break obreak_args int +17 MPSAFE STD BSD { int obreak(char *nsize); } break obreak_args int 18 STD BSD { int getfsstat(struct statfs *buf, long bufsize, \ int flags); } 19 COMPAT POSIX { long lseek(int fd, long offset, int whence); } @@ -121,23 +121,23 @@ 62 COMPAT POSIX { int fstat(int fd, struct ostat *sb); } 63 COMPAT BSD { int getkerninfo(int op, char *where, size_t *size, \ int arg); } getkerninfo getkerninfo_args int -64 COMPAT BSD { int getpagesize(void); } \ +64 MPSAFE COMPAT BSD { int getpagesize(void); } \ getpagesize getpagesize_args int 65 STD BSD { int msync(void *addr, size_t len, int flags); } 66 STD BSD { int vfork(void); } 67 OBSOL NOHIDE vread 68 OBSOL NOHIDE vwrite -69 STD BSD { int sbrk(int incr); } -70 STD BSD { int sstk(int incr); } -71 COMPAT BSD { int mmap(void *addr, int len, int prot, \ +69 MPSAFE STD BSD { int sbrk(int incr); } +70 MPSAFE STD BSD { int sstk(int incr); } +71 MPSAFE COMPAT BSD { int mmap(void *addr, int len, int prot, \ int flags, int fd, long pos); } -72 STD BSD { int ovadvise(int anom); } vadvise ovadvise_args int -73 STD BSD { int munmap(void *addr, size_t len); } -74 STD BSD { int mprotect(const void *addr, size_t len, int prot); } -75 STD BSD { int madvise(void *addr, size_t len, int behav); } +72 MPSAFE STD BSD { int ovadvise(int anom); } vadvise ovadvise_args int +73 MPSAFE STD BSD { int munmap(void *addr, size_t len); } +74 MPSAFE STD BSD { int mprotect(const void *addr, size_t len, int prot); } +75 MPSAFE STD BSD { int madvise(void *addr, size_t len, int behav); } 76 OBSOL NOHIDE vhangup 77 OBSOL NOHIDE vlimit -78 STD BSD { int mincore(const void *addr, size_t len, \ +78 MPSAFE STD BSD { int mincore(const void *addr, size_t len, \ char *vec); } 79 STD POSIX { int getgroups(u_int gidsetsize, gid_t *gidset); } 80 STD POSIX { int setgroups(u_int gidsetsize, gid_t *gidset); } @@ -306,7 +306,7 @@ setrlimit __setrlimit_args int 196 STD BSD { int getdirentries(int fd, char *buf, u_int count, \ long *basep); } -197 STD BSD { caddr_t mmap(caddr_t addr, size_t len, int prot, \ +197 MPSAFE STD BSD { caddr_t mmap(caddr_t addr, size_t len, int prot, \ int flags, int fd, int pad, off_t pos); } 198 STD NOHIDE { int nosys(void); } __syscall __syscall_args int 199 STD POSIX { off_t lseek(int fd, int pad, off_t offset, \ @@ -318,8 +318,8 @@ __sysctl sysctl_args int ; properly, __sysctl should be a NOHIDE, but making an exception ; here allows to avoid one in libc/sys/Makefile.inc. -203 STD BSD { int mlock(const void *addr, size_t len); } -204 STD BSD { int munlock(const void *addr, size_t len); } +203 MPSAFE STD BSD { int mlock(const void *addr, size_t len); } +204 MPSAFE STD BSD { int munlock(const void *addr, size_t len); } 205 STD BSD { int undelete(char *path); } 206 STD BSD { int futimes(int fd, struct timeval *tptr); } 207 STD BSD { int getpgid(pid_t pid); } @@ -386,7 +386,7 @@ 248 UNIMPL NOHIDE nosys 249 UNIMPL NOHIDE nosys ; syscall numbers initially used in OpenBSD -250 STD BSD { int minherit(void *addr, size_t len, int inherit); } +250 MPSAFE STD BSD { int minherit(void *addr, size_t len, int inherit); } 251 STD BSD { int rfork(int flags); } 252 STD BSD { int openbsd_poll(struct pollfd *fds, u_int nfds, \ int timeout); } @@ -414,7 +414,7 @@ 274 STD BSD { int lchmod(char *path, mode_t mode); } 275 NOPROTO BSD { int lchown(char *path, uid_t uid, gid_t gid); } netbsd_lchown lchown_args int 276 STD BSD { int lutimes(char *path, struct timeval *tptr); } -277 NOPROTO BSD { int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int +277 MPSAFE NOPROTO BSD { int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int 278 STD BSD { int nstat(char *path, struct nstat *ub); } 279 STD BSD { int nfstat(int fd, struct nstat *sb); } 280 STD BSD { int nlstat(char *path, struct nstat *ub); } @@ -463,8 +463,8 @@ 321 STD BSD { int yield(void); } 322 OBSOL NOHIDE thr_sleep 323 OBSOL NOHIDE thr_wakeup -324 STD BSD { int mlockall(int how); } -325 STD BSD { int munlockall(void); } +324 MPSAFE STD BSD { int mlockall(int how); } +325 MPSAFE STD BSD { int munlockall(void); } 326 STD BSD { int __getcwd(u_char *buf, u_int buflen); } 327 STD POSIX { int sched_setparam (pid_t pid, const struct sched_param *param); } diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index fab53a83325b..0a9abda70bf9 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -43,6 +43,7 @@ #include <sys/shm.h> #include <sys/proc.h> #include <sys/malloc.h> +#include <sys/mutex.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/syscall.h> @@ -314,14 +315,17 @@ shmat(p, uap) } shm_handle = shmseg->shm_internal; + mtx_lock(&vm_mtx); vm_object_reference(shm_handle->shm_object); rv = vm_map_find(&p->p_vmspace->vm_map, shm_handle->shm_object, 0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0); if (rv != KERN_SUCCESS) { + mtx_unlock(&vm_mtx); return ENOMEM; } vm_map_inherit(&p->p_vmspace->vm_map, attach_va, attach_va + size, VM_INHERIT_SHARE); + mtx_unlock(&vm_mtx); shmmap_s->va = attach_va; shmmap_s->shmid = uap->shmid; @@ -549,6 +553,7 @@ shmget_allocate_segment(p, uap, mode) * We make sure that we have allocated a pager before we need * to. */ + mtx_lock(&vm_mtx); if (shm_use_phys) { shm_handle->shm_object = vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0); @@ -558,6 +563,7 @@ shmget_allocate_segment(p, uap, mode) } vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING); vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT); + mtx_unlock(&vm_mtx); shmseg->shm_internal = shm_handle; shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid; diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index c1b53d8c76e2..a980330a9b5c 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -281,6 +281,8 @@ waitrunningbufspace(void) * Called when a buffer is extended. This function clears the B_CACHE * bit if the newly extended portion of the buffer does not contain * valid data. + * + * must be called with vm_mtx held */ static __inline__ void @@ -426,11 +428,13 @@ bufinit(void) * from buf_daemon. */ + mtx_lock(&vm_mtx); bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE); bogus_page = vm_page_alloc(kernel_object, ((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT), VM_ALLOC_NORMAL); cnt.v_wire_count++; + mtx_unlock(&vm_mtx); } @@ -441,17 +445,27 @@ bufinit(void) * buffer_map. * * Since this call frees up buffer space, we call bufspacewakeup(). + * + * Can be called with or without the vm_mtx. */ static void bfreekva(struct buf * bp) { + if (bp->b_kvasize) { + int hadvmlock; + ++buffreekvacnt; bufspace -= bp->b_kvasize; + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); vm_map_delete(buffer_map, (vm_offset_t) bp->b_kvabase, (vm_offset_t) bp->b_kvabase + bp->b_kvasize ); + if (!hadvmlock) + mtx_unlock(&vm_mtx); bp->b_kvasize = 0; bufspacewakeup(); } @@ -807,6 +821,7 @@ bdwrite(struct buf * bp) VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL); } + mtx_lock(&vm_mtx); /* * Set the *dirty* buffer range based upon the VM system dirty pages. */ @@ -820,6 +835,7 @@ bdwrite(struct buf * bp) * out on the next sync, or perhaps the cluster will be completed. */ vfs_clean_pages(bp); + mtx_unlock(&vm_mtx); bqrelse(bp); /* @@ -973,12 +989,15 @@ buf_dirty_count_severe(void) * Release a busy buffer and, if requested, free its resources. The * buffer will be stashed in the appropriate bufqueue[] allowing it * to be accessed later as a cache entity or reused for other purposes. + * + * vm_mtx must be not be held. */ void brelse(struct buf * bp) { int s; + mtx_assert(&vm_mtx, MA_NOTOWNED); KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp)); s = splbio(); @@ -1088,6 +1107,7 @@ brelse(struct buf * bp) resid = bp->b_bufsize; foff = bp->b_offset; + mtx_lock(&vm_mtx); for (i = 0; i < bp->b_npages; i++) { int had_bogus = 0; @@ -1099,10 +1119,12 @@ brelse(struct buf * bp) * now. */ if (m == bogus_page) { + mtx_unlock(&vm_mtx); VOP_GETVOBJECT(vp, &obj); poff = OFF_TO_IDX(bp->b_offset); had_bogus = 1; + mtx_lock(&vm_mtx); for (j = i; j < bp->b_npages; j++) { vm_page_t mtmp; mtmp = bp->b_pages[j]; @@ -1136,11 +1158,15 @@ brelse(struct buf * bp) if (bp->b_flags & (B_INVAL | B_RELBUF)) vfs_vmio_release(bp); + mtx_unlock(&vm_mtx); } else if (bp->b_flags & B_VMIO) { - if (bp->b_flags & (B_INVAL | B_RELBUF)) + if (bp->b_flags & (B_INVAL | B_RELBUF)) { + mtx_lock(&vm_mtx); vfs_vmio_release(bp); + mtx_unlock(&vm_mtx); + } } @@ -1302,6 +1328,9 @@ bqrelse(struct buf * bp) splx(s); } +/* + * Must be called with vm_mtx held. + */ static void vfs_vmio_release(bp) struct buf *bp; @@ -1310,6 +1339,7 @@ vfs_vmio_release(bp) vm_page_t m; s = splvm(); + mtx_assert(&vm_mtx, MA_OWNED); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; bp->b_pages[i] = NULL; @@ -1343,6 +1373,9 @@ vfs_vmio_release(bp) } splx(s); pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); + + /* could drop vm_mtx here */ + if (bp->b_bufsize) { bufspacewakeup(); bp->b_bufsize = 0; @@ -1614,7 +1647,9 @@ restart: if (qindex == QUEUE_CLEAN) { if (bp->b_flags & B_VMIO) { bp->b_flags &= ~B_ASYNC; + mtx_lock(&vm_mtx); vfs_vmio_release(bp); + mtx_unlock(&vm_mtx); } if (bp->b_vp) brelvp(bp); @@ -1735,6 +1770,8 @@ restart: if (maxsize != bp->b_kvasize) { vm_offset_t addr = 0; + /* we'll hold the lock over some vm ops */ + mtx_lock(&vm_mtx); bfreekva(bp); if (vm_map_findspace(buffer_map, @@ -1743,6 +1780,7 @@ restart: * Uh oh. Buffer map is to fragmented. We * must defragment the map. */ + mtx_unlock(&vm_mtx); ++bufdefragcnt; defrag = 1; bp->b_flags |= B_INVAL; @@ -1759,6 +1797,7 @@ restart: bufspace += bp->b_kvasize; ++bufreusecnt; } + mtx_unlock(&vm_mtx); } bp->b_data = bp->b_kvabase; } @@ -1936,18 +1975,24 @@ inmem(struct vnode * vp, daddr_t blkno) size = vp->v_mount->mnt_stat.f_iosize; off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize; + mtx_lock(&vm_mtx); for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { m = vm_page_lookup(obj, OFF_TO_IDX(off + toff)); if (!m) - return 0; + goto notinmem; tinc = size; if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK)) tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK); if (vm_page_is_valid(m, (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0) - return 0; + goto notinmem; } + mtx_unlock(&vm_mtx); return 1; + +notinmem: + mtx_unlock(&vm_mtx); + return (0); } /* @@ -1960,11 +2005,14 @@ inmem(struct vnode * vp, daddr_t blkno) * * This routine is primarily used by NFS, but is generalized for the * B_VMIO case. + * + * Can be called with or without vm_mtx */ static void vfs_setdirty(struct buf *bp) { int i; + int hadvmlock; vm_object_t object; /* @@ -1983,6 +2031,10 @@ vfs_setdirty(struct buf *bp) if ((bp->b_flags & B_VMIO) == 0) return; + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); + object = bp->b_pages[0]->object; if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY)) @@ -2040,6 +2092,8 @@ vfs_setdirty(struct buf *bp) bp->b_dirtyend = eoffset; } } + if (!hadvmlock) + mtx_unlock(&vm_mtx); } /* @@ -2441,6 +2495,7 @@ allocbuf(struct buf *bp, int size) * DEV_BSIZE aligned existing buffer size. Figure out * if we have to remove any pages. */ + mtx_lock(&vm_mtx); if (desiredpages < bp->b_npages) { for (i = desiredpages; i < bp->b_npages; i++) { /* @@ -2461,6 +2516,7 @@ allocbuf(struct buf *bp, int size) (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages)); bp->b_npages = desiredpages; } + mtx_unlock(&vm_mtx); } else if (size > bp->b_bcount) { /* * We are growing the buffer, possibly in a @@ -2481,6 +2537,7 @@ allocbuf(struct buf *bp, int size) vp = bp->b_vp; VOP_GETVOBJECT(vp, &obj); + mtx_lock(&vm_mtx); while (bp->b_npages < desiredpages) { vm_page_t m; vm_pindex_t pi; @@ -2589,6 +2646,9 @@ allocbuf(struct buf *bp, int size) bp->b_pages, bp->b_npages ); + + mtx_unlock(&vm_mtx); + bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | (vm_offset_t)(bp->b_offset & PAGE_MASK)); } @@ -2726,6 +2786,7 @@ bufdone(struct buf *bp) if (error) { panic("biodone: no object"); } + mtx_lock(&vm_mtx); #if defined(VFS_BIO_DEBUG) if (obj->paging_in_progress < bp->b_npages) { printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", @@ -2814,6 +2875,7 @@ bufdone(struct buf *bp) } if (obj) vm_object_pip_wakeupn(obj, 0); + mtx_unlock(&vm_mtx); } /* @@ -2837,12 +2899,15 @@ bufdone(struct buf *bp) * This routine is called in lieu of iodone in the case of * incomplete I/O. This keeps the busy status for pages * consistant. + * + * vm_mtx should not be held */ void vfs_unbusy_pages(struct buf * bp) { int i; + mtx_assert(&vm_mtx, MA_NOTOWNED); runningbufwakeup(bp); if (bp->b_flags & B_VMIO) { struct vnode *vp = bp->b_vp; @@ -2850,6 +2915,7 @@ vfs_unbusy_pages(struct buf * bp) VOP_GETVOBJECT(vp, &obj); + mtx_lock(&vm_mtx); for (i = 0; i < bp->b_npages; i++) { vm_page_t m = bp->b_pages[i]; @@ -2866,6 +2932,7 @@ vfs_unbusy_pages(struct buf * bp) vm_page_io_finish(m); } vm_object_pip_wakeupn(obj, 0); + mtx_unlock(&vm_mtx); } } @@ -2876,12 +2943,15 @@ vfs_unbusy_pages(struct buf * bp) * range is restricted to the buffer's size. * * This routine is typically called after a read completes. + * + * vm_mtx should be held */ static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m) { vm_ooffset_t soff, eoff; + mtx_assert(&vm_mtx, MA_OWNED); /* * Start and end offsets in buffer. eoff - soff may not cross a * page boundry or cross the end of the buffer. The end of the @@ -2917,12 +2987,15 @@ vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m) * Since I/O has not been initiated yet, certain buffer flags * such as BIO_ERROR or B_INVAL may be in an inconsistant state * and should be ignored. + * + * vm_mtx should not be held */ void vfs_busy_pages(struct buf * bp, int clear_modify) { int i, bogus; + mtx_assert(&vm_mtx, MA_NOTOWNED); if (bp->b_flags & B_VMIO) { struct vnode *vp = bp->b_vp; vm_object_t obj; @@ -2932,6 +3005,7 @@ vfs_busy_pages(struct buf * bp, int clear_modify) foff = bp->b_offset; KASSERT(bp->b_offset != NOOFFSET, ("vfs_busy_pages: no buffer offset")); + mtx_lock(&vm_mtx); vfs_setdirty(bp); retry: @@ -2979,6 +3053,7 @@ retry: } if (bogus) pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages); + mtx_unlock(&vm_mtx); } } @@ -2989,12 +3064,15 @@ retry: * * Note that while we only really need to clean through to b_bcount, we * just go ahead and clean through to b_bufsize. + * + * should be called with vm_mtx held */ static void vfs_clean_pages(struct buf * bp) { int i; + mtx_assert(&vm_mtx, MA_OWNED); if (bp->b_flags & B_VMIO) { vm_ooffset_t foff; @@ -3021,6 +3099,7 @@ vfs_clean_pages(struct buf * bp) * Set the range within the buffer to valid and clean. The range is * relative to the beginning of the buffer, b_offset. Note that b_offset * itself may be offset from the beginning of the first page. + * */ void @@ -3061,13 +3140,18 @@ vfs_bio_set_validclean(struct buf *bp, int base, int size) * * Note that while we only theoretically need to clear through b_bcount, * we go ahead and clear through b_bufsize. + * + * We'll get vm_mtx here for safety if processing a VMIO buffer. + * I don't think vm_mtx is needed, but we're twiddling vm_page flags. */ void vfs_bio_clrbuf(struct buf *bp) { int i, mask = 0; caddr_t sa, ea; + if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) { + mtx_lock(&vm_mtx); bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) && @@ -3079,6 +3163,7 @@ vfs_bio_clrbuf(struct buf *bp) { } bp->b_pages[0]->valid |= mask; bp->b_resid = 0; + mtx_unlock(&vm_mtx); return; } ea = sa = bp->b_data; @@ -3106,6 +3191,7 @@ vfs_bio_clrbuf(struct buf *bp) { vm_page_flag_clear(bp->b_pages[i], PG_ZERO); } bp->b_resid = 0; + mtx_unlock(&vm_mtx); } else { clrbuf(bp); } @@ -3115,18 +3201,22 @@ vfs_bio_clrbuf(struct buf *bp) { * vm_hold_load_pages and vm_hold_unload pages get pages into * a buffers address space. The pages are anonymous and are * not associated with a file object. + * + * vm_mtx should not be held */ -void +static void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) { vm_offset_t pg; vm_page_t p; int index; + mtx_assert(&vm_mtx, MA_NOTOWNED); to = round_page(to); from = round_page(from); index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; + mtx_lock(&vm_mtx); for (pg = from; pg < to; pg += PAGE_SIZE, index++) { tryagain: @@ -3152,6 +3242,7 @@ tryagain: vm_page_wakeup(p); } bp->b_npages = index; + mtx_unlock(&vm_mtx); } void @@ -3160,11 +3251,15 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) vm_offset_t pg; vm_page_t p; int index, newnpages; + int hadvmlock; from = round_page(from); to = round_page(to); newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); for (pg = from; pg < to; pg += PAGE_SIZE, index++) { p = bp->b_pages[index]; if (p && (index < bp->b_npages)) { @@ -3180,6 +3275,8 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) } } bp->b_npages = newnpages; + if (!hadvmlock) + mtx_unlock(&vm_mtx); } diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 8a6e0452813d..0eb47bde0a46 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -433,6 +433,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) BUF_KERNPROC(tbp); TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head, tbp, b_cluster.cluster_entry); + mtx_lock(&vm_mtx); for (j = 0; j < tbp->b_npages; j += 1) { vm_page_t m; m = tbp->b_pages[j]; @@ -446,10 +447,12 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) tbp->b_pages[j] = bogus_page; } + mtx_unlock(&vm_mtx); bp->b_bcount += tbp->b_bcount; bp->b_bufsize += tbp->b_bufsize; } + mtx_lock(&vm_mtx); for(j=0;j<bp->b_npages;j++) { if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) @@ -462,6 +465,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) pmap_qenter(trunc_page((vm_offset_t) bp->b_data), (vm_page_t *)bp->b_pages, bp->b_npages); + mtx_unlock(&vm_mtx); return (bp); } @@ -484,7 +488,9 @@ cluster_callback(bp) if (bp->b_ioflags & BIO_ERROR) error = bp->b_error; + mtx_lock(&vm_mtx); pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); + mtx_unlock(&vm_mtx); /* * Move memory from the large cluster buffer into the component * buffers and mark IO as done on these. @@ -851,6 +857,7 @@ cluster_wbuild(vp, size, start_lbn, len) } } + mtx_lock(&vm_mtx); for (j = 0; j < tbp->b_npages; j += 1) { m = tbp->b_pages[j]; vm_page_io_start(m); @@ -861,6 +868,7 @@ cluster_wbuild(vp, size, start_lbn, len) bp->b_npages++; } } + mtx_unlock(&vm_mtx); } bp->b_bcount += size; bp->b_bufsize += size; @@ -879,8 +887,10 @@ cluster_wbuild(vp, size, start_lbn, len) tbp, b_cluster.cluster_entry); } finishcluster: + mtx_lock(&vm_mtx); pmap_qenter(trunc_page((vm_offset_t) bp->b_data), (vm_page_t *) bp->b_pages, bp->b_npages); + mtx_unlock(&vm_mtx); if (bp->b_bufsize > bp->b_kvasize) panic( "cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n", diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 328a9b1526d3..d17e9348bda8 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -535,14 +535,18 @@ retry: if (vp->v_type == VREG || vp->v_type == VDIR) { if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) goto retn; + mtx_lock(&vm_mtx); object = vnode_pager_alloc(vp, vat.va_size, 0, 0); + mtx_unlock(&vm_mtx); } else if (devsw(vp->v_rdev) != NULL) { /* * This simply allocates the biggest object possible * for a disk vnode. This should be fixed, but doesn't * cause any problems (yet). */ + mtx_lock(&vm_mtx); object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); + mtx_unlock(&vm_mtx); } else { goto retn; } @@ -550,15 +554,23 @@ retry: * Dereference the reference we just created. This assumes * that the object is associated with the vp. */ + mtx_lock(&vm_mtx); object->ref_count--; + mtx_unlock(&vm_mtx); vp->v_usecount--; } else { + /* + * XXX: safe to hold vm mutex through VOP_UNLOCK? + */ + mtx_lock(&vm_mtx); if (object->flags & OBJ_DEAD) { VOP_UNLOCK(vp, 0, p); - tsleep(object, PVM, "vodead", 0); + msleep(object, VM_OBJECT_MTX(object), PVM, "vodead", 0); + mtx_unlock(&vm_mtx); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); goto retry; } + mtx_unlock(&vm_mtx); } KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); @@ -580,6 +592,7 @@ vop_stddestroyvobject(ap) if (vp->v_object == NULL) return (0); + mtx_lock(&vm_mtx); if (obj->ref_count == 0) { /* * vclean() may be called twice. The first time @@ -594,6 +607,7 @@ vop_stddestroyvobject(ap) */ vm_pager_deallocate(obj); } + mtx_unlock(&vm_mtx); return (0); } diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index 6b73258f0684..3f975516162d 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -2770,8 +2770,13 @@ fsync(p, uap) if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (VOP_GETVOBJECT(vp, &obj) == 0) + if (VOP_GETVOBJECT(vp, &obj) == 0) { + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); vm_object_page_clean(obj, 0, 0, 0); + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); + } error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p); #ifdef SOFTUPDATES if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 2f4dc8d95326..6c050ba16d84 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -711,6 +711,8 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) int s, error; vm_object_t object; + mtx_assert(&vm_mtx, MA_NOTOWNED); + if (flags & V_SAVE) { s = splbio(); while (vp->v_numoutput) { @@ -797,8 +799,10 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) */ mtx_lock(&vp->v_interlock); if (VOP_GETVOBJECT(vp, &object) == 0) { + mtx_lock(&vm_mtx); vm_object_page_remove(object, 0, 0, (flags & V_SAVE) ? TRUE : FALSE); + mtx_unlock(&vm_mtx); } mtx_unlock(&vp->v_interlock); @@ -1132,6 +1136,8 @@ speedup_syncer() * Also sets B_PAGING flag to indicate that vnode is not fully associated * with the buffer. i.e. the bp has not been linked into the vnode or * ref-counted. + * + * Doesn't block, only vnode seems to need a lock. */ void pbgetvp(vp, bp) @@ -1554,6 +1560,7 @@ vput(vp) { struct proc *p = curproc; /* XXX */ + mtx_assert(&Giant, MA_OWNED); KASSERT(vp != NULL, ("vput: null vp")); mtx_lock(&vp->v_interlock); /* Skip this v_writecount check if we're going to panic below. */ @@ -2382,7 +2389,11 @@ loop: if (!vget(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { if (VOP_GETVOBJECT(vp, &obj) == 0) { - vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); + mtx_lock(&vm_mtx); + vm_object_page_clean(obj, 0, 0, + flags == MNT_WAIT ? + OBJPC_SYNC : OBJPC_NOSYNC); + mtx_unlock(&vm_mtx); anyio = 1; } vput(vp); @@ -2409,6 +2420,8 @@ vfs_object_create(vp, p, cred) struct proc *p; struct ucred *cred; { + + mtx_assert(&vm_mtx, MA_NOTOWNED); return (VOP_CREATEVOBJECT(vp, cred, p)); } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 6b73258f0684..3f975516162d 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -2770,8 +2770,13 @@ fsync(p, uap) if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (VOP_GETVOBJECT(vp, &obj) == 0) + if (VOP_GETVOBJECT(vp, &obj) == 0) { + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); vm_object_page_clean(obj, 0, 0, 0); + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); + } error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p); #ifdef SOFTUPDATES if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) diff --git a/sys/miscfs/procfs/procfs_map.c b/sys/miscfs/procfs/procfs_map.c index 5e4a30c78c22..5c21993f50ca 100644 --- a/sys/miscfs/procfs/procfs_map.c +++ b/sys/miscfs/procfs/procfs_map.c @@ -42,6 +42,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/proc.h> #include <sys/vnode.h> diff --git a/sys/miscfs/procfs/procfs_mem.c b/sys/miscfs/procfs/procfs_mem.c index 3a2f8d2e7932..1e28870675e5 100644 --- a/sys/miscfs/procfs/procfs_mem.c +++ b/sys/miscfs/procfs/procfs_mem.c @@ -48,6 +48,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/proc.h> #include <sys/ptrace.h> #include <sys/user.h> @@ -88,8 +89,14 @@ procfs_rwmem(curp, p, uio) * usage in that process can be messed up. */ vm = p->p_vmspace; - if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1)) + if ((p->p_flag & P_WEXIT)) return EFAULT; + + mtx_lock(&vm_mtx); + if (vm->vm_refcnt < 1) { + mtx_unlock(&vm_mtx); + return EFAULT; + } ++vm->vm_refcnt; /* * The map we want... @@ -207,7 +214,9 @@ procfs_rwmem(curp, p, uio) /* * Now do the i/o move. */ + mtx_unlock(&vm_mtx); error = uiomove((caddr_t)(kva + page_offset), len, uio); + mtx_lock(&vm_mtx); pmap_kremove(kva); @@ -226,6 +235,7 @@ procfs_rwmem(curp, p, uio) kmem_free(kernel_map, kva, PAGE_SIZE); vmspace_free(vm); + mtx_unlock(&vm_mtx); return (error); } diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c index 2940f404b779..ba812293e4ac 100644 --- a/sys/miscfs/specfs/spec_vnops.c +++ b/sys/miscfs/specfs/spec_vnops.c @@ -731,6 +731,8 @@ spec_getpages(ap) cnt.v_vnodein++; cnt.v_vnodepgsin += pcount; + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); /* Do the input. */ BUF_STRATEGY(bp); @@ -741,6 +743,8 @@ spec_getpages(ap) tsleep(bp, PVM, "spread", 0); splx(s); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); if ((bp->b_ioflags & BIO_ERROR) != 0) { if (bp->b_error) diff --git a/sys/miscfs/union/union_subr.c b/sys/miscfs/union/union_subr.c index 869818f28067..3ac98bf508bb 100644 --- a/sys/miscfs/union/union_subr.c +++ b/sys/miscfs/union/union_subr.c @@ -45,6 +45,7 @@ #include <sys/filedesc.h> #include <sys/kernel.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/malloc.h> #include <sys/module.h> #include <sys/mount.h> diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c index cb7297f46bfe..234815c925c0 100644 --- a/sys/nfs/nfs_bio.c +++ b/sys/nfs/nfs_bio.c @@ -124,8 +124,13 @@ nfs_getpages(ap) } if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && - (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) + (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); (void)nfs_fsinfo(nmp, vp, cred, p); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); + } npages = btoc(count); @@ -168,7 +173,11 @@ nfs_getpages(ap) uio.uio_rw = UIO_READ; uio.uio_procp = p; + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); error = nfs_readrpc(vp, &uio, cred); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); pmap_qremove(kva, npages); relpbuf(bp, &nfs_pbuf_freecnt); @@ -280,8 +289,13 @@ nfs_putpages(ap) offset = IDX_TO_OFF(pages[0]->pindex); if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && - (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) + (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); (void)nfs_fsinfo(nmp, vp, cred, p); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); + } for (i = 0; i < npages; i++) { rtvals[i] = VM_PAGER_AGAIN; @@ -321,7 +335,11 @@ nfs_putpages(ap) else iomode = NFSV3WRITE_FILESYNC; + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); pmap_qremove(kva, npages); relpbuf(bp, &nfs_pbuf_freecnt); @@ -332,8 +350,13 @@ nfs_putpages(ap) rtvals[i] = VM_PAGER_OK; vm_page_undirty(pages[i]); } - if (must_commit) + if (must_commit) { + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); nfs_clearcommit(vp->v_mount); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); + } } return rtvals[0]; } @@ -1076,7 +1099,9 @@ again: bp->b_dirtyoff = on; bp->b_dirtyend = on + n; } + mtx_lock(&vm_mtx); vfs_bio_set_validclean(bp, on, n); + mtx_unlock(&vm_mtx); } /* diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c index c7e6917e87dc..18cb8a9a6f22 100644 --- a/sys/nfs/nfs_common.c +++ b/sys/nfs/nfs_common.c @@ -2139,6 +2139,8 @@ nfs_clearcommit(mp) int s; s = splbio(); + mtx_assert(&Giant, MA_OWNED); + mtx_assert(&vm_mtx, MA_NOTOWNED); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { if (vp->v_mount != mp) /* Paranoia */ diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c index c7e6917e87dc..18cb8a9a6f22 100644 --- a/sys/nfs/nfs_subs.c +++ b/sys/nfs/nfs_subs.c @@ -2139,6 +2139,8 @@ nfs_clearcommit(mp) int s; s = splbio(); + mtx_assert(&Giant, MA_OWNED); + mtx_assert(&vm_mtx, MA_NOTOWNED); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { if (vp->v_mount != mp) /* Paranoia */ diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c index cb7297f46bfe..234815c925c0 100644 --- a/sys/nfsclient/nfs_bio.c +++ b/sys/nfsclient/nfs_bio.c @@ -124,8 +124,13 @@ nfs_getpages(ap) } if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && - (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) + (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); (void)nfs_fsinfo(nmp, vp, cred, p); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); + } npages = btoc(count); @@ -168,7 +173,11 @@ nfs_getpages(ap) uio.uio_rw = UIO_READ; uio.uio_procp = p; + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); error = nfs_readrpc(vp, &uio, cred); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); pmap_qremove(kva, npages); relpbuf(bp, &nfs_pbuf_freecnt); @@ -280,8 +289,13 @@ nfs_putpages(ap) offset = IDX_TO_OFF(pages[0]->pindex); if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && - (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) + (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); (void)nfs_fsinfo(nmp, vp, cred, p); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); + } for (i = 0; i < npages; i++) { rtvals[i] = VM_PAGER_AGAIN; @@ -321,7 +335,11 @@ nfs_putpages(ap) else iomode = NFSV3WRITE_FILESYNC; + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); pmap_qremove(kva, npages); relpbuf(bp, &nfs_pbuf_freecnt); @@ -332,8 +350,13 @@ nfs_putpages(ap) rtvals[i] = VM_PAGER_OK; vm_page_undirty(pages[i]); } - if (must_commit) + if (must_commit) { + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); nfs_clearcommit(vp->v_mount); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); + } } return rtvals[0]; } @@ -1076,7 +1099,9 @@ again: bp->b_dirtyoff = on; bp->b_dirtyend = on + n; } + mtx_lock(&vm_mtx); vfs_bio_set_validclean(bp, on, n); + mtx_unlock(&vm_mtx); } /* diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c index c7e6917e87dc..18cb8a9a6f22 100644 --- a/sys/nfsclient/nfs_subs.c +++ b/sys/nfsclient/nfs_subs.c @@ -2139,6 +2139,8 @@ nfs_clearcommit(mp) int s; s = splbio(); + mtx_assert(&Giant, MA_OWNED); + mtx_assert(&vm_mtx, MA_NOTOWNED); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { if (vp->v_mount != mp) /* Paranoia */ diff --git a/sys/nfsserver/nfs_srvsubs.c b/sys/nfsserver/nfs_srvsubs.c index c7e6917e87dc..18cb8a9a6f22 100644 --- a/sys/nfsserver/nfs_srvsubs.c +++ b/sys/nfsserver/nfs_srvsubs.c @@ -2139,6 +2139,8 @@ nfs_clearcommit(mp) int s; s = splbio(); + mtx_assert(&Giant, MA_OWNED); + mtx_assert(&vm_mtx, MA_NOTOWNED); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { if (vp->v_mount != mp) /* Paranoia */ diff --git a/sys/pci/agp.c b/sys/pci/agp.c index 6419635a3e98..333c4c82797e 100644 --- a/sys/pci/agp.c +++ b/sys/pci/agp.c @@ -38,6 +38,7 @@ #include <sys/ioccom.h> #include <sys/agpio.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/proc.h> #include <pci/pcivar.h> diff --git a/sys/pci/agp_ali.c b/sys/pci/agp_ali.c index 86e070e8d2ae..aa805e107f91 100644 --- a/sys/pci/agp_ali.c +++ b/sys/pci/agp_ali.c @@ -35,6 +35,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <pci/pcivar.h> #include <pci/pcireg.h> diff --git a/sys/pci/agp_amd.c b/sys/pci/agp_amd.c index 0a498f72ae99..4aaf4e9bc16d 100644 --- a/sys/pci/agp_amd.c +++ b/sys/pci/agp_amd.c @@ -35,6 +35,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <pci/pcivar.h> #include <pci/pcireg.h> diff --git a/sys/pci/agp_i810.c b/sys/pci/agp_i810.c index 79fd56683214..5c4049383e1f 100644 --- a/sys/pci/agp_i810.c +++ b/sys/pci/agp_i810.c @@ -36,6 +36,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <pci/pcivar.h> #include <pci/pcireg.h> diff --git a/sys/pci/agp_intel.c b/sys/pci/agp_intel.c index a4b9a43b0045..dc1ef4da50b8 100644 --- a/sys/pci/agp_intel.c +++ b/sys/pci/agp_intel.c @@ -35,6 +35,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <pci/pcivar.h> #include <pci/pcireg.h> diff --git a/sys/pci/agp_sis.c b/sys/pci/agp_sis.c index 1f1a50b7eceb..a6a20a45a8fb 100644 --- a/sys/pci/agp_sis.c +++ b/sys/pci/agp_sis.c @@ -35,6 +35,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <pci/pcivar.h> #include <pci/pcireg.h> diff --git a/sys/pci/agp_via.c b/sys/pci/agp_via.c index 983348ea25d4..086b0276d135 100644 --- a/sys/pci/agp_via.c +++ b/sys/pci/agp_via.c @@ -35,6 +35,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <pci/pcivar.h> #include <pci/pcireg.h> diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c index db9e239c8a5a..672d0a0bb6a4 100644 --- a/sys/ufs/ufs/ufs_readwrite.c +++ b/sys/ufs/ufs/ufs_readwrite.c @@ -114,8 +114,11 @@ READ(ap) return 0; } - if (object) + if (object) { + mtx_lock(&vm_mtx); vm_object_reference(object); + mtx_unlock(&vm_mtx); + } #ifdef ENABLE_VFS_IOOPT /* @@ -147,8 +150,11 @@ READ(ap) (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) ip->i_flag |= IN_ACCESS; - if (object) + if (object) { + mtx_lock(&vm_mtx); vm_object_vndeallocate(object); + mtx_unlock(&vm_mtx); + } return error; } } @@ -192,8 +198,11 @@ READ(ap) (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) ip->i_flag |= IN_ACCESS; - if (object) + if (object) { + mtx_lock(&vm_mtx); vm_object_vndeallocate(object); + mtx_unlock(&vm_mtx); + } return error; } /* @@ -355,8 +364,11 @@ READ(ap) } } - if (object) + if (object) { + mtx_lock(&vm_mtx); vm_object_vndeallocate(object); + mtx_unlock(&vm_mtx); + } if ((error == 0 || uio->uio_resid != orig_resid) && (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) ip->i_flag |= IN_ACCESS; @@ -395,8 +407,11 @@ WRITE(ap) ip = VTOI(vp); object = vp->v_object; - if (object) + if (object) { + mtx_lock(&vm_mtx); vm_object_reference(object); + mtx_unlock(&vm_mtx); + } #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) @@ -408,8 +423,11 @@ WRITE(ap) if (ioflag & IO_APPEND) uio->uio_offset = ip->i_size; if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) { - if (object) + if (object) { + mtx_lock(&vm_mtx); vm_object_vndeallocate(object); + mtx_unlock(&vm_mtx); + } return (EPERM); } /* FALLTHROUGH */ @@ -428,8 +446,11 @@ WRITE(ap) fs = ip->I_FS; if (uio->uio_offset < 0 || (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) { - if (object) + if (object) { + mtx_lock(&vm_mtx); vm_object_vndeallocate(object); + mtx_unlock(&vm_mtx); + } return (EFBIG); } /* @@ -443,8 +464,11 @@ WRITE(ap) PROC_LOCK(p); psignal(p, SIGXFSZ); PROC_UNLOCK(p); - if (object) + if (object) { + mtx_lock(&vm_mtx); vm_object_vndeallocate(object); + mtx_unlock(&vm_mtx); + } return (EFBIG); } @@ -455,9 +479,11 @@ WRITE(ap) flags = B_SYNC; if (object && (object->flags & OBJ_OPT)) { + mtx_lock(&vm_mtx); vm_freeze_copyopts(object, OFF_TO_IDX(uio->uio_offset), OFF_TO_IDX(uio->uio_offset + uio->uio_resid + PAGE_MASK)); + mtx_unlock(&vm_mtx); } for (error = 0; uio->uio_resid > 0;) { @@ -546,8 +572,11 @@ WRITE(ap) } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) error = UFS_UPDATE(vp, 1); - if (object) + if (object) { + mtx_lock(&vm_mtx); vm_object_vndeallocate(object); + mtx_unlock(&vm_mtx); + } return (error); } diff --git a/sys/vm/default_pager.c b/sys/vm/default_pager.c index f5d88a50db8f..0fb4896d0ed3 100644 --- a/sys/vm/default_pager.c +++ b/sys/vm/default_pager.c @@ -41,6 +41,8 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/lock.h> +#include <sys/mutex.h> #include <vm/vm.h> #include <vm/vm_object.h> diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c index 1f00ea0c9661..d34672b29e53 100644 --- a/sys/vm/phys_pager.c +++ b/sys/vm/phys_pager.c @@ -34,7 +34,6 @@ #include <sys/mutex.h> #include <sys/mman.h> #include <sys/sysctl.h> -#include <sys/sx.h> #include <vm/vm.h> #include <vm/vm_object.h> @@ -43,7 +42,7 @@ #include <vm/vm_zone.h> /* prevent concurrant creation races */ -static struct sx phys_pager_sx; +static int phys_pager_alloc_lock; /* list of device pager objects */ static struct pagerlst phys_pager_object_list; /* protect access to phys_pager_object_list */ @@ -54,7 +53,6 @@ phys_pager_init(void) { TAILQ_INIT(&phys_pager_object_list); - sx_init(&phys_pager_sx, "phys_pager create"); mtx_init(&phys_pager_mtx, "phys_pager list", MTX_DEF); } @@ -76,8 +74,11 @@ phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, /* * Lock to prevent object creation race condition. */ - sx_xlock(&phys_pager_sx); - + while (phys_pager_alloc_lock) { + phys_pager_alloc_lock = -1; + msleep(&phys_pager_alloc_lock, &vm_mtx, PVM, "swpalc", 0); + } + /* * Look up pager, creating as necessary. */ @@ -101,7 +102,10 @@ phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, if (OFF_TO_IDX(foff + size) > object->size) object->size = OFF_TO_IDX(foff + size); } - sx_xunlock(&phys_pager_sx); + if (phys_pager_alloc_lock) + wakeup(&phys_pager_alloc_lock); + phys_pager_alloc_lock = 0; + } else { object = vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(foff + size)); diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 8d343f4177e1..44f44652916a 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -80,7 +80,6 @@ #include <sys/sysctl.h> #include <sys/blist.h> #include <sys/lock.h> -#include <sys/sx.h> #include <sys/vmmeter.h> #ifndef MAX_PAGEOUT_CLUSTER @@ -119,6 +118,7 @@ static int nsw_wcount_sync; /* limit write buffers / synchronous */ static int nsw_wcount_async; /* limit write buffers / asynchronous */ static int nsw_wcount_async_max;/* assigned maximum */ static int nsw_cluster_max; /* maximum VOP I/O allowed */ +static int sw_alloc_interlock; /* swap pager allocation interlock */ struct blist *swapblist; static struct swblock **swhash; @@ -145,7 +145,6 @@ SYSCTL_INT(_vm, OID_AUTO, swap_async_max, #define NOBJLIST(handle) \ (&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)]) -static struct sx sw_alloc_sx; /* prevent concurrant creation */ static struct mtx sw_alloc_mtx; /* protect list manipulation */ static struct pagerlst swap_pager_object_list[NOBJLISTS]; struct pagerlst swap_pager_un_object_list; @@ -233,6 +232,8 @@ static daddr_t swp_pager_meta_ctl __P((vm_object_t, vm_pindex_t, int)); static __inline void swp_sizecheck() { + + mtx_assert(&vm_mtx, MA_OWNED); if (vm_swap_size < nswap_lowat) { if (swap_pager_almost_full == 0) { printf("swap_pager: out of swap space\n"); @@ -264,7 +265,6 @@ swap_pager_init() for (i = 0; i < NOBJLISTS; ++i) TAILQ_INIT(&swap_pager_object_list[i]); TAILQ_INIT(&swap_pager_un_object_list); - sx_init(&sw_alloc_sx, "swap_pager create"); mtx_init(&sw_alloc_mtx, "swap_pager list", MTX_DEF); /* @@ -389,7 +389,10 @@ swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, * of the handle. */ - sx_xlock(&sw_alloc_sx); + while (sw_alloc_interlock) { + sw_alloc_interlock = -1; + msleep(&sw_alloc_interlock, &vm_mtx, PVM, "swpalc", 0); + } object = vm_pager_object_lookup(NOBJLIST(handle), handle); @@ -403,7 +406,9 @@ swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, swp_pager_meta_build(object, 0, SWAPBLK_NONE); } - sx_xunlock(&sw_alloc_sx); + if (sw_alloc_interlock < 0) + wakeup(&sw_alloc_interlock); + sw_alloc_interlock = 0; } else { object = vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(offset + PAGE_MASK + size)); @@ -478,6 +483,7 @@ swap_pager_dealloc(object) * * This routine may not block * This routine must be called at splvm(). + * vm_mtx should be held */ static __inline daddr_t @@ -486,6 +492,7 @@ swp_pager_getswapspace(npages) { daddr_t blk; + mtx_assert(&vm_mtx, MA_OWNED); if ((blk = blist_alloc(swapblist, npages)) == SWAPBLK_NONE) { if (swap_pager_full != 2) { printf("swap_pager_getswapspace: failed\n"); @@ -514,6 +521,7 @@ swp_pager_getswapspace(npages) * * This routine may not block * This routine must be called at splvm(). + * vm_mtx should be held */ static __inline void @@ -521,6 +529,8 @@ swp_pager_freeswapspace(blk, npages) daddr_t blk; int npages; { + + mtx_assert(&vm_mtx, MA_OWNED); blist_free(swapblist, blk, npages); vm_swap_size += npages; /* per-swap area stats */ @@ -551,6 +561,9 @@ swap_pager_freespace(object, start, size) vm_size_t size; { int s = splvm(); + + mtx_assert(&vm_mtx, MA_OWNED); + swp_pager_meta_free(object, start, size); splx(s); } @@ -635,6 +648,8 @@ swap_pager_copy(srcobject, dstobject, offset, destroysource) s = splvm(); + mtx_assert(&vm_mtx, MA_OWNED); + /* * If destroysource is set, we remove the source object from the * swap_pager internal queue now. @@ -881,7 +896,9 @@ swap_pager_strategy(vm_object_t object, struct bio *bp) * FREE PAGE(s) - destroy underlying swap that is no longer * needed. */ + mtx_lock(&vm_mtx); swp_pager_meta_free(object, start, count); + mtx_unlock(&vm_mtx); splx(s); bp->bio_resid = 0; biodone(bp); @@ -892,6 +909,7 @@ swap_pager_strategy(vm_object_t object, struct bio *bp) * Execute read or write */ + mtx_lock(&vm_mtx); while (count > 0) { daddr_t blk; @@ -954,7 +972,9 @@ swap_pager_strategy(vm_object_t object, struct bio *bp) bp->bio_resid -= PAGE_SIZE; } else { if (nbp == NULL) { + mtx_unlock(&vm_mtx); nbp = getchainbuf(bp, swapdev_vp, B_ASYNC); + mtx_lock(&vm_mtx); nbp->b_blkno = blk; nbp->b_bcount = 0; nbp->b_data = data; @@ -985,6 +1005,7 @@ swap_pager_strategy(vm_object_t object, struct bio *bp) /* nbp = NULL; */ } + mtx_unlock(&vm_mtx); /* * Wait for completion. */ @@ -1281,6 +1302,7 @@ swap_pager_putpages(object, m, count, sync, rtvals) * at this time. */ s = splvm(); + mtx_unlock(&vm_mtx); mtx_lock(&pbuf_mtx); n -= nsw_wcount_async_max; if (nsw_wcount_async + n >= 0) { @@ -1289,6 +1311,7 @@ swap_pager_putpages(object, m, count, sync, rtvals) wakeup(&nsw_wcount_async); } mtx_unlock(&pbuf_mtx); + mtx_lock(&vm_mtx); splx(s); } @@ -1399,6 +1422,8 @@ swap_pager_putpages(object, m, count, sync, rtvals) swapdev_vp->v_numoutput++; splx(s); + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); /* * asynchronous @@ -1410,9 +1435,12 @@ swap_pager_putpages(object, m, count, sync, rtvals) bp->b_iodone = swp_pager_async_iodone; BUF_KERNPROC(bp); BUF_STRATEGY(bp); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); for (j = 0; j < n; ++j) rtvals[i+j] = VM_PAGER_PEND; + /* restart outter loop */ continue; } @@ -1445,6 +1473,8 @@ swap_pager_putpages(object, m, count, sync, rtvals) * normal async completion, which frees everything up. */ + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); swp_pager_async_iodone(bp); splx(s); @@ -1732,7 +1762,8 @@ swp_pager_hash(vm_object_t object, vm_pindex_t index) * * This routine must be called at splvm(), except when used to convert * an OBJT_DEFAULT object into an OBJT_SWAP object. - + * + * Requires vm_mtx. */ static void @@ -1744,6 +1775,7 @@ swp_pager_meta_build( struct swblock *swap; struct swblock **pswap; + mtx_assert(&vm_mtx, MA_OWNED); /* * Convert default object to swap object if necessary */ @@ -1830,12 +1862,16 @@ retry: * out. This routine does *NOT* operate on swap metadata associated * with resident pages. * + * mv_mtx must be held * This routine must be called at splvm() */ static void swp_pager_meta_free(vm_object_t object, vm_pindex_t index, daddr_t count) { + + mtx_assert(&vm_mtx, MA_OWNED); + if (object->type != OBJT_SWAP) return; @@ -1875,6 +1911,7 @@ swp_pager_meta_free(vm_object_t object, vm_pindex_t index, daddr_t count) * an object. * * This routine must be called at splvm() + * Requires vm_mtx. */ static void @@ -1882,6 +1919,8 @@ swp_pager_meta_free_all(vm_object_t object) { daddr_t index = 0; + mtx_assert(&vm_mtx, MA_OWNED); + if (object->type != OBJT_SWAP) return; @@ -1930,6 +1969,7 @@ swp_pager_meta_free_all(vm_object_t object) * busy page. * * This routine must be called at splvm(). + * Requires vm_mtx. * * SWM_FREE remove and free swap block from metadata * SWM_POP remove from meta data but do not free.. pop it out @@ -2032,18 +2072,24 @@ vm_pager_chain_iodone(struct buf *nbp) * Obtain a physical buffer and chain it to its parent buffer. When * I/O completes, the parent buffer will be B_SIGNAL'd. Errors are * automatically propagated to the parent + * + * vm_mtx can't be held */ struct buf * getchainbuf(struct bio *bp, struct vnode *vp, int flags) { - struct buf *nbp = getpbuf(NULL); - u_int *count = (u_int *)&(bp->bio_caller1); + struct buf *nbp; + u_int *count; + + mtx_assert(&vm_mtx, MA_NOTOWNED); + nbp = getpbuf(NULL); + count = (u_int *)&(bp->bio_caller1); nbp->b_caller1 = bp; ++(*count); - if (*count > 4) + if (*count > 4) waitchainbuf(bp, 4, 0); nbp->b_iocmd = bp->bio_cmd; @@ -2063,6 +2109,9 @@ getchainbuf(struct bio *bp, struct vnode *vp, int flags) void flushchainbuf(struct buf *nbp) { + + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); if (nbp->b_bcount) { nbp->b_bufsize = nbp->b_bcount; if (nbp->b_iocmd == BIO_WRITE) @@ -2072,14 +2121,19 @@ flushchainbuf(struct buf *nbp) } else { bufdone(nbp); } + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); } -void +static void waitchainbuf(struct bio *bp, int limit, int done) { int s; - u_int *count = (u_int *)&(bp->bio_caller1); + u_int *count; + mtx_assert(&vm_mtx, MA_NOTOWNED); + mtx_lock(&Giant); + count = (u_int *)&(bp->bio_caller1); s = splbio(); while (*count > limit) { bp->bio_flags |= BIO_FLAG1; @@ -2092,6 +2146,7 @@ waitchainbuf(struct bio *bp, int limit, int done) } biodone(bp); } + mtx_unlock(&Giant); splx(s); } diff --git a/sys/vm/vm.h b/sys/vm/vm.h index 38f04aced890..5915b29972f7 100644 --- a/sys/vm/vm.h +++ b/sys/vm/vm.h @@ -95,6 +95,10 @@ typedef struct vm_map *vm_map_t; struct vm_object; typedef struct vm_object *vm_object_t; +#ifdef _KERNEL +extern struct mtx vm_mtx; +#endif + #ifndef _KERNEL /* * This is defined in <sys/types.h> for the kernel so that non-vm kernel diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index a1bad69dadae..f31f12b64774 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -81,6 +81,8 @@ #include <sys/vnode.h> #include <sys/resourcevar.h> #include <sys/vmmeter.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> #include <vm/vm.h> #include <vm/vm_param.h> @@ -134,6 +136,8 @@ unlock_map(struct faultstate *fs) static void _unlock_things(struct faultstate *fs, int dealloc) { + + mtx_assert(&vm_mtx, MA_OWNED); vm_object_pip_wakeup(fs->object); if (fs->object != fs->first_object) { vm_page_free(fs->first_m); @@ -145,8 +149,15 @@ _unlock_things(struct faultstate *fs, int dealloc) } unlock_map(fs); if (fs->vp != NULL) { - vput(fs->vp); + struct vnode *vp; + + vp = fs->vp; fs->vp = NULL; + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); + vput(vp); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); } } @@ -179,10 +190,41 @@ _unlock_things(struct faultstate *fs, int dealloc) * * * The map in question must be referenced, and remains so. - * Caller may hold no locks. + * Caller may hold no locks except the vm_mtx which will be + * locked if needed. */ +static int vm_fault1 __P((vm_map_t, vm_offset_t, vm_prot_t, int)); + +static int vm_faults_no_vm_mtx; +SYSCTL_INT(_vm, OID_AUTO, vm_faults_no_vm_mtx, CTLFLAG_RW, + &vm_faults_no_vm_mtx, 0, ""); + +static int vm_faults_no_giant; +SYSCTL_INT(_vm, OID_AUTO, vm_faults_no_giant, CTLFLAG_RW, + &vm_faults_no_giant, 0, ""); + int -vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags) +vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, + int fault_flags) +{ + int hadvmlock, ret; + + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) { + mtx_lock(&vm_mtx); + vm_faults_no_vm_mtx++; + if (!mtx_owned(&Giant)) + vm_faults_no_giant++; + } + ret = vm_fault1(map, vaddr, fault_type, fault_flags); + if (!hadvmlock) + mtx_unlock(&vm_mtx); + return (ret); +} + +static int +vm_fault1(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, + int fault_flags) { vm_prot_t prot; int result; @@ -194,7 +236,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags) int faultcount; struct faultstate fs; - cnt.v_vm_faults++; /* needs lock XXX */ + mtx_assert(&vm_mtx, MA_OWNED); + cnt.v_vm_faults++; hardfault = 0; RetryFault:; @@ -251,7 +294,11 @@ RetryFault:; vm_object_reference(fs.first_object); vm_object_pip_add(fs.first_object, 1); + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); fs.vp = vnode_pager_lock(fs.first_object); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); if ((fault_type & VM_PROT_WRITE) && (fs.first_object->type == OBJT_VNODE)) { vm_freeze_copyopts(fs.first_object, @@ -723,7 +770,11 @@ readrest: */ if (fs.vp != NULL) { + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); vput(fs.vp); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); fs.vp = NULL; } @@ -940,6 +991,7 @@ vm_fault_user_wire(map, start, end) register pmap_t pmap; int rv; + mtx_assert(&vm_mtx, MA_OWNED); pmap = vm_map_pmap(map); /* @@ -1112,6 +1164,9 @@ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) * * Return value: * number of pages in marray + * + * This routine can't block. + * vm_mtx must be held. */ static int vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) @@ -1127,6 +1182,8 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) vm_page_t rtm; int cbehind, cahead; + mtx_assert(&vm_mtx, MA_OWNED); + object = m->object; pindex = m->pindex; diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index a180ae344bb2..37c580acfd90 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -161,6 +161,7 @@ useracc(addr, len, rw) || (vm_offset_t) addr + len < (vm_offset_t) addr) { return (FALSE); } + mtx_lock(&vm_mtx); map = &curproc->p_vmspace->vm_map; vm_map_lock_read(map); /* @@ -172,6 +173,7 @@ useracc(addr, len, rw) trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len), prot); map->hint = save_hint; vm_map_unlock_read(map); + mtx_unlock(&vm_mtx); return (rv == TRUE); } @@ -181,8 +183,12 @@ vslock(addr, len) caddr_t addr; u_int len; { - vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr), + + mtx_lock(&vm_mtx); + vm_map_pageable(&curproc->p_vmspace->vm_map, + trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len), FALSE); + mtx_unlock(&vm_mtx); } void @@ -190,8 +196,12 @@ vsunlock(addr, len) caddr_t addr; u_int len; { - vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr), + + mtx_lock(&vm_mtx); + vm_map_pageable(&curproc->p_vmspace->vm_map, + trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len), TRUE); + mtx_unlock(&vm_mtx); } /* @@ -201,6 +211,8 @@ vsunlock(addr, len) * machine-dependent layer to fill those in and make the new process * ready to run. The new process is set up so that it returns directly * to user mode to avoid stack copying and relocation problems. + * + * Called without vm_mtx. */ void vm_fork(p1, p2, flags) @@ -209,6 +221,7 @@ vm_fork(p1, p2, flags) { register struct user *up; + mtx_lock(&vm_mtx); if ((flags & RFPROC) == 0) { /* * Divorce the memory, if it is shared, essentially @@ -221,6 +234,7 @@ vm_fork(p1, p2, flags) } } cpu_fork(p1, p2, flags); + mtx_unlock(&vm_mtx); return; } @@ -275,6 +289,7 @@ vm_fork(p1, p2, flags) * and make the child ready to run. */ cpu_fork(p1, p2, flags); + mtx_unlock(&vm_mtx); } /* @@ -360,10 +375,13 @@ scheduler(dummy) mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); loop: + mtx_lock(&vm_mtx); if (vm_page_count_min()) { VM_WAIT; + mtx_unlock(&vm_mtx); goto loop; } + mtx_unlock(&vm_mtx); mtx_unlock(&Giant); pp = NULL; @@ -442,6 +460,9 @@ SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, * If any procs have been sleeping/stopped for at least maxslp seconds, * they are swapped. Else, we swap the longest-sleeping or stopped process, * if any, otherwise the longest-resident process. + * + * Can block + * must be called with vm_mtx */ void swapout_procs(action) @@ -452,6 +473,8 @@ int action; int outpri, outpri2; int didswap = 0; + mtx_assert(&vm_mtx, MA_OWNED); + mtx_unlock(&vm_mtx); outp = outp2 = NULL; outpri = outpri2 = INT_MIN; sx_slock(&allproc_lock); @@ -465,6 +488,11 @@ retry: PROC_UNLOCK(p); continue; } + /* + * only aiod changes vmspace, however it will be + * skipped because of the if statement above checking + * for P_SYSTEM + */ vm = p->p_vmspace; mtx_lock_spin(&sched_lock); if ((p->p_sflag & (PS_INMEM|PS_SWAPPING)) != PS_INMEM) { @@ -516,6 +544,7 @@ retry: } mtx_unlock_spin(&sched_lock); + mtx_lock(&vm_mtx); #if 0 /* * XXX: This is broken. We release the lock we @@ -531,7 +560,7 @@ retry: */ if (lockmgr(&vm->vm_map.lock, LK_EXCLUSIVE | LK_NOWAIT, - (void *)0, curproc)) { + NULL, curproc)) { vmspace_free(vm); PROC_UNLOCK(p); continue; @@ -548,8 +577,10 @@ retry: swapout(p); vmspace_free(vm); didswap++; + mtx_unlock(&vm_mtx); goto retry; } + mtx_unlock(&vm_mtx); PROC_UNLOCK(p); } } @@ -558,6 +589,7 @@ retry: * If we swapped something out, and another process needed memory, * then wakeup the sched process. */ + mtx_lock(&vm_mtx); if (didswap) wakeup(&proc0); } diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c index ae336e1b930b..35e4676665f4 100644 --- a/sys/vm/vm_init.c +++ b/sys/vm/vm_init.c @@ -73,6 +73,7 @@ #include <sys/lock.h> #include <sys/proc.h> #include <sys/systm.h> +#include <sys/mutex.h> #include <vm/vm.h> #include <vm/vm_object.h> @@ -96,16 +97,20 @@ SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_FIRST, vm_mem_init, NULL) * The start and end address of physical memory is passed in. */ +struct mtx vm_mtx; + /* ARGSUSED*/ static void vm_mem_init(dummy) void *dummy; { + /* * Initializes resident memory structures. From here on, all physical * memory is accounted for, and we use only virtual addresses. */ - + mtx_init(&vm_mtx, "vm", MTX_DEF); + mtx_lock(&vm_mtx); vm_set_page_size(); virtual_avail = vm_page_startup(avail_start, avail_end, virtual_avail); @@ -118,4 +123,5 @@ vm_mem_init(dummy) kmem_init(virtual_avail, virtual_end); pmap_init(avail_start, avail_end); vm_pager_init(); + mtx_unlock(&vm_mtx); } diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 14e4867d8fa3..08ee4863fca7 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -106,11 +106,17 @@ kmem_alloc_pageable(map, size) { vm_offset_t addr; int result; + int hadvmlock; + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); size = round_page(size); addr = vm_map_min(map); result = vm_map_find(map, NULL, (vm_offset_t) 0, &addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0); + if (!hadvmlock) + mtx_unlock(&vm_mtx); if (result != KERN_SUCCESS) { return (0); } @@ -131,10 +137,17 @@ kmem_alloc_nofault(map, size) vm_offset_t addr; int result; + int hadvmlock; + + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); size = round_page(size); addr = vm_map_min(map); result = vm_map_find(map, NULL, (vm_offset_t) 0, &addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); + if (!hadvmlock) + mtx_unlock(&vm_mtx); if (result != KERN_SUCCESS) { return (0); } @@ -153,8 +166,11 @@ kmem_alloc(map, size) vm_offset_t addr; vm_offset_t offset; vm_offset_t i; + int hadvmlock; - mtx_assert(&Giant, MA_OWNED); + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); size = round_page(size); /* @@ -170,6 +186,8 @@ kmem_alloc(map, size) vm_map_lock(map); if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { vm_map_unlock(map); + if (!hadvmlock) + mtx_unlock(&vm_mtx); return (0); } offset = addr - VM_MIN_KERNEL_ADDRESS; @@ -214,6 +232,8 @@ kmem_alloc(map, size) (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE); + if (!hadvmlock) + mtx_unlock(&vm_mtx); return (addr); } @@ -232,9 +252,16 @@ kmem_free(map, addr, size) vm_offset_t addr; vm_size_t size; { + int hadvmlock; + + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); - mtx_assert(&Giant, MA_OWNED); (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); + + if (!hadvmlock) + mtx_unlock(&vm_mtx); } /* @@ -257,6 +284,11 @@ kmem_suballoc(parent, min, max, size) { int ret; vm_map_t result; + int hadvmlock; + + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); size = round_page(size); @@ -274,6 +306,8 @@ kmem_suballoc(parent, min, max, size) panic("kmem_suballoc: cannot create submap"); if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS) panic("kmem_suballoc: unable to change range to submap"); + if (!hadvmlock) + mtx_unlock(&vm_mtx); return (result); } @@ -308,10 +342,15 @@ kmem_malloc(map, size, flags) vm_map_entry_t entry; vm_offset_t addr; vm_page_t m; + int hadvmlock; if (map != kmem_map && map != mb_map) panic("kmem_malloc: map != {kmem,mb}_map"); + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); + size = round_page(size); addr = vm_map_min(map); @@ -326,12 +365,12 @@ kmem_malloc(map, size, flags) if (map == mb_map) { mb_map_full = TRUE; printf("Out of mbuf clusters - adjust NMBCLUSTERS or increase maxusers!\n"); - return (0); + goto bad; } if ((flags & M_NOWAIT) == 0) panic("kmem_malloc(%ld): kmem_map too small: %ld total allocated", (long)size, (long)map->size); - return (0); + goto bad; } offset = addr - VM_MIN_KERNEL_ADDRESS; vm_object_reference(kmem_object); @@ -370,7 +409,7 @@ retry: if (flags & M_ASLEEP) { VM_AWAIT; } - return (0); + goto bad; } vm_page_flag_clear(m, PG_ZERO); m->valid = VM_PAGE_BITS_ALL; @@ -407,7 +446,14 @@ retry: } vm_map_unlock(map); + if (!hadvmlock) + mtx_unlock(&vm_mtx); return (addr); + +bad: + if (!hadvmlock) + mtx_unlock(&vm_mtx); + return (0); } /* @@ -425,6 +471,11 @@ kmem_alloc_wait(map, size) vm_size_t size; { vm_offset_t addr; + int hadvmlock; + + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); size = round_page(size); @@ -439,13 +490,17 @@ kmem_alloc_wait(map, size) /* no space now; see if we can ever get space */ if (vm_map_max(map) - vm_map_min(map) < size) { vm_map_unlock(map); + if (!hadvmlock) + mtx_unlock(&vm_mtx); return (0); } vm_map_unlock(map); - tsleep(map, PVM, "kmaw", 0); + msleep(map, &vm_mtx, PVM, "kmaw", 0); } vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(map); + if (!hadvmlock) + mtx_unlock(&vm_mtx); return (addr); } @@ -461,10 +516,17 @@ kmem_free_wakeup(map, addr, size) vm_offset_t addr; vm_size_t size; { + int hadvmlock; + + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); vm_map_lock(map); (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); wakeup(map); vm_map_unlock(map); + if (!hadvmlock) + mtx_unlock(&vm_mtx); } /* diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index b33e9e4a56ff..d07d35bad336 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -200,6 +200,7 @@ vmspace_free(vm) struct vmspace *vm; { + mtx_assert(&vm_mtx, MA_OWNED); if (vm->vm_refcnt == 0) panic("vmspace_free: attempt to free already freed vmspace"); @@ -350,6 +351,8 @@ vm_map_entry_unlink(vm_map_t map, * in the "entry" parameter. The boolean * result indicates whether the address is * actually contained in the map. + * + * Doesn't block. */ boolean_t vm_map_lookup_entry(map, address, entry) @@ -439,6 +442,7 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_map_entry_t temp_entry; vm_eflags_t protoeflags; + mtx_assert(&vm_mtx, MA_OWNED); /* * Check that the start and end points are not bogus. */ @@ -1705,7 +1709,9 @@ vm_map_clean(map, start, end, syncio, invalidate) int flags; vm_object_reference(object); + mtx_unlock(&vm_mtx); vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); + mtx_lock(&vm_mtx); flags = (syncio || invalidate) ? OBJPC_SYNC : 0; flags |= invalidate ? OBJPC_INVAL : 0; vm_object_page_clean(object, @@ -2296,6 +2302,8 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, * the stack. Also returns KERN_SUCCESS if addr is outside the * stack range (this is strange, but preserves compatibility with * the grow function in vm_machdep.c). + * + * Will grab vm_mtx if needed */ int vm_map_growstack (struct proc *p, vm_offset_t addr) @@ -2309,18 +2317,29 @@ vm_map_growstack (struct proc *p, vm_offset_t addr) int grow_amount; int rv; int is_procstack; + int hadvmlock; + + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); +#define myreturn(rval) do { \ + if (!hadvmlock) \ + mtx_unlock(&vm_mtx); \ + return (rval); \ +} while (0) + Retry: vm_map_lock_read(map); /* If addr is already in the entry range, no need to grow.*/ if (vm_map_lookup_entry(map, addr, &prev_entry)) { vm_map_unlock_read(map); - return (KERN_SUCCESS); + myreturn (KERN_SUCCESS); } if ((stack_entry = prev_entry->next) == &map->header) { vm_map_unlock_read(map); - return (KERN_SUCCESS); + myreturn (KERN_SUCCESS); } if (prev_entry == &map->header) end = stack_entry->start - stack_entry->avail_ssize; @@ -2338,14 +2357,14 @@ Retry: addr >= stack_entry->start || addr < stack_entry->start - stack_entry->avail_ssize) { vm_map_unlock_read(map); - return (KERN_SUCCESS); + myreturn (KERN_SUCCESS); } /* Find the minimum grow amount */ grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE); if (grow_amount > stack_entry->avail_ssize) { vm_map_unlock_read(map); - return (KERN_NO_SPACE); + myreturn (KERN_NO_SPACE); } /* If there is no longer enough space between the entries @@ -2364,7 +2383,7 @@ Retry: stack_entry->avail_ssize = stack_entry->start - end; vm_map_unlock(map); - return (KERN_NO_SPACE); + myreturn (KERN_NO_SPACE); } is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr; @@ -2375,7 +2394,7 @@ Retry: if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > p->p_rlimit[RLIMIT_STACK].rlim_cur)) { vm_map_unlock_read(map); - return (KERN_NO_SPACE); + myreturn (KERN_NO_SPACE); } /* Round up the grow amount modulo SGROWSIZ */ @@ -2427,8 +2446,8 @@ Retry: } vm_map_unlock(map); - return (rv); - + myreturn (rv); +#undef myreturn } /* @@ -2501,6 +2520,9 @@ vmspace_unshare(struct proc *p) { * specified, the map may be changed to perform virtual * copying operations, although the data referenced will * remain the same. + * + * Can block locking maps and while calling vm_object_shadow(). + * Will drop/reaquire the vm_mtx. */ int vm_map_lookup(vm_map_t *var_map, /* IN/OUT */ @@ -2928,6 +2950,8 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages) * Performs the copy_on_write operations necessary to allow the virtual copies * into user space to work. This has to be called for write(2) system calls * from other processes, file unlinking, and file size shrinkage. + * + * Requires that the vm_mtx is held */ void vm_freeze_copyopts(object, froma, toa) @@ -2938,6 +2962,7 @@ vm_freeze_copyopts(object, froma, toa) vm_object_t robject; vm_pindex_t idx; + mtx_assert(&vm_mtx, MA_OWNED); if ((object == NULL) || ((object->flags & OBJ_OPT) == 0)) return; diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index daf2b6ea4cab..241a80cefaa5 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -196,6 +196,7 @@ struct vmspace { caddr_t vm_minsaddr; /* user VA at max stack growth */ }; +#ifdef _KERNEL /* * Macros: vm_map_lock, etc. * Function: @@ -211,6 +212,7 @@ struct vmspace { do { \ lockmgr(&(map)->lock, LK_DRAIN|LK_INTERLOCK, \ &(map)->ref_lock, curproc); \ + mtx_lock(&vm_mtx); \ (map)->timestamp++; \ } while(0) @@ -225,27 +227,33 @@ struct vmspace { #define vm_map_lock(map) \ do { \ vm_map_printf("locking map LK_EXCLUSIVE: %p\n", map); \ - if (lockmgr(&(map)->lock, LK_EXCLUSIVE, (void *)0, curproc) != 0) \ + mtx_assert(&vm_mtx, MA_OWNED); \ + if (lockmgr(&(map)->lock, LK_EXCLUSIVE | LK_INTERLOCK, \ + &vm_mtx, curproc) != 0) \ panic("vm_map_lock: failed to get lock"); \ + mtx_lock(&vm_mtx); \ (map)->timestamp++; \ } while(0) #define vm_map_unlock(map) \ do { \ vm_map_printf("locking map LK_RELEASE: %p\n", map); \ - lockmgr(&(map)->lock, LK_RELEASE, (void *)0, curproc); \ + lockmgr(&(map)->lock, LK_RELEASE, NULL, curproc); \ } while (0) #define vm_map_lock_read(map) \ do { \ vm_map_printf("locking map LK_SHARED: %p\n", map); \ - lockmgr(&(map)->lock, LK_SHARED, (void *)0, curproc); \ + mtx_assert(&vm_mtx, MA_OWNED); \ + lockmgr(&(map)->lock, LK_SHARED | LK_INTERLOCK, \ + &vm_mtx, curproc); \ + mtx_lock(&vm_mtx); \ } while (0) #define vm_map_unlock_read(map) \ do { \ vm_map_printf("locking map LK_RELEASE: %p\n", map); \ - lockmgr(&(map)->lock, LK_RELEASE, (void *)0, curproc); \ + lockmgr(&(map)->lock, LK_RELEASE, NULL, curproc); \ } while (0) static __inline__ int @@ -253,7 +261,8 @@ _vm_map_lock_upgrade(vm_map_t map, struct proc *p) { int error; vm_map_printf("locking map LK_EXCLUPGRADE: %p\n", map); - error = lockmgr(&map->lock, LK_EXCLUPGRADE, (void *)0, p); + error = lockmgr(&map->lock, LK_EXCLUPGRADE | LK_INTERLOCK, &vm_mtx, p); + mtx_lock(&vm_mtx); if (error == 0) map->timestamp++; return error; @@ -264,7 +273,7 @@ _vm_map_lock_upgrade(vm_map_t map, struct proc *p) { #define vm_map_lock_downgrade(map) \ do { \ vm_map_printf("locking map LK_DOWNGRADE: %p\n", map); \ - lockmgr(&(map)->lock, LK_DOWNGRADE, (void *)0, curproc); \ + lockmgr(&(map)->lock, LK_DOWNGRADE, NULL, curproc); \ } while (0) #define vm_map_set_recursive(map) \ @@ -287,6 +296,7 @@ _vm_map_lock_upgrade(vm_map_t map, struct proc *p) { #define vm_map_min(map) ((map)->min_offset) #define vm_map_max(map) ((map)->max_offset) #define vm_map_pmap(map) ((map)->pmap) +#endif /* _KERNEL */ static __inline struct pmap * vmspace_pmap(struct vmspace *vmspace) diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 8dcb9067bbb8..0f4e1072368f 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -145,8 +145,10 @@ vmtotal(SYSCTL_HANDLER_ARGS) /* * Mark all objects as inactive. */ + mtx_lock(&vm_mtx); TAILQ_FOREACH(object, &vm_object_list, object_list) vm_object_clear_flag(object, OBJ_ACTIVE); + mtx_unlock(&vm_mtx); /* * Calculate process statistics. */ @@ -197,6 +199,7 @@ vmtotal(SYSCTL_HANDLER_ARGS) * Note active objects. */ paging = 0; + mtx_lock(&vm_mtx); for (map = &p->p_vmspace->vm_map, entry = map->header.next; entry != &map->header; entry = entry->next) { if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || @@ -205,6 +208,7 @@ vmtotal(SYSCTL_HANDLER_ARGS) vm_object_set_flag(entry->object.vm_object, OBJ_ACTIVE); paging |= entry->object.vm_object->paging_in_progress; } + mtx_unlock(&vm_mtx); if (paging) totalp->t_pw++; } @@ -212,6 +216,7 @@ vmtotal(SYSCTL_HANDLER_ARGS) /* * Calculate object memory usage statistics. */ + mtx_lock(&vm_mtx); TAILQ_FOREACH(object, &vm_object_list, object_list) { /* * devices, like /dev/mem, will badly skew our totals @@ -235,6 +240,7 @@ vmtotal(SYSCTL_HANDLER_ARGS) } } totalp->t_free = cnt.v_free_count + cnt.v_cache_count; + mtx_unlock(&vm_mtx); return (sysctl_handle_opaque(oidp, totalp, sizeof total, req)); } diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 14307b366543..5de25d913aa9 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -52,6 +52,7 @@ #include <sys/systm.h> #include <sys/kernel.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/sysproto.h> #include <sys/filedesc.h> #include <sys/proc.h> @@ -515,14 +516,17 @@ msync(p, uap) * the range of the map entry containing addr. This can be incorrect * if the region splits or is coalesced with a neighbor. */ + mtx_lock(&vm_mtx); if (size == 0) { vm_map_entry_t entry; vm_map_lock_read(map); rv = vm_map_lookup_entry(map, addr, &entry); vm_map_unlock_read(map); - if (rv == FALSE) + if (rv == FALSE) { + mtx_unlock(&vm_mtx); return (EINVAL); + } addr = entry->start; size = entry->end - entry->start; } @@ -533,6 +537,7 @@ msync(p, uap) rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, (flags & MS_INVALIDATE) != 0); + mtx_unlock(&vm_mtx); switch (rv) { case KERN_SUCCESS: break; @@ -589,10 +594,14 @@ munmap(p, uap) /* * Make sure entire range is allocated. */ - if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) + mtx_lock(&vm_mtx); + if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) { + mtx_unlock(&vm_mtx); return (EINVAL); + } /* returns nothing but KERN_SUCCESS anyway */ (void) vm_map_remove(map, addr, addr + size); + mtx_unlock(&vm_mtx); return (0); } @@ -624,6 +633,7 @@ mprotect(p, uap) vm_offset_t addr; vm_size_t size, pageoff; register vm_prot_t prot; + int ret; addr = (vm_offset_t) uap->addr; size = uap->len; @@ -640,8 +650,11 @@ mprotect(p, uap) if (addr + size < addr) return(EINVAL); - switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, - FALSE)) { + mtx_lock(&vm_mtx); + ret = vm_map_protect(&p->p_vmspace->vm_map, addr, + addr + size, prot, FALSE); + mtx_unlock(&vm_mtx); + switch (ret) { case KERN_SUCCESS: return (0); case KERN_PROTECTION_FAILURE: @@ -665,6 +678,7 @@ minherit(p, uap) vm_offset_t addr; vm_size_t size, pageoff; register vm_inherit_t inherit; + int ret; addr = (vm_offset_t)uap->addr; size = uap->len; @@ -677,8 +691,12 @@ minherit(p, uap) if (addr + size < addr) return(EINVAL); - switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, - inherit)) { + mtx_lock(&vm_mtx); + ret = vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, + inherit); + mtx_unlock(&vm_mtx); + + switch (ret) { case KERN_SUCCESS: return (0); case KERN_PROTECTION_FAILURE: @@ -702,6 +720,7 @@ madvise(p, uap) struct madvise_args *uap; { vm_offset_t start, end; + int ret; /* * Check for illegal behavior @@ -729,9 +748,10 @@ madvise(p, uap) start = trunc_page((vm_offset_t) uap->addr); end = round_page((vm_offset_t) uap->addr + uap->len); - if (vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav)) - return (EINVAL); - return (0); + mtx_lock(&vm_mtx); + ret = vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav); + mtx_unlock(&vm_mtx); + return (ret ? EINVAL : 0); } #ifndef _SYS_SYSPROTO_H_ @@ -777,6 +797,7 @@ mincore(p, uap) vec = uap->vec; map = &p->p_vmspace->vm_map; + mtx_lock(&vm_mtx); pmap = vmspace_pmap(p->p_vmspace); vm_map_lock_read(map); @@ -856,6 +877,7 @@ RestartScan: * the map, we release the lock. */ vm_map_unlock_read(map); + mtx_unlock(&vm_mtx); /* * calculate index into user supplied byte vector @@ -886,6 +908,7 @@ RestartScan: * If the map has changed, due to the subyte, the previous * output may be invalid. */ + mtx_lock(&vm_mtx); vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; @@ -900,6 +923,7 @@ RestartScan: * the map, we release the lock. */ vm_map_unlock_read(map); + mtx_unlock(&vm_mtx); /* * Zero the last entries in the byte vector. @@ -917,10 +941,12 @@ RestartScan: * If the map has changed, due to the subyte, the previous * output may be invalid. */ + mtx_lock(&vm_mtx); vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; vm_map_unlock_read(map); + mtx_unlock(&vm_mtx); return (0); } @@ -965,7 +991,10 @@ mlock(p, uap) return (error); #endif - error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); + mtx_lock(&vm_mtx); + error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, + addr + size, FALSE); + mtx_unlock(&vm_mtx); return (error == KERN_SUCCESS ? 0 : ENOMEM); } @@ -1030,7 +1059,10 @@ munlock(p, uap) return (error); #endif - error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); + mtx_lock(&vm_mtx); + error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, + addr + size, TRUE); + mtx_lock(&vm_mtx); return (error == KERN_SUCCESS ? 0 : ENOMEM); } @@ -1077,7 +1109,9 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, if (*addr != trunc_page(*addr)) return (EINVAL); fitit = FALSE; + mtx_lock(&vm_mtx); (void) vm_map_remove(map, *addr, *addr + size); + mtx_unlock(&vm_mtx); } /* @@ -1099,7 +1133,9 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, struct vattr vat; int error; + mtx_lock(&Giant); error = VOP_GETATTR(vp, &vat, p->p_ucred, p); + mtx_unlock(&Giant); if (error) return (error); objsize = round_page(vat.va_size); @@ -1148,6 +1184,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, maxprot |= VM_PROT_EXECUTE; #endif + mtx_lock(&vm_mtx); if (fitit) { *addr = pmap_addr_hint(object, *addr, size); } @@ -1180,6 +1217,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, } } out: + mtx_unlock(&vm_mtx); switch (rv) { case KERN_SUCCESS: return (0); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 33fe834d0dfa..30ef19043d40 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -146,6 +146,24 @@ _vm_object_allocate(type, size, object) vm_object_t object; { int incr; + int hadvmlock; + + /* + * XXX: Not all callers seem to have the lock, compensate. + * I'm pretty sure we need to bump the gen count before possibly + * nuking the data contained within while under the lock. + */ + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); + object->generation++; + if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP)) + vm_object_set_flag(object, OBJ_ONEMAPPING); + TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); + vm_object_count++; + if (!hadvmlock) + mtx_unlock(&vm_mtx); + TAILQ_INIT(&object->memq); TAILQ_INIT(&object->shadow_head); @@ -153,8 +171,6 @@ _vm_object_allocate(type, size, object) object->size = size; object->ref_count = 1; object->flags = 0; - if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP)) - vm_object_set_flag(object, OBJ_ONEMAPPING); object->paging_in_progress = 0; object->resident_page_count = 0; object->shadow_count = 0; @@ -175,10 +191,6 @@ _vm_object_allocate(type, size, object) */ object->hash_rand = object_hash_rand - 129; - object->generation++; - - TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); - vm_object_count++; object_hash_rand = object->hash_rand; } @@ -226,7 +238,6 @@ vm_object_allocate(type, size) vm_object_t result; result = (vm_object_t) zalloc(obj_zone); - _vm_object_allocate(type, size, result); return (result); @@ -250,18 +261,29 @@ vm_object_reference(object) object->ref_count++; if (object->type == OBJT_VNODE) { + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); while (vget((struct vnode *) object->handle, LK_RETRY|LK_NOOBJ, curproc)) { printf("vm_object_reference: delay in getting object\n"); } + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); } } +/* + * handle deallocating a object of type OBJT_VNODE + * + * requires vm_mtx + * may block + */ void vm_object_vndeallocate(object) vm_object_t object; { struct vnode *vp = (struct vnode *) object->handle; + mtx_assert(&vm_mtx, MA_OWNED); KASSERT(object->type == OBJT_VNODE, ("vm_object_vndeallocate: not a vnode object")); KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); @@ -277,7 +299,14 @@ vm_object_vndeallocate(object) vp->v_flag &= ~VTEXT; vm_object_clear_flag(object, OBJ_OPT); } + /* + * vrele may need a vop lock + */ + mtx_unlock(VM_OBJECT_MTX(object)); + mtx_lock(&Giant); vrele(vp); + mtx_unlock(&Giant); + mtx_lock(VM_OBJECT_MTX(object)); } /* @@ -290,6 +319,7 @@ vm_object_vndeallocate(object) * may be relinquished. * * No object may be locked. + * vm_mtx must be held */ void vm_object_deallocate(object) @@ -297,6 +327,7 @@ vm_object_deallocate(object) { vm_object_t temp; + mtx_assert(&vm_mtx, MA_OWNED); while (object != NULL) { if (object->type == OBJT_VNODE) { @@ -422,7 +453,11 @@ vm_object_terminate(object) vm_object_page_clean(object, 0, 0, OBJPC_SYNC); vp = (struct vnode *) object->handle; + mtx_unlock(VM_OBJECT_MTX(object)); + mtx_lock(&Giant); vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); + mtx_unlock(&Giant); + mtx_lock(VM_OBJECT_MTX(object)); } KASSERT(object->ref_count == 0, @@ -507,6 +542,7 @@ vm_object_page_clean(object, start, end, flags) vm_page_t ma[vm_pageout_page_count]; int curgeneration; + mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); if (object->type != OBJT_VNODE || (object->flags & OBJ_MIGHTBEDIRTY) == 0) return; @@ -962,6 +998,7 @@ vm_object_backing_scan(vm_object_t object, int op) vm_pindex_t backing_offset_index; s = splvm(); + mtx_assert(&vm_mtx, MA_OWNED); backing_object = object->backing_object; backing_offset_index = OFF_TO_IDX(object->backing_object_offset); @@ -1175,6 +1212,9 @@ void vm_object_collapse(object) vm_object_t object; { + + mtx_assert(&vm_mtx, MA_OWNED); + while (TRUE) { vm_object_t backing_object; @@ -1386,6 +1426,8 @@ vm_object_page_remove(object, start, end, clean_only) unsigned int size; int all; + mtx_assert(&vm_mtx, MA_OWNED); + if (object == NULL || object->resident_page_count == 0) return; @@ -1502,6 +1544,8 @@ vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size) { vm_pindex_t next_pindex; + mtx_assert(&vm_mtx, MA_OWNED); + if (prev_object == NULL) { return (TRUE); } diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index ba4c026a7292..2b29baf2841c 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -169,34 +169,49 @@ extern vm_object_t kmem_object; #ifdef _KERNEL +/* + * For now a global vm lock. + */ +#define VM_OBJECT_MTX(object) (&vm_mtx) + static __inline void vm_object_set_flag(vm_object_t object, u_short bits) { - atomic_set_short(&object->flags, bits); + + mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); + object->flags |= bits; } static __inline void vm_object_clear_flag(vm_object_t object, u_short bits) { - atomic_clear_short(&object->flags, bits); + + mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); + object->flags &= ~bits; } static __inline void vm_object_pip_add(vm_object_t object, short i) { - atomic_add_short(&object->paging_in_progress, i); + + mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); + object->paging_in_progress += i; } static __inline void vm_object_pip_subtract(vm_object_t object, short i) { - atomic_subtract_short(&object->paging_in_progress, i); + + mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); + object->paging_in_progress -= i; } static __inline void vm_object_pip_wakeup(vm_object_t object) { - atomic_subtract_short(&object->paging_in_progress, 1); + + mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); + object->paging_in_progress--; if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { vm_object_clear_flag(object, OBJ_PIPWNT); wakeup(object); @@ -206,8 +221,10 @@ vm_object_pip_wakeup(vm_object_t object) static __inline void vm_object_pip_wakeupn(vm_object_t object, short i) { + + mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); if (i) - atomic_subtract_short(&object->paging_in_progress, i); + object->paging_in_progress -= i; if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { vm_object_clear_flag(object, OBJ_PIPWNT); wakeup(object); @@ -217,11 +234,13 @@ vm_object_pip_wakeupn(vm_object_t object, short i) static __inline void vm_object_pip_sleep(vm_object_t object, char *waitid) { + + mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); if (object->paging_in_progress) { int s = splvm(); if (object->paging_in_progress) { vm_object_set_flag(object, OBJ_PIPWNT); - tsleep(object, PVM, waitid, 0); + msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0); } splx(s); } @@ -230,6 +249,8 @@ vm_object_pip_sleep(vm_object_t object, char *waitid) static __inline void vm_object_pip_wait(vm_object_t object, char *waitid) { + + mtx_assert(VM_OBJECT_MTX(object), MA_OWNED); while (object->paging_in_progress) vm_object_pip_sleep(object, waitid); } diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 5865d707a74b..2ae0fe73f068 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -71,6 +71,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/malloc.h> #include <sys/proc.h> #include <sys/vmmeter.h> @@ -147,6 +148,7 @@ vm_set_page_size() * * Add a new page to the freelist for use by the system. * Must be called at splhigh(). + * Must be called with the vm_mtx held. */ vm_page_t vm_add_new_page(pa) @@ -154,6 +156,7 @@ vm_add_new_page(pa) { vm_page_t m; + mtx_assert(&vm_mtx, MA_OWNED); ++cnt.v_page_count; ++cnt.v_free_count; m = PHYS_TO_VM_PAGE(pa); @@ -360,6 +363,7 @@ vm_page_insert(m, object, pindex) { register struct vm_page **bucket; + mtx_assert(&vm_mtx, MA_OWNED); if (m->object != NULL) panic("vm_page_insert: already inserted"); @@ -419,6 +423,7 @@ vm_page_remove(m) { vm_object_t object; + mtx_assert(&vm_mtx, MA_OWNED); if (m->object == NULL) return; @@ -482,6 +487,8 @@ vm_page_remove(m) * an interrupt makes a change, but the generation algorithm will not * operate properly in an SMP environment where both cpu's are able to run * kernel code simultaneously. + * NOTE: under the giant vm lock we should be ok, there should be + * no reason to check vm_page_bucket_generation * * The object must be locked. No side effects. * This routine may not block. @@ -596,6 +603,8 @@ vm_page_unqueue(m) { int queue = m->queue; struct vpgqueues *pq; + + mtx_assert(&vm_mtx, MA_OWNED); if (queue != PQ_NONE) { m->queue = PQ_NONE; pq = &vm_page_queues[queue]; @@ -636,6 +645,7 @@ _vm_page_list_find(basequeue, index) vm_page_t m = NULL; struct vpgqueues *pq; + mtx_assert(&vm_mtx, MA_OWNED); pq = &vm_page_queues[basequeue]; /* @@ -673,6 +683,7 @@ vm_page_select_cache(object, pindex) { vm_page_t m; + mtx_assert(&vm_mtx, MA_OWNED); while (TRUE) { m = vm_page_list_find( PQ_CACHE, @@ -724,7 +735,7 @@ vm_page_select_free(vm_object_t object, vm_pindex_t pindex, boolean_t prefer_zer * VM_ALLOC_INTERRUPT interrupt time request * VM_ALLOC_ZERO zero page * - * Object must be locked. + * vm_mtx must be locked. * This routine may not block. * * Additional special handling is required when called from an @@ -741,6 +752,7 @@ vm_page_alloc(object, pindex, page_req) register vm_page_t m = NULL; int s; + mtx_assert(&vm_mtx, MA_OWNED); KASSERT(!vm_page_lookup(object, pindex), ("vm_page_alloc: page already allocated")); @@ -873,13 +885,13 @@ vm_wait() s = splvm(); if (curproc == pageproc) { vm_pageout_pages_needed = 1; - tsleep(&vm_pageout_pages_needed, PSWP, "VMWait", 0); + msleep(&vm_pageout_pages_needed, &vm_mtx, PSWP, "VMWait", 0); } else { if (!vm_pages_needed) { vm_pages_needed = 1; wakeup(&vm_pages_needed); } - tsleep(&cnt.v_free_count, PVM, "vmwait", 0); + msleep(&cnt.v_free_count, &vm_mtx, PVM, "vmwait", 0); } splx(s); } @@ -910,61 +922,6 @@ vm_await() splx(s); } -#if 0 -/* - * vm_page_sleep: - * - * Block until page is no longer busy. - */ - -int -vm_page_sleep(vm_page_t m, char *msg, char *busy) { - int slept = 0; - if ((busy && *busy) || (m->flags & PG_BUSY)) { - int s; - s = splvm(); - if ((busy && *busy) || (m->flags & PG_BUSY)) { - vm_page_flag_set(m, PG_WANTED); - tsleep(m, PVM, msg, 0); - slept = 1; - } - splx(s); - } - return slept; -} - -#endif - -#if 0 - -/* - * vm_page_asleep: - * - * Similar to vm_page_sleep(), but does not block. Returns 0 if - * the page is not busy, or 1 if the page is busy. - * - * This routine has the side effect of calling asleep() if the page - * was busy (1 returned). - */ - -int -vm_page_asleep(vm_page_t m, char *msg, char *busy) { - int slept = 0; - if ((busy && *busy) || (m->flags & PG_BUSY)) { - int s; - s = splvm(); - if ((busy && *busy) || (m->flags & PG_BUSY)) { - vm_page_flag_set(m, PG_WANTED); - asleep(m, PVM, msg, 0); - slept = 1; - } - splx(s); - } - return slept; -} - -#endif - /* * vm_page_activate: * @@ -982,6 +939,7 @@ vm_page_activate(m) int s; s = splvm(); + mtx_assert(&vm_mtx, MA_OWNED); if (m->queue != PQ_ACTIVE) { if ((m->queue - m->pc) == PQ_CACHE) cnt.v_reactivated++; @@ -1056,6 +1014,7 @@ vm_page_free_toq(vm_page_t m) s = splvm(); + mtx_assert(&vm_mtx, MA_OWNED); cnt.v_tfree++; if (m->busy || ((m->queue - m->pc) == PQ_FREE) || @@ -1293,6 +1252,7 @@ _vm_page_deactivate(vm_page_t m, int athead) { int s; + mtx_assert(&vm_mtx, MA_OWNED); /* * Ignore if already inactive. */ @@ -1330,6 +1290,8 @@ vm_page_deactivate(vm_page_t m) int vm_page_try_to_cache(vm_page_t m) { + + mtx_assert(VM_PAGE_MTX(m), MA_OWNED); if (m->dirty || m->hold_count || m->busy || m->wire_count || (m->flags & (PG_BUSY|PG_UNMANAGED))) { return(0); @@ -1354,6 +1316,7 @@ vm_page_cache(m) { int s; + mtx_assert(&vm_mtx, MA_OWNED); if ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || m->wire_count) { printf("vm_page_cache: attempting to cache busy page\n"); return; @@ -1411,6 +1374,7 @@ vm_page_dontneed(m) int dnw; int head; + mtx_assert(&vm_mtx, MA_OWNED); dnw = ++dnweight; /* @@ -1451,6 +1415,7 @@ vm_page_dontneed(m) * to be in the object. If the page doesn't exist, allocate it. * * This routine may block. + * Requires vm_mtx. */ vm_page_t vm_page_grab(object, pindex, allocflags) @@ -1458,10 +1423,10 @@ vm_page_grab(object, pindex, allocflags) vm_pindex_t pindex; int allocflags; { - vm_page_t m; int s, generation; + mtx_assert(&vm_mtx, MA_OWNED); retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { if (m->busy || (m->flags & PG_BUSY)) { @@ -1471,7 +1436,7 @@ retrylookup: while ((object->generation == generation) && (m->busy || (m->flags & PG_BUSY))) { vm_page_flag_set(m, PG_WANTED | PG_REFERENCED); - tsleep(m, PVM, "pgrbwt", 0); + msleep(m, &vm_mtx, PVM, "pgrbwt", 0); if ((allocflags & VM_ALLOC_RETRY) == 0) { splx(s); return NULL; @@ -1534,6 +1499,8 @@ vm_page_bits(int base, int size) * This routine may not block. * * (base + size) must be less then or equal to PAGE_SIZE. + * + * vm_mtx needs to be held */ void vm_page_set_validclean(m, base, size) @@ -1545,6 +1512,7 @@ vm_page_set_validclean(m, base, size) int frag; int endoff; + mtx_assert(&vm_mtx, MA_OWNED); if (size == 0) /* handle degenerate case */ return; @@ -1618,6 +1586,8 @@ vm_page_clear_dirty(m, base, size) int base; int size; { + + mtx_assert(&vm_mtx, MA_OWNED); m->dirty &= ~vm_page_bits(base, size); } @@ -1637,6 +1607,7 @@ vm_page_set_invalid(m, base, size) { int bits; + mtx_assert(&vm_mtx, MA_OWNED); bits = vm_page_bits(base, size); m->valid &= ~bits; m->dirty &= ~bits; @@ -1923,8 +1894,19 @@ contigmalloc(size, type, flags, low, high, alignment, boundary) unsigned long alignment; unsigned long boundary; { - return contigmalloc1(size, type, flags, low, high, alignment, boundary, + void * ret; + int hadvmlock; + + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); + ret = contigmalloc1(size, type, flags, low, high, alignment, boundary, kernel_map); + if (!hadvmlock) + mtx_unlock(&vm_mtx); + + return (ret); + } void @@ -1933,7 +1915,14 @@ contigfree(addr, size, type) unsigned long size; struct malloc_type *type; { + int hadvmlock; + + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); kmem_free(kernel_map, (vm_offset_t)addr, size); + if (!hadvmlock) + mtx_unlock(&vm_mtx); } vm_offset_t @@ -1943,8 +1932,18 @@ vm_page_alloc_contig(size, low, high, alignment) vm_offset_t high; vm_offset_t alignment; { - return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high, + vm_offset_t ret; + int hadvmlock; + + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); + ret = ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high, alignment, 0ul, kernel_map)); + if (!hadvmlock) + mtx_unlock(&vm_mtx); + return (ret); + } #include "opt_ddb.h" diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index dc8290e34a07..e1c1cc49509e 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -305,19 +305,28 @@ extern long first_page; /* first physical page number */ (&vm_page_array[atop(pa) - first_page ]) /* + * For now, a global vm lock + */ +#define VM_PAGE_MTX(m) (&vm_mtx) + +/* * Functions implemented as macros */ static __inline void vm_page_flag_set(vm_page_t m, unsigned short bits) { - atomic_set_short(&(m)->flags, bits); + + mtx_assert(VM_PAGE_MTX(m), MA_OWNED); + m->flags |= bits; } static __inline void vm_page_flag_clear(vm_page_t m, unsigned short bits) { - atomic_clear_short(&(m)->flags, bits); + + mtx_assert(VM_PAGE_MTX(m), MA_OWNED); + m->flags &= ~bits; } #if 0 @@ -332,7 +341,9 @@ vm_page_assert_wait(vm_page_t m, int interruptible) static __inline void vm_page_busy(vm_page_t m) { - KASSERT((m->flags & PG_BUSY) == 0, ("vm_page_busy: page already busy!!!")); + + KASSERT((m->flags & PG_BUSY) == 0, + ("vm_page_busy: page already busy!!!")); vm_page_flag_set(m, PG_BUSY); } @@ -375,13 +386,17 @@ vm_page_wakeup(vm_page_t m) static __inline void vm_page_io_start(vm_page_t m) { - atomic_add_char(&(m)->busy, 1); + + mtx_assert(VM_PAGE_MTX(m), MA_OWNED); + m->busy++; } static __inline void vm_page_io_finish(vm_page_t m) { - atomic_subtract_char(&m->busy, 1); + + mtx_assert(VM_PAGE_MTX(m), MA_OWNED); + m->busy--; if (m->busy == 0) vm_page_flash(m); } @@ -447,12 +462,16 @@ void vm_page_free_toq(vm_page_t m); static __inline void vm_page_hold(vm_page_t mem) { + + mtx_assert(VM_PAGE_MTX(m), MA_OWNED); mem->hold_count++; } static __inline void vm_page_unhold(vm_page_t mem) { + + mtx_assert(VM_PAGE_MTX(m), MA_OWNED); --mem->hold_count; KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!")); } @@ -565,7 +584,7 @@ vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg) * Page is busy. Wait and retry. */ vm_page_flag_set(m, PG_WANTED | PG_REFERENCED); - tsleep(m, PVM, msg, 0); + msleep(m, VM_PAGE_MTX(m), PVM, msg, 0); } splx(s); return(TRUE); diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index ce333cf4d5b5..60e3f213020f 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -447,6 +447,8 @@ vm_pageout_flush(mc, count, flags) * backing_objects. * * The object and map must be locked. + * + * Requires the vm_mtx */ static void vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only) @@ -460,6 +462,7 @@ vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only) int remove_mode; int s; + mtx_assert(&vm_mtx, MA_OWNED); if (object->type == OBJT_DEVICE || object->type == OBJT_PHYS) return; @@ -1322,7 +1325,7 @@ vm_pageout() { int pass; - mtx_lock(&Giant); + mtx_lock(&vm_mtx); /* * Initialize some paging parameters. @@ -1412,7 +1415,8 @@ vm_pageout() */ ++pass; if (pass > 1) - tsleep(&vm_pages_needed, PVM, "psleep", hz/2); + msleep(&vm_pages_needed, &vm_mtx, PVM, + "psleep", hz/2); } else { /* * Good enough, sleep & handle stats. Prime the pass @@ -1422,7 +1426,7 @@ vm_pageout() pass = 1; else pass = 0; - error = tsleep(&vm_pages_needed, + error = msleep(&vm_pages_needed, &vm_mtx, PVM, "psleep", vm_pageout_stats_interval * hz); if (error && !vm_pages_needed) { splx(s); @@ -1466,12 +1470,13 @@ vm_daemon() { struct proc *p; - mtx_lock(&Giant); + mtx_lock(&vm_mtx); while (TRUE) { - tsleep(&vm_daemon_needed, PPAUSE, "psleep", 0); + msleep(&vm_daemon_needed, &vm_mtx, PPAUSE, "psleep", 0); if (vm_pageout_req_swapout) { swapout_procs(vm_pageout_req_swapout); + mtx_assert(&vm_mtx, MA_OWNED); vm_pageout_req_swapout = 0; } /* @@ -1479,6 +1484,7 @@ vm_daemon() * process is swapped out -- deactivate pages */ + mtx_unlock(&vm_mtx); sx_slock(&allproc_lock); LIST_FOREACH(p, &allproc, p_list) { vm_pindex_t limit, size; @@ -1515,13 +1521,16 @@ vm_daemon() limit = 0; /* XXX */ mtx_unlock_spin(&sched_lock); + mtx_lock(&vm_mtx); size = vmspace_resident_count(p->p_vmspace); if (limit >= 0 && size >= limit) { vm_pageout_map_deactivate_pages( &p->p_vmspace->vm_map, limit); } + mtx_unlock(&vm_mtx); } sx_sunlock(&allproc_lock); + mtx_lock(&vm_mtx); } } #endif diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index b13c9c05166a..e53a14c68255 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -240,21 +240,32 @@ vm_pager_bufferinit() * need to perform page-level validation (e.g. the device pager). */ vm_object_t -vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size, vm_prot_t prot, - vm_ooffset_t off) +vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size, + vm_prot_t prot, vm_ooffset_t off) { + vm_object_t ret; struct pagerops *ops; + int hadvmlock; + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); ops = pagertab[type]; if (ops) - return ((*ops->pgo_alloc) (handle, size, prot, off)); - return (NULL); + ret = (*ops->pgo_alloc) (handle, size, prot, off); + else + ret = NULL; + if (!hadvmlock) + mtx_unlock(&vm_mtx); + return (ret); } void vm_pager_deallocate(object) vm_object_t object; { + + mtx_assert(&vm_mtx, MA_OWNED); (*pagertab[object->type]->pgo_dealloc) (object); } @@ -374,6 +385,8 @@ initpbuf(struct buf *bp) * * NOTE: pfreecnt can be NULL, but this 'feature' will be removed * relatively soon when the rest of the subsystems get smart about it. XXX + * + * vm_mtx can be held or unheld */ struct buf * getpbuf(pfreecnt) @@ -381,8 +394,12 @@ getpbuf(pfreecnt) { int s; struct buf *bp; + int hadvmlock; s = splvm(); + hadvmlock = mtx_owned(&vm_mtx); + if (hadvmlock) + mtx_unlock(&vm_mtx); mtx_lock(&pbuf_mtx); for (;;) { @@ -407,6 +424,8 @@ getpbuf(pfreecnt) splx(s); initpbuf(bp); + if (hadvmlock) + mtx_lock(&vm_mtx); return bp; } diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index f54c739d83eb..b4511cacd184 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -124,10 +124,12 @@ vm_pager_get_pages( ) { int r; + mtx_assert(&vm_mtx, MA_OWNED); r = (*pagertab[object->type]->pgo_getpages)(object, m, count, reqpage); if (r == VM_PAGER_OK && m[reqpage]->valid != VM_PAGE_BITS_ALL) { vm_page_zero_invalid(m[reqpage], TRUE); } + mtx_assert(&vm_mtx, MA_OWNED); return(r); } @@ -139,8 +141,11 @@ vm_pager_put_pages( int flags, int *rtvals ) { + + mtx_assert(&vm_mtx, MA_OWNED); (*pagertab[object->type]->pgo_putpages) (object, m, count, flags, rtvals); + mtx_assert(&vm_mtx, MA_OWNED); } /* @@ -161,7 +166,13 @@ vm_pager_has_page( int *before, int *after ) { - return ((*pagertab[object->type]->pgo_haspage) (object, offset, before, after)); + boolean_t ret; + + mtx_assert(&vm_mtx, MA_OWNED); + ret = (*pagertab[object->type]->pgo_haspage) + (object, offset, before, after); + mtx_assert(&vm_mtx, MA_OWNED); + return (ret); } /* @@ -175,8 +186,11 @@ vm_pager_has_page( static __inline void vm_pager_page_unswapped(vm_page_t m) { + + mtx_assert(&vm_mtx, MA_OWNED); if (pagertab[m->object->type]->pgo_pageunswapped) (*pagertab[m->object->type]->pgo_pageunswapped)(m); + mtx_assert(&vm_mtx, MA_OWNED); } #endif diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index f9b24f84523d..48613062ee08 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -49,6 +49,9 @@ #include <sys/sysproto.h> #include <sys/proc.h> #include <sys/resourcevar.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/systm.h> #include <vm/vm.h> #include <vm/vm_param.h> @@ -93,6 +96,7 @@ obreak(p, uap) return EINVAL; } + mtx_lock(&vm_mtx); if (new > old) { vm_size_t diff; @@ -100,16 +104,19 @@ obreak(p, uap) rv = vm_map_find(&vm->vm_map, NULL, 0, &old, diff, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (rv != KERN_SUCCESS) { + mtx_unlock(&vm_mtx); return (ENOMEM); } vm->vm_dsize += btoc(diff); } else if (new < old) { rv = vm_map_remove(&vm->vm_map, new, old); if (rv != KERN_SUCCESS) { + mtx_unlock(&vm_mtx); return (ENOMEM); } vm->vm_dsize -= btoc(old - new); } + mtx_unlock(&vm_mtx); return (0); } diff --git a/sys/vm/vm_zone.c b/sys/vm/vm_zone.c index 4cddadc55123..30fadbebd7d3 100644 --- a/sys/vm/vm_zone.c +++ b/sys/vm/vm_zone.c @@ -137,6 +137,7 @@ zinitna(vm_zone_t z, vm_object_t obj, char *name, int size, * in pages as needed. */ if (z->zflags & ZONE_INTERRUPT) { + int hadvmlock; totsize = round_page(z->zsize * nentries); atomic_add_int(&zone_kmem_kvaspace, totsize); @@ -145,12 +146,17 @@ zinitna(vm_zone_t z, vm_object_t obj, char *name, int size, return 0; z->zpagemax = totsize / PAGE_SIZE; + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); if (obj == NULL) { z->zobj = vm_object_allocate(OBJT_DEFAULT, z->zpagemax); } else { z->zobj = obj; _vm_object_allocate(OBJT_DEFAULT, z->zpagemax, obj); } + if (!hadvmlock) + mtx_unlock(&vm_mtx); z->zallocflag = VM_ALLOC_INTERRUPT; z->zmax += nentries; } else { @@ -262,7 +268,6 @@ _zget(vm_zone_t z) void *item; KASSERT(z != NULL, ("invalid zone")); - mtx_assert(&z->zmtx, MA_OWNED); if (z->zflags & ZONE_INTERRUPT) { item = (char *) z->zkva + z->zpagecount * PAGE_SIZE; @@ -299,16 +304,13 @@ _zget(vm_zone_t z) * We can wait, so just do normal map allocation in the appropriate * map. */ + mtx_unlock(&z->zmtx); if (lockstatus(&kernel_map->lock, NULL)) { - mtx_unlock(&z->zmtx); item = (void *) kmem_malloc(kmem_map, nbytes, M_WAITOK); - mtx_lock(&z->zmtx); if (item != NULL) atomic_add_int(&zone_kmem_pages, z->zalloc); } else { - mtx_unlock(&z->zmtx); item = (void *) kmem_alloc(kernel_map, nbytes); - mtx_lock(&z->zmtx); if (item != NULL) atomic_add_int(&zone_kern_pages, z->zalloc); } @@ -318,6 +320,7 @@ _zget(vm_zone_t z) nbytes = 0; } nitems = nbytes / z->zsize; + mtx_lock(&z->zmtx); } z->ztotal += nitems; @@ -361,14 +364,17 @@ void * zalloc(vm_zone_t z) { void *item; + int hadvmlock; KASSERT(z != NULL, ("invalid zone")); + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); mtx_lock(&z->zmtx); if (z->zfreecnt <= z->zfreemin) { item = _zget(z); - mtx_unlock(&z->zmtx); - return item; + goto out; } item = z->zitems; @@ -381,8 +387,11 @@ zalloc(vm_zone_t z) z->zfreecnt--; z->znalloc++; - + +out: mtx_unlock(&z->zmtx); + if (!hadvmlock) + mtx_unlock(&vm_mtx); return item; } @@ -392,8 +401,13 @@ zalloc(vm_zone_t z) void zfree(vm_zone_t z, void *item) { + int hadvmlock; + KASSERT(z != NULL, ("invalid zone")); KASSERT(item != NULL, ("invalid item")); + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); mtx_lock(&z->zmtx); ((void **) item)[0] = z->zitems; @@ -405,6 +419,8 @@ zfree(vm_zone_t z, void *item) z->zitems = item; z->zfreecnt++; + if (!hadvmlock) + mtx_unlock(&vm_mtx); mtx_unlock(&z->zmtx); } diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index e9400b80cc2b..12763c876c4d 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -103,6 +103,7 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_object_t object; struct vnode *vp; + mtx_assert(&vm_mtx, MA_OWNED); /* * Pageout to vnode, no can do yet. */ @@ -122,11 +123,15 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, * Prevent race condition when allocating the object. This * can happen with NFS vnodes since the nfsnode isn't locked. */ + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); while (vp->v_flag & VOLOCK) { vp->v_flag |= VOWANT; tsleep(vp, PVM, "vnpobj", 0); } vp->v_flag |= VOLOCK; + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); /* * If the object is being terminated, wait for it to @@ -134,7 +139,7 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, */ while (((object = vp->v_object) != NULL) && (object->flags & OBJ_DEAD)) { - tsleep(object, PVM, "vadead", 0); + msleep(object, &vm_mtx, PVM, "vadead", 0); } if (vp->v_usecount == 0) @@ -157,11 +162,15 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vp->v_usecount++; } + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); vp->v_flag &= ~VOLOCK; if (vp->v_flag & VOWANT) { vp->v_flag &= ~VOWANT; wakeup(vp); } + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); return (object); } @@ -221,8 +230,12 @@ vnode_pager_haspage(object, pindex, before, after) blocksperpage = (PAGE_SIZE / bsize); reqblock = pindex * blocksperpage; } + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn, after, before); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); if (err) return TRUE; if ( bn == -1) @@ -285,6 +298,11 @@ vnode_pager_setsize(vp, nsize) * File has shrunk. Toss any cached pages beyond the new EOF. */ if (nsize < object->un_pager.vnp.vnp_size) { + int hadvmlock; + + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); vm_freeze_copyopts(object, OFF_TO_IDX(nsize), object->size); if (nobjsize < object->size) { vm_object_page_remove(object, nobjsize, object->size, @@ -325,6 +343,8 @@ vnode_pager_setsize(vp, nsize) m->dirty = VM_PAGE_BITS_ALL; } } + if (!hadvmlock) + mtx_unlock(&vm_mtx); } object->un_pager.vnp.vnp_size = nsize; object->size = nobjsize; @@ -542,8 +562,8 @@ vnode_pager_input_old(object, m) */ /* - * EOPNOTSUPP is no longer legal. For local media VFS's that do not - * implement their own VOP_GETPAGES, their VOP_GETPAGES should call to + * Local media VFS's that do not implement their own VOP_GETPAGES + * should have their VOP_GETPAGES should call to * vnode_pager_generic_getpages() to implement the previous behaviour. * * All other FS's should use the bypass to get to the local media @@ -560,16 +580,11 @@ vnode_pager_getpages(object, m, count, reqpage) struct vnode *vp; int bytes = count * PAGE_SIZE; + mtx_assert(&vm_mtx, MA_OWNED); vp = object->handle; - /* - * XXX temporary diagnostic message to help track stale FS code, - * Returning EOPNOTSUPP from here may make things unhappy. - */ rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0); - if (rtval == EOPNOTSUPP) { - printf("vnode_pager: *** WARNING *** stale FS getpages\n"); - rtval = vnode_pager_generic_getpages( vp, m, bytes, reqpage); - } + KASSERT(rtval != EOPNOTSUPP, + ("vnode_pager: FS getpages not implemented\n")); return rtval; } @@ -891,13 +906,19 @@ vnode_pager_putpages(object, m, count, sync, rtvals) vp = object->handle; if (vp->v_type != VREG) mp = NULL; + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); (void)vn_start_write(vp, &mp, V_WAIT); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0); - if (rtval == EOPNOTSUPP) { - printf("vnode_pager: *** WARNING *** stale FS putpages\n"); - rtval = vnode_pager_generic_putpages( vp, m, bytes, sync, rtvals); - } + KASSERT(rtval != EOPNOTSUPP, + ("vnode_pager: stale FS putpages\n")); + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); vn_finished_write(mp); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); } @@ -1000,6 +1021,8 @@ vnode_pager_lock(object) { struct proc *p = curproc; /* XXX */ + mtx_assert(&vm_mtx, MA_NOTOWNED); + mtx_assert(&Giant, MA_OWNED); for (; object != NULL; object = object->backing_object) { if (object->type != OBJT_VNODE) continue; |