diff options
author | John Dyson <dyson@FreeBSD.org> | 1996-05-19 07:36:50 +0000 |
---|---|---|
committer | John Dyson <dyson@FreeBSD.org> | 1996-05-19 07:36:50 +0000 |
commit | 867a482d661c25849e1fa5e3d992efbca7b4add2 (patch) | |
tree | d5aabcba7719875b5a0f874d310c6f63ca671595 /sys/vm | |
parent | 0ac7fc1db6655659df6eacbc4ff0e16d1627def2 (diff) | |
download | src-867a482d661c25849e1fa5e3d992efbca7b4add2.tar.gz src-867a482d661c25849e1fa5e3d992efbca7b4add2.zip |
Initial support for mincore and madvise. Both are almost fully
supported, except madvise does not page in with MADV_WILLNEED, and
MADV_DONTNEED doesn't force dirty pages out.
Notes
Notes:
svn path=/head/; revision=15819
Diffstat (limited to 'sys/vm')
-rw-r--r-- | sys/vm/pmap.h | 7 | ||||
-rw-r--r-- | sys/vm/vm_fault.c | 46 | ||||
-rw-r--r-- | sys/vm/vm_map.c | 93 | ||||
-rw-r--r-- | sys/vm/vm_map.h | 3 | ||||
-rw-r--r-- | sys/vm/vm_mmap.c | 187 | ||||
-rw-r--r-- | sys/vm/vm_object.c | 67 | ||||
-rw-r--r-- | sys/vm/vm_object.h | 9 |
7 files changed, 387 insertions, 25 deletions
diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 7e654dd4c5ce..78c59a5afc88 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: pmap.h,v 1.8 1995/12/17 07:38:58 bde Exp $ + * $Id: pmap.h,v 1.9 1996/03/28 04:54:50 dyson Exp $ */ /* @@ -105,7 +105,8 @@ void pmap_kenter __P((vm_offset_t, vm_offset_t)); void pmap_kremove __P((vm_offset_t)); vm_offset_t pmap_map __P((vm_offset_t, vm_offset_t, vm_offset_t, int)); void pmap_object_init_pt __P((pmap_t pmap, vm_offset_t addr, - vm_object_t object, vm_pindex_t pindex, vm_offset_t size)); + vm_object_t object, vm_pindex_t pindex, vm_offset_t size, + int pagelimit)); boolean_t pmap_page_exists __P((pmap_t, vm_offset_t)); void pmap_page_protect __P((vm_offset_t, vm_prot_t)); void pmap_pageable __P((pmap_t, vm_offset_t, vm_offset_t, @@ -122,6 +123,8 @@ void pmap_remove __P((pmap_t, vm_offset_t, vm_offset_t)); void pmap_zero_page __P((vm_offset_t)); void pmap_prefault __P((pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry, vm_object_t object)); +int pmap_mincore __P((pmap_t pmap, vm_offset_t addr)); + #endif /* KERNEL */ #endif /* _PMAP_VM_ */ diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 904270b63a35..05b9bf90ed66 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -66,7 +66,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_fault.c,v 1.43 1996/03/28 04:53:23 dyson Exp $ + * $Id: vm_fault.c,v 1.44 1996/05/18 03:37:35 dyson Exp $ */ /* @@ -324,6 +324,47 @@ readrest: int rv; int faultcount; int reqpage; + int ahead, behind; + + ahead = VM_FAULT_READ_AHEAD; + behind = VM_FAULT_READ_BEHIND; + if (first_object->behavior == OBJ_RANDOM) { + ahead = 0; + behind = 0; + } + + if (first_object->behavior == OBJ_SEQUENTIAL) { + vm_pindex_t firstpindex, tmppindex; + if (first_pindex < + 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1)) + firstpindex = 0; + else + firstpindex = first_pindex - + 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1); + + for(tmppindex = first_pindex - 1; + tmppindex >= first_pindex; + --tmppindex) { + vm_page_t mt; + mt = vm_page_lookup( first_object, tmppindex); + if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL)) + break; + if (mt->busy || (mt->flags & PG_BUSY) || mt->hold_count || + mt->wire_count) + continue; + if (mt->dirty == 0) + vm_page_test_dirty(mt); + if (mt->dirty) { + vm_page_protect(mt, VM_PROT_NONE); + vm_page_deactivate(mt); + } else { + vm_page_cache(mt); + } + } + + ahead += behind; + behind = 0; + } /* * now we find out if any other pages should be paged @@ -338,8 +379,7 @@ readrest: * vm_page_t passed to the routine. */ faultcount = vm_fault_additional_pages( - m, VM_FAULT_READ_BEHIND, VM_FAULT_READ_AHEAD, - marray, &reqpage); + m, behind, ahead, marray, &reqpage); /* * Call the pager to retrieve the data, if any, after diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index e0948e49f4d5..89f4e87d30d6 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_map.c,v 1.44 1996/05/03 21:01:49 phk Exp $ + * $Id: vm_map.c,v 1.45 1996/05/18 03:37:43 dyson Exp $ */ /* @@ -75,6 +75,7 @@ #include <sys/proc.h> #include <sys/queue.h> #include <sys/vmmeter.h> +#include <sys/mman.h> #include <vm/vm.h> #include <vm/vm_param.h> @@ -845,6 +846,7 @@ vm_map_simplify_entry(map, entry) prevsize = prev->end - prev->start; if ( (prev->end == entry->start) && (prev->object.vm_object == entry->object.vm_object) && + (!prev->object.vm_object || (prev->object.vm_object->behavior == entry->object.vm_object->behavior)) && (prev->offset + prevsize == entry->offset) && (prev->needs_copy == entry->needs_copy) && (prev->copy_on_write == entry->copy_on_write) && @@ -873,6 +875,7 @@ vm_map_simplify_entry(map, entry) esize = entry->end - entry->start; if ((entry->end == next->start) && (next->object.vm_object == entry->object.vm_object) && + (!next->object.vm_object || (next->object.vm_object->behavior == entry->object.vm_object->behavior)) && (entry->offset + esize == next->offset) && (next->needs_copy == entry->needs_copy) && (next->copy_on_write == entry->copy_on_write) && @@ -1177,6 +1180,94 @@ vm_map_protect(map, start, end, new_prot, set_max) } /* + * vm_map_madvise: + * + * This routine traverses a processes map handling the madvise + * system call. + */ +void +vm_map_madvise(map, pmap, start, end, advise) + vm_map_t map; + pmap_t pmap; + vm_offset_t start, end; + int advise; +{ + register vm_map_entry_t current; + vm_map_entry_t entry; + + vm_map_lock(map); + + VM_MAP_RANGE_CHECK(map, start, end); + + if (vm_map_lookup_entry(map, start, &entry)) { + vm_map_clip_start(map, entry, start); + } else + entry = entry->next; + + for(current = entry; + (current != &map->header) && (current->start < end); + current = current->next) { + if (current->is_a_map || current->is_sub_map) { + continue; + } + vm_map_clip_end(map, current, end); + switch (advise) { + case MADV_NORMAL: + current->object.vm_object->behavior = OBJ_NORMAL; + break; + case MADV_SEQUENTIAL: + current->object.vm_object->behavior = OBJ_SEQUENTIAL; + break; + case MADV_RANDOM: + current->object.vm_object->behavior = OBJ_RANDOM; + break; + /* + * Right now, we could handle DONTNEED and WILLNEED with common code. + * They are mostly the same, except for the potential async reads (NYI). + */ + case MADV_DONTNEED: + { + vm_pindex_t pindex; + int count; + vm_size_t size = entry->end - entry->start; + pindex = OFF_TO_IDX(entry->offset); + count = OFF_TO_IDX(size); + /* + * MADV_DONTNEED removes the page from all + * pmaps, so pmap_remove is not necessary. + */ + vm_object_madvise(current->object.vm_object, + pindex, count, advise); + } + break; + + case MADV_WILLNEED: + { + vm_pindex_t pindex; + int count; + vm_size_t size = entry->end - entry->start; + pindex = OFF_TO_IDX(entry->offset); + count = OFF_TO_IDX(size); + vm_object_madvise(current->object.vm_object, + pindex, count, advise); + pmap_object_init_pt(pmap, current->start, + current->object.vm_object, pindex, + (count << PAGE_SHIFT), 0); + } + break; + + default: + break; + } + } + + vm_map_simplify_entry(map, entry); + vm_map_unlock(map); + return; +} + + +/* * vm_map_inherit: * * Sets the inheritance of the specified address diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 0f800bfa2cdb..3ba837519912 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_map.h,v 1.11 1996/01/19 03:59:55 dyson Exp $ + * $Id: vm_map.h,v 1.12 1996/01/30 23:02:35 mpp Exp $ */ /* @@ -236,6 +236,7 @@ int vm_map_remove __P((vm_map_t, vm_offset_t, vm_offset_t)); void vm_map_simplify __P((vm_map_t, vm_offset_t)); void vm_map_startup __P((void)); int vm_map_submap __P((vm_map_t, vm_offset_t, vm_offset_t, vm_map_t)); +void vm_map_madvise __P((vm_map_t, pmap_t, vm_offset_t, vm_offset_t, int)); #endif #endif /* _VM_MAP_ */ diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 16f8ebec2b74..4a4ee15819d9 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -38,7 +38,7 @@ * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ * * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 - * $Id: vm_mmap.c,v 1.41 1996/05/03 21:01:51 phk Exp $ + * $Id: vm_mmap.c,v 1.42 1996/05/18 03:37:51 dyson Exp $ */ /* @@ -71,6 +71,7 @@ #include <vm/vm_pageout.h> #include <vm/vm_extern.h> #include <vm/vm_kern.h> +#include <vm/vm_page.h> #ifndef _SYS_SYSPROTO_H_ struct sbrk_args { @@ -543,9 +544,37 @@ madvise(p, uap, retval) struct madvise_args *uap; int *retval; { + vm_map_t map; + pmap_t pmap; + vm_offset_t start, end, addr, nextaddr; + /* + * Check for illegal addresses. Watch out for address wrap... Note + * that VM_*_ADDRESS are not constants due to casts (argh). + */ + if (VM_MAXUSER_ADDRESS > 0 && + ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS) + return (EINVAL); +#ifndef i386 + if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) + return (EINVAL); +#endif + if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) + return (EINVAL); + + /* + * Since this routine is only advisory, we default to conservative + * behavior. + */ + start = round_page((vm_offset_t) uap->addr); + end = trunc_page((vm_offset_t) uap->addr + uap->len); + + map = &p->p_vmspace->vm_map; + pmap = &p->p_vmspace->vm_pmap; + + vm_map_madvise(map, pmap, start, end, uap->behav); /* Not yet implemented */ - return (EOPNOTSUPP); + return (0); } #ifndef _SYS_SYSPROTO_H_ @@ -563,30 +592,156 @@ mincore(p, uap, retval) struct mincore_args *uap; int *retval; { - vm_offset_t addr; - vm_offset_t end; + vm_offset_t addr, first_addr; + vm_offset_t end, cend; + pmap_t pmap; + vm_map_t map; char *vec; + int error; + int vecindex, lastvecindex; + register vm_map_entry_t current; + vm_map_entry_t entry; + int mincoreinfo; - addr = trunc_page((vm_offset_t) uap->addr); + /* + * Make sure that the addresses presented are valid for user + * mode. + */ + first_addr = addr = trunc_page((vm_offset_t) uap->addr); end = addr + (vm_size_t)round_page(uap->len); if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS) return (EINVAL); if (end < addr) return (EINVAL); + /* + * Address of byte vector + */ vec = uap->vec; - while(addr < end) { - int error; - if (pmap_extract(&p->p_vmspace->vm_pmap, addr)) { - error = subyte( vec, 1); - } else { - error = subyte( vec, 0); + + map = &p->p_vmspace->vm_map; + pmap = &p->p_vmspace->vm_pmap; + + vm_map_lock(map); + + /* + * Not needed here + */ +#if 0 + VM_MAP_RANGE_CHECK(map, addr, end); +#endif + + if (!vm_map_lookup_entry(map, addr, &entry)) + entry = entry->next; + + /* + * Do this on a map entry basis so that if the pages are not + * in the current processes address space, we can easily look + * up the pages elsewhere. + */ + lastvecindex = -1; + for(current = entry; + (current != &map->header) && (current->start < end); + current = current->next) { + + /* + * ignore submaps (for now) or null objects + */ + if (current->is_a_map || current->is_sub_map || + current->object.vm_object == NULL) + continue; + + /* + * limit this scan to the current map entry and the + * limits for the mincore call + */ + if (addr < current->start) + addr = current->start; + cend = current->end; + if (cend > end) + cend = end; + + /* + * scan this entry one page at a time + */ + while(addr < cend) { + /* + * Check pmap first, it is likely faster, also + * it can provide info as to whether we are the + * one referencing or modifying the page. + */ + mincoreinfo = pmap_mincore(pmap, addr); + if (!mincoreinfo) { + vm_pindex_t pindex; + vm_ooffset_t offset; + vm_page_t m; + /* + * calculate the page index into the object + */ + offset = current->offset + (addr - current->start); + pindex = OFF_TO_IDX(offset); + m = vm_page_lookup(current->object.vm_object, + pindex); + /* + * if the page is resident, then gather information about + * it. + */ + if (m) { + mincoreinfo = MINCORE_INCORE; + if (m->dirty || + pmap_is_modified(VM_PAGE_TO_PHYS(m))) + mincoreinfo |= MINCORE_MODIFIED_OTHER; + if ((m->flags & PG_REFERENCED) || + pmap_is_referenced(VM_PAGE_TO_PHYS(m))) + mincoreinfo |= MINCORE_REFERENCED_OTHER; + } + } + + /* + * calculate index into user supplied byte vector + */ + vecindex = OFF_TO_IDX(addr - first_addr); + + /* + * If we have skipped map entries, we need to make sure that + * the byte vector is zeroed for those skipped entries. + */ + while((lastvecindex + 1) < vecindex) { + error = subyte( vec + lastvecindex, 0); + if (error) { + vm_map_unlock(map); + return (EFAULT); + } + ++lastvecindex; + } + + /* + * Pass the page information to the user + */ + error = subyte( vec + vecindex, mincoreinfo); + if (error) { + vm_map_unlock(map); + return (EFAULT); + } + lastvecindex = vecindex; + addr += PAGE_SIZE; + } + } + + /* + * Zero the last entries in the byte vector. + */ + vecindex = OFF_TO_IDX(end - first_addr); + while((lastvecindex + 1) < vecindex) { + error = subyte( vec + lastvecindex, 0); + if (error) { + vm_map_unlock(map); + return (EFAULT); } - if (error) - return EFAULT; - vec++; - addr += PAGE_SIZE; + ++lastvecindex; } + + vm_map_unlock(map); return (0); } @@ -804,7 +959,7 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) */ if ((type == OBJT_VNODE) && (map->pmap != NULL)) { pmap_object_init_pt(map->pmap, *addr, - object, (vm_pindex_t) OFF_TO_IDX(foff), size); + object, (vm_pindex_t) OFF_TO_IDX(foff), size, 1); } /* diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 187e7773b2db..d3720d9c6f03 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_object.c,v 1.68 1996/04/24 04:16:45 dyson Exp $ + * $Id: vm_object.c,v 1.69 1996/05/18 03:37:55 dyson Exp $ */ /* @@ -77,6 +77,7 @@ #include <sys/vnode.h> #include <sys/mount.h> #include <sys/vmmeter.h> +#include <sys/mman.h> #include <vm/vm.h> #include <vm/vm_param.h> @@ -162,6 +163,7 @@ _vm_object_allocate(type, size, object) object->size = size; object->ref_count = 1; object->flags = 0; + object->behavior = OBJ_NORMAL; object->paging_in_progress = 0; object->resident_page_count = 0; object->shadow_count = 0; @@ -673,6 +675,69 @@ vm_object_pmap_remove(object, start, end) } /* + * vm_object_madvise: + * + * Implements the madvise function at the object/page level. + */ +void +vm_object_madvise(object, pindex, count, advise) + vm_object_t object; + vm_pindex_t pindex; + int count; + int advise; +{ + vm_pindex_t end; + vm_page_t m; + + if (object == NULL) + return; + + end = pindex + count; + + for (; pindex < end; pindex += 1) { + m = vm_page_lookup(object, pindex); + + /* + * If the page is busy or not in a normal active state, + * we skip it. Things can break if we mess with pages + * in any of the below states. + */ + if (m == NULL || m->busy || (m->flags & PG_BUSY) || + m->hold_count || m->wire_count || + m->valid != VM_PAGE_BITS_ALL) + continue; + + if (advise == MADV_WILLNEED) { + if (m->queue != PQ_ACTIVE) + vm_page_activate(m); + } else if (advise == MADV_DONTNEED) { + /* + * If the upper level VM system doesn't think that + * the page is dirty, check the pmap layer. + */ + if (m->dirty == 0) { + vm_page_test_dirty(m); + } + /* + * If the page is not dirty, then we place it onto + * the cache queue. When on the cache queue, it is + * available for immediate reuse. + */ + if (m->dirty == 0) { + vm_page_cache(m); + } else { + /* + * If the page IS dirty, then we remove it from all + * pmaps and deactivate it. + */ + vm_page_protect(m, VM_PROT_NONE); + vm_page_deactivate(m); + } + } + } +} + +/* * vm_object_copy: * * Create a new object which is a copy of an existing diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 74a6b6bcc5cb..4cf72cbfde05 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_object.h,v 1.26 1995/12/14 09:55:05 phk Exp $ + * $Id: vm_object.h,v 1.27 1996/03/02 02:54:23 dyson Exp $ */ /* @@ -96,6 +96,7 @@ struct vm_object { int shadow_count; /* how many objects that this is a shadow for */ u_short flags; /* see below */ u_short paging_in_progress; /* Paging (in or out) so don't collapse or destroy */ + u_short behavior; /* see below */ int resident_page_count; /* number of resident pages */ vm_ooffset_t paging_offset; /* Offset into paging space */ struct vm_object *backing_object; /* object that I'm a shadow of */ @@ -130,6 +131,11 @@ struct vm_object { #define OBJ_MIGHTBEDIRTY 0x0100 /* object might be dirty */ #define OBJ_CLEANING 0x0200 + +#define OBJ_NORMAL 0x0 /* default behavior */ +#define OBJ_SEQUENTIAL 0x1 /* expect sequential accesses */ +#define OBJ_RANDOM 0x2 /* expect random accesses */ + #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT) #define OFF_TO_IDX(off) ((vm_pindex_t)(((vm_ooffset_t)(off)) >> PAGE_SHIFT)) @@ -175,6 +181,7 @@ void vm_object_pmap_copy __P((vm_object_t, vm_pindex_t, vm_pindex_t)); void vm_object_pmap_remove __P((vm_object_t, vm_pindex_t, vm_pindex_t)); void vm_object_reference __P((vm_object_t)); void vm_object_shadow __P((vm_object_t *, vm_ooffset_t *, vm_size_t)); +void vm_object_madvise __P((vm_object_t, vm_pindex_t, int, int)); #endif /* KERNEL */ #endif /* _VM_OBJECT_ */ |