diff options
author | Alan Cox <alc@FreeBSD.org> | 2015-12-19 18:42:50 +0000 |
---|---|---|
committer | Alan Cox <alc@FreeBSD.org> | 2015-12-19 18:42:50 +0000 |
commit | c869e672086f986a9f3aa1fe93fd2f0eb5c81156 (patch) | |
tree | 34a147bea7788aca2b0a5376368e87fce8afbfa1 /sys/vm/vm_phys.c | |
parent | 2906f6cbae2e407ba2d1b5a81310c3f8ce5de32c (diff) | |
download | src-c869e672086f986a9f3aa1fe93fd2f0eb5c81156.tar.gz src-c869e672086f986a9f3aa1fe93fd2f0eb5c81156.zip |
Introduce a new mechanism for relocating virtual pages to a new physical
address and use this mechanism when:
1. kmem_alloc_{attr,contig}() can't find suitable free pages in the physical
memory allocator's free page lists. This replaces the long-standing
approach of scanning the inactive and inactive queues, converting clean
pages into PG_CACHED pages and laundering dirty pages. In contrast, the
new mechanism does not use PG_CACHED pages nor does it trigger a large
number of I/O operations.
2. on 32-bit MIPS processors, uma_small_alloc() and the pmap can't find
free pages in the physical memory allocator's free page lists that are
covered by the direct map. Tested by: adrian
3. ttm_bo_global_init() and ttm_vm_page_alloc_dma32() can't find suitable
free pages in the physical memory allocator's free page lists.
In the coming months, I expect that this new mechanism will be applied in
other places. For example, balloon drivers should use relocation to
minimize fragmentation of the guest physical address space.
Make vm_phys_alloc_contig() a little smarter (and more efficient in some
cases). Specifically, use vm_phys_segs[] earlier to avoid scanning free
page lists that can't possibly contain suitable pages.
Reviewed by: kib, markj
Glanced at: jhb
Discussed with: jeff
Sponsored by: EMC / Isilon Storage Division
Differential Revision: https://reviews.freebsd.org/D4444
Notes
Notes:
svn path=/head/; revision=292469
Diffstat (limited to 'sys/vm/vm_phys.c')
-rw-r--r-- | sys/vm/vm_phys.c | 221 |
1 files changed, 152 insertions, 69 deletions
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c index d26b8b580952..38799f2dac03 100644 --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -170,6 +170,9 @@ static struct vm_domain_policy vm_default_policy = static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order); +static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, + u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, + vm_paddr_t boundary); static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); static int vm_phys_paddr_to_segind(vm_paddr_t pa); @@ -1163,6 +1166,56 @@ vm_phys_free_contig(vm_page_t m, u_long npages) } /* + * Scan physical memory between the specified addresses "low" and "high" for a + * run of contiguous physical pages that satisfy the specified conditions, and + * return the lowest page in the run. The specified "alignment" determines + * the alignment of the lowest physical page in the run. If the specified + * "boundary" is non-zero, then the run of physical pages cannot span a + * physical address that is a multiple of "boundary". + * + * "npages" must be greater than zero. Both "alignment" and "boundary" must + * be a power of two. + */ +vm_page_t +vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary, int options) +{ + vm_paddr_t pa_end; + vm_page_t m_end, m_run, m_start; + struct vm_phys_seg *seg; + int segind; + + KASSERT(npages > 0, ("npages is 0")); + KASSERT(powerof2(alignment), ("alignment is not a power of 2")); + KASSERT(powerof2(boundary), ("boundary is not a power of 2")); + if (low >= high) + return (NULL); + for (segind = 0; segind < vm_phys_nsegs; segind++) { + seg = &vm_phys_segs[segind]; + if (seg->start >= high) + break; + if (low >= seg->end) + continue; + if (low <= seg->start) + m_start = seg->first_page; + else + m_start = &seg->first_page[atop(low - seg->start)]; + if (high < seg->end) + pa_end = high; + else + pa_end = seg->end; + if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) + continue; + m_end = &seg->first_page[atop(pa_end - seg->start)]; + m_run = vm_page_scan_contig(npages, m_start, m_end, + alignment, boundary, options); + if (m_run != NULL) + return (m_run); + } + return (NULL); +} + +/* * Set the pool for a contiguous, power of two-sized set of physical pages. */ void @@ -1300,93 +1353,123 @@ vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { - struct vm_freelist *fl; - struct vm_phys_seg *seg; - vm_paddr_t pa, pa_last, size; - vm_page_t m, m_ret; - u_long npages_end; - int domain, flind, oind, order, pind; + vm_paddr_t pa_end, pa_start; + vm_page_t m_run; struct vm_domain_iterator vi; + struct vm_phys_seg *seg; + int domain, segind; + KASSERT(npages > 0, ("npages is 0")); + KASSERT(powerof2(alignment), ("alignment is not a power of 2")); + KASSERT(powerof2(boundary), ("boundary is not a power of 2")); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - size = npages << PAGE_SHIFT; - KASSERT(size != 0, - ("vm_phys_alloc_contig: size must not be 0")); - KASSERT((alignment & (alignment - 1)) == 0, - ("vm_phys_alloc_contig: alignment must be a power of 2")); - KASSERT((boundary & (boundary - 1)) == 0, - ("vm_phys_alloc_contig: boundary must be a power of 2")); - /* Compute the queue that is the best fit for npages. */ - for (order = 0; (1 << order) < npages; order++); - + if (low >= high) + return (NULL); vm_policy_iterator_init(&vi); - restartdom: if (vm_domain_iterator_run(&vi, &domain) != 0) { vm_policy_iterator_finish(&vi); return (NULL); } + m_run = NULL; + for (segind = 0; segind < vm_phys_nsegs; segind++) { + seg = &vm_phys_segs[segind]; + if (seg->start >= high) + break; + if (low >= seg->end || seg->domain != domain) + continue; + if (low <= seg->start) + pa_start = seg->start; + else + pa_start = low; + if (high < seg->end) + pa_end = high; + else + pa_end = seg->end; + if (pa_end - pa_start < ptoa(npages)) + continue; + m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, + alignment, boundary); + if (m_run != NULL) + break; + } + if (m_run == NULL && !vm_domain_iterator_isdone(&vi)) + goto restartdom; + vm_policy_iterator_finish(&vi); + return (m_run); +} - for (flind = 0; flind < vm_nfreelists; flind++) { - for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { - for (pind = 0; pind < VM_NFREEPOOL; pind++) { - fl = &vm_phys_free_queues[domain][flind][pind][0]; - TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { - /* - * A free list may contain physical pages - * from one or more segments. - */ - seg = &vm_phys_segs[m_ret->segind]; - if (seg->start > high || - low >= seg->end) - continue; - - /* - * Is the size of this allocation request - * larger than the largest block size? - */ - if (order >= VM_NFREEORDER) { - /* - * Determine if a sufficient number - * of subsequent blocks to satisfy - * the allocation request are free. - */ - pa = VM_PAGE_TO_PHYS(m_ret); - pa_last = pa + size; - for (;;) { - pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); - if (pa >= pa_last) - break; - if (pa < seg->start || - pa >= seg->end) - break; - m = &seg->first_page[atop(pa - seg->start)]; - if (m->order != VM_NFREEORDER - 1) - break; - } - /* If not, continue to the next block. */ - if (pa < pa_last) - continue; - } +/* + * Allocate a run of contiguous physical pages from the free list for the + * specified segment. + */ +static vm_page_t +vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, + vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) +{ + struct vm_freelist *fl; + vm_paddr_t pa, pa_end, size; + vm_page_t m, m_ret; + u_long npages_end; + int oind, order, pind; + KASSERT(npages > 0, ("npages is 0")); + KASSERT(powerof2(alignment), ("alignment is not a power of 2")); + KASSERT(powerof2(boundary), ("boundary is not a power of 2")); + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + /* Compute the queue that is the best fit for npages. */ + for (order = 0; (1 << order) < npages; order++); + /* Search for a run satisfying the specified conditions. */ + size = npages << PAGE_SHIFT; + for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; + oind++) { + for (pind = 0; pind < VM_NFREEPOOL; pind++) { + fl = (*seg->free_queues)[pind]; + TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { + /* + * Is the size of this allocation request + * larger than the largest block size? + */ + if (order >= VM_NFREEORDER) { /* - * Determine if the blocks are within the given range, - * satisfy the given alignment, and do not cross the - * given boundary. + * Determine if a sufficient number of + * subsequent blocks to satisfy the + * allocation request are free. */ pa = VM_PAGE_TO_PHYS(m_ret); - if (pa >= low && - pa + size <= high && - (pa & (alignment - 1)) == 0 && - ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) - goto done; + pa_end = pa + size; + for (;;) { + pa += 1 << (PAGE_SHIFT + + VM_NFREEORDER - 1); + if (pa >= pa_end || + pa < seg->start || + pa >= seg->end) + break; + m = &seg->first_page[atop(pa - + seg->start)]; + if (m->order != VM_NFREEORDER - + 1) + break; + } + /* If not, go to the next block. */ + if (pa < pa_end) + continue; } + + /* + * Determine if the blocks are within the + * given range, satisfy the given alignment, + * and do not cross the given boundary. + */ + pa = VM_PAGE_TO_PHYS(m_ret); + pa_end = pa + size; + if (pa >= low && pa_end <= high && (pa & + (alignment - 1)) == 0 && ((pa ^ (pa_end - + 1)) & ~(boundary - 1)) == 0) + goto done; } } } - if (!vm_domain_iterator_isdone(&vi)) - goto restartdom; - vm_policy_iterator_finish(&vi); return (NULL); done: for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { |