aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Belousov <kib@FreeBSD.org>2010-06-08 17:54:28 +0000
committerKonstantin Belousov <kib@FreeBSD.org>2010-06-08 17:54:28 +0000
commit8d4a7be84d51767f19a1e59a31d68d19a8d7a019 (patch)
treece721d22f60a1202a7cf7daeb70ebb7c20101794
parent53a908cb07b62603e7463b92a7082cc341a46f0a (diff)
Reorganize the code in bdwrite() which handles move of dirtiness
from the buffer pages to buffer. Combine the code to set buffer dirty range (previously in vfs_setdirty()) and to clean the pages (vfs_clean_pages()) into new function vfs_clean_pages_dirty_buf(). Now the vm object lock is acquired only once. Drain the VPO_BUSY bit of the buffer pages before setting valid and clean bits in vfs_clean_pages_dirty_buf() with new helper vfs_drain_busy_pages(). pmap_clear_modify() asserts that page is not busy. In vfs_busy_pages(), move the wait for draining of VPO_BUSY before the dirtyness handling, to follow the structure of vfs_clean_pages_dirty_buf(). Reported and tested by: pho Suggested and reviewed by: alc MFC after: 2 weeks
Notes
Notes: svn path=/head/; revision=208920
-rw-r--r--sys/kern/vfs_bio.c135
1 files changed, 65 insertions, 70 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index bbced87d425d..5429b14a1d3b 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -102,8 +102,8 @@ static void vm_hold_load_pages(struct buf *bp, vm_offset_t from,
static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m);
static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off,
vm_page_t m);
-static void vfs_clean_pages(struct buf *bp);
-static void vfs_setdirty(struct buf *bp);
+static void vfs_drain_busy_pages(struct buf *bp);
+static void vfs_clean_pages_dirty_buf(struct buf *bp);
static void vfs_setdirty_locked_object(struct buf *bp);
static void vfs_vmio_release(struct buf *bp);
static int vfs_bio_clcheck(struct vnode *vp, int size,
@@ -1025,18 +1025,17 @@ bdwrite(struct buf *bp)
}
/*
- * Set the *dirty* buffer range based upon the VM system dirty pages.
- */
- vfs_setdirty(bp);
-
- /*
- * We need to do this here to satisfy the vnode_pager and the
- * pageout daemon, so that it thinks that the pages have been
- * "cleaned". Note that since the pages are in a delayed write
- * buffer -- the VFS layer "will" see that the pages get written
- * out on the next sync, or perhaps the cluster will be completed.
+ * Set the *dirty* buffer range based upon the VM system dirty
+ * pages.
+ *
+ * Mark the buffer pages as clean. We need to do this here to
+ * satisfy the vnode_pager and the pageout daemon, so that it
+ * thinks that the pages have been "cleaned". Note that since
+ * the pages are in a delayed write buffer -- the VFS layer
+ * "will" see that the pages get written out on the next sync,
+ * or perhaps the cluster will be completed.
*/
- vfs_clean_pages(bp);
+ vfs_clean_pages_dirty_buf(bp);
bqrelse(bp);
/*
@@ -2398,31 +2397,44 @@ notinmem:
}
/*
- * vfs_setdirty:
- *
- * Sets the dirty range for a buffer based on the status of the dirty
- * bits in the pages comprising the buffer.
+ * Set the dirty range for a buffer based on the status of the dirty
+ * bits in the pages comprising the buffer. The range is limited
+ * to the size of the buffer.
*
- * The range is limited to the size of the buffer.
+ * Tell the VM system that the pages associated with this buffer
+ * are clean. This is used for delayed writes where the data is
+ * going to go to disk eventually without additional VM intevention.
*
- * This routine is primarily used by NFS, but is generalized for the
- * B_VMIO case.
+ * Note that while we only really need to clean through to b_bcount, we
+ * just go ahead and clean through to b_bufsize.
*/
static void
-vfs_setdirty(struct buf *bp)
+vfs_clean_pages_dirty_buf(struct buf *bp)
{
+ vm_ooffset_t foff, noff, eoff;
+ vm_page_t m;
+ int i;
- /*
- * Degenerate case - empty buffer
- */
- if (bp->b_bufsize == 0)
+ if ((bp->b_flags & B_VMIO) == 0 || bp->b_bufsize == 0)
return;
- if ((bp->b_flags & B_VMIO) == 0)
- return;
+ foff = bp->b_offset;
+ KASSERT(bp->b_offset != NOOFFSET,
+ ("vfs_clean_pages_dirty_buf: no buffer offset"));
VM_OBJECT_LOCK(bp->b_bufobj->bo_object);
+ vfs_drain_busy_pages(bp);
vfs_setdirty_locked_object(bp);
+ for (i = 0; i < bp->b_npages; i++) {
+ noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
+ eoff = noff;
+ if (eoff > bp->b_offset + bp->b_bufsize)
+ eoff = bp->b_offset + bp->b_bufsize;
+ m = bp->b_pages[i];
+ vfs_page_set_validclean(bp, foff, m);
+ /* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */
+ foff = noff;
+ }
VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object);
}
@@ -3533,6 +3545,31 @@ vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, vm_page_t m)
}
/*
+ * Ensure that all buffer pages are not busied by VPO_BUSY flag. If
+ * any page is busy, drain the flag.
+ */
+static void
+vfs_drain_busy_pages(struct buf *bp)
+{
+ vm_page_t m;
+ int i, last_busied;
+
+ VM_OBJECT_LOCK_ASSERT(bp->b_bufobj->bo_object, MA_OWNED);
+ last_busied = 0;
+ for (i = 0; i < bp->b_npages; i++) {
+ m = bp->b_pages[i];
+ if ((m->oflags & VPO_BUSY) != 0) {
+ for (; last_busied < i; last_busied++)
+ vm_page_busy(bp->b_pages[last_busied]);
+ while ((m->oflags & VPO_BUSY) != 0)
+ vm_page_sleep(m, "vbpage");
+ }
+ }
+ for (i = 0; i < last_busied; i++)
+ vm_page_wakeup(bp->b_pages[i]);
+}
+
+/*
* This routine is called before a device strategy routine.
* It is used to tell the VM system that paging I/O is in
* progress, and treat the pages associated with the buffer
@@ -3560,15 +3597,9 @@ vfs_busy_pages(struct buf *bp, int clear_modify)
KASSERT(bp->b_offset != NOOFFSET,
("vfs_busy_pages: no buffer offset"));
VM_OBJECT_LOCK(obj);
+ vfs_drain_busy_pages(bp);
if (bp->b_bufsize != 0)
vfs_setdirty_locked_object(bp);
-retry:
- for (i = 0; i < bp->b_npages; i++) {
- m = bp->b_pages[i];
-
- if (vm_page_sleep_if_busy(m, FALSE, "vbpage"))
- goto retry;
- }
bogus = 0;
for (i = 0; i < bp->b_npages; i++) {
m = bp->b_pages[i];
@@ -3609,42 +3640,6 @@ retry:
}
/*
- * Tell the VM system that the pages associated with this buffer
- * are clean. This is used for delayed writes where the data is
- * going to go to disk eventually without additional VM intevention.
- *
- * Note that while we only really need to clean through to b_bcount, we
- * just go ahead and clean through to b_bufsize.
- */
-static void
-vfs_clean_pages(struct buf *bp)
-{
- int i;
- vm_ooffset_t foff, noff, eoff;
- vm_page_t m;
-
- if (!(bp->b_flags & B_VMIO))
- return;
-
- foff = bp->b_offset;
- KASSERT(bp->b_offset != NOOFFSET,
- ("vfs_clean_pages: no buffer offset"));
- VM_OBJECT_LOCK(bp->b_bufobj->bo_object);
- for (i = 0; i < bp->b_npages; i++) {
- m = bp->b_pages[i];
- noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
- eoff = noff;
-
- if (eoff > bp->b_offset + bp->b_bufsize)
- eoff = bp->b_offset + bp->b_bufsize;
- vfs_page_set_validclean(bp, foff, m);
- /* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */
- foff = noff;
- }
- VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object);
-}
-
-/*
* vfs_bio_set_valid:
*
* Set the range within the buffer to valid. The range is