aboutsummaryrefslogtreecommitdiff
path: root/sys/cddl/contrib/opensolaris/uts/common
diff options
context:
space:
mode:
authorMatt Macy <mmacy@FreeBSD.org>2018-08-12 02:24:18 +0000
committerMatt Macy <mmacy@FreeBSD.org>2018-08-12 02:24:18 +0000
commit13ae5c6ba81cbcec352f4d6ff7dc61423873d9e2 (patch)
tree4833e01e4318324bf5e24398cf188588e9b4f578 /sys/cddl/contrib/opensolaris/uts/common
parent6e3d1345d9e119705981347bea160c2f5ad6258a (diff)
downloadsrc-13ae5c6ba81cbcec352f4d6ff7dc61423873d9e2.tar.gz
src-13ae5c6ba81cbcec352f4d6ff7dc61423873d9e2.zip
MFV/ZoL: Fix stack dbuf_hold_impl()
commit fc5bb51f08a6c91ff9ad3559d0266eeeab0b1f61 Author: Brian Behlendorf <behlendorf1@llnl.gov> Date: Thu Aug 26 10:52:00 2010 -0700 Fix stack dbuf_hold_impl() This commit preserves the recursive function dbuf_hold_impl() but moves the local variables and function arguments to the heap to minimize the stack frame size. Enough space is initially allocated on the stack for 20 levels of recursion. This technique was based on commit 34229a2f2ac07363f64ddd63e014964fff2f0671 which reduced stack usage of traverse_visitbp(). Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Notes
Notes: svn path=/head/; revision=337676
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c256
1 files changed, 185 insertions, 71 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
index 963c44c7c37a..bf25a458bbb8 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
@@ -51,6 +51,30 @@
#include <sys/cityhash.h>
#include <sys/spa_impl.h>
+struct dbuf_hold_impl_data {
+ /* Function arguments */
+ dnode_t *dh_dn;
+ uint8_t dh_level;
+ uint64_t dh_blkid;
+ boolean_t dh_fail_sparse;
+ boolean_t dh_fail_uncached;
+ void *dh_tag;
+ dmu_buf_impl_t **dh_dbp;
+ /* Local variables */
+ dmu_buf_impl_t *dh_db;
+ dmu_buf_impl_t *dh_parent;
+ blkptr_t *dh_bp;
+ int dh_err;
+ dbuf_dirty_record_t *dh_dr;
+ int dh_depth;
+};
+
+static void __dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
+ dnode_t *dn, uint8_t level, uint64_t blkid, boolean_t fail_sparse,
+ boolean_t fail_uncached,
+ void *tag, dmu_buf_impl_t **dbp, int depth);
+static int __dbuf_hold_impl(struct dbuf_hold_impl_data *dh);
+
static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
@@ -2202,9 +2226,10 @@ dbuf_destroy(dmu_buf_impl_t *db)
* this happens when the dnode is the meta-dnode, or a userused or groupused
* object.
*/
-static int
+__attribute__((always_inline))
+static inline int
dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
- dmu_buf_impl_t **parentp, blkptr_t **bpp)
+ dmu_buf_impl_t **parentp, blkptr_t **bpp, struct dbuf_hold_impl_data *dh)
{
*parentp = NULL;
*bpp = NULL;
@@ -2257,8 +2282,16 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
return (SET_ERROR(ENOENT));
} else if (level < nlevels-1) {
/* this block is referenced from an indirect block */
- int err = dbuf_hold_impl(dn, level+1,
- blkid >> epbs, fail_sparse, FALSE, NULL, parentp);
+ int err;
+ if (dh == NULL) {
+ err = dbuf_hold_impl(dn, level+1,
+ blkid >> epbs, fail_sparse, FALSE, NULL, parentp);
+ } else {
+ __dbuf_hold_impl_init(dh + 1, dn, dh->dh_level + 1,
+ blkid >> epbs, fail_sparse, FALSE, NULL,
+ parentp, dh->dh_depth + 1);
+ err = __dbuf_hold_impl(dh + 1);
+ }
if (err)
return (err);
err = dbuf_read(*parentp, NULL,
@@ -2626,110 +2659,191 @@ dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
zio_nowait(pio);
}
+#define DBUF_HOLD_IMPL_MAX_DEPTH 20
+
+/*
+ * Helper function for __dbuf_hold_impl() to copy a buffer. Handles
+ * the case of encrypted, compressed and uncompressed buffers by
+ * allocating the new buffer, respectively, with arc_alloc_raw_buf(),
+ * arc_alloc_compressed_buf() or arc_alloc_buf().*
+ *
+ * NOTE: Declared noinline to avoid stack bloat in __dbuf_hold_impl().
+ */
+noinline static void
+dbuf_hold_copy(struct dbuf_hold_impl_data *dh)
+{
+ dnode_t *dn = dh->dh_dn;
+ dmu_buf_impl_t *db = dh->dh_db;
+ dbuf_dirty_record_t *dr = dh->dh_dr;
+ arc_buf_t *data = dr->dt.dl.dr_data;
+
+ enum zio_compress compress_type = arc_get_compression(data);
+
+ if (compress_type != ZIO_COMPRESS_OFF) {
+ dbuf_set_data(db, arc_alloc_compressed_buf(
+ dn->dn_objset->os_spa, db, arc_buf_size(data),
+ arc_buf_lsize(data), compress_type));
+ } else {
+ dbuf_set_data(db, arc_alloc_buf(dn->dn_objset->os_spa, db,
+ DBUF_GET_BUFC_TYPE(db), db->db.db_size));
+ }
+
+ bcopy(data->b_data, db->db.db_data, arc_buf_size(data));
+}
+
/*
* Returns with db_holds incremented, and db_mtx not held.
* Note: dn_struct_rwlock must be held.
*/
-int
-dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
- boolean_t fail_sparse, boolean_t fail_uncached,
- void *tag, dmu_buf_impl_t **dbp)
+static int
+__dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
{
- dmu_buf_impl_t *db, *parent = NULL;
+ ASSERT3S(dh->dh_depth, <, DBUF_HOLD_IMPL_MAX_DEPTH);
+ dh->dh_parent = NULL;
- ASSERT(blkid != DMU_BONUS_BLKID);
- ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
- ASSERT3U(dn->dn_nlevels, >, level);
+ ASSERT(dh->dh_blkid != DMU_BONUS_BLKID);
+ ASSERT(RW_LOCK_HELD(&dh->dh_dn->dn_struct_rwlock));
+ ASSERT3U(dh->dh_dn->dn_nlevels, >, dh->dh_level);
+
+ *(dh->dh_dbp) = NULL;
- *dbp = NULL;
-top:
/* dbuf_find() returns with db_mtx held */
- db = dbuf_find(dn->dn_objset, dn->dn_object, level, blkid);
+ dh->dh_db = dbuf_find(dh->dh_dn->dn_objset, dh->dh_dn->dn_object,
+ dh->dh_level, dh->dh_blkid);
- if (db == NULL) {
- blkptr_t *bp = NULL;
- int err;
+ if (dh->dh_db == NULL) {
+ dh->dh_bp = NULL;
- if (fail_uncached)
+ if (dh->dh_fail_uncached)
return (SET_ERROR(ENOENT));
- ASSERT3P(parent, ==, NULL);
- err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp);
- if (fail_sparse) {
- if (err == 0 && bp && BP_IS_HOLE(bp))
- err = SET_ERROR(ENOENT);
- if (err) {
- if (parent)
- dbuf_rele(parent, NULL);
- return (err);
+ ASSERT3P(dh->dh_parent, ==, NULL);
+ dh->dh_err = dbuf_findbp(dh->dh_dn, dh->dh_level, dh->dh_blkid,
+ dh->dh_fail_sparse, &dh->dh_parent, &dh->dh_bp, dh);
+ if (dh->dh_fail_sparse) {
+ if (dh->dh_err == 0 &&
+ dh->dh_bp && BP_IS_HOLE(dh->dh_bp))
+ dh->dh_err = SET_ERROR(ENOENT);
+ if (dh->dh_err) {
+ if (dh->dh_parent)
+ dbuf_rele(dh->dh_parent, NULL);
+ return (dh->dh_err);
}
}
- if (err && err != ENOENT)
- return (err);
- db = dbuf_create(dn, level, blkid, parent, bp);
+ if (dh->dh_err && dh->dh_err != ENOENT)
+ return (dh->dh_err);
+ dh->dh_db = dbuf_create(dh->dh_dn, dh->dh_level, dh->dh_blkid,
+ dh->dh_parent, dh->dh_bp);
}
- if (fail_uncached && db->db_state != DB_CACHED) {
- mutex_exit(&db->db_mtx);
+ if (dh->dh_fail_uncached && dh->dh_db->db_state != DB_CACHED) {
+ mutex_exit(&dh->dh_db->db_mtx);
return (SET_ERROR(ENOENT));
}
- if (db->db_buf != NULL) {
- arc_buf_access(db->db_buf);
- ASSERT3P(db->db.db_data, ==, db->db_buf->b_data);
+ if (dh->dh_db->db_buf != NULL) {
+ arc_buf_access(dh->dh_db->db_buf);
+ ASSERT3P(dh->dh_db->db.db_data, ==, dh->dh_db->db_buf->b_data);
}
- ASSERT(db->db_buf == NULL || arc_referenced(db->db_buf));
+ ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf));
/*
* If this buffer is currently syncing out, and we are are
* still referencing it from db_data, we need to make a copy
* of it in case we decide we want to dirty it again in this txg.
*/
- if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
- dn->dn_object != DMU_META_DNODE_OBJECT &&
- db->db_state == DB_CACHED && db->db_data_pending) {
- dbuf_dirty_record_t *dr = db->db_data_pending;
-
- if (dr->dt.dl.dr_data == db->db_buf) {
- arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
-
- dbuf_set_data(db,
- arc_alloc_buf(dn->dn_objset->os_spa, db, type,
- db->db.db_size));
- bcopy(dr->dt.dl.dr_data->b_data, db->db.db_data,
- db->db.db_size);
- }
- }
-
- if (multilist_link_active(&db->db_cache_link)) {
- ASSERT(refcount_is_zero(&db->db_holds));
- ASSERT(db->db_caching_status == DB_DBUF_CACHE ||
- db->db_caching_status == DB_DBUF_METADATA_CACHE);
-
- multilist_remove(dbuf_caches[db->db_caching_status].cache, db);
+ if (dh->dh_db->db_level == 0 &&
+ dh->dh_db->db_blkid != DMU_BONUS_BLKID &&
+ dh->dh_dn->dn_object != DMU_META_DNODE_OBJECT &&
+ dh->dh_db->db_state == DB_CACHED && dh->dh_db->db_data_pending) {
+ dh->dh_dr = dh->dh_db->db_data_pending;
+ if (dh->dh_dr->dt.dl.dr_data == dh->dh_db->db_buf)
+ dbuf_hold_copy(dh);
+ }
+
+ if (multilist_link_active(&dh->dh_db->db_cache_link)) {
+ ASSERT(refcount_is_zero(&dh->dh_db->db_holds));
+ ASSERT(dh->dh_db->db_caching_status == DB_DBUF_CACHE ||
+ dh->dh_db->db_caching_status == DB_DBUF_METADATA_CACHE);
+
+ multilist_remove(
+ dbuf_caches[dh->dh_db->db_caching_status].cache,
+ dh->dh_db);
(void) refcount_remove_many(
- &dbuf_caches[db->db_caching_status].size,
- db->db.db_size, db);
+ &dbuf_caches[dh->dh_db->db_caching_status].size,
+ dh->dh_db->db.db_size, dh->dh_db);
- db->db_caching_status = DB_NO_CACHE;
+ dh->dh_db->db_caching_status = DB_NO_CACHE;
}
- (void) refcount_add(&db->db_holds, tag);
- DBUF_VERIFY(db);
- mutex_exit(&db->db_mtx);
+ (void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag);
+ DBUF_VERIFY(dh->dh_db);
+ mutex_exit(&dh->dh_db->db_mtx);
/* NOTE: we can't rele the parent until after we drop the db_mtx */
- if (parent)
- dbuf_rele(parent, NULL);
+ if (dh->dh_parent)
+ dbuf_rele(dh->dh_parent, NULL);
- ASSERT3P(DB_DNODE(db), ==, dn);
- ASSERT3U(db->db_blkid, ==, blkid);
- ASSERT3U(db->db_level, ==, level);
- *dbp = db;
+ ASSERT3P(DB_DNODE(dh->dh_db), ==, dh->dh_dn);
+ ASSERT3U(dh->dh_db->db_blkid, ==, dh->dh_blkid);
+ ASSERT3U(dh->dh_db->db_level, ==, dh->dh_level);
+ *(dh->dh_dbp) = dh->dh_db;
return (0);
}
+/*
+ * The following code preserves the recursive function dbuf_hold_impl()
+ * but moves the local variables AND function arguments to the heap to
+ * minimize the stack frame size. Enough space is initially allocated
+ * on the stack for 20 levels of recursion.
+ */
+int
+dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
+ boolean_t fail_sparse, boolean_t fail_uncached,
+ void *tag, dmu_buf_impl_t **dbp)
+{
+ struct dbuf_hold_impl_data *dh;
+ int error;
+
+ dh = kmem_alloc(sizeof (struct dbuf_hold_impl_data) *
+ DBUF_HOLD_IMPL_MAX_DEPTH, KM_SLEEP);
+ __dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse,
+ fail_uncached, tag, dbp, 0);
+
+ error = __dbuf_hold_impl(dh);
+
+ kmem_free(dh, sizeof (struct dbuf_hold_impl_data) *
+ DBUF_HOLD_IMPL_MAX_DEPTH);
+
+ return (error);
+}
+
+static void
+__dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
+ dnode_t *dn, uint8_t level, uint64_t blkid,
+ boolean_t fail_sparse, boolean_t fail_uncached,
+ void *tag, dmu_buf_impl_t **dbp, int depth)
+{
+ dh->dh_dn = dn;
+ dh->dh_level = level;
+ dh->dh_blkid = blkid;
+
+ dh->dh_fail_sparse = fail_sparse;
+ dh->dh_fail_uncached = fail_uncached;
+
+ dh->dh_tag = tag;
+ dh->dh_dbp = dbp;
+
+ dh->dh_db = NULL;
+ dh->dh_parent = NULL;
+ dh->dh_bp = NULL;
+ dh->dh_err = 0;
+ dh->dh_dr = NULL;
+
+ dh->dh_depth = depth;
+}
+
dmu_buf_impl_t *
dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag)
{