diff options
author | Poul-Henning Kamp <phk@FreeBSD.org> | 2003-02-07 23:08:24 +0000 |
---|---|---|
committer | Poul-Henning Kamp <phk@FreeBSD.org> | 2003-02-07 23:08:24 +0000 |
commit | 801bb689ca8be76700b0c16c159683b5fa89472d (patch) | |
tree | 926f1fc791d8d4e6893f5573cb0b3fed3a603f7c /sys | |
parent | 96fda1ea0570af073eb2e9ef0770fe54766ff42b (diff) | |
download | src-801bb689ca8be76700b0c16c159683b5fa89472d.tar.gz src-801bb689ca8be76700b0c16c159683b5fa89472d.zip |
Commit the correct copy of the g_stat structure.
Add debug.sizeof.g_stat sysctl.
Set the id field of the g_stat when we create consumers and providers.
Remove biocount from consumer, we will use the counters in the g_stat
structure instead. Replace one field which will need to be atomically
manipulated with two fields which will not (stat.nop and stat.nend).
Change add companion field to bio_children: bio_inbed for the exact
same reason.
Don't output the biocount in the confdot output.
Fix KASSERT in g_io_request().
Add sysctl kern.geom.collectstats defaulting to off.
Collect the following raw statistics conditioned on this sysctl:
for each consumer and provider {
total number of operations started.
total number of operations completed.
time last operation completed.
sum of idle-time.
for each of BIO_READ, BIO_WRITE and BIO_DELETE {
number of operations completed.
number of bytes completed.
number of ENOMEM errors.
number of other errors.
sum of transaction time.
}
}
API for getting hold of these statistics data not included yet.
Notes
Notes:
svn path=/head/; revision=110523
Diffstat (limited to 'sys')
-rw-r--r-- | sys/geom/geom.h | 10 | ||||
-rw-r--r-- | sys/geom/geom_dev.c | 2 | ||||
-rw-r--r-- | sys/geom/geom_dump.c | 4 | ||||
-rw-r--r-- | sys/geom/geom_int.h | 1 | ||||
-rw-r--r-- | sys/geom/geom_io.c | 103 | ||||
-rw-r--r-- | sys/geom/geom_kern.c | 6 | ||||
-rw-r--r-- | sys/geom/geom_subr.c | 8 | ||||
-rw-r--r-- | sys/geom/notes | 40 | ||||
-rw-r--r-- | sys/sys/bio.h | 1 |
9 files changed, 139 insertions, 36 deletions
diff --git a/sys/geom/geom.h b/sys/geom/geom.h index dfb582b159de..f406fb9997be 100644 --- a/sys/geom/geom.h +++ b/sys/geom/geom.h @@ -151,12 +151,10 @@ struct g_stat { uint64_t nmem; uint64_t nerr; struct bintime dt; - } ops[5]; + } ops[3]; #define G_STAT_IDX_READ 0 -#define G_STAT_IDX_WRITE 2 -#define G_STAT_IDX_DELETE 3 -#define G_STAT_IDX_GETATTR 4 -#define G_STAT_IDX_SETATTR 5 +#define G_STAT_IDX_WRITE 1 +#define G_STAT_IDX_DELETE 2 }; /* @@ -173,8 +171,6 @@ struct g_consumer { LIST_ENTRY(g_consumer) consumers; /* XXX: better name */ int acr, acw, ace; struct g_event *event; - - int biocount; int spoiled; struct g_stat stat; }; diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index 99f4cc7e0c12..24234b953337 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -430,7 +430,7 @@ g_dev_orphan(struct g_consumer *cp) gp = cp->geom; g_trace(G_T_TOPOLOGY, "g_dev_orphan(%p(%s))", cp, gp->name); g_topology_assert(); - if (cp->biocount > 0) + if (cp->stat.nop != cp->stat.nend) /* XXX ? */ return; dev = gp->softc; if (dev->si_flags & SI_DUMPDEV) diff --git a/sys/geom/geom_dump.c b/sys/geom/geom_dump.c index f65803cc9b39..561ae514271b 100644 --- a/sys/geom/geom_dump.c +++ b/sys/geom/geom_dump.c @@ -59,8 +59,8 @@ static void g_confdot_consumer(struct sbuf *sb, struct g_consumer *cp) { - sbuf_printf(sb, "z%p [label=\"r%dw%de%d\\nbio #%d\"];\n", - cp, cp->acr, cp->acw, cp->ace, cp->biocount); + sbuf_printf(sb, "z%p [label=\"r%dw%de%d\"];\n", + cp, cp->acr, cp->acw, cp->ace); if (cp->provider) sbuf_printf(sb, "z%p -> z%p;\n", cp, cp->provider); } diff --git a/sys/geom/geom_int.h b/sys/geom/geom_int.h index 3224a2db2353..5962c86adcb0 100644 --- a/sys/geom/geom_int.h +++ b/sys/geom/geom_int.h @@ -40,6 +40,7 @@ TAILQ_HEAD(g_tailq_head, g_geom); TAILQ_HEAD(event_tailq_head, g_event); extern struct event_tailq_head events; +extern int g_collectstats; extern int g_debugflags; /* 1 G_T_TOPOLOGY */ /* 2 G_T_BIO */ diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c index 9a6d215d290c..842595afbf9e 100644 --- a/sys/geom/geom_io.c +++ b/sys/geom/geom_io.c @@ -154,7 +154,7 @@ g_clone_bio(struct bio *bp) bp2->bio_offset = bp->bio_offset; bp2->bio_data = bp->bio_data; bp2->bio_attribute = bp->bio_attribute; - bp->bio_children++; /* XXX: atomic ? */ + bp->bio_children++; } /* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */ return(bp2); @@ -261,24 +261,46 @@ g_io_check(struct bio *bp) void g_io_request(struct bio *bp, struct g_consumer *cp) { + struct g_provider *pp; + struct bintime bt; + pp = cp->provider; KASSERT(cp != NULL, ("NULL cp in g_io_request")); KASSERT(bp != NULL, ("NULL bp in g_io_request")); KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); - KASSERT(bp->bio_to == NULL, ("consumer not attached in g_io_request")); + KASSERT(pp != NULL, ("consumer not attached in g_io_request")); + bp->bio_from = cp; - bp->bio_to = cp->provider; + bp->bio_to = pp; bp->bio_error = 0; bp->bio_completed = 0; - /* begin_stats(&bp->stats); */ + if (g_collectstats) { + /* Collect statistics */ + binuptime(&bp->bio_t0); + if (cp->stat.nop == cp->stat.nend) { + /* Consumer is idle */ + bt = bp->bio_t0; + bintime_sub(&bt, &cp->stat.wentidle); + bintime_add(&cp->stat.it, &bt); + if (pp->stat.nop == pp->stat.nend) { + /* + * NB: Provider can only be idle if the + * consumer is but we cannot trust them + * to have gone idle at the same time. + */ + bt = bp->bio_t0; + bintime_sub(&bt, &pp->stat.wentidle); + bintime_add(&pp->stat.it, &bt); + } + } + } + cp->stat.nop++; + pp->stat.nop++; - atomic_add_int(&cp->biocount, 1); - /* Pass it on down. */ g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", - bp, bp->bio_from, bp->bio_from->geom->name, - bp->bio_to, bp->bio_to->name, bp->bio_cmd); + bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); g_bioq_enqueue_tail(bp, &g_bio_run_down); wakeup(&g_wait_down); } @@ -286,32 +308,69 @@ g_io_request(struct bio *bp, struct g_consumer *cp) void g_io_deliver(struct bio *bp, int error) { + struct g_consumer *cp; + struct g_provider *pp; + struct bintime t1; + int idx; + cp = bp->bio_from; + pp = bp->bio_to; KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); - KASSERT(bp->bio_from != NULL, ("NULL bio_from in g_io_deliver")); - KASSERT(bp->bio_from->geom != NULL, - ("NULL bio_from->geom in g_io_deliver")); - KASSERT(bp->bio_to != NULL, ("NULL bio_to in g_io_deliver")); + KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); + KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); + KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); g_trace(G_T_BIO, "g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", - bp, bp->bio_from, bp->bio_from->geom->name, - bp->bio_to, bp->bio_to->name, bp->bio_cmd, error, + bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); - /* finish_stats(&bp->stats); */ + + switch (bp->bio_cmd) { + case BIO_READ: idx = G_STAT_IDX_READ; break; + case BIO_WRITE: idx = G_STAT_IDX_WRITE; break; + case BIO_DELETE: idx = G_STAT_IDX_DELETE; break; + case BIO_GETATTR: idx = -1; break; + case BIO_SETATTR: idx = -1; break; + default: + panic("unknown bio_cmd in g_io_deliver"); + break; + } + + /* Collect statistics */ + if (g_collectstats) { + binuptime(&t1); + pp->stat.wentidle = t1; + cp->stat.wentidle = t1; + + if (idx >= 0) { + bintime_sub(&t1, &bp->bio_t0); + bintime_add(&cp->stat.ops[idx].dt, &t1); + bintime_add(&pp->stat.ops[idx].dt, &t1); + pp->stat.ops[idx].nbyte += bp->bio_completed; + cp->stat.ops[idx].nbyte += bp->bio_completed; + pp->stat.ops[idx].nop++; + cp->stat.ops[idx].nop++; + if (error == ENOMEM) { + cp->stat.ops[idx].nmem++; + pp->stat.ops[idx].nmem++; + } else if (error != 0) { + cp->stat.ops[idx].nerr++; + pp->stat.ops[idx].nerr++; + } + } + } + + pp->stat.nend++; /* In reverse order of g_io_request() */ + cp->stat.nend++; if (error == ENOMEM) { - printf("ENOMEM %p on %p(%s)\n", - bp, bp->bio_to, bp->bio_to->name); - g_io_request(bp, bp->bio_from); + printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); + g_io_request(bp, cp); pace++; return; } - bp->bio_error = error; - g_bioq_enqueue_tail(bp, &g_bio_run_up); - wakeup(&g_wait_up); } @@ -362,8 +421,6 @@ g_io_schedule_up(struct thread *tp __unused) break; cp = bp->bio_from; - - atomic_add_int(&cp->biocount, -1); biodone(bp); } } diff --git a/sys/geom/geom_kern.c b/sys/geom/geom_kern.c index fb9a249df82b..a885c28c6e33 100644 --- a/sys/geom/geom_kern.c +++ b/sys/geom/geom_kern.c @@ -57,6 +57,7 @@ struct sx topology_lock; static struct proc *g_up_proc; int g_debugflags; +int g_collectstats; /* * G_UP and G_DOWN are the two threads which push I/O through the @@ -230,6 +231,9 @@ SYSCTL_PROC(_kern_geom, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RD, SYSCTL_INT(_kern_geom, OID_AUTO, debugflags, CTLFLAG_RW, &g_debugflags, 0, ""); +SYSCTL_INT(_kern_geom, OID_AUTO, collectstats, CTLFLAG_RW, + &g_collectstats, 0, ""); + SYSCTL_INT(_debug_sizeof, OID_AUTO, g_class, CTLFLAG_RD, 0, sizeof(struct g_class), ""); SYSCTL_INT(_debug_sizeof, OID_AUTO, g_geom, CTLFLAG_RD, @@ -242,3 +246,5 @@ SYSCTL_INT(_debug_sizeof, OID_AUTO, g_bioq, CTLFLAG_RD, 0, sizeof(struct g_bioq), ""); SYSCTL_INT(_debug_sizeof, OID_AUTO, g_event, CTLFLAG_RD, 0, sizeof(struct g_event), ""); +SYSCTL_INT(_debug_sizeof, OID_AUTO, g_stat, CTLFLAG_RD, + 0, sizeof(struct g_stat), ""); diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c index 0d289789589f..d8b30546f18c 100644 --- a/sys/geom/geom_subr.c +++ b/sys/geom/geom_subr.c @@ -145,6 +145,7 @@ g_new_consumer(struct g_geom *gp) cp = g_malloc(sizeof *cp, M_ZERO); cp->protect = 0x020016602; cp->geom = gp; + cp->stat.id = cp; LIST_INSERT_HEAD(&gp->consumer, cp, consumer); return(cp); } @@ -184,6 +185,7 @@ g_new_providerf(struct g_geom *gp, const char *fmt, ...) LIST_INIT(&pp->consumers); pp->error = ENXIO; pp->geom = gp; + pp->stat.id = pp; LIST_INSERT_HEAD(&gp->provider, pp, provider); g_nproviders++; g_post_event(EV_NEW_PROVIDER, NULL, NULL, pp, NULL); @@ -327,7 +329,7 @@ g_detach(struct g_consumer *cp) KASSERT(cp->acr == 0, ("detach but nonzero acr")); KASSERT(cp->acw == 0, ("detach but nonzero acw")); KASSERT(cp->ace == 0, ("detach but nonzero ace")); - KASSERT(cp->biocount == 0, ("detach but nonzero biocount")); + KASSERT(cp->stat.nop == cp->stat.nend, ("detach with active requests")); pp = cp->provider; LIST_REMOVE(cp, consumers); cp->provider = NULL; @@ -499,8 +501,8 @@ g_std_done(struct bio *bp) bp2->bio_error = bp->bio_error; bp2->bio_completed += bp->bio_completed; g_destroy_bio(bp); - bp2->bio_children--; /* XXX: atomic ? */ - if (bp2->bio_children == 0) + bp2->bio_inbed++; + if (bp2->bio_children == bp2->bio_inbed) g_io_deliver(bp2, bp2->bio_error); } diff --git a/sys/geom/notes b/sys/geom/notes new file mode 100644 index 000000000000..88e0f528d786 --- /dev/null +++ b/sys/geom/notes @@ -0,0 +1,40 @@ +$FreeBSD$ + +For the lack of a better place to put them, this file will contain +notes on some of the more intricate details of geom. + +----------------------------------------------------------------------- +Locking of bio_children and bio_inbed + +bio_children is used by g_std_done() and g_clone_bio() to keep track +of children cloned off a request. g_clone_bio will increment the +bio_children counter for each time it is called and g_std_done will +increment bio_inbed for every call, and if the two counters are +equal, call g_io_deliver() on the parent bio. + +The general assumption is that g_clone_bio() is called only in +the g_down thread, and g_std_done() only in the g_up thread and +therefore the two fields do not generally need locking. These +restrictions are not enforced by the code, but only with great +care should they be violated. + +It is the responsibility of the class implementation to avoid the +following race condition: A class intend to split a bio in two +children. It clones the bio, and requests I/O on the child. +This I/O operation completes before the second child is cloned +and g_std_done() sees the counters both equal 1 and finishes off +the bio. + +There is no race present in the common case where the bio is split +in multiple parts in the class start method and the I/O is requested +on another GEOM class below: There is only one g_down thread and +the class below will not get its start method run until we return +from our start method, and consequently the I/O cannot complete +prematurely. + +In all other cases, this race needs to be mitigated, for instance +by cloning all children before I/O is request on any of them. + +Notice that cloning an "extra" child and calling g_std_done() on +it directly opens another race since the assumption is that +g_std_done() only is called in the g_up thread. diff --git a/sys/sys/bio.h b/sys/sys/bio.h index 2ef28bc77b68..31695935e8be 100644 --- a/sys/sys/bio.h +++ b/sys/sys/bio.h @@ -71,6 +71,7 @@ struct bio { off_t bio_length; /* Like bio_bcount */ off_t bio_completed; /* Inverse of bio_resid */ u_int bio_children; /* Number of spawned bios */ + u_int bio_inbed; /* Children safely home by now */ struct bio *bio_parent; /* Pointer to parent */ struct bintime bio_t0; /* Time request started */ |