aboutsummaryrefslogtreecommitdiff
path: root/sys/geom/vinum/geom_vinum_subr.c
diff options
context:
space:
mode:
authorUlf Lilleengen <lulf@FreeBSD.org>2009-03-28 17:20:08 +0000
committerUlf Lilleengen <lulf@FreeBSD.org>2009-03-28 17:20:08 +0000
commitc0b9797aa87bd4e81152f0d3d34007d74a1cee88 (patch)
tree5e7f9ce749d55fb9cc8ca02210ab0d369c6900cd /sys/geom/vinum/geom_vinum_subr.c
parent4d59cc85e80d021c95e000553fcfaf02bdfc1b9e (diff)
Import the gvinum work that have been done during and after Summer of Code 2007.
The work have been under testing and fixing since then, and it is mature enough to be put into HEAD for further testing. A lot have changed in this time, and here are the most important: - Gvinum now uses one single workerthread instead of one thread for each volume and each plex. The reason for this is that the previous scheme was very complex, and was the cause of many of the bugs discovered in gvinum. Instead, gvinum now uses one worker thread with an event queue, quite similar to what used in gmirror. - The rebuild/grow/initialize/parity check routines no longer runs in separate threads, but are run as regular I/O requests with special flags. This made it easier to support mounted growing and parity rebuild. - Support for growing striped and raid5-plexes, meaning that one can extend the volumes for these plex types in addition to the concat type. Also works while the volume is mounted. - Implementation of many of the missing commands from the old vinum: attach/detach, start (was partially implemented), stop (was partially implemented), concat, mirror, stripe, raid5 (shortcuts for creating volumes with one plex of these organizations). - The parity check and rebuild no longer goes between userland/kernel, meaning that the gvinum command will not stay and wait forever for the rebuild to finish. You can instead watch the status with the list command. - Many problems with gvinum have been reported since 5.x, and some has been hard to fix due to the complicated architecture. Hopefully, it should be more stable and better handle edge cases that previously made gvinum crash. - Failed drives no longer disappears entirely, but now leave behind a dummy drive that makes sure the original state is not forgotten in case the system is rebooted between drive failures/swaps. - Update manpage to reflect new commands and extend it with some examples. Sponsored by: Google Summer of Code 2007 Mentored by: le Tested by: Rick C. Petty <rick-freebsd2008 -at- kiwi-computer.com>
Notes
Notes: svn path=/head/; revision=190507
Diffstat (limited to 'sys/geom/vinum/geom_vinum_subr.c')
-rw-r--r--sys/geom/vinum/geom_vinum_subr.c889
1 files changed, 599 insertions, 290 deletions
diff --git a/sys/geom/vinum/geom_vinum_subr.c b/sys/geom/vinum/geom_vinum_subr.c
index 848fbc7c1e66..70c7f3f6b307 100644
--- a/sys/geom/vinum/geom_vinum_subr.c
+++ b/sys/geom/vinum/geom_vinum_subr.c
@@ -1,5 +1,6 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
+ * Copyright (c) 2007, 2009 Ulf Lilleengen
* Copyright (c) 1997, 1998, 1999
* Nan Yang Computer Services Limited. All rights reserved.
*
@@ -42,59 +43,28 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <sys/conf.h>
-#include <sys/kernel.h>
-#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/systm.h>
#include <geom/geom.h>
-#include <geom/geom_int.h>
#include <geom/vinum/geom_vinum_var.h>
#include <geom/vinum/geom_vinum.h>
#include <geom/vinum/geom_vinum_share.h>
-static off_t gv_plex_smallest_sd(struct gv_plex *, off_t);
+int gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
+static off_t gv_plex_smallest_sd(struct gv_plex *);
-/* Find the VINUM class and it's associated geom. */
-struct g_geom *
-find_vinum_geom(void)
-{
- struct g_class *mp;
- struct g_geom *gp;
-
- g_topology_assert();
-
- gp = NULL;
-
- LIST_FOREACH(mp, &g_classes, class) {
- if (!strcmp(mp->name, "VINUM")) {
- gp = LIST_FIRST(&mp->geom);
- break;
- }
- }
-
- return (gp);
-}
-
-/*
- * Parse the vinum config provided in *buf and store it in *gp's softc.
- * If parameter 'merge' is non-zero, then the given config is merged into
- * *gp.
- */
void
-gv_parse_config(struct gv_softc *sc, u_char *buf, int merge)
+gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
{
char *aptr, *bptr, *cptr;
struct gv_volume *v, *v2;
struct gv_plex *p, *p2;
struct gv_sd *s, *s2;
- int tokens;
+ int error, is_newer, tokens;
char *token[GV_MAXARGS];
- g_topology_assert();
-
- KASSERT(sc != NULL, ("gv_parse_config: NULL softc"));
+ is_newer = gv_drive_is_newer(sc, d);
/* Until the end of the string *buf. */
for (aptr = buf; *aptr != '\0'; aptr = bptr) {
@@ -109,64 +79,95 @@ gv_parse_config(struct gv_softc *sc, u_char *buf, int merge)
tokens = gv_tokenize(cptr, token, GV_MAXARGS);
- if (tokens > 0) {
- if (!strcmp(token[0], "volume")) {
- v = gv_new_volume(tokens, token);
- if (v == NULL) {
- G_VINUM_DEBUG(0, "failed volume");
- break;
- }
+ if (tokens <= 0)
+ continue;
- if (merge) {
- v2 = gv_find_vol(sc, v->name);
- if (v2 != NULL) {
- g_free(v);
- continue;
- }
+ if (!strcmp(token[0], "volume")) {
+ v = gv_new_volume(tokens, token);
+ if (v == NULL) {
+ G_VINUM_DEBUG(0, "config parse failed volume");
+ break;
+ }
+
+ v2 = gv_find_vol(sc, v->name);
+ if (v2 != NULL) {
+ if (is_newer) {
+ v2->state = v->state;
+ G_VINUM_DEBUG(2, "newer volume found!");
}
+ g_free(v);
+ continue;
+ }
- v->vinumconf = sc;
- LIST_INIT(&v->plexes);
- LIST_INSERT_HEAD(&sc->volumes, v, volume);
+ gv_create_volume(sc, v);
- } else if (!strcmp(token[0], "plex")) {
- p = gv_new_plex(tokens, token);
- if (p == NULL) {
- G_VINUM_DEBUG(0, "failed plex");
- break;
- }
+ } else if (!strcmp(token[0], "plex")) {
+ p = gv_new_plex(tokens, token);
+ if (p == NULL) {
+ G_VINUM_DEBUG(0, "config parse failed plex");
+ break;
+ }
- if (merge) {
- p2 = gv_find_plex(sc, p->name);
- if (p2 != NULL) {
- g_free(p);
- continue;
- }
+ p2 = gv_find_plex(sc, p->name);
+ if (p2 != NULL) {
+ /* XXX */
+ if (is_newer) {
+ p2->state = p->state;
+ G_VINUM_DEBUG(2, "newer plex found!");
}
+ g_free(p);
+ continue;
+ }
- p->vinumconf = sc;
- LIST_INIT(&p->subdisks);
- LIST_INSERT_HEAD(&sc->plexes, p, plex);
+ error = gv_create_plex(sc, p);
+ if (error)
+ continue;
+ /*
+ * These flags were set in gv_create_plex() and are not
+ * needed here (on-disk config parsing).
+ */
+ p->flags &= ~GV_PLEX_ADDED;
+ p->flags &= ~GV_PLEX_NEWBORN;
- } else if (!strcmp(token[0], "sd")) {
- s = gv_new_sd(tokens, token);
+ } else if (!strcmp(token[0], "sd")) {
+ s = gv_new_sd(tokens, token);
- if (s == NULL) {
- G_VINUM_DEBUG(0, "failed subdisk");
- break;
- }
+ if (s == NULL) {
+ G_VINUM_DEBUG(0, "config parse failed subdisk");
+ break;
+ }
- if (merge) {
- s2 = gv_find_sd(sc, s->name);
- if (s2 != NULL) {
- g_free(s);
- continue;
- }
+ s2 = gv_find_sd(sc, s->name);
+ if (s2 != NULL) {
+ /* XXX */
+ if (is_newer) {
+ s2->state = s->state;
+ G_VINUM_DEBUG(2, "newer subdisk found!");
}
-
- s->vinumconf = sc;
- LIST_INSERT_HEAD(&sc->subdisks, s, sd);
+ g_free(s);
+ continue;
}
+
+ /*
+ * Signal that this subdisk was tasted, and could
+ * possibly reference a drive that isn't in our config
+ * yet.
+ */
+ s->flags |= GV_SD_TASTED;
+
+ if (s->state == GV_SD_UP)
+ s->flags |= GV_SD_CANGOUP;
+
+ error = gv_create_sd(sc, s);
+ if (error)
+ continue;
+
+ /*
+ * This flag was set in gv_create_sd() and is not
+ * needed here (on-disk config parsing).
+ */
+ s->flags &= ~GV_SD_NEWBORN;
+ s->flags &= ~GV_SD_GROW;
}
}
}
@@ -183,8 +184,6 @@ gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
struct gv_plex *p;
struct gv_volume *v;
- g_topology_assert();
-
/*
* We don't need the drive configuration if we're not writing the
* config to disk.
@@ -233,17 +232,20 @@ gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
sbuf_printf(sb, " state %s", gv_sdstate(s->state));
sbuf_printf(sb, "\n");
}
-
- return;
}
static off_t
-gv_plex_smallest_sd(struct gv_plex *p, off_t smallest)
+gv_plex_smallest_sd(struct gv_plex *p)
{
struct gv_sd *s;
+ off_t smallest;
KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
+ s = LIST_FIRST(&p->subdisks);
+ if (s == NULL)
+ return (-1);
+ smallest = s->size;
LIST_FOREACH(s, &p->subdisks, in_plex) {
if (s->size < smallest)
smallest = s->size;
@@ -251,12 +253,29 @@ gv_plex_smallest_sd(struct gv_plex *p, off_t smallest)
return (smallest);
}
+/* Walk over plexes in a volume and count how many are down. */
int
-gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
+gv_plexdown(struct gv_volume *v)
{
- struct gv_sd *s2;
+ int plexdown;
+ struct gv_plex *p;
- g_topology_assert();
+ KASSERT(v != NULL, ("gv_plexdown: NULL v"));
+
+ plexdown = 0;
+
+ LIST_FOREACH(p, &v->plexes, plex) {
+ if (p->state == GV_PLEX_DOWN)
+ plexdown++;
+ }
+ return (plexdown);
+}
+
+int
+gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
+{
+ struct gv_sd *s2;
+ off_t psizeorig, remainder, smallest;
/* If this subdisk was already given to this plex, do nothing. */
if (s->plex_sc == p)
@@ -264,15 +283,56 @@ gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
/* Check correct size of this subdisk. */
s2 = LIST_FIRST(&p->subdisks);
- if (s2 != NULL && gv_is_striped(p) && (s2->size != s->size)) {
- G_VINUM_DEBUG(0, "need equal sized subdisks for "
- "this plex organisation - %s (%jd) <-> %s (%jd)",
- s2->name, s2->size, s->name, s->size);
- return (-1);
+ /* Adjust the subdisk-size if necessary. */
+ if (s2 != NULL && gv_is_striped(p)) {
+ /* First adjust to the stripesize. */
+ remainder = s->size % p->stripesize;
+
+ if (remainder) {
+ G_VINUM_DEBUG(1, "size of sd %s is not a "
+ "multiple of plex stripesize, taking off "
+ "%jd bytes", s->name,
+ (intmax_t)remainder);
+ gv_adjust_freespace(s, remainder);
+ }
+
+ smallest = gv_plex_smallest_sd(p);
+ /* Then take off extra if other subdisks are smaller. */
+ remainder = s->size - smallest;
+
+ /*
+ * Don't allow a remainder below zero for running plexes, it's too
+ * painful, and if someone were to accidentally do this, the
+ * resulting array might be smaller than the original... not god
+ */
+ if (remainder < 0) {
+ if (!(p->flags & GV_PLEX_NEWBORN)) {
+ G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
+ s->name, p->name);
+ return (GV_ERR_BADSIZE);
+ }
+ /* Adjust other subdisks. */
+ LIST_FOREACH(s2, &p->subdisks, in_plex) {
+ G_VINUM_DEBUG(1, "size of sd %s is to big, "
+ "taking off %jd bytes", s->name,
+ (intmax_t)remainder);
+ gv_adjust_freespace(s2, (remainder * -1));
+ }
+ } else if (remainder > 0) {
+ G_VINUM_DEBUG(1, "size of sd %s is to big, "
+ "taking off %jd bytes", s->name,
+ (intmax_t)remainder);
+ gv_adjust_freespace(s, remainder);
+ }
}
/* Find the correct plex offset for this subdisk, if needed. */
if (s->plex_offset == -1) {
+ /*
+ * First set it to 0 to catch the case where we had a detached
+ * subdisk that didn't get any good offset.
+ */
+ s->plex_offset = 0;
if (p->sdcount) {
LIST_FOREACH(s2, &p->subdisks, in_plex) {
if (gv_is_striped(p))
@@ -282,25 +342,7 @@ gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
s->plex_offset = s2->plex_offset +
s2->size;
}
- } else
- s->plex_offset = 0;
- }
-
- p->sdcount++;
-
- /* Adjust the size of our plex. */
- switch (p->org) {
- case GV_PLEX_CONCAT:
- case GV_PLEX_STRIPED:
- p->size += s->size;
- break;
-
- case GV_PLEX_RAID5:
- p->size = (p->sdcount - 1) * gv_plex_smallest_sd(p, s->size);
- break;
-
- default:
- break;
+ }
}
/* There are no subdisks for this plex yet, just insert it. */
@@ -321,6 +363,29 @@ gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
}
s->plex_sc = p;
+ /* Adjust the size of our plex. We check if the plex misses a subdisk,
+ * so we don't make the plex smaller than it actually should be.
+ */
+ psizeorig = p->size;
+ p->size = gv_plex_size(p);
+ /* Make sure the size is not changed. */
+ if (p->sddetached > 0) {
+ if (p->size < psizeorig) {
+ p->size = psizeorig;
+ /* We make sure wee need another subdisk. */
+ if (p->sddetached == 1)
+ p->sddetached++;
+ }
+ p->sddetached--;
+ } else {
+ if ((p->org == GV_PLEX_RAID5 ||
+ p->org == GV_PLEX_STRIPED) &&
+ !(p->flags & GV_PLEX_NEWBORN) &&
+ p->state >= GV_PLEX_DEGRADED) {
+ s->flags |= GV_SD_GROW;
+ }
+ p->sdcount++;
+ }
return (0);
}
@@ -328,21 +393,32 @@ gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
void
gv_update_vol_size(struct gv_volume *v, off_t size)
{
- struct g_geom *gp;
- struct g_provider *pp;
-
if (v == NULL)
return;
+ if (v->provider != NULL) {
+ g_topology_lock();
+ v->provider->mediasize = size;
+ g_topology_unlock();
+ }
+ v->size = size;
+}
- gp = v->geom;
- if (gp == NULL)
- return;
+/* Return how many subdisks that constitute the original plex. */
+int
+gv_sdcount(struct gv_plex *p, int growing)
+{
+ struct gv_sd *s;
+ int sdcount;
- LIST_FOREACH(pp, &gp->provider, provider) {
- pp->mediasize = size;
+ sdcount = p->sdcount;
+ if (growing) {
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->flags & GV_SD_GROW)
+ sdcount--;
+ }
}
- v->size = size;
+ return (sdcount);
}
/* Calculates the plex size. */
@@ -351,14 +427,13 @@ gv_plex_size(struct gv_plex *p)
{
struct gv_sd *s;
off_t size;
+ int sdcount;
KASSERT(p != NULL, ("gv_plex_size: NULL p"));
- if (p->sdcount == 0)
- return (0);
-
/* Adjust the size of our plex. */
size = 0;
+ sdcount = gv_sdcount(p, 1);
switch (p->org) {
case GV_PLEX_CONCAT:
LIST_FOREACH(s, &p->subdisks, in_plex)
@@ -366,11 +441,11 @@ gv_plex_size(struct gv_plex *p)
break;
case GV_PLEX_STRIPED:
s = LIST_FIRST(&p->subdisks);
- size = p->sdcount * s->size;
+ size = ((s != NULL) ? (sdcount * s->size) : 0);
break;
case GV_PLEX_RAID5:
s = LIST_FIRST(&p->subdisks);
- size = (p->sdcount - 1) * s->size;
+ size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
break;
}
@@ -391,7 +466,7 @@ gv_vol_size(struct gv_volume *v)
return (0);
minplexsize = p->size;
- LIST_FOREACH(p, &v->plexes, plex) {
+ LIST_FOREACH(p, &v->plexes, in_volume) {
if (p->size < minplexsize) {
minplexsize = p->size;
}
@@ -408,12 +483,9 @@ gv_update_plex_config(struct gv_plex *p)
KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
- /* This is what we want the plex to be. */
- state = GV_PLEX_UP;
-
/* The plex was added to an already running volume. */
if (p->flags & GV_PLEX_ADDED)
- state = GV_PLEX_DOWN;
+ gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
switch (p->org) {
case GV_PLEX_STRIPED:
@@ -430,7 +502,7 @@ gv_update_plex_config(struct gv_plex *p)
if (required_sds) {
if (p->sdcount < required_sds) {
- state = GV_PLEX_DOWN;
+ gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
}
/*
@@ -442,12 +514,13 @@ gv_update_plex_config(struct gv_plex *p)
G_VINUM_DEBUG(0, "subdisk size mismatch %s"
"(%jd) <> %s (%jd)", s->name, s->size,
s2->name, s2->size);
- state = GV_PLEX_DOWN;
+ gv_set_plex_state(p, GV_PLEX_DOWN,
+ GV_SETSTATE_FORCE);
}
}
- /* Trim subdisk sizes so that they match the stripe size. */
LIST_FOREACH(s, &p->subdisks, in_plex) {
+ /* Trim subdisk sizes to match the stripe size. */
remainder = s->size % p->stripesize;
if (remainder) {
G_VINUM_DEBUG(1, "size of sd %s is not a "
@@ -458,41 +531,32 @@ gv_update_plex_config(struct gv_plex *p)
}
}
- /* Adjust the size of our plex. */
- if (p->sdcount > 0) {
- p->size = 0;
- switch (p->org) {
- case GV_PLEX_CONCAT:
- LIST_FOREACH(s, &p->subdisks, in_plex)
- p->size += s->size;
- break;
-
- case GV_PLEX_STRIPED:
- s = LIST_FIRST(&p->subdisks);
- p->size = p->sdcount * s->size;
- break;
-
- case GV_PLEX_RAID5:
- s = LIST_FIRST(&p->subdisks);
- p->size = (p->sdcount - 1) * s->size;
- break;
-
- default:
- break;
- }
- }
-
+ p->size = gv_plex_size(p);
if (p->sdcount == 0)
- state = GV_PLEX_DOWN;
- else if ((p->flags & GV_PLEX_ADDED) ||
- ((p->org == GV_PLEX_RAID5) && (p->flags & GV_PLEX_NEWBORN))) {
+ gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
+ else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
+ LIST_FOREACH(s, &p->subdisks, in_plex)
+ gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
+ /* If added to a volume, we want the plex to be down. */
+ state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
+ gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
+ p->flags &= ~GV_PLEX_ADDED;
+ } else if (p->flags & GV_PLEX_ADDED) {
LIST_FOREACH(s, &p->subdisks, in_plex)
- s->state = GV_SD_STALE;
+ gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
+ gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
p->flags &= ~GV_PLEX_ADDED;
- p->flags &= ~GV_PLEX_NEWBORN;
- state = GV_PLEX_DOWN;
+ } else if (p->state == GV_PLEX_UP) {
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->flags & GV_SD_GROW) {
+ gv_set_plex_state(p, GV_PLEX_GROWABLE,
+ GV_SETSTATE_FORCE);
+ break;
+ }
+ }
}
- p->state = state;
+ /* Our plex is grown up now. */
+ p->flags &= ~GV_PLEX_NEWBORN;
}
/*
@@ -500,76 +564,82 @@ gv_update_plex_config(struct gv_plex *p)
* freelist.
*/
int
-gv_sd_to_drive(struct gv_softc *sc, struct gv_drive *d, struct gv_sd *s,
- char *errstr, int errlen)
+gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
{
struct gv_sd *s2;
struct gv_freelist *fl, *fl2;
off_t tmp;
int i;
- g_topology_assert();
-
fl2 = NULL;
- KASSERT(sc != NULL, ("gv_sd_to_drive: NULL softc"));
- KASSERT(d != NULL, ("gv_sd_to_drive: NULL drive"));
- KASSERT(s != NULL, ("gv_sd_to_drive: NULL subdisk"));
- KASSERT(errstr != NULL, ("gv_sd_to_drive: NULL errstr"));
- KASSERT(errlen >= ERRBUFSIZ, ("gv_sd_to_drive: short errlen (%d)",
- errlen));
+ /* Shortcut for "referenced" drives. */
+ if (d->flags & GV_DRIVE_REFERENCED) {
+ s->drive_sc = d;
+ return (0);
+ }
/* Check if this subdisk was already given to this drive. */
- if (s->drive_sc == d)
- return (0);
+ if (s->drive_sc != NULL) {
+ if (s->drive_sc == d) {
+ if (!(s->flags & GV_SD_TASTED)) {
+ return (0);
+ }
+ } else {
+ G_VINUM_DEBUG(0, "can't give sd '%s' to '%s' "
+ "(already on '%s')", s->name, d->name,
+ s->drive_sc->name);
+ return (GV_ERR_ISATTACHED);
+ }
+ }
/* Preliminary checks. */
- if (s->size > d->avail || d->freelist_entries == 0) {
- snprintf(errstr, errlen, "not enough space on '%s' for '%s'",
- d->name, s->name);
- return (-1);
+ if ((s->size > d->avail) || (d->freelist_entries == 0)) {
+ G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
+ s->name);
+ return (GV_ERR_NOSPACE);
}
- /* No size given, autosize it. */
+ /* If no size was given for this subdisk, try to auto-size it... */
if (s->size == -1) {
/* Find the largest available slot. */
LIST_FOREACH(fl, &d->freelist, freelist) {
- if (fl->size >= s->size) {
- s->size = fl->size;
- s->drive_offset = fl->offset;
- fl2 = fl;
- }
+ if (fl->size < s->size)
+ continue;
+ s->size = fl->size;
+ s->drive_offset = fl->offset;
+ fl2 = fl;
}
/* No good slot found? */
if (s->size == -1) {
- snprintf(errstr, errlen, "could not autosize '%s' on "
- "'%s'", s->name, d->name);
- return (-1);
+ G_VINUM_DEBUG(0, "couldn't autosize '%s' on '%s'",
+ s->name, d->name);
+ return (GV_ERR_BADSIZE);
}
/*
- * Check if we have a free slot that's large enough for the given size.
+ * ... or check if we have a free slot that's large enough for the
+ * given size.
*/
} else {
i = 0;
LIST_FOREACH(fl, &d->freelist, freelist) {
- /* Yes, this subdisk fits. */
- if (fl->size >= s->size) {
- i++;
- /* Assign drive offset, if not given. */
- if (s->drive_offset == -1)
- s->drive_offset = fl->offset;
- fl2 = fl;
- break;
- }
+ if (fl->size < s->size)
+ continue;
+ /* Assign drive offset, if not given. */
+ if (s->drive_offset == -1)
+ s->drive_offset = fl->offset;
+ fl2 = fl;
+ i++;
+ break;
}
/* Couldn't find a good free slot. */
if (i == 0) {
- snprintf(errstr, errlen, "free slots to small for '%s' "
- "on '%s'", s->name, d->name);
- return (-1);
+ G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
+ s->name, d->name);
+ return (GV_ERR_NOSPACE);
}
}
@@ -604,9 +674,9 @@ gv_sd_to_drive(struct gv_softc *sc, struct gv_drive *d, struct gv_sd *s,
/* Couldn't find a good free slot. */
if (i == 0) {
- snprintf(errstr, errlen, "given drive_offset for '%s' "
- "won't fit on '%s'", s->name, d->name);
- return (-1);
+ G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
+ "on '%s'", s->name, d->name);
+ return (GV_ERR_NOSPACE);
}
}
@@ -617,49 +687,41 @@ gv_sd_to_drive(struct gv_softc *sc, struct gv_drive *d, struct gv_sd *s,
/* First, adjust the freelist. */
LIST_FOREACH(fl, &d->freelist, freelist) {
+ /* Look for the free slot that we have found before. */
+ if (fl != fl2)
+ continue;
+
+ /* The subdisk starts at the beginning of the free slot. */
+ if (fl->offset == s->drive_offset) {
+ fl->offset += s->size;
+ fl->size -= s->size;
+
+ /* The subdisk uses the whole slot, so remove it. */
+ if (fl->size == 0) {
+ d->freelist_entries--;
+ LIST_REMOVE(fl, freelist);
+ }
+ /*
+ * The subdisk does not start at the beginning of the free
+ * slot.
+ */
+ } else {
+ tmp = fl->offset + fl->size;
+ fl->size = s->drive_offset - fl->offset;
- /* This is the free slot that we have found before. */
- if (fl == fl2) {
-
- /*
- * The subdisk starts at the beginning of the free
- * slot.
- */
- if (fl->offset == s->drive_offset) {
- fl->offset += s->size;
- fl->size -= s->size;
-
- /*
- * The subdisk uses the whole slot, so remove
- * it.
- */
- if (fl->size == 0) {
- d->freelist_entries--;
- LIST_REMOVE(fl, freelist);
- }
/*
- * The subdisk does not start at the beginning of the
- * free slot.
+ * The subdisk didn't use the complete rest of the free
+ * slot, so we need to split it.
*/
- } else {
- tmp = fl->offset + fl->size;
- fl->size = s->drive_offset - fl->offset;
-
- /*
- * The subdisk didn't use the complete rest of
- * the free slot, so we need to split it.
- */
- if (s->drive_offset + s->size != tmp) {
- fl2 = g_malloc(sizeof(*fl2),
- M_WAITOK | M_ZERO);
- fl2->offset = s->drive_offset + s->size;
- fl2->size = tmp - fl2->offset;
- LIST_INSERT_AFTER(fl, fl2, freelist);
- d->freelist_entries++;
- }
+ if (s->drive_offset + s->size != tmp) {
+ fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
+ fl2->offset = s->drive_offset + s->size;
+ fl2->size = tmp - fl2->offset;
+ LIST_INSERT_AFTER(fl, fl2, freelist);
+ d->freelist_entries++;
}
- break;
}
+ break;
}
/*
@@ -685,6 +747,8 @@ gv_sd_to_drive(struct gv_softc *sc, struct gv_drive *d, struct gv_sd *s,
d->sdcount++;
d->avail -= s->size;
+ s->flags &= ~GV_SD_TASTED;
+
/* Link back from the subdisk to this drive. */
s->drive_sc = d;
@@ -869,17 +933,65 @@ gv_find_drive(struct gv_softc *sc, char *name)
return (NULL);
}
+/* Find a drive given a device. */
+struct gv_drive *
+gv_find_drive_device(struct gv_softc *sc, char *device)
+{
+ struct gv_drive *d;
+
+ LIST_FOREACH(d, &sc->drives, drive) {
+ if(!strcmp(d->device, device))
+ return (d);
+ }
+
+ return (NULL);
+}
+
/* Check if any consumer of the given geom is open. */
int
-gv_is_open(struct g_geom *gp)
+gv_consumer_is_open(struct g_consumer *cp)
{
- struct g_consumer *cp;
+ if (cp == NULL)
+ return (0);
- if (gp == NULL)
+ if (cp->acr || cp->acw || cp->ace)
+ return (1);
+
+ return (0);
+}
+
+int
+gv_provider_is_open(struct g_provider *pp)
+{
+ if (pp == NULL)
return (0);
- LIST_FOREACH(cp, &gp->consumer, consumer) {
- if (cp->acr || cp->acw || cp->ace)
+ if (pp->acr || pp->acw || pp->ace)
+ return (1);
+
+ return (0);
+}
+
+/*
+ * Compare the modification dates of the drives.
+ * Return 1 if a > b, 0 otherwise.
+ */
+int
+gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
+{
+ struct gv_drive *d2;
+ struct timeval *a, *b;
+
+ KASSERT(!LIST_EMPTY(&sc->drives),
+ ("gv_is_drive_newer: empty drive list"));
+
+ a = &d->hdr->label.last_update;
+ LIST_FOREACH(d2, &sc->drives, drive) {
+ if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
+ (d2->hdr == NULL))
+ continue;
+ b = &d2->hdr->label.last_update;
+ if (timevalcmp(a, b, >))
return (1);
}
@@ -915,58 +1027,255 @@ gv_object_type(struct gv_softc *sc, char *name)
return (GV_TYPE_DRIVE);
}
- return (-1);
+ return (GV_ERR_NOTFOUND);
}
void
-gv_kill_drive_thread(struct gv_drive *d)
+gv_setup_objects(struct gv_softc *sc)
{
- if (d->flags & GV_DRIVE_THREAD_ACTIVE) {
- d->flags |= GV_DRIVE_THREAD_DIE;
- wakeup(d);
- while (!(d->flags & GV_DRIVE_THREAD_DEAD))
- tsleep(d, PRIBIO, "gv_die", hz);
- d->flags &= ~GV_DRIVE_THREAD_ACTIVE;
- d->flags &= ~GV_DRIVE_THREAD_DIE;
- d->flags &= ~GV_DRIVE_THREAD_DEAD;
- g_free(d->bqueue);
- d->bqueue = NULL;
- mtx_destroy(&d->bqueue_mtx);
+ struct g_provider *pp;
+ struct gv_volume *v;
+ struct gv_plex *p;
+ struct gv_sd *s;
+ struct gv_drive *d;
+
+ LIST_FOREACH(s, &sc->subdisks, sd) {
+ d = gv_find_drive(sc, s->drive);
+ if (d != NULL)
+ gv_sd_to_drive(s, d);
+ p = gv_find_plex(sc, s->plex);
+ if (p != NULL)
+ gv_sd_to_plex(s, p);
+ gv_update_sd_state(s);
+ }
+
+ LIST_FOREACH(p, &sc->plexes, plex) {
+ gv_update_plex_config(p);
+ v = gv_find_vol(sc, p->volume);
+ if (v != NULL && p->vol_sc != v) {
+ p->vol_sc = v;
+ v->plexcount++;
+ LIST_INSERT_HEAD(&v->plexes, p, in_volume);
+ }
+ gv_update_plex_config(p);
+ }
+
+ LIST_FOREACH(v, &sc->volumes, volume) {
+ v->size = gv_vol_size(v);
+ if (v->provider == NULL) {
+ g_topology_lock();
+ pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
+ pp->mediasize = v->size;
+ pp->sectorsize = 512; /* XXX */
+ g_error_provider(pp, 0);
+ v->provider = pp;
+ pp->private = v;
+ g_topology_unlock();
+ } else if (v->provider->mediasize != v->size) {
+ g_topology_lock();
+ v->provider->mediasize = v->size;
+ g_topology_unlock();
+ }
+ v->flags &= ~GV_VOL_NEWBORN;
+ gv_update_vol_state(v);
}
}
void
-gv_kill_plex_thread(struct gv_plex *p)
+gv_cleanup(struct gv_softc *sc)
{
- if (p->flags & GV_PLEX_THREAD_ACTIVE) {
- p->flags |= GV_PLEX_THREAD_DIE;
- wakeup(p);
- while (!(p->flags & GV_PLEX_THREAD_DEAD))
- tsleep(p, PRIBIO, "gv_die", hz);
- p->flags &= ~GV_PLEX_THREAD_ACTIVE;
- p->flags &= ~GV_PLEX_THREAD_DIE;
- p->flags &= ~GV_PLEX_THREAD_DEAD;
+ struct gv_volume *v, *v2;
+ struct gv_plex *p, *p2;
+ struct gv_sd *s, *s2;
+ struct gv_drive *d, *d2;
+ struct gv_freelist *fl, *fl2;
+
+ mtx_lock(&sc->config_mtx);
+ LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
+ LIST_REMOVE(v, volume);
+ g_free(v->wqueue);
+ g_free(v);
+ }
+ LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
+ LIST_REMOVE(p, plex);
g_free(p->bqueue);
+ g_free(p->rqueue);
g_free(p->wqueue);
- p->bqueue = NULL;
- p->wqueue = NULL;
- mtx_destroy(&p->bqueue_mtx);
+ g_free(p);
+ }
+ LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
+ LIST_REMOVE(s, sd);
+ g_free(s);
}
+ LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
+ LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
+ LIST_REMOVE(fl, freelist);
+ g_free(fl);
+ }
+ LIST_REMOVE(d, drive);
+ g_free(d->hdr);
+ g_free(d);
+ }
+ mtx_destroy(&sc->config_mtx);
}
-void
-gv_kill_vol_thread(struct gv_volume *v)
+/* General 'attach' routine. */
+int
+gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
+{
+ struct gv_sd *s;
+ struct gv_softc *sc;
+
+ g_topology_assert();
+
+ sc = p->vinumconf;
+ KASSERT(sc != NULL, ("NULL sc"));
+
+ if (p->vol_sc != NULL) {
+ G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
+ p->name, p->volume);
+ return (GV_ERR_ISATTACHED);
+ }
+
+ /* Stale all subdisks of this plex. */
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->state != GV_SD_STALE)
+ gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
+ }
+ /* Attach to volume. Make sure volume is not up and running. */
+ if (gv_provider_is_open(v->provider)) {
+ G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
+ p->name, v->name);
+ return (GV_ERR_ISBUSY);
+ }
+ p->vol_sc = v;
+ strlcpy(p->volume, v->name, sizeof(p->volume));
+ v->plexcount++;
+ if (rename) {
+ snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
+ v->plexcount);
+ }
+ LIST_INSERT_HEAD(&v->plexes, p, in_volume);
+
+ /* Get plex up again. */
+ gv_update_vol_size(v, gv_vol_size(v));
+ gv_set_plex_state(p, GV_PLEX_UP, 0);
+ gv_save_config(p->vinumconf);
+ return (0);
+}
+
+int
+gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
+{
+ struct gv_sd *s2;
+ int error, sdcount;
+
+ g_topology_assert();
+
+ /* If subdisk is attached, don't do it. */
+ if (s->plex_sc != NULL) {
+ G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
+ s->name, s->plex);
+ return (GV_ERR_ISATTACHED);
+ }
+
+ gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
+ /* First check that this subdisk has a correct offset. If none other
+ * starts at the same, and it's correct module stripesize, it is */
+ if (offset != -1 && offset % p->stripesize != 0)
+ return (GV_ERR_BADOFFSET);
+ LIST_FOREACH(s2, &p->subdisks, in_plex) {
+ if (s2->plex_offset == offset)
+ return (GV_ERR_BADOFFSET);
+ }
+
+ /* Attach the subdisk to the plex at given offset. */
+ s->plex_offset = offset;
+ strlcpy(s->plex, p->name, sizeof(s->plex));
+
+ sdcount = p->sdcount;
+ error = gv_sd_to_plex(s, p);
+ if (error)
+ return (error);
+ gv_update_plex_config(p);
+
+ if (rename) {
+ snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
+ p->sdcount);
+ }
+ if (p->vol_sc != NULL)
+ gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
+ gv_save_config(p->vinumconf);
+ /* We don't update the subdisk state since the user might have to
+ * initiate a rebuild/sync first. */
+ return (0);
+}
+
+/* Detach a plex from a volume. */
+int
+gv_detach_plex(struct gv_plex *p, int flags)
{
- if (v->flags & GV_VOL_THREAD_ACTIVE) {
- v->flags |= GV_VOL_THREAD_DIE;
- wakeup(v);
- while (!(v->flags & GV_VOL_THREAD_DEAD))
- tsleep(v, PRIBIO, "gv_die", hz);
- v->flags &= ~GV_VOL_THREAD_ACTIVE;
- v->flags &= ~GV_VOL_THREAD_DIE;
- v->flags &= ~GV_VOL_THREAD_DEAD;
- g_free(v->bqueue);
- v->bqueue = NULL;
- mtx_destroy(&v->bqueue_mtx);
+ struct gv_volume *v;
+
+ g_topology_assert();
+ v = p->vol_sc;
+
+ if (v == NULL) {
+ G_VINUM_DEBUG(1, "unable to detach %s: already detached",
+ p->name);
+ return (0); /* Not an error. */
+ }
+
+ /*
+ * Only proceed if forced or volume inactive.
+ */
+ if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
+ p->state == GV_PLEX_UP)) {
+ G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
+ p->name, p->volume);
+ return (GV_ERR_ISBUSY);
}
+ v->plexcount--;
+ /* Make sure someone don't read us when gone. */
+ v->last_read_plex = NULL;
+ LIST_REMOVE(p, in_volume);
+ p->vol_sc = NULL;
+ memset(p->volume, 0, GV_MAXVOLNAME);
+ gv_update_vol_size(v, gv_vol_size(v));
+ gv_save_config(p->vinumconf);
+ return (0);
+}
+
+/* Detach a subdisk from a plex. */
+int
+gv_detach_sd(struct gv_sd *s, int flags)
+{
+ struct gv_plex *p;
+
+ g_topology_assert();
+ p = s->plex_sc;
+
+ if (p == NULL) {
+ G_VINUM_DEBUG(1, "unable to detach %s: already detached",
+ s->name);
+ return (0); /* Not an error. */
+ }
+
+ /*
+ * Don't proceed if we're not forcing, and the plex is up, or degraded
+ * with this subdisk up.
+ */
+ if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
+ ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
+ G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
+ s->name, s->plex);
+ return (GV_ERR_ISBUSY);
+ }
+
+ LIST_REMOVE(s, in_plex);
+ s->plex_sc = NULL;
+ memset(s->plex, 0, GV_MAXPLEXNAME);
+ p->sddetached++;
+ gv_save_config(s->vinumconf);
+ return (0);
}