diff options
Diffstat (limited to 'sbin/hastd/activemap.c')
-rw-r--r-- | sbin/hastd/activemap.c | 691 |
1 files changed, 691 insertions, 0 deletions
diff --git a/sbin/hastd/activemap.c b/sbin/hastd/activemap.c new file mode 100644 index 000000000000..10eb64103057 --- /dev/null +++ b/sbin/hastd/activemap.c @@ -0,0 +1,691 @@ +/*- + * Copyright (c) 2009-2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> /* powerof2() */ +#include <sys/queue.h> + +#include <assert.h> +#include <bitstring.h> +#include <errno.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <activemap.h> + +#define ACTIVEMAP_MAGIC 0xac71e4 +struct activemap { + int am_magic; /* Magic value. */ + off_t am_mediasize; /* Media size in bytes. */ + uint32_t am_extentsize; /* Extent size in bytes, + must be power of 2. */ + uint8_t am_extentshift;/* 2 ^ extentbits == extentsize */ + int am_nextents; /* Number of extents. */ + size_t am_mapsize; /* Bitmap size in bytes. */ + uint16_t *am_memtab; /* An array that holds number of pending + writes per extent. */ + bitstr_t *am_diskmap; /* On-disk bitmap of dirty extents. */ + bitstr_t *am_memmap; /* In-memory bitmap of dirty extents. */ + size_t am_diskmapsize; /* Map size rounded up to sector size. */ + uint64_t am_ndirty; /* Number of dirty regions. */ + bitstr_t *am_syncmap; /* Bitmap of extents to sync. */ + off_t am_syncoff; /* Next synchronization offset. */ + TAILQ_HEAD(skeepdirty, keepdirty) am_keepdirty; /* List of extents that + we keep dirty to reduce bitmap + updates. */ + int am_nkeepdirty; /* Number of am_keepdirty elements. */ + int am_nkeepdirty_limit; /* Maximum number of am_keepdirty + elements. */ +}; + +struct keepdirty { + int kd_extent; + TAILQ_ENTRY(keepdirty) kd_next; +}; + +/* + * Helper function taken from sys/systm.h to calculate extentshift. + */ +static uint32_t +bitcount32(uint32_t x) +{ + + x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1); + x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2); + x = (x + (x >> 4)) & 0x0f0f0f0f; + x = (x + (x >> 8)); + x = (x + (x >> 16)) & 0x000000ff; + return (x); +} + +static __inline int +off2ext(const struct activemap *amp, off_t offset) +{ + int extent; + + assert(offset >= 0 && offset < amp->am_mediasize); + extent = (offset >> amp->am_extentshift); + assert(extent >= 0 && extent < amp->am_nextents); + return (extent); +} + +static __inline off_t +ext2off(const struct activemap *amp, int extent) +{ + off_t offset; + + assert(extent >= 0 && extent < amp->am_nextents); + offset = ((off_t)extent << amp->am_extentshift); + assert(offset >= 0 && offset < amp->am_mediasize); + return (offset); +} + +/* + * Function calculates number of requests needed to synchronize the given + * extent. + */ +static __inline int +ext2reqs(const struct activemap *amp, int ext) +{ + off_t left; + + if (ext < amp->am_nextents - 1) + return (((amp->am_extentsize - 1) / MAXPHYS) + 1); + + assert(ext == amp->am_nextents - 1); + left = amp->am_mediasize % amp->am_extentsize; + if (left == 0) + left = amp->am_extentsize; + return (((left - 1) / MAXPHYS) + 1); +} + +/* + * Initialize activemap structure and allocate memory for internal needs. + * Function returns 0 on success and -1 if any of the allocations failed. + */ +int +activemap_init(struct activemap **ampp, uint64_t mediasize, uint32_t extentsize, + uint32_t sectorsize, uint32_t keepdirty) +{ + struct activemap *amp; + + assert(ampp != NULL); + assert(mediasize > 0); + assert(extentsize > 0); + assert(powerof2(extentsize)); + assert(sectorsize > 0); + assert(powerof2(sectorsize)); + assert(keepdirty > 0); + + amp = malloc(sizeof(*amp)); + if (amp == NULL) + return (-1); + + amp->am_mediasize = mediasize; + amp->am_nkeepdirty_limit = keepdirty; + amp->am_extentsize = extentsize; + amp->am_extentshift = bitcount32(extentsize - 1); + amp->am_nextents = ((mediasize - 1) / extentsize) + 1; + amp->am_mapsize = sizeof(bitstr_t) * bitstr_size(amp->am_nextents); + amp->am_diskmapsize = roundup2(amp->am_mapsize, sectorsize); + amp->am_ndirty = 0; + amp->am_syncoff = -2; + TAILQ_INIT(&->am_keepdirty); + amp->am_nkeepdirty = 0; + + amp->am_memtab = calloc(amp->am_nextents, sizeof(amp->am_memtab[0])); + amp->am_diskmap = calloc(1, amp->am_diskmapsize); + amp->am_memmap = bit_alloc(amp->am_nextents); + amp->am_syncmap = bit_alloc(amp->am_nextents); + + /* + * Check to see if any of the allocations above failed. + */ + if (amp->am_memtab == NULL || amp->am_diskmap == NULL || + amp->am_memmap == NULL || amp->am_syncmap == NULL) { + if (amp->am_memtab != NULL) + free(amp->am_memtab); + if (amp->am_diskmap != NULL) + free(amp->am_diskmap); + if (amp->am_memmap != NULL) + free(amp->am_memmap); + if (amp->am_syncmap != NULL) + free(amp->am_syncmap); + amp->am_magic = 0; + free(amp); + errno = ENOMEM; + return (-1); + } + + amp->am_magic = ACTIVEMAP_MAGIC; + *ampp = amp; + + return (0); +} + +static struct keepdirty * +keepdirty_find(struct activemap *amp, int extent) +{ + struct keepdirty *kd; + + TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) { + if (kd->kd_extent == extent) + break; + } + return (kd); +} + +static void +keepdirty_add(struct activemap *amp, int extent) +{ + struct keepdirty *kd; + + kd = keepdirty_find(amp, extent); + if (kd != NULL) { + /* + * Only move element at the begining. + */ + TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); + TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next); + return; + } + /* + * Add new element, but first remove the most unused one if + * we have too many. + */ + if (amp->am_nkeepdirty >= amp->am_nkeepdirty_limit) { + kd = TAILQ_LAST(&->am_keepdirty, skeepdirty); + assert(kd != NULL); + TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); + amp->am_nkeepdirty--; + assert(amp->am_nkeepdirty > 0); + } + if (kd == NULL) + kd = malloc(sizeof(*kd)); + /* We can ignore allocation failure. */ + if (kd != NULL) { + kd->kd_extent = extent; + amp->am_nkeepdirty++; + TAILQ_INSERT_HEAD(&->am_keepdirty, kd, kd_next); + } +} + +static void +keepdirty_fill(struct activemap *amp) +{ + struct keepdirty *kd; + + TAILQ_FOREACH(kd, &->am_keepdirty, kd_next) + bit_set(amp->am_diskmap, kd->kd_extent); +} + +static void +keepdirty_free(struct activemap *amp) +{ + struct keepdirty *kd; + + while ((kd = TAILQ_FIRST(&->am_keepdirty)) != NULL) { + TAILQ_REMOVE(&->am_keepdirty, kd, kd_next); + amp->am_nkeepdirty--; + free(kd); + } + assert(amp->am_nkeepdirty == 0); +} + +/* + * Function frees resources allocated by activemap_init() function. + */ +void +activemap_free(struct activemap *amp) +{ + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + + amp->am_magic = 0; + + keepdirty_free(amp); + free(amp->am_memtab); + free(amp->am_diskmap); + free(amp->am_memmap); + free(amp->am_syncmap); +} + +/* + * Function should be called before we handle write requests. It updates + * internal structures and returns true if on-disk metadata should be updated. + */ +bool +activemap_write_start(struct activemap *amp, off_t offset, off_t length) +{ + bool modified; + off_t end; + int ext; + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + assert(length > 0); + + modified = false; + end = offset + length - 1; + + for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { + /* + * If the number of pending writes is increased from 0, + * we have to mark the extent as dirty also in on-disk bitmap. + * By returning true we inform the caller that on-disk bitmap + * was modified and has to be flushed to disk. + */ + if (amp->am_memtab[ext]++ == 0) { + assert(!bit_test(amp->am_memmap, ext)); + bit_set(amp->am_memmap, ext); + amp->am_ndirty++; + modified = true; + } + keepdirty_add(amp, ext); + } + + return (modified); +} + +/* + * Function should be called after receiving write confirmation. It updates + * internal structures and returns true if on-disk metadata should be updated. + */ +bool +activemap_write_complete(struct activemap *amp, off_t offset, off_t length) +{ + bool modified; + off_t end; + int ext; + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + assert(length > 0); + + modified = false; + end = offset + length - 1; + + for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { + /* + * If the number of pending writes goes down to 0, we have to + * mark the extent as clean also in on-disk bitmap. + * By returning true we inform the caller that on-disk bitmap + * was modified and has to be flushed to disk. + */ + assert(amp->am_memtab[ext] > 0); + assert(bit_test(amp->am_memmap, ext)); + if (--amp->am_memtab[ext] == 0) { + bit_clear(amp->am_memmap, ext); + amp->am_ndirty--; + modified = true; + } + } + + return (modified); +} + +/* + * Function should be called after finishing synchronization of one extent. + * It returns true if on-disk metadata should be updated. + */ +bool +activemap_extent_complete(struct activemap *amp, int extent) +{ + bool modified; + int reqs; + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + assert(extent >= 0 && extent < amp->am_nextents); + + modified = false; + + reqs = ext2reqs(amp, extent); + assert(amp->am_memtab[extent] >= reqs); + amp->am_memtab[extent] -= reqs; + assert(bit_test(amp->am_memmap, extent)); + if (amp->am_memtab[extent] == 0) { + bit_clear(amp->am_memmap, extent); + amp->am_ndirty--; + modified = true; + } + + return (modified); +} + +/* + * Function returns number of dirty regions. + */ +uint64_t +activemap_ndirty(const struct activemap *amp) +{ + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + + return (amp->am_ndirty); +} + +/* + * Function compare on-disk bitmap and in-memory bitmap and returns true if + * they differ and should be flushed to the disk. + */ +bool +activemap_differ(const struct activemap *amp) +{ + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + + return (memcmp(amp->am_diskmap, amp->am_memmap, + amp->am_mapsize) != 0); +} + +/* + * Function returns number of bytes used by bitmap. + */ +size_t +activemap_size(const struct activemap *amp) +{ + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + + return (amp->am_mapsize); +} + +/* + * Function returns number of bytes needed for storing on-disk bitmap. + * This is the same as activemap_size(), but rounded up to sector size. + */ +size_t +activemap_ondisk_size(const struct activemap *amp) +{ + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + + return (amp->am_diskmapsize); +} + +/* + * Function copies the given buffer read from disk to the internal bitmap. + */ +void +activemap_copyin(struct activemap *amp, const unsigned char *buf, size_t size) +{ + int ext; + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + assert(size >= amp->am_mapsize); + + memcpy(amp->am_diskmap, buf, amp->am_mapsize); + memcpy(amp->am_memmap, buf, amp->am_mapsize); + memcpy(amp->am_syncmap, buf, amp->am_mapsize); + + bit_ffs(amp->am_memmap, amp->am_nextents, &ext); + if (ext == -1) { + /* There are no dirty extents, so we can leave now. */ + return; + } + /* + * Set synchronization offset to the first dirty extent. + */ + activemap_sync_rewind(amp); + /* + * We have dirty extents and we want them to stay that way until + * we synchronize, so we set number of pending writes to number + * of requests needed to synchronize one extent. + */ + amp->am_ndirty = 0; + for (; ext < amp->am_nextents; ext++) { + if (bit_test(amp->am_memmap, ext)) { + amp->am_memtab[ext] = ext2reqs(amp, ext); + amp->am_ndirty++; + } + } +} + +/* + * Function merges the given bitmap with existng one. + */ +void +activemap_merge(struct activemap *amp, const unsigned char *buf, size_t size) +{ + bitstr_t *remmap = __DECONST(bitstr_t *, buf); + int ext; + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + assert(size >= amp->am_mapsize); + + bit_ffs(remmap, amp->am_nextents, &ext); + if (ext == -1) { + /* There are no dirty extents, so we can leave now. */ + return; + } + /* + * We have dirty extents and we want them to stay that way until + * we synchronize, so we set number of pending writes to number + * of requests needed to synchronize one extent. + */ + for (; ext < amp->am_nextents; ext++) { + /* Local extent already dirty. */ + if (bit_test(amp->am_syncmap, ext)) + continue; + /* Remote extent isn't dirty. */ + if (!bit_test(remmap, ext)) + continue; + bit_set(amp->am_syncmap, ext); + bit_set(amp->am_memmap, ext); + bit_set(amp->am_diskmap, ext); + if (amp->am_memtab[ext] == 0) + amp->am_ndirty++; + amp->am_memtab[ext] = ext2reqs(amp, ext); + } + /* + * Set synchronization offset to the first dirty extent. + */ + activemap_sync_rewind(amp); +} + +/* + * Function returns pointer to internal bitmap that should be written to disk. + */ +const unsigned char * +activemap_bitmap(struct activemap *amp, size_t *sizep) +{ + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + + if (sizep != NULL) + *sizep = amp->am_diskmapsize; + memcpy(amp->am_diskmap, amp->am_memmap, amp->am_mapsize); + keepdirty_fill(amp); + return ((const unsigned char *)amp->am_diskmap); +} + +/* + * Function calculates size needed to store bitmap on disk. + */ +size_t +activemap_calc_ondisk_size(uint64_t mediasize, uint32_t extentsize, + uint32_t sectorsize) +{ + uint64_t nextents, mapsize; + + assert(mediasize > 0); + assert(extentsize > 0); + assert(powerof2(extentsize)); + assert(sectorsize > 0); + assert(powerof2(sectorsize)); + + nextents = ((mediasize - 1) / extentsize) + 1; + mapsize = sizeof(bitstr_t) * bitstr_size(nextents); + return (roundup2(mapsize, sectorsize)); +} + +/* + * Set synchronization offset to the first dirty extent. + */ +void +activemap_sync_rewind(struct activemap *amp) +{ + int ext; + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + + bit_ffs(amp->am_syncmap, amp->am_nextents, &ext); + if (ext == -1) { + /* There are no extents to synchronize. */ + amp->am_syncoff = -2; + return; + } + /* + * Mark that we want to start synchronization from the begining. + */ + amp->am_syncoff = -1; +} + +/* + * Return next offset of where we should synchronize. + */ +off_t +activemap_sync_offset(struct activemap *amp, off_t *lengthp, int *syncextp) +{ + off_t syncoff, left; + int ext; + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + assert(lengthp != NULL); + assert(syncextp != NULL); + + *syncextp = -1; + + if (amp->am_syncoff == -2) + return (-1); + + if (amp->am_syncoff >= 0 && + (amp->am_syncoff + MAXPHYS >= amp->am_mediasize || + off2ext(amp, amp->am_syncoff) != + off2ext(amp, amp->am_syncoff + MAXPHYS))) { + /* + * We are about to change extent, so mark previous one as clean. + */ + ext = off2ext(amp, amp->am_syncoff); + bit_clear(amp->am_syncmap, ext); + *syncextp = ext; + amp->am_syncoff = -1; + } + + if (amp->am_syncoff == -1) { + /* + * Let's find first extent to synchronize. + */ + bit_ffs(amp->am_syncmap, amp->am_nextents, &ext); + if (ext == -1) { + amp->am_syncoff = -2; + return (-1); + } + amp->am_syncoff = ext2off(amp, ext); + } else { + /* + * We don't change extent, so just increase offset. + */ + amp->am_syncoff += MAXPHYS; + if (amp->am_syncoff >= amp->am_mediasize) { + amp->am_syncoff = -2; + return (-1); + } + } + + syncoff = amp->am_syncoff; + left = ext2off(amp, off2ext(amp, syncoff)) + + amp->am_extentsize - syncoff; + if (syncoff + left > amp->am_mediasize) + left = amp->am_mediasize - syncoff; + if (left > MAXPHYS) + left = MAXPHYS; + + assert(left >= 0 && left <= MAXPHYS); + assert(syncoff >= 0 && syncoff < amp->am_mediasize); + assert(syncoff + left >= 0 && syncoff + left <= amp->am_mediasize); + + *lengthp = left; + return (syncoff); +} + +/* + * Mark extent(s) containing the given region for synchronization. + * Most likely one of the components is unavailable. + */ +bool +activemap_need_sync(struct activemap *amp, off_t offset, off_t length) +{ + bool modified; + off_t end; + int ext; + + assert(amp->am_magic == ACTIVEMAP_MAGIC); + + modified = false; + end = offset + length - 1; + + for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) { + if (bit_test(amp->am_syncmap, ext)) { + /* Already marked for synchronization. */ + assert(bit_test(amp->am_memmap, ext)); + continue; + } + bit_set(amp->am_syncmap, ext); + if (!bit_test(amp->am_memmap, ext)) { + bit_set(amp->am_memmap, ext); + amp->am_ndirty++; + } + amp->am_memtab[ext] += ext2reqs(amp, ext); + modified = true; + } + + return (modified); +} + +void +activemap_dump(const struct activemap *amp) +{ + int bit; + + printf("M: "); + for (bit = 0; bit < amp->am_nextents; bit++) + printf("%d", bit_test(amp->am_memmap, bit) ? 1 : 0); + printf("\n"); + printf("D: "); + for (bit = 0; bit < amp->am_nextents; bit++) + printf("%d", bit_test(amp->am_diskmap, bit) ? 1 : 0); + printf("\n"); + printf("S: "); + for (bit = 0; bit < amp->am_nextents; bit++) + printf("%d", bit_test(amp->am_syncmap, bit) ? 1 : 0); + printf("\n"); +} |