diff options
author | Scott Long <scottl@FreeBSD.org> | 2002-10-20 08:17:39 +0000 |
---|---|---|
committer | Scott Long <scottl@FreeBSD.org> | 2002-10-20 08:17:39 +0000 |
commit | f9d186edc88163ed0b54b8f5afca9e8a2b531f12 (patch) | |
tree | 71c65823ba2e8591de708d5cb2e990a75135ee11 /sys/dev/raidframe/rf_paritylogDiskMgr.c | |
parent | 597e16e0129ec8b3f43db4523fa8247aa7286c6f (diff) | |
download | src-f9d186edc88163ed0b54b8f5afca9e8a2b531f12.tar.gz src-f9d186edc88163ed0b54b8f5afca9e8a2b531f12.zip |
After much delay and anticipation, welcome RAIDFrame into the FreeBSD
world. This should be considered highly experimental.
Approved-by: re
Notes
Notes:
svn path=/head/; revision=105503
Diffstat (limited to 'sys/dev/raidframe/rf_paritylogDiskMgr.c')
-rw-r--r-- | sys/dev/raidframe/rf_paritylogDiskMgr.c | 701 |
1 files changed, 701 insertions, 0 deletions
diff --git a/sys/dev/raidframe/rf_paritylogDiskMgr.c b/sys/dev/raidframe/rf_paritylogDiskMgr.c new file mode 100644 index 000000000000..5eadad84b7a4 --- /dev/null +++ b/sys/dev/raidframe/rf_paritylogDiskMgr.c @@ -0,0 +1,701 @@ +/* $FreeBSD$ */ +/* $NetBSD: rf_paritylogDiskMgr.c,v 1.10 2000/01/15 01:57:57 oster Exp $ */ +/* + * Copyright (c) 1995 Carnegie-Mellon University. + * All rights reserved. + * + * Author: William V. Courtright II + * + * Permission to use, copy, modify and distribute this software and + * its documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie the + * rights to redistribute these changes. + */ +/* Code for flushing and reintegration operations related to parity logging. + * + */ + +#include <dev/raidframe/rf_archs.h> + +#if RF_INCLUDE_PARITYLOGGING > 0 + +#include <dev/raidframe/rf_types.h> +#include <dev/raidframe/rf_threadstuff.h> +#include <dev/raidframe/rf_mcpair.h> +#include <dev/raidframe/rf_raid.h> +#include <dev/raidframe/rf_dag.h> +#include <dev/raidframe/rf_dagfuncs.h> +#include <dev/raidframe/rf_desc.h> +#include <dev/raidframe/rf_layout.h> +#include <dev/raidframe/rf_diskqueue.h> +#include <dev/raidframe/rf_paritylog.h> +#include <dev/raidframe/rf_general.h> +#include <dev/raidframe/rf_etimer.h> +#include <dev/raidframe/rf_paritylogging.h> +#include <dev/raidframe/rf_engine.h> +#include <dev/raidframe/rf_dagutils.h> +#include <dev/raidframe/rf_map.h> +#include <dev/raidframe/rf_parityscan.h> +#include <dev/raidframe/rf_kintf.h> + +#include <dev/raidframe/rf_paritylogDiskMgr.h> + +static caddr_t AcquireReintBuffer(RF_RegionBufferQueue_t *); + +static caddr_t +AcquireReintBuffer(pool) + RF_RegionBufferQueue_t *pool; +{ + caddr_t bufPtr = NULL; + + /* Return a region buffer from the free list (pool). If the free list + * is empty, WAIT. BLOCKING */ + + RF_LOCK_MUTEX(pool->mutex); + if (pool->availableBuffers > 0) { + bufPtr = pool->buffers[pool->availBuffersIndex]; + pool->availableBuffers--; + pool->availBuffersIndex++; + if (pool->availBuffersIndex == pool->totalBuffers) + pool->availBuffersIndex = 0; + RF_UNLOCK_MUTEX(pool->mutex); + } else { + RF_PANIC(); /* should never happen in correct config, + * single reint */ + RF_WAIT_COND(pool->cond, pool->mutex); + } + return (bufPtr); +} + +static void +ReleaseReintBuffer( + RF_RegionBufferQueue_t * pool, + caddr_t bufPtr) +{ + /* Insert a region buffer (bufPtr) into the free list (pool). + * NON-BLOCKING */ + + RF_LOCK_MUTEX(pool->mutex); + pool->availableBuffers++; + pool->buffers[pool->emptyBuffersIndex] = bufPtr; + pool->emptyBuffersIndex++; + if (pool->emptyBuffersIndex == pool->totalBuffers) + pool->emptyBuffersIndex = 0; + RF_ASSERT(pool->availableBuffers <= pool->totalBuffers); + RF_UNLOCK_MUTEX(pool->mutex); + RF_SIGNAL_COND(pool->cond); +} + + + +static void +ReadRegionLog( + RF_RegionId_t regionID, + RF_MCPair_t * rrd_mcpair, + caddr_t regionBuffer, + RF_Raid_t * raidPtr, + RF_DagHeader_t ** rrd_dag_h, + RF_AllocListElem_t ** rrd_alloclist, + RF_PhysDiskAddr_t ** rrd_pda) +{ + /* Initiate the read a region log from disk. Once initiated, return + * to the calling routine. + * + * NON-BLOCKING */ + + RF_AccTraceEntry_t *tracerec; + RF_DagNode_t *rrd_rdNode; + + /* create DAG to read region log from disk */ + rf_MakeAllocList(*rrd_alloclist); + *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, + rf_DiskReadFunc, rf_DiskReadUndoFunc, + "Rrl", *rrd_alloclist, + RF_DAG_FLAGS_NONE, + RF_IO_NORMAL_PRIORITY); + + /* create and initialize PDA for the core log */ + /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t + * *)); */ + *rrd_pda = rf_AllocPDAList(1); + rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row), + &((*rrd_pda)->col), &((*rrd_pda)->startSector)); + (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity; + + if ((*rrd_pda)->next) { + (*rrd_pda)->next = NULL; + printf("set rrd_pda->next to NULL\n"); + } + /* initialize DAG parameters */ + RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); + (*rrd_dag_h)->tracerec = tracerec; + rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0]; + rrd_rdNode->params[0].p = *rrd_pda; +/* rrd_rdNode->params[1] = regionBuffer; */ + rrd_rdNode->params[2].v = 0; + rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, 0); + + /* launch region log read dag */ + rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) rrd_mcpair); +} + + + +static void +WriteCoreLog( + RF_ParityLog_t * log, + RF_MCPair_t * fwr_mcpair, + RF_Raid_t * raidPtr, + RF_DagHeader_t ** fwr_dag_h, + RF_AllocListElem_t ** fwr_alloclist, + RF_PhysDiskAddr_t ** fwr_pda) +{ + RF_RegionId_t regionID = log->regionID; + RF_AccTraceEntry_t *tracerec; + RF_SectorNum_t regionOffset; + RF_DagNode_t *fwr_wrNode; + + /* Initiate the write of a core log to a region log disk. Once + * initiated, return to the calling routine. + * + * NON-BLOCKING */ + + /* create DAG to write a core log to a region log disk */ + rf_MakeAllocList(*fwr_alloclist); + *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + "Wcl", *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); + + /* create and initialize PDA for the region log */ + /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t + * *)); */ + *fwr_pda = rf_AllocPDAList(1); + regionOffset = log->diskOffset; + rf_MapLogParityLogging(raidPtr, regionID, regionOffset, + &((*fwr_pda)->row), &((*fwr_pda)->col), + &((*fwr_pda)->startSector)); + (*fwr_pda)->numSector = raidPtr->numSectorsPerLog; + + /* initialize DAG parameters */ + RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); + (*fwr_dag_h)->tracerec = tracerec; + fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0]; + fwr_wrNode->params[0].p = *fwr_pda; +/* fwr_wrNode->params[1] = log->bufPtr; */ + fwr_wrNode->params[2].v = 0; + fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, 0); + + /* launch the dag to write the core log to disk */ + rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) fwr_mcpair); +} + + +static void +ReadRegionParity( + RF_RegionId_t regionID, + RF_MCPair_t * prd_mcpair, + caddr_t parityBuffer, + RF_Raid_t * raidPtr, + RF_DagHeader_t ** prd_dag_h, + RF_AllocListElem_t ** prd_alloclist, + RF_PhysDiskAddr_t ** prd_pda) +{ + /* Initiate the read region parity from disk. Once initiated, return + * to the calling routine. + * + * NON-BLOCKING */ + + RF_AccTraceEntry_t *tracerec; + RF_DagNode_t *prd_rdNode; + + /* create DAG to read region parity from disk */ + rf_MakeAllocList(*prd_alloclist); + *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, + rf_DiskReadUndoFunc, "Rrp", + *prd_alloclist, RF_DAG_FLAGS_NONE, + RF_IO_NORMAL_PRIORITY); + + /* create and initialize PDA for region parity */ + /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t + * *)); */ + *prd_pda = rf_AllocPDAList(1); + rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row), + &((*prd_pda)->col), &((*prd_pda)->startSector), + &((*prd_pda)->numSector)); + if (rf_parityLogDebug) + printf("[reading %d sectors of parity from region %d]\n", + (int) (*prd_pda)->numSector, regionID); + if ((*prd_pda)->next) { + (*prd_pda)->next = NULL; + printf("set prd_pda->next to NULL\n"); + } + /* initialize DAG parameters */ + RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); + (*prd_dag_h)->tracerec = tracerec; + prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0]; + prd_rdNode->params[0].p = *prd_pda; + prd_rdNode->params[1].p = parityBuffer; + prd_rdNode->params[2].v = 0; + prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, 0); + if (rf_validateDAGDebug) + rf_ValidateDAG(*prd_dag_h); + /* launch region parity read dag */ + rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) prd_mcpair); +} + +static void +WriteRegionParity( + RF_RegionId_t regionID, + RF_MCPair_t * pwr_mcpair, + caddr_t parityBuffer, + RF_Raid_t * raidPtr, + RF_DagHeader_t ** pwr_dag_h, + RF_AllocListElem_t ** pwr_alloclist, + RF_PhysDiskAddr_t ** pwr_pda) +{ + /* Initiate the write of region parity to disk. Once initiated, return + * to the calling routine. + * + * NON-BLOCKING */ + + RF_AccTraceEntry_t *tracerec; + RF_DagNode_t *pwr_wrNode; + + /* create DAG to write region log from disk */ + rf_MakeAllocList(*pwr_alloclist); + *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, + rf_DiskWriteFunc, rf_DiskWriteUndoFunc, + "Wrp", *pwr_alloclist, + RF_DAG_FLAGS_NONE, + RF_IO_NORMAL_PRIORITY); + + /* create and initialize PDA for region parity */ + /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t + * *)); */ + *pwr_pda = rf_AllocPDAList(1); + rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row), + &((*pwr_pda)->col), &((*pwr_pda)->startSector), + &((*pwr_pda)->numSector)); + + /* initialize DAG parameters */ + RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); + bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t)); + (*pwr_dag_h)->tracerec = tracerec; + pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0]; + pwr_wrNode->params[0].p = *pwr_pda; +/* pwr_wrNode->params[1] = parityBuffer; */ + pwr_wrNode->params[2].v = 0; + pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, + 0, 0, 0); + + /* launch the dag to write region parity to disk */ + rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, + (void *) pwr_mcpair); +} + +static void +FlushLogsToDisk( + RF_Raid_t * raidPtr, + RF_ParityLog_t * logList) +{ + /* Flush a linked list of core logs to the log disk. Logs contain the + * disk location where they should be written. Logs were written in + * FIFO order and that order must be preserved. + * + * Recommended optimizations: 1) allow multiple flushes to occur + * simultaneously 2) coalesce contiguous flush operations + * + * BLOCKING */ + + RF_ParityLog_t *log; + RF_RegionId_t regionID; + RF_MCPair_t *fwr_mcpair; + RF_DagHeader_t *fwr_dag_h; + RF_AllocListElem_t *fwr_alloclist; + RF_PhysDiskAddr_t *fwr_pda; + + fwr_mcpair = rf_AllocMCPair(); + RF_LOCK_MUTEX(fwr_mcpair->mutex); + + RF_ASSERT(logList); + log = logList; + while (log) { + regionID = log->regionID; + + /* create and launch a DAG to write the core log */ + if (rf_parityLogDebug) + printf("[initiating write of core log for region %d]\n", regionID); + fwr_mcpair->flag = RF_FALSE; + WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, + &fwr_alloclist, &fwr_pda); + + /* wait for the DAG to complete */ + while (!fwr_mcpair->flag) + RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex); + if (fwr_dag_h->status != rf_enable) { + RF_ERRORMSG1("Unable to write core log to disk (region %d)\n", regionID); + RF_ASSERT(0); + } + /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */ + rf_FreePhysDiskAddr(fwr_pda); + rf_FreeDAG(fwr_dag_h); + rf_FreeAllocList(fwr_alloclist); + + log = log->next; + } + RF_UNLOCK_MUTEX(fwr_mcpair->mutex); + rf_FreeMCPair(fwr_mcpair); + rf_ReleaseParityLogs(raidPtr, logList); +} + +static void +ReintegrateRegion( + RF_Raid_t * raidPtr, + RF_RegionId_t regionID, + RF_ParityLog_t * coreLog) +{ + RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair; + RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h; + RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist; + RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda; + caddr_t parityBuffer, regionBuffer = NULL; + + /* Reintegrate a region (regionID). + * + * 1. acquire region and parity buffers + * 2. read log from disk + * 3. read parity from disk + * 4. apply log to parity + * 5. apply core log to parity + * 6. write new parity to disk + * + * BLOCKING */ + + if (rf_parityLogDebug) + printf("[reintegrating region %d]\n", regionID); + + /* initiate read of region parity */ + if (rf_parityLogDebug) + printf("[initiating read of parity for region %d]\n",regionID); + parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool); + prd_mcpair = rf_AllocMCPair(); + RF_LOCK_MUTEX(prd_mcpair->mutex); + prd_mcpair->flag = RF_FALSE; + ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, + &prd_dag_h, &prd_alloclist, &prd_pda); + + /* if region log nonempty, initiate read */ + if (raidPtr->regionInfo[regionID].diskCount > 0) { + if (rf_parityLogDebug) + printf("[initiating read of disk log for region %d]\n", + regionID); + regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool); + rrd_mcpair = rf_AllocMCPair(); + RF_LOCK_MUTEX(rrd_mcpair->mutex); + rrd_mcpair->flag = RF_FALSE; + ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, + &rrd_dag_h, &rrd_alloclist, &rrd_pda); + } + /* wait on read of region parity to complete */ + while (!prd_mcpair->flag) { + RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex); + } + RF_UNLOCK_MUTEX(prd_mcpair->mutex); + if (prd_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to read parity from disk\n"); + /* add code to fail the parity disk */ + RF_ASSERT(0); + } + /* apply core log to parity */ + /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */ + + if (raidPtr->regionInfo[regionID].diskCount > 0) { + /* wait on read of region log to complete */ + while (!rrd_mcpair->flag) + RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex); + RF_UNLOCK_MUTEX(rrd_mcpair->mutex); + if (rrd_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to read region log from disk\n"); + /* add code to fail the log disk */ + RF_ASSERT(0); + } + /* apply region log to parity */ + /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */ + /* release resources associated with region log */ + /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */ + rf_FreePhysDiskAddr(rrd_pda); + rf_FreeDAG(rrd_dag_h); + rf_FreeAllocList(rrd_alloclist); + rf_FreeMCPair(rrd_mcpair); + ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer); + } + /* write reintegrated parity to disk */ + if (rf_parityLogDebug) + printf("[initiating write of parity for region %d]\n", + regionID); + pwr_mcpair = rf_AllocMCPair(); + RF_LOCK_MUTEX(pwr_mcpair->mutex); + pwr_mcpair->flag = RF_FALSE; + WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, + &pwr_dag_h, &pwr_alloclist, &pwr_pda); + while (!pwr_mcpair->flag) + RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex); + RF_UNLOCK_MUTEX(pwr_mcpair->mutex); + if (pwr_dag_h->status != rf_enable) { + RF_ERRORMSG("Unable to write parity to disk\n"); + /* add code to fail the parity disk */ + RF_ASSERT(0); + } + /* release resources associated with read of old parity */ + /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */ + rf_FreePhysDiskAddr(prd_pda); + rf_FreeDAG(prd_dag_h); + rf_FreeAllocList(prd_alloclist); + rf_FreeMCPair(prd_mcpair); + + /* release resources associated with write of new parity */ + ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer); + /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */ + rf_FreePhysDiskAddr(pwr_pda); + rf_FreeDAG(pwr_dag_h); + rf_FreeAllocList(pwr_alloclist); + rf_FreeMCPair(pwr_mcpair); + + if (rf_parityLogDebug) + printf("[finished reintegrating region %d]\n", regionID); +} + + + +static void +ReintegrateLogs( + RF_Raid_t * raidPtr, + RF_ParityLog_t * logList) +{ + RF_ParityLog_t *log, *freeLogList = NULL; + RF_ParityLogData_t *logData, *logDataList; + RF_RegionId_t regionID; + + RF_ASSERT(logList); + while (logList) { + log = logList; + logList = logList->next; + log->next = NULL; + regionID = log->regionID; + ReintegrateRegion(raidPtr, regionID, log); + log->numRecords = 0; + + /* remove all items which are blocked on reintegration of this + * region */ + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, + &raidPtr->parityLogDiskQueue.reintBlockHead, + &raidPtr->parityLogDiskQueue.reintBlockTail, + RF_TRUE); + logDataList = logData; + while (logData) { + logData->next = rf_SearchAndDequeueParityLogData( + raidPtr, regionID, + &raidPtr->parityLogDiskQueue.reintBlockHead, + &raidPtr->parityLogDiskQueue.reintBlockTail, + RF_TRUE); + logData = logData->next; + } + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + + /* process blocked log data and clear reintInProgress flag for + * this region */ + if (logDataList) + rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE); + else { + /* Enable flushing for this region. Holding both + * locks provides a synchronization barrier with + * DumpParityLogToDisk */ + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + raidPtr->regionInfo[regionID].diskCount = 0; + raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now + * enabled */ + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + } + /* if log wasn't used, attach it to the list of logs to be + * returned */ + if (log) { + log->next = freeLogList; + freeLogList = log; + } + } + if (freeLogList) + rf_ReleaseParityLogs(raidPtr, freeLogList); +} + +int +rf_ShutdownLogging(RF_Raid_t * raidPtr) +{ + /* shutdown parity logging 1) disable parity logging in all regions 2) + * reintegrate all regions */ + + RF_SectorCount_t diskCount; + RF_RegionId_t regionID; + RF_ParityLog_t *log; + + if (rf_parityLogDebug) + printf("[shutting down parity logging]\n"); + /* Since parity log maps are volatile, we must reintegrate all + * regions. */ + if (rf_forceParityLogReint) { + for (regionID = 0; regionID < rf_numParityRegions; regionID++) { + RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + raidPtr->regionInfo[regionID].loggingEnabled = + RF_FALSE; + log = raidPtr->regionInfo[regionID].coreLog; + raidPtr->regionInfo[regionID].coreLog = NULL; + diskCount = raidPtr->regionInfo[regionID].diskCount; + RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex); + if (diskCount > 0 || log != NULL) + ReintegrateRegion(raidPtr, regionID, log); + if (log != NULL) + rf_ReleaseParityLogs(raidPtr, log); + } + } + if (rf_parityLogDebug) { + printf("[parity logging disabled]\n"); + printf("[should be done!]\n"); + } + return (0); +} + +int +rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) +{ + RF_ParityLog_t *reintQueue, *flushQueue; + int workNeeded, done = RF_FALSE; + int s; + + /* Main program for parity logging disk thread. This routine waits + * for work to appear in either the flush or reintegration queues and + * is responsible for flushing core logs to the log disk as well as + * reintegrating parity regions. + * + * BLOCKING */ + + s = splbio(); + + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + + /* + * Inform our creator that we're running. Don't bother doing the + * mutex lock/unlock dance- we locked above, and we'll unlock + * below with nothing to do, yet. + */ + raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING; + RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); + + /* empty the work queues */ + flushQueue = raidPtr->parityLogDiskQueue.flushQueue; + raidPtr->parityLogDiskQueue.flushQueue = NULL; + reintQueue = raidPtr->parityLogDiskQueue.reintQueue; + raidPtr->parityLogDiskQueue.reintQueue = NULL; + workNeeded = (flushQueue || reintQueue); + + while (!done) { + while (workNeeded) { + /* First, flush all logs in the flush queue, freeing + * buffers Second, reintegrate all regions which are + * reported as full. Third, append queued log data + * until blocked. + * + * Note: Incoming appends (ParityLogAppend) can block on + * either 1. empty buffer pool 2. region under + * reintegration To preserve a global FIFO ordering of + * appends, buffers are not released to the world + * until those appends blocked on buffers are removed + * from the append queue. Similarly, regions which + * are reintegrated are not opened for general use + * until the append queue has been emptied. */ + + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + + /* empty flushQueue, using free'd log buffers to + * process bufTail */ + if (flushQueue) + FlushLogsToDisk(raidPtr, flushQueue); + + /* empty reintQueue, flushing from reintTail as we go */ + if (reintQueue) + ReintegrateLogs(raidPtr, reintQueue); + + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + flushQueue = raidPtr->parityLogDiskQueue.flushQueue; + raidPtr->parityLogDiskQueue.flushQueue = NULL; + reintQueue = raidPtr->parityLogDiskQueue.reintQueue; + raidPtr->parityLogDiskQueue.reintQueue = NULL; + workNeeded = (flushQueue || reintQueue); + } + /* no work is needed at this point */ + if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) { + /* shutdown parity logging 1. disable parity logging + * in all regions 2. reintegrate all regions */ + done = RF_TRUE; /* thread disabled, no work needed */ + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + rf_ShutdownLogging(raidPtr); + } + if (!done) { + /* thread enabled, no work needed, so sleep */ + if (rf_parityLogDebug) + printf("[parity logging disk manager sleeping]\n"); + RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond, + raidPtr->parityLogDiskQueue.mutex); + if (rf_parityLogDebug) + printf("[parity logging disk manager just woke up]\n"); + flushQueue = raidPtr->parityLogDiskQueue.flushQueue; + raidPtr->parityLogDiskQueue.flushQueue = NULL; + reintQueue = raidPtr->parityLogDiskQueue.reintQueue; + raidPtr->parityLogDiskQueue.reintQueue = NULL; + workNeeded = (flushQueue || reintQueue); + } + } + /* + * Announce that we're done. + */ + RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN; + RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex); + RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond); + + splx(s); + + /* + * In the NetBSD kernel, the thread must exit; returning would + * cause the proc trampoline to attempt to return to userspace. + */ + kthread_exit(0); /* does not return */ +} +#endif /* RF_INCLUDE_PARITYLOGGING > 0 */ |