diff options
author | John Dyson <dyson@FreeBSD.org> | 1995-11-05 23:25:12 +0000 |
---|---|---|
committer | John Dyson <dyson@FreeBSD.org> | 1995-11-05 23:25:12 +0000 |
commit | c33a4405f7457413ce5aba276d3b3326140dd251 (patch) | |
tree | 3c31656291a1b60c5b4c604354e037da124df62e /sys/gnu | |
parent | c15c761192ed4b46a0e9f0ef8c73e29e923846b1 (diff) | |
download | src-c33a4405f7457413ce5aba276d3b3326140dd251.tar.gz src-c33a4405f7457413ce5aba276d3b3326140dd251.zip |
Main code for the ext2fs filesystem. Please refer to the COPYRIGHT.INFO
file for GPL restrictions. This code was ported to the BSD platform
by Godmar Back <gback@facility.cs.utah.edu> and specifically to FreeBSD
by John Dyson. This code is still green and should be used with caution.
Additional changes to UFS necessary to make this code work will be commited
seperately.
Submitted by: Godmar Back <gback@facility.cs.utah.edu>
Obtained from: Lites/Mach4
Notes
Notes:
svn path=/head/; revision=12115
Diffstat (limited to 'sys/gnu')
35 files changed, 13232 insertions, 0 deletions
diff --git a/sys/gnu/ext2fs/COPYRIGHT.INFO b/sys/gnu/ext2fs/COPYRIGHT.INFO new file mode 100644 index 000000000000..bbb021451bee --- /dev/null +++ b/sys/gnu/ext2fs/COPYRIGHT.INFO @@ -0,0 +1,30 @@ +Most of the files in this directory are written by Godmar Back or modified +by him using the CSRG sources. Those files are covered by the Berkeley-style +copyright. However the following files are covered by GPL. Since the policy +of the FreeBSD project is to keep the files with the more restrictive +copyright in the gnu tree and it is a good idea to keep the filesystem code +all together, the EXT2FS in it's entirety resides under the gnu tree. Note +that only the files below are under the GPL. In the eventuality that these +files are redesigned or rewritten, this tree can be moved back into the less +restrictive FreeBSD tree. + + ext2_fs.h + ext2_fs_i.h + ext2_fs_sb.h + ext2_linux_balloc.c + ext2_linux_ialloc.c + i386-bitops.h + +PS. + THANKS GODMAR!!! + +Note that this port has been modified by John Dyson and others on +the FreeBSD team, and it is best to send the bug reports to the FreeBSD +team. If there are any non-FreeBSD specific bugs, fixes will be sent to +Godmar to help him fix the original code base. It is also our intention +to send Godmar any FreeBSD specific porting changes so that he can keep +control of his code.... + +John +dyson@freebsd.org + diff --git a/sys/gnu/ext2fs/ext2_alloc.c b/sys/gnu/ext2fs/ext2_alloc.c new file mode 100644 index 000000000000..6a0f5d3c6360 --- /dev/null +++ b/sys/gnu/ext2fs/ext2_alloc.c @@ -0,0 +1,572 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ext2_alloc.c 8.8 (Berkeley) 2/21/94 + */ + +#if !defined(__FreeBSD__) +#include "quota.h" +#include "diagnostic.h" +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <sys/kernel.h> +#include <sys/syslog.h> + +#include <vm/vm.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> + +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> +#include <gnu/ext2fs/ext2_extern.h> + +extern u_long nextgennumber; + +static void ext2_fserr __P((struct ext2_sb_info *, u_int, char *)); + +/* + * Linux calls this functions at the following locations: + * (1) the inode is freed + * (2) a preallocation miss occurs + * (3) truncate is called + * (4) release_file is called and f_mode & 2 + * + * I call it in ext2_inactive, ext2_truncate, ext2_vfree and in (2) + * the call in vfree might be redundant + */ +void ext2_discard_prealloc (struct inode * ip) +{ +#ifdef EXT2_PREALLOCATE + if (ip->i_prealloc_count) { + int i = ip->i_prealloc_count; + ip->i_prealloc_count = 0; + ext2_free_blocks (ITOV(ip)->v_mount, + ip->i_prealloc_block, + i); + } +#endif +} + +/* + * Allocate a block in the file system. + * + * this takes the framework from ffs_alloc. To implement the + * actual allocation, it calls ext2_new_block, the ported version + * of the same Linux routine. + * + * we note that this is always called in connection with ext2_blkpref + * + * preallocation is done as Linux does it + */ +int +ext2_alloc(ip, lbn, bpref, size, cred, bnp) + register struct inode *ip; + daddr_t lbn, bpref; + int size; + struct ucred *cred; + daddr_t *bnp; +{ + register struct ext2_sb_info *fs; + daddr_t bno; + int cg, error; + + *bnp = 0; + fs = ip->i_e2fs; +#if DIAGNOSTIC + if ((u_int)size > fs->s_blocksize || blkoff(fs, size) != 0) { + printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", + ip->i_dev, fs->s_blocksize, size, fs->fs_fsmnt); + panic("ext2_alloc: bad size"); + } + if (cred == NOCRED) + panic("ext2_alloc: missing credential\n"); +#endif /* DIAGNOSTIC */ + if (size == fs->s_blocksize && fs->s_es->s_free_blocks_count == 0) + goto nospace; + if (cred->cr_uid != 0 && + fs->s_es->s_free_blocks_count < fs->s_es->s_r_blocks_count) + goto nospace; +#if QUOTA + if (error = chkdq(ip, (long)btodb(size), cred, 0)) + return (error); +#endif + if (bpref >= fs->s_es->s_blocks_count) + bpref = 0; + /* call the Linux code */ +#ifdef EXT2_PREALLOCATE + /* To have a preallocation hit, we must + * - have at least one block preallocated + * - and our preferred block must have that block number or one below + */ + if (ip->i_prealloc_count && + (bpref == ip->i_prealloc_block || + bpref + 1 == ip->i_prealloc_block)) + { + bno = ip->i_prealloc_block++; + ip->i_prealloc_count--; + /* ext2_debug ("preallocation hit (%lu/%lu).\n", + ++alloc_hits, ++alloc_attempts); */ + + /* Linux gets, clears, and releases the buffer at this + point - we don't have to that; we leave it to the caller + */ + } else { + ext2_discard_prealloc (ip); + /* ext2_debug ("preallocation miss (%lu/%lu).\n", + alloc_hits, ++alloc_attempts); */ + if (S_ISREG(ip->i_mode)) + bno = ext2_new_block + (ITOV(ip)->v_mount, bpref, + &ip->i_prealloc_count, + &ip->i_prealloc_block); + else + bno = (daddr_t)ext2_new_block(ITOV(ip)->v_mount, + bpref, 0, 0); + } +#else + bno = (daddr_t)ext2_new_block(ITOV(ip)->v_mount, bpref, 0, 0); +#endif + + if (bno > 0) { + /* set next_alloc fields as done in block_getblk */ + ip->i_next_alloc_block = lbn; + ip->i_next_alloc_goal = bno; + + ip->i_blocks += btodb(size); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + *bnp = bno; + return (0); + } +#if QUOTA + /* + * Restore user's disk quota because allocation failed. + */ + (void) chkdq(ip, (long)-btodb(size), cred, FORCE); +#endif +nospace: + ext2_fserr(fs, cred->cr_uid, "file system full"); + uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); + return (ENOSPC); +} + +/* + * Reallocate a sequence of blocks into a contiguous sequence of blocks. + * + * The vnode and an array of buffer pointers for a range of sequential + * logical blocks to be made contiguous is given. The allocator attempts + * to find a range of sequential blocks starting as close as possible to + * an fs_rotdelay offset from the end of the allocation for the logical + * block immediately preceeding the current range. If successful, the + * physical block numbers in the buffer pointers and in the inode are + * changed to reflect the new allocation. If unsuccessful, the allocation + * is left unchanged. The success in doing the reallocation is returned. + * Note that the error return is not reflected back to the user. Rather + * the previous block allocation will be used. + */ +#include <sys/sysctl.h> +static int doasyncfree = 1; +#ifdef OPT_DEBUG +struct ctldebug debug14 = { "doasyncfree", &doasyncfree }; +#endif /* OPT_DEBUG */ +int +ext2_reallocblks(ap) + struct vop_reallocblks_args /* { + struct vnode *a_vp; + struct cluster_save *a_buflist; + } */ *ap; +{ +#ifndef FANCY_REALLOC +/* printf("ext2_reallocblks not implemented\n"); */ +return ENOSPC; +#else + + struct ext2_sb_info *fs; + struct inode *ip; + struct vnode *vp; + struct buf *sbp, *ebp; + daddr_t *bap, *sbap, *ebap; + struct cluster_save *buflist; + daddr_t start_lbn, end_lbn, soff, eoff, newblk, blkno; + struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; + int i, len, start_lvl, end_lvl, pref, ssize; + + vp = ap->a_vp; + ip = VTOI(vp); + fs = ip->i_e2fs; +#ifdef UNKLAR + if (fs->fs_contigsumsize <= 0) + return (ENOSPC); +#endif + buflist = ap->a_buflist; + len = buflist->bs_nchildren; + start_lbn = buflist->bs_children[0]->b_lblkno; + end_lbn = start_lbn + len - 1; +#if DIAGNOSTIC + for (i = 1; i < len; i++) + if (buflist->bs_children[i]->b_lblkno != start_lbn + i) + panic("ext2_reallocblks: non-cluster"); +#endif + /* + * If the latest allocation is in a new cylinder group, assume that + * the filesystem has decided to move and do not force it back to + * the previous cylinder group. + */ + if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != + dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) + return (ENOSPC); + if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || + ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) + return (ENOSPC); + /* + * Get the starting offset and block map for the first block. + */ + if (start_lvl == 0) { + sbap = &ip->i_db[0]; + soff = start_lbn; + } else { + idp = &start_ap[start_lvl - 1]; + if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &sbp)) { + brelse(sbp); + return (ENOSPC); + } + sbap = (daddr_t *)sbp->b_data; + soff = idp->in_off; + } + /* + * Find the preferred location for the cluster. + */ + pref = ext2_blkpref(ip, start_lbn, soff, sbap); + /* + * If the block range spans two block maps, get the second map. + */ + if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { + ssize = len; + } else { +#if DIAGNOSTIC + if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) + panic("ext2_reallocblk: start == end"); +#endif + ssize = len - (idp->in_off + 1); + if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &ebp)) + goto fail; + ebap = (daddr_t *)ebp->b_data; + } + /* + * Search the block map looking for an allocation of the desired size. + */ + if ((newblk = (daddr_t)ext2_hashalloc(ip, dtog(fs, pref), (long)pref, + len, (u_long (*)())ext2_clusteralloc)) == 0) + goto fail; + /* + * We have found a new contiguous block. + * + * First we have to replace the old block pointers with the new + * block pointers in the inode and indirect blocks associated + * with the file. + */ + blkno = newblk; + for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->s_frags_per_block) { + if (i == ssize) + bap = ebap; +#if DIAGNOSTIC + if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) + panic("ext2_reallocblks: alloc mismatch"); +#endif + *bap++ = blkno; + } + /* + * Next we must write out the modified inode and indirect blocks. + * For strict correctness, the writes should be synchronous since + * the old block values may have been written to disk. In practise + * they are almost never written, but if we are concerned about + * strict correctness, the `doasyncfree' flag should be set to zero. + * + * The test on `doasyncfree' should be changed to test a flag + * that shows whether the associated buffers and inodes have + * been written. The flag should be set when the cluster is + * started and cleared whenever the buffer or inode is flushed. + * We can then check below to see if it is set, and do the + * synchronous write only when it has been cleared. + */ + if (sbap != &ip->i_db[0]) { + if (doasyncfree) + bdwrite(sbp); + else + bwrite(sbp); + } else { +#if !defined(__FreeBSD__) + struct timeval time; + get_time(&time); +#endif + ip->i_flag |= IN_CHANGE | IN_UPDATE; + if (!doasyncfree) + VOP_UPDATE(vp, &time, &time, MNT_WAIT); + } + if (ssize < len) + if (doasyncfree) + bdwrite(ebp); + else + bwrite(ebp); + /* + * Last, free the old blocks and assign the new blocks to the buffers. + */ + for (blkno = newblk, i = 0; i < len; i++, blkno += fs->s_frags_per_block) { + ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), + fs->s_blocksize); + buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); + } + return (0); + +fail: + if (ssize < len) + brelse(ebp); + if (sbap != &ip->i_db[0]) + brelse(sbp); + return (ENOSPC); + +#endif /* FANCY_REALLOC */ +} + +/* + * Allocate an inode in the file system. + * + * we leave the actual allocation strategy to the (modified) + * ext2_new_inode(), to make sure we get the policies right + */ +int +ext2_valloc(ap) + struct vop_valloc_args /* { + struct vnode *a_pvp; + int a_mode; + struct ucred *a_cred; + struct vnode **a_vpp; + } */ *ap; +{ + register struct vnode *pvp = ap->a_pvp; + register struct inode *pip; + register struct ext2_sb_info *fs; + register struct inode *ip; + mode_t mode = ap->a_mode; + ino_t ino, ipref; + int i, error; +#if !defined(__FreeBSD__) + struct timeval time; +#endif + + *ap->a_vpp = NULL; + pip = VTOI(pvp); + fs = pip->i_e2fs; + if (fs->s_es->s_free_inodes_count == 0) + goto noinodes; + + /* call the Linux routine - it returns the inode number only */ + ino = ext2_new_inode(pip, mode); + + if (ino == 0) + goto noinodes; + error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp); + if (error) { + VOP_VFREE(pvp, ino, mode); + return (error); + } + ip = VTOI(*ap->a_vpp); + + /* + the question is whether using VGET was such good idea at all - + Linux doesn't read the old inode in when it's allocating a + new one. I will set at least i_size & i_blocks the zero. + */ + ip->i_mode = 0; + ip->i_size = 0; + ip->i_blocks = 0; + ip->i_flags = 0; + /* now we want to make sure that the block pointers are zeroed out */ + for(i = 0; i < EXT2_NDIR_BLOCKS; i++) + ip->i_db[i] = 0; + + /* + * Set up a new generation number for this inode. + * XXX check if this makes sense in ext2 + */ +#if !defined(__FreeBSD__) + get_time(&time); +#endif + if (++nextgennumber < (u_long)time.tv_sec) + nextgennumber = time.tv_sec; + ip->i_gen = nextgennumber; +/* +printf("ext2_valloc: allocated inode %d\n", ino); +*/ + return (0); +noinodes: + ext2_fserr(fs, ap->a_cred->cr_uid, "out of inodes"); + uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); + return (ENOSPC); +} + +/* + * Select the desired position for the next block in a file. + * + * we try to mimic what Remy does in inode_getblk/block_getblk + * + * we note: blocknr == 0 means that we're about to allocate either + * a direct block or a pointer block at the first level of indirection + * (In other words, stuff that will go in i_db[] or i_ib[]) + * + * blocknr != 0 means that we're allocating a block that is none + * of the above. Then, blocknr tells us the number of the block + * that will hold the pointer + */ +daddr_t +ext2_blkpref(ip, lbn, indx, bap, blocknr) + struct inode *ip; + daddr_t lbn; + int indx; + daddr_t *bap; + daddr_t blocknr; +{ + register struct ext2_sb_info *fs; + int tmp; + + /* if the next block is actually what we thought it is, + then set the goal to what we thought it should be + */ + if(ip->i_next_alloc_block == lbn) + return ip->i_next_alloc_goal; + + /* now check whether we were provided with an array that basically + tells us previous blocks to which we want to stay closeby + */ + if(bap) + for (tmp = indx - 1; tmp >= 0; tmp--) + if (bap[tmp]) + return bap[tmp]; + + /* else let's fall back to the blocknr, or, if there is none, + follow the rule that a block should be allocated near it's inode + */ + return blocknr ? blocknr : + (daddr_t)(ip->i_block_group * + EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + + ip->i_e2fs->s_es->s_first_data_block; +} + +/* + * Free a block or fragment. + * + * pass on to the Linux code + */ +void +ext2_blkfree(ip, bno, size) + register struct inode *ip; + daddr_t bno; + long size; +{ + register struct ext2_sb_info *fs; + + fs = ip->i_e2fs; + /* + * call Linux code with mount *, block number, count + */ + ext2_free_blocks(ITOV(ip)->v_mount, bno, size / fs->s_frag_size); +} + +/* + * Free an inode. + * + * the maintenance of the actual bitmaps is again up to the linux code + */ +int +ext2_vfree(ap) + struct vop_vfree_args /* { + struct vnode *a_pvp; + ino_t a_ino; + int a_mode; + } */ *ap; +{ + register struct ext2_sb_info *fs; + register struct inode *pip; + ino_t ino = ap->a_ino; + int mode; + + pip = VTOI(ap->a_pvp); + fs = pip->i_e2fs; + if ((u_int)ino >= fs->s_inodes_per_group * fs->s_groups_count) + panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n", + pip->i_dev, ino, fs->fs_fsmnt); + +/* ext2_debug("ext2_vfree (%d, %d) called\n", pip->i_number, ap->a_mode); + */ + ext2_discard_prealloc(pip); + + /* we need to make sure that ext2_free_inode can adjust the + used_dir_counts in the group summary information - I'd + really like to know what the rationale behind this + 'set i_mode to zero to denote an unused inode' is + */ + mode = pip->i_mode; + pip->i_mode = ap->a_mode; + ext2_free_inode(pip); + pip->i_mode = mode; + return (0); +} + +/* + * Fserr prints the name of a file system with an error diagnostic. + * + * The form of the error message is: + * fs: error message + */ +static void +ext2_fserr(fs, uid, cp) + struct ext2_sb_info *fs; + u_int uid; + char *cp; +{ + + log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); +} diff --git a/sys/gnu/ext2fs/ext2_balloc.c b/sys/gnu/ext2fs/ext2_balloc.c new file mode 100644 index 000000000000..44d75ae34a2d --- /dev/null +++ b/sys/gnu/ext2fs/ext2_balloc.c @@ -0,0 +1,335 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_balloc.c 8.4 (Berkeley) 9/23/93 + */ + +#if !defined(__FreeBSD__) +#include "diagnostic.h" +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/file.h> +#include <sys/vnode.h> + +#include <vm/vm.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/ufs_extern.h> + +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> +#include <gnu/ext2fs/ext2_extern.h> + +/* + * Balloc defines the structure of file system storage + * by allocating the physical blocks on a device given + * the inode and the logical block number in a file. + */ +int +ext2_balloc(ip, bn, size, cred, bpp, flags) + register struct inode *ip; + register daddr_t bn; + int size; + struct ucred *cred; + struct buf **bpp; + int flags; +{ + register struct ext2_sb_info *fs; + register daddr_t nb; + struct buf *bp, *nbp; + struct vnode *vp = ITOV(ip); + struct indir indirs[NIADDR + 2]; + daddr_t newb, lbn, *bap, pref; + int osize, nsize, num, i, error; +/* +ext2_debug("ext2_balloc called (%d, %d, %d)\n", + ip->i_number, (int)bn, (int)size); +*/ + *bpp = NULL; + if (bn < 0) + return (EFBIG); + fs = ip->i_e2fs; + lbn = bn; + + /* + * check if this is a sequential block allocation. + * If so, increment next_alloc fields to allow ext2_blkpref + * to make a good guess + */ + if (lbn == ip->i_next_alloc_block + 1) { + ip->i_next_alloc_block++; + ip->i_next_alloc_goal++; + } + + /* + * The first NDADDR blocks are direct blocks + */ + if (bn < NDADDR) { + nb = ip->i_db[bn]; + /* no new block is to be allocated, and no need to expand + the file */ + if (nb != 0 && ip->i_size >= (bn + 1) * fs->s_blocksize) { + error = bread(vp, bn, fs->s_blocksize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + *bpp = bp; + return (0); + } + if (nb != 0) { + /* + * Consider need to reallocate a fragment. + */ + osize = fragroundup(fs, blkoff(fs, ip->i_size)); + nsize = fragroundup(fs, size); + if (nsize <= osize) { + error = bread(vp, bn, osize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + } else { + /* Godmar thinks: this shouldn't happen w/o fragments */ + printf("nsize %d(%d) > osize %d(%d) nb %d\n", + (int)nsize, (int)size, (int)osize, + (int)ip->i_size, (int)nb); + panic("ext2_balloc: " + "Something is terribly wrong\n"); +/* + * please note there haven't been any changes from here on - + * FFS seems to work. + */ + } + } else { + if (ip->i_size < (bn + 1) * fs->s_blocksize) + nsize = fragroundup(fs, size); + else + nsize = fs->s_blocksize; + error = ext2_alloc(ip, bn, + ext2_blkpref(ip, bn, (int)bn, &ip->i_db[0], 0), + nsize, cred, &newb); + if (error) + return (error); + bp = getblk(vp, bn, nsize, 0, 0); + bp->b_blkno = fsbtodb(fs, newb); + if (flags & B_CLRBUF) +#if defined(__FreeBSD__) + vfs_bio_clrbuf(bp); +#else + clrbuf(bp); +#endif + } + ip->i_db[bn] = dbtofsb(fs, bp->b_blkno); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + *bpp = bp; + return (0); + } + /* + * Determine the number of levels of indirection. + */ + pref = 0; + if (error = ufs_getlbns(vp, bn, indirs, &num)) + return(error); +#if DIAGNOSTIC + if (num < 1) + panic ("ext2_balloc: ufs_bmaparray returned indirect block\n"); +#endif + /* + * Fetch the first indirect block allocating if necessary. + */ + --num; + nb = ip->i_ib[indirs[0].in_off]; + if (nb == 0) { +#if 0 + pref = ext2_blkpref(ip, lbn, 0, (daddr_t *)0, 0); +#else + /* see the comment by ext2_blkpref. What we do here is + to pretend that it'd be good for a block holding indirect + pointers to be allocated near its predecessor in terms + of indirection, or the last direct block. + We shamelessly exploit the fact that i_ib immediately + follows i_db. + Godmar thinks it make sense to allocate i_ib[0] immediately + after i_db[11], but it's not utterly clear whether this also + applies to i_ib[1] and i_ib[0] + */ + + pref = ext2_blkpref(ip, lbn, indirs[0].in_off + + EXT2_NDIR_BLOCKS, &ip->i_db[0], 0); +#endif + if (error = ext2_alloc(ip, lbn, pref, (int)fs->s_blocksize, + cred, &newb)) + return (error); + nb = newb; + bp = getblk(vp, indirs[1].in_lbn, fs->s_blocksize, 0, 0); + bp->b_blkno = fsbtodb(fs, newb); +#if defined(__FreeBSD__) + vfs_bio_clrbuf(bp); +#else + clrbuf(bp); +#endif + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if (error = bwrite(bp)) { + ext2_blkfree(ip, nb, fs->s_blocksize); + return (error); + } + ip->i_ib[indirs[0].in_off] = newb; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + } + /* + * Fetch through the indirect blocks, allocating as necessary. + */ + for (i = 1;;) { + error = bread(vp, + indirs[i].in_lbn, (int)fs->s_blocksize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + bap = (daddr_t *)bp->b_data; + nb = bap[indirs[i].in_off]; + if (i == num) + break; + i += 1; + if (nb != 0) { + brelse(bp); + continue; + } + if (pref == 0) +#if 1 + /* see the comment above and by ext2_blkpref + * I think this implements Linux policy, but + * does it really make sense to allocate to + * block containing pointers together ? + * Also, will it ever succeed ? + */ + pref = ext2_blkpref(ip, lbn, indirs[i].in_off, bap, + bp->b_lblkno); +#else + pref = ext2_blkpref(ip, lbn, 0, (daddr_t *)0, 0); +#endif + if (error = + ext2_alloc(ip, lbn, pref, (int)fs->s_blocksize, cred, &newb)) { + brelse(bp); + return (error); + } + nb = newb; + nbp = getblk(vp, indirs[i].in_lbn, fs->s_blocksize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); +#if defined(__FreeBSD__) + vfs_bio_clrbuf(nbp); +#else + clrbuf(nbp); +#endif + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if (error = bwrite(nbp)) { + ext2_blkfree(ip, nb, fs->s_blocksize); + brelse(bp); + return (error); + } + bap[indirs[i - 1].in_off] = nb; + /* + * If required, write synchronously, otherwise use + * delayed write. + */ + if (flags & B_SYNC) { + bwrite(bp); + } else { + bdwrite(bp); + } + } + /* + * Get the data block, allocating if necessary. + */ + if (nb == 0) { + pref = ext2_blkpref(ip, lbn, indirs[i].in_off, &bap[0], + bp->b_lblkno); + if (error = ext2_alloc(ip, + lbn, pref, (int)fs->s_blocksize, cred, &newb)) { + brelse(bp); + return (error); + } + nb = newb; + nbp = getblk(vp, lbn, fs->s_blocksize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); + if (flags & B_CLRBUF) +#if defined(__FreeBSD__) + vfs_bio_clrbuf(nbp); +#else + clrbuf(nbp); +#endif + bap[indirs[i].in_off] = nb; + /* + * If required, write synchronously, otherwise use + * delayed write. + */ + if (flags & B_SYNC) { + bwrite(bp); + } else { + bdwrite(bp); + } + *bpp = nbp; + return (0); + } + brelse(bp); + if (flags & B_CLRBUF) { + error = bread(vp, lbn, (int)fs->s_blocksize, NOCRED, &nbp); + if (error) { + brelse(nbp); + return (error); + } + } else { + nbp = getblk(vp, lbn, fs->s_blocksize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); + } + *bpp = nbp; + return (0); +} diff --git a/sys/gnu/ext2fs/ext2_extern.h b/sys/gnu/ext2fs/ext2_extern.h new file mode 100644 index 000000000000..92ec0427aa3d --- /dev/null +++ b/sys/gnu/ext2fs/ext2_extern.h @@ -0,0 +1,140 @@ +/* + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/*- + * Copyright (c) 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_extern.h 8.3 (Berkeley) 4/16/94 + */ + +struct buf; +struct fid; +struct fs; +struct inode; +struct mount; +struct nameidata; +struct proc; +struct statfs; +struct timeval; +struct ucred; +struct uio; +struct vnode; +struct mbuf; +struct dinode; +struct ext2_group_desc; +struct ext2_inode; + +__BEGIN_DECLS +int ext2_alloc __P((struct inode *, + daddr_t, daddr_t, int, struct ucred *, daddr_t *)); +int ext2_balloc __P((struct inode *, + daddr_t, int, struct ucred *, struct buf **, int)); +int ext2_blkatoff __P((struct vop_blkatoff_args *)); +void ext2_blkfree __P((struct inode *, daddr_t, long)); +daddr_t ext2_blkpref __P((struct inode *, daddr_t, int, daddr_t *, daddr_t)); +int ext2_bmap __P((struct vop_bmap_args *)); +int ext2_fhtovp __P((struct mount *, struct fid *, struct mbuf *, + struct vnode **, int *, struct ucred **)); +int ext2_fsync __P((struct vop_fsync_args *)); +int ext2_init __P((void)); +int ext2_mount __P((struct mount *, + char *, caddr_t, struct nameidata *, struct proc *)); +int ext2_mountfs __P((struct vnode *, struct mount *, struct proc *)); +int ext2_mountroot __P((void)); +int ext2_read __P((struct vop_read_args *)); +int ext2_reallocblks __P((struct vop_reallocblks_args *)); +int ext2_reclaim __P((struct vop_reclaim_args *)); +void ext2_setblock __P((struct ext2_sb_info *, u_char *, daddr_t)); +int ext2_statfs __P((struct mount *, struct statfs *, struct proc *)); +int ext2_sync __P((struct mount *, int, struct ucred *, struct proc *)); +int ext2_truncate __P((struct vop_truncate_args *)); +int ext2_unmount __P((struct mount *, int, struct proc *)); +int ext2_update __P((struct vop_update_args *)); +int ext2_valloc __P((struct vop_valloc_args *)); +int ext2_vfree __P((struct vop_vfree_args *)); +int ext2_vget __P((struct mount *, ino_t, struct vnode **)); +int ext2_vptofh __P((struct vnode *, struct fid *)); +int ext2_write __P((struct vop_write_args *)); +int ext2_lookup __P((struct vop_lookup_args *)); +int ext2_readdir __P((struct vop_readdir_args *)); +void ext2_print_dinode __P((struct dinode *)); +void ext2_print_inode __P((struct inode *)); +int ext2_direnter __P((struct inode *, + struct vnode *, struct componentname *)); +int ext2_dirremove __P((struct vnode *, struct componentname *)); +int ext2_dirrewrite __P((struct inode *, + struct inode *, struct componentname *)); +int ext2_dirempty __P((struct inode *, ino_t, struct ucred *)); +int ext2_checkpath __P((struct inode *, struct inode *, struct ucred *)); +struct ext2_group_desc * get_group_desc __P((struct mount * , + unsigned int , struct buf ** )); +void ext2_discard_prealloc __P((struct inode *)); +int ext2_inactive __P((struct vop_inactive_args *)); +int ll_w_block __P((struct buf *, int )); +int ext2_di2ei __P((struct dinode *di, struct ext2_inode *ei)); +int ext2_ei2di __P((struct ext2_inode *ei, struct dinode *di)); +int ext2_new_block __P ((struct mount * mp, unsigned long goal, + long * prealloc_count, + long * prealloc_block)); +ino_t ext2_new_inode __P ((const struct inode * dir, int mode)); +void ext2_free_blocks (struct mount * mp, unsigned long block, + unsigned long count); +void ext2_free_inode (struct inode * inode); +int ext2_flushfiles __P((struct mount *mp, int flags, struct proc *p)); +int ext2_reload __P((struct mount *mountp, struct ucred *cred, + struct proc *p)); + +#if !defined(__FreeBSD__) +int bwrite(); /* FFS needs a bwrite routine. XXX */ +#endif + +/* this macros allows some of the ufs code to distinguish between + * an EXT2 and a non-ext2(FFS/LFS) vnode. + */ +#define IS_EXT2_VNODE(vp) (vp->v_mount->mnt_stat.f_type == MOUNT_EXT2FS) + +#ifdef DIAGNOSTIC +void ext2_checkoverlap __P((struct buf *, struct inode *)); +#endif +__END_DECLS + +extern int (**ext2_vnodeop_p)(); +extern int (**ext2_specop_p)(); +#ifdef FIFO +extern int (**ext2_fifoop_p)(); +#define EXT2_FIFOOPS ext2_fifoop_p +#else +#define EXT2_FIFOOPS NULL +#endif diff --git a/sys/gnu/ext2fs/ext2_fs.h b/sys/gnu/ext2fs/ext2_fs.h new file mode 100644 index 000000000000..56a85756f75c --- /dev/null +++ b/sys/gnu/ext2fs/ext2_fs.h @@ -0,0 +1,340 @@ +/* + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * linux/include/linux/ext2_fs.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ +#ifndef _LINUX_EXT2_FS_H +#define _LINUX_EXT2_FS_H + +#include <sys/types.h> + +#ifdef i386 +#if defined(__FreeBSD__) +#include <machine/types.h> +#else +#include <i386/types.h> +#endif +#else +#error need processor specific types +#endif + +#define __u32 u_int32_t +#define u32 u_int32_t +#define __u16 u_int16_t +#define __u8 u_int8_t + +#define __s32 int32_t +#define __s16 int16_t +#define __s8 int8_t + +#define umode_t mode_t +#define loff_t off_t + +/* the Linux implementation of EXT2 stores some information about + * an inode in a ext2_inode_info structure which is part of the incore + * inode in Linux + * I decided to use the i_spare[11] fields instead - we'll see how this + * works out + */ + +#define i_block_group i_spare[0] +#define i_next_alloc_block i_spare[1] +#define i_next_alloc_goal i_spare[2] +#define i_prealloc_block i_spare[3] +#define i_prealloc_count i_spare[4] + +/* + * The second extended filesystem constants/structures + */ + +/* + * Define EXT2FS_DEBUG to produce debug messages + */ +#undef EXT2FS_DEBUG + +/* + * Define EXT2FS_DEBUG_CACHE to produce cache debug messages + */ +#undef EXT2FS_DEBUG_CACHE + +/* + * Define EXT2FS_CHECK_CACHE to add some checks to the name cache code + */ +#undef EXT2FS_CHECK_CACHE + +/* + * Define EXT2FS_PRE_02B_COMPAT to convert ext 2 fs prior to 0.2b + */ +#undef EXT2FS_PRE_02B_COMPAT + +/* + * Define EXT2_PREALLOCATE to preallocate data blocks for expanding files + */ +#define EXT2_PREALLOCATE + +/* + * The second extended file system version + */ +#define EXT2FS_DATE "95/03/19" +#define EXT2FS_VERSION "0.5a" + +/* + * Debug code + */ +#ifdef EXT2FS_DEBUG +# define ext2_debug(f, a...) { \ + printf ("EXT2-fs DEBUG (%s, %d): %s:", \ + __FILE__, __LINE__, __FUNCTION__); \ + printf (f, ## a); \ + } +#else +# define ext2_debug(f, a...) /**/ +#endif + +/* + * Special inodes numbers + */ +#define EXT2_BAD_INO 1 /* Bad blocks inode */ +#define EXT2_ROOT_INO 2 /* Root inode */ +#define EXT2_ACL_IDX_INO 3 /* ACL inode */ +#define EXT2_ACL_DATA_INO 4 /* ACL inode */ +#define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ +#define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ +#define EXT2_FIRST_INO 11 /* First non reserved inode */ + +/* + * The second extended file system magic number + */ +#define EXT2_PRE_02B_MAGIC 0xEF51 +#define EXT2_SUPER_MAGIC 0xEF53 + +/* + * Maximal count of links to a file + */ +#define EXT2_LINK_MAX 32000 + +/* + * Macro-instructions used to manage several block sizes + */ +#define EXT2_MIN_BLOCK_SIZE 1024 +#define EXT2_MAX_BLOCK_SIZE 4096 +#define EXT2_MIN_BLOCK_LOG_SIZE 10 + +#define EXT2_BLOCK_SIZE(s) ((s)->s_blocksize) +#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / \ + sizeof (struct ext2_acl_entry)) +#define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) +#define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) + +#define EXT2_INODE_SIZE 128 + /* ought to be sizeof (struct ext2_inode)) */ +#define EXT2_INODES_PER_BLOCK(s) ((s)->s_inodes_per_block) + +/* + * Macro-instructions used to manage fragments + */ +#define EXT2_MIN_FRAG_SIZE 1024 +#define EXT2_MAX_FRAG_SIZE 4096 +#define EXT2_MIN_FRAG_LOG_SIZE 10 +#define EXT2_FRAG_SIZE(s) ((s)->s_frag_size) +#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / EXT2_FRAG_SIZE(s)) + +/* + * ACL structures + */ +struct ext2_acl_header /* Header of Access Control Lists */ +{ + __u32 aclh_size; + __u32 aclh_file_count; + __u32 aclh_acle_count; + __u32 aclh_first_acle; +}; + +struct ext2_acl_entry /* Access Control List Entry */ +{ + __u32 acle_size; + __u16 acle_perms; /* Access permissions */ + __u16 acle_type; /* Type of entry */ + __u16 acle_tag; /* User or group identity */ + __u16 acle_pad1; + __u32 acle_next; /* Pointer on next entry for the */ + /* same inode or on next free entry */ +}; + +/* + * Structure of a blocks group descriptor + */ +struct ext2_old_group_desc +{ + __u32 bg_block_bitmap; /* Blocks bitmap block */ + __u32 bg_inode_bitmap; /* Inodes bitmap block */ + __u32 bg_inode_table; /* Inodes table block */ + __u16 bg_free_blocks_count; /* Free blocks count */ + __u16 bg_free_inodes_count; /* Free inodes count */ +}; + +struct ext2_group_desc +{ + __u32 bg_block_bitmap; /* Blocks bitmap block */ + __u32 bg_inode_bitmap; /* Inodes bitmap block */ + __u32 bg_inode_table; /* Inodes table block */ + __u16 bg_free_blocks_count; /* Free blocks count */ + __u16 bg_free_inodes_count; /* Free inodes count */ + __u16 bg_used_dirs_count; /* Directories count */ + __u16 bg_pad; + __u32 bg_reserved[3]; +}; + +/* + * Macro-instructions used to manage group descriptors + */ +#define EXT2_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) +#define EXT2_DESC_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_group_desc)) +#define EXT2_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) + +/* + * Constants relative to the data blocks + */ +#define EXT2_NDIR_BLOCKS 12 +#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS +#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1) +#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1) +#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1) +#define EXT2_MAXSYMLINKLEN (EXT2_N_BLOCKS * sizeof (__u32)) + +/* + * Inode flags + */ +#define EXT2_SECRM_FL 0x00000001 /* Secure deletion */ +#define EXT2_UNRM_FL 0x00000002 /* Undelete */ +#define EXT2_COMPR_FL 0x00000004 /* Compress file */ +#define EXT2_SYNC_FL 0x00000008 /* Synchronous updates */ +#define EXT2_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define EXT2_APPEND_FL 0x00000020 /* writes to file may only append */ +#define EXT2_NODUMP_FL 0x00000040 /* do not dump file */ + +/* + * ioctl commands + */ +#define EXT2_IOC_GETFLAGS _IOR('f', 1, long) +#define EXT2_IOC_SETFLAGS _IOW('f', 2, long) +#define EXT2_IOC_GETVERSION _IOR('v', 1, long) +#define EXT2_IOC_SETVERSION _IOW('v', 2, long) + +/* + * File system states + */ +#define EXT2_VALID_FS 0x0001 /* Unmounted cleanly */ +#define EXT2_ERROR_FS 0x0002 /* Errors detected */ + +/* + * Mount flags + */ +#define EXT2_MOUNT_CHECK_NORMAL 0x0001 /* Do some more checks */ +#define EXT2_MOUNT_CHECK_STRICT 0x0002 /* Do again more checks */ +#define EXT2_MOUNT_CHECK (EXT2_MOUNT_CHECK_NORMAL | \ + EXT2_MOUNT_CHECK_STRICT) +#define EXT2_MOUNT_GRPID 0x0004 /* Create files with directory's group */ +#define EXT2_MOUNT_DEBUG 0x0008 /* Some debugging messages */ +#define EXT2_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */ +#define EXT2_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */ +#define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ +#define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ + +#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt +#define set_opt(o, opt) o |= EXT2_MOUNT_##opt +#define test_opt(sb, opt) ((sb)->u.ext2_sb.s_mount_opt & \ + EXT2_MOUNT_##opt) +/* + * Maximal mount counts between two filesystem checks + */ +#define EXT2_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ +#define EXT2_DFL_CHECKINTERVAL 0 /* Don't use interval check */ + +/* + * Behaviour when detecting errors + */ +#define EXT2_ERRORS_CONTINUE 1 /* Continue execution */ +#define EXT2_ERRORS_RO 2 /* Remount fs read-only */ +#define EXT2_ERRORS_PANIC 3 /* Panic */ +#define EXT2_ERRORS_DEFAULT EXT2_ERRORS_CONTINUE + +/* + * Structure of the super block + */ +struct ext2_super_block { + __u32 s_inodes_count; /* Inodes count */ + __u32 s_blocks_count; /* Blocks count */ + __u32 s_r_blocks_count; /* Reserved blocks count */ + __u32 s_free_blocks_count; /* Free blocks count */ + __u32 s_free_inodes_count; /* Free inodes count */ + __u32 s_first_data_block; /* First Data Block */ + __u32 s_log_block_size; /* Block size */ + __s32 s_log_frag_size; /* Fragment size */ + __u32 s_blocks_per_group; /* # Blocks per group */ + __u32 s_frags_per_group; /* # Fragments per group */ + __u32 s_inodes_per_group; /* # Inodes per group */ + __u32 s_mtime; /* Mount time */ + __u32 s_wtime; /* Write time */ + __u16 s_mnt_count; /* Mount count */ + __s16 s_max_mnt_count; /* Maximal mount count */ + __u16 s_magic; /* Magic signature */ + __u16 s_state; /* File system state */ + __u16 s_errors; /* Behaviour when detecting errors */ + __u16 s_pad; + __u32 s_lastcheck; /* time of last check */ + __u32 s_checkinterval; /* max. time between checks */ + __u32 s_creator_os; /* OS */ + __u32 s_rev_level; /* Revision level */ + __u16 s_def_resuid; /* Default uid for reserved blocks */ + __u16 s_def_resgid; /* Default gid for reserved blocks */ + __u32 s_reserved[235]; /* Padding to the end of the block */ +}; + +#define EXT2_OS_LINUX 0 +#define EXT2_OS_HURD 1 +#define EXT2_OS_MASIX 2 + +#define EXT2_CURRENT_REV 0 + +#define EXT2_DEF_RESUID 0 +#define EXT2_DEF_RESGID 0 + +/* + * Structure of a directory entry + */ +#define EXT2_NAME_LEN 255 + +struct ext2_dir_entry { + __u32 inode; /* Inode number */ + __u16 rec_len; /* Directory entry length */ + __u16 name_len; /* Name length */ + char name[EXT2_NAME_LEN]; /* File name */ +}; + +/* + * EXT2_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a multiple of 4 + */ +#define EXT2_DIR_PAD 4 +#define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) +#define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ + ~EXT2_DIR_ROUND) + +#endif /* _LINUX_EXT2_FS_H */ diff --git a/sys/gnu/ext2fs/ext2_fs_i.h b/sys/gnu/ext2fs/ext2_fs_i.h new file mode 100644 index 000000000000..800b2d404024 --- /dev/null +++ b/sys/gnu/ext2fs/ext2_fs_i.h @@ -0,0 +1,86 @@ +/* + * added for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + * + * Note that this started out to be ext2_fs_i.h. In reality it + * doesn't have anything to do with. I put the declaration of + * the on disk ext2 format here from ext2_fs.h because this is + * something that would name clash with other stuff. + * This is used only in ext2_inode_cnv.c + */ +/* + * linux/include/linux/ext2_fs_i.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs_i.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _EXT2_FS_I +#define _EXT2_FS_I + +/* + * Structure of an inode on the disk + */ +struct ext2_inode { + __u16 i_mode; /* File mode */ + __u16 i_uid; /* Owner Uid */ + __u32 i_size; /* Size in bytes */ + __u32 i_atime; /* Access time */ + __u32 i_ctime; /* Creation time */ + __u32 i_mtime; /* Modification time */ + __u32 i_dtime; /* Deletion Time */ + __u16 i_gid; /* Group Id */ + __u16 i_links_count; /* Links count */ + __u32 i_blocks; /* Blocks count */ + __u32 i_flags; /* File flags */ + union { + struct { + __u32 l_i_reserved1; + } linux1; + struct { + __u32 h_i_translator; + } hurd1; + struct { + __u32 m_i_reserved1; + } masix1; + } osd1; /* OS dependent 1 */ + __u32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */ + __u32 i_version; /* File version (for NFS) */ + __u32 i_file_acl; /* File ACL */ + __u32 i_dir_acl; /* Directory ACL */ + __u32 i_faddr; /* Fragment address */ + union { + struct { + __u8 l_i_frag; /* Fragment number */ + __u8 l_i_fsize; /* Fragment size */ + __u16 i_pad1; + __u32 l_i_reserved2[2]; + } linux2; + struct { + __u8 h_i_frag; /* Fragment number */ + __u8 h_i_fsize; /* Fragment size */ + __u16 h_i_mode_high; + __u16 h_i_uid_high; + __u16 h_i_gid_high; + __u32 h_i_author; + } hurd2; + struct { + __u8 m_i_frag; /* Fragment number */ + __u8 m_i_fsize; /* Fragment size */ + __u16 m_pad1; + __u32 m_i_reserved2[2]; + } masix2; + } osd2; /* OS dependent 2 */ +}; + +#endif /* _EXT2_FS_I */ diff --git a/sys/gnu/ext2fs/ext2_fs_sb.h b/sys/gnu/ext2fs/ext2_fs_sb.h new file mode 100644 index 000000000000..f475ce279d82 --- /dev/null +++ b/sys/gnu/ext2fs/ext2_fs_sb.h @@ -0,0 +1,87 @@ +/* + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * linux/include/linux/ext2_fs_sb.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs_sb.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _LINUX_EXT2_FS_SB +#define _LINUX_EXT2_FS_SB + +/* + * The following is not needed anymore since the descriptors buffer + * heads are now dynamically allocated + */ +/* #define EXT2_MAX_GROUP_DESC 8 */ + +#define EXT2_MAX_GROUP_LOADED 8 + +#if defined(LITES) || defined(__FreeBSD__) +#define buffer_head buf +#define MAXMNTLEN 512 +#endif + +/* + * second extended-fs super-block data in memory + */ +struct ext2_sb_info { + unsigned long s_frag_size; /* Size of a fragment in bytes */ + unsigned long s_frags_per_block;/* Number of fragments per block */ + unsigned long s_inodes_per_block;/* Number of inodes per block */ + unsigned long s_frags_per_group;/* Number of fragments in a group */ + unsigned long s_blocks_per_group;/* Number of blocks in a group */ + unsigned long s_inodes_per_group;/* Number of inodes in a group */ + unsigned long s_itb_per_group; /* Number of inode table blocks per group */ + unsigned long s_db_per_group; /* Number of descriptor blocks per group */ + unsigned long s_desc_per_block; /* Number of group descriptors per block */ + unsigned long s_groups_count; /* Number of groups in the fs */ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext2_super_block * s_es; /* Pointer to the super block in the buffer */ + struct buffer_head ** s_group_desc; + unsigned short s_loaded_inode_bitmaps; + unsigned short s_loaded_block_bitmaps; + unsigned long s_inode_bitmap_number[EXT2_MAX_GROUP_LOADED]; + struct buffer_head * s_inode_bitmap[EXT2_MAX_GROUP_LOADED]; + unsigned long s_block_bitmap_number[EXT2_MAX_GROUP_LOADED]; + struct buffer_head * s_block_bitmap[EXT2_MAX_GROUP_LOADED]; + int s_rename_lock; +#if !defined(LITES) && !defined(__FreeBSD__) + struct wait_queue * s_rename_wait; +#endif + unsigned long s_mount_opt; + unsigned short s_resuid; + unsigned short s_resgid; + unsigned short s_mount_state; +#if defined(LITES) || defined(__FreeBSD__) + /* + stuff that FFS keeps in its super block or that linux + has in its non-ext2 specific super block and which is + generally considered useful + */ + unsigned long s_blocksize; + unsigned long s_blocksize_bits; + unsigned int s_bshift; /* = log2(s_blocksize) */ + quad_t s_qbmask; /* = s_blocksize - 1 */ + unsigned int s_fsbtodb; /* shift to get disk block */ + char s_rd_only; /* read-only */ + char s_dirt; /* fs modified flag */ + + char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ +#endif +}; + +#endif /* _LINUX_EXT2_FS_SB */ diff --git a/sys/gnu/ext2fs/ext2_inode.c b/sys/gnu/ext2fs/ext2_inode.c new file mode 100644 index 000000000000..f2d6fd7aae3f --- /dev/null +++ b/sys/gnu/ext2fs/ext2_inode.c @@ -0,0 +1,547 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ext2_inode.c 8.5 (Berkeley) 12/30/93 + */ + +#if !defined(__FreeBSD__) +#include "quota.h" +#include "diagnostic.h" +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mount.h> +#include <sys/proc.h> +#include <sys/file.h> +#include <sys/buf.h> +#include <sys/vnode.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#if !defined(__FreeBSD__) +#include <sys/trace.h> +#endif +#include <sys/resourcevar.h> + +#include <vm/vm.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/ufsmount.h> +#include <ufs/ufs/ufs_extern.h> + +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> +#include <gnu/ext2fs/ext2_extern.h> + +static int ext2_indirtrunc __P((struct inode *, daddr_t, daddr_t, daddr_t, int, + long *)); + +int +ext2_init() +{ + return (ufs_init()); +} + +/* + * Update the access, modified, and inode change times as specified by the + * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is + * used to specify that the inode needs to be updated but that the times have + * already been set. The access and modified times are taken from the second + * and third parameters; the inode change time is always taken from the current + * time. If waitfor is set, then wait for the disk write of the inode to + * complete. + */ +int +ext2_update(ap) + struct vop_update_args /* { + struct vnode *a_vp; + struct timeval *a_access; + struct timeval *a_modify; + int a_waitfor; + } */ *ap; +{ + register struct ext2_sb_info *fs; + struct buf *bp; + struct inode *ip; + int error; +#if !defined(__FreeBSD__) + struct timeval time; +#endif + + ip = VTOI(ap->a_vp); + if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) { + ip->i_flag &= + ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); + return (0); + } + if ((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) + return (0); + if (ip->i_flag & IN_ACCESS) + ip->i_atime.ts_sec = ap->a_access->tv_sec; + if (ip->i_flag & IN_UPDATE) { + ip->i_mtime.ts_sec = ap->a_modify->tv_sec; + ip->i_modrev++; + } + if (ip->i_flag & IN_CHANGE) { +#if !defined(__FreeBSD__) + get_time(&time); +#endif + ip->i_ctime.ts_sec = time.tv_sec; + } + ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); + fs = ip->i_e2fs; + if (error = bread(ip->i_devvp, + fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + (int)fs->s_blocksize, NOCRED, &bp)) { + brelse(bp); + return (error); + } + ext2_di2ei( &ip->i_din, (struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * + ino_to_fsbo(fs, ip->i_number))); +/* + if (ap->a_waitfor && (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0) + return (bwrite(bp)); + else { +*/ + bdwrite(bp); + return (0); +/* + } +*/ +} + +#define SINGLE 0 /* index of single indirect block */ +#define DOUBLE 1 /* index of double indirect block */ +#define TRIPLE 2 /* index of triple indirect block */ +/* + * Truncate the inode oip to at most length size, freeing the + * disk blocks. + */ +int +ext2_truncate(ap) + struct vop_truncate_args /* { + struct vnode *a_vp; + off_t a_length; + int a_flags; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *ovp = ap->a_vp; + register daddr_t lastblock; + register struct inode *oip; + daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; + daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; + off_t length = ap->a_length; + register struct ext2_sb_info *fs; + struct buf *bp; + int offset, size, level; + long count, nblocks, vflags, blocksreleased = 0; + struct timeval tv; + register int i; + int aflags, error, allerror; + off_t osize; +/* +printf("ext2_truncate called %d to %d\n", VTOI(ovp)->i_number, ap->a_length); +*/ /* + * negative file sizes will totally break the code below and + * are not meaningful anyways. + */ + if (length < 0) + return EFBIG; + + oip = VTOI(ovp); +#if defined(__FreeBSD__) + tv = time; +#else + get_time(&tv); +#endif + if (ovp->v_type == VLNK && + oip->i_size < ovp->v_mount->mnt_maxsymlinklen) { +#if DIAGNOSTIC + if (length != 0) + panic("ext2_truncate: partial truncate of symlink"); +#endif + bzero((char *)&oip->i_shortlink, (u_int)oip->i_size); + oip->i_size = 0; + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (VOP_UPDATE(ovp, &tv, &tv, 1)); + } + if (oip->i_size == length) { + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (VOP_UPDATE(ovp, &tv, &tv, 0)); + } +#if QUOTA + if (error = getinoquota(oip)) + return (error); +#endif + vnode_pager_setsize(ovp, (u_long)length); + fs = oip->i_e2fs; + osize = oip->i_size; + ext2_discard_prealloc(oip); + /* + * Lengthen the size of the file. We must ensure that the + * last byte of the file is allocated. Since the smallest + * value of oszie is 0, length will be at least 1. + */ + if (osize < length) { + offset = blkoff(fs, length - 1); + lbn = lblkno(fs, length - 1); + aflags = B_CLRBUF; + if (ap->a_flags & IO_SYNC) + aflags |= B_SYNC; + if (error = ext2_balloc(oip, lbn, offset + 1, ap->a_cred, &bp, + aflags)) + return (error); + oip->i_size = length; +#if !defined(__FreeBSD__) + (void) vnode_pager_uncache(ovp); +#endif + if (aflags & IO_SYNC) + bwrite(bp); + else + bawrite(bp); + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (VOP_UPDATE(ovp, &tv, &tv, 1)); + } + /* + * Shorten the size of the file. If the file is not being + * truncated to a block boundry, the contents of the + * partial block following the end of the file must be + * zero'ed in case it ever become accessable again because + * of subsequent file growth. + */ + /* I don't understand the comment above */ + offset = blkoff(fs, length); + if (offset == 0) { + oip->i_size = length; + } else { + lbn = lblkno(fs, length); + aflags = B_CLRBUF; + if (ap->a_flags & IO_SYNC) + aflags |= B_SYNC; + if (error = ext2_balloc(oip, lbn, offset, ap->a_cred, &bp, + aflags)) + return (error); + oip->i_size = length; + size = blksize(fs, oip, lbn); +#if !defined(__FreeBSD__) + (void) vnode_pager_uncache(ovp); +#endif + bzero((char *)bp->b_data + offset, (u_int)(size - offset)); + allocbuf(bp, size); + if (aflags & IO_SYNC) + bwrite(bp); + else + bawrite(bp); + } + /* + * Calculate index into inode's block list of + * last direct and indirect blocks (if any) + * which we want to keep. Lastblock is -1 when + * the file is truncated to 0. + */ + lastblock = lblkno(fs, length + fs->s_blocksize - 1) - 1; + lastiblock[SINGLE] = lastblock - NDADDR; + lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); + lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); + nblocks = btodb(fs->s_blocksize); + /* + * Update file and block pointers on disk before we start freeing + * blocks. If we crash before free'ing blocks below, the blocks + * will be returned to the free list. lastiblock values are also + * normalized to -1 for calls to ext2_indirtrunc below. + */ + bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks); + for (level = TRIPLE; level >= SINGLE; level--) + if (lastiblock[level] < 0) { + oip->i_ib[level] = 0; + lastiblock[level] = -1; + } + for (i = NDADDR - 1; i > lastblock; i--) + oip->i_db[i] = 0; + oip->i_flag |= IN_CHANGE | IN_UPDATE; + if (error = VOP_UPDATE(ovp, &tv, &tv, MNT_WAIT)) + allerror = error; + /* + * Having written the new inode to disk, save its new configuration + * and put back the old block pointers long enough to process them. + * Note that we save the new block configuration so we can check it + * when we are done. + */ + bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks); + bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks); + oip->i_size = osize; + vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; + allerror = vinvalbuf(ovp, vflags, ap->a_cred, ap->a_p, 0, 0); + + /* + * Indirect blocks first. + */ + indir_lbn[SINGLE] = -NDADDR; + indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; + indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; + for (level = TRIPLE; level >= SINGLE; level--) { + bn = oip->i_ib[level]; + if (bn != 0) { + error = ext2_indirtrunc(oip, indir_lbn[level], + fsbtodb(fs, bn), lastiblock[level], level, &count); + if (error) + allerror = error; + blocksreleased += count; + if (lastiblock[level] < 0) { + oip->i_ib[level] = 0; + ext2_blkfree(oip, bn, fs->s_frag_size); + blocksreleased += nblocks; + } + } + if (lastiblock[level] >= 0) + goto done; + } + + /* + * All whole direct blocks or frags. + */ + for (i = NDADDR - 1; i > lastblock; i--) { + register long bsize; + + bn = oip->i_db[i]; + if (bn == 0) + continue; + oip->i_db[i] = 0; + bsize = blksize(fs, oip, i); + ext2_blkfree(oip, bn, bsize); + blocksreleased += btodb(bsize); + } + if (lastblock < 0) + goto done; + + /* + * Finally, look for a change in size of the + * last direct block; release any frags. + */ + bn = oip->i_db[lastblock]; + if (bn != 0) { + long oldspace, newspace; + + /* + * Calculate amount of space we're giving + * back as old block size minus new block size. + */ + oldspace = blksize(fs, oip, lastblock); + oip->i_size = length; + newspace = blksize(fs, oip, lastblock); + if (newspace == 0) + panic("itrunc: newspace"); + if (oldspace - newspace > 0) { + /* + * Block number of space to be free'd is + * the old block # plus the number of frags + * required for the storage we're keeping. + */ + bn += numfrags(fs, newspace); + ext2_blkfree(oip, bn, oldspace - newspace); + blocksreleased += btodb(oldspace - newspace); + } + } +done: +#if DIAGNOSTIC + for (level = SINGLE; level <= TRIPLE; level++) + if (newblks[NDADDR + level] != oip->i_ib[level]) + panic("itrunc1"); + for (i = 0; i < NDADDR; i++) + if (newblks[i] != oip->i_db[i]) + panic("itrunc2"); + if (length == 0 && + (ovp->v_dirtyblkhd.lh_first || ovp->v_cleanblkhd.lh_first)) + panic("itrunc3"); +#endif /* DIAGNOSTIC */ + /* + * Put back the real size. + */ + oip->i_size = length; + oip->i_blocks -= blocksreleased; + if (oip->i_blocks < 0) /* sanity */ + oip->i_blocks = 0; + oip->i_flag |= IN_CHANGE; +#if QUOTA + (void) chkdq(oip, -blocksreleased, NOCRED, 0); +#endif + return (allerror); +} + +/* + * Release blocks associated with the inode ip and stored in the indirect + * block bn. Blocks are free'd in LIFO order up to (but not including) + * lastbn. If level is greater than SINGLE, the block is an indirect block + * and recursive calls to indirtrunc must be used to cleanse other indirect + * blocks. + * + * NB: triple indirect blocks are untested. + */ + +static int +ext2_indirtrunc(ip, lbn, dbn, lastbn, level, countp) + register struct inode *ip; + daddr_t lbn, lastbn; + daddr_t dbn; + int level; + long *countp; +{ + register int i; + struct buf *bp; + register struct ext2_sb_info *fs = ip->i_e2fs; + register daddr_t *bap; + struct vnode *vp; + daddr_t *copy, nb, nlbn, last; + long blkcount, factor; + int nblocks, blocksreleased = 0; + int error = 0, allerror = 0; + + /* + * Calculate index in current block of last + * block to be kept. -1 indicates the entire + * block so we need not calculate the index. + */ + factor = 1; + for (i = SINGLE; i < level; i++) + factor *= NINDIR(fs); + last = lastbn; + if (lastbn > 0) + last /= factor; + nblocks = btodb(fs->s_blocksize); + /* + * Get buffer of block pointers, zero those entries corresponding + * to blocks to be free'd, and update on disk copy first. Since + * double(triple) indirect before single(double) indirect, calls + * to bmap on these blocks will fail. However, we already have + * the on disk address, so we have to set the b_blkno field + * explicitly instead of letting bread do everything for us. + */ + vp = ITOV(ip); + bp = getblk(vp, lbn, (int)fs->s_blocksize, 0, 0); + if (bp->b_flags & (B_DONE | B_DELWRI)) { + /* Braces must be here in case trace evaluates to nothing. */ +#if !defined(__FreeBSD__) + trace(TR_BREADHIT, pack(vp, fs->s_blocksize), lbn); +#endif + } else { +#if !defined(__FreeBSD__) + trace(TR_BREADMISS, pack(vp, fs->s_blocksize), lbn); + get_proc()->p_stats->p_ru.ru_inblock++; /* pay for read */ +#endif + bp->b_flags |= B_READ; + if (bp->b_bcount > bp->b_bufsize) + panic("ext2_indirtrunc: bad buffer size"); + bp->b_blkno = dbn; +#if defined(__FreeBSD__) + vfs_busy_pages(bp, 0); +#endif + VOP_STRATEGY(bp); + error = biowait(bp); + } + if (error) { + brelse(bp); + *countp = 0; + return (error); + } + + bap = (daddr_t *)bp->b_data; + MALLOC(copy, daddr_t *, fs->s_blocksize, M_TEMP, M_WAITOK); + bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->s_blocksize); + bzero((caddr_t)&bap[last + 1], + (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); + if (last == -1) + bp->b_flags |= B_INVAL; + error = bwrite(bp); + if (error) + allerror = error; + bap = copy; + + /* + * Recursively free totally unused blocks. + */ + for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; + i--, nlbn += factor) { + nb = bap[i]; + if (nb == 0) + continue; + if (level > SINGLE) { + if (error = ext2_indirtrunc(ip, nlbn, + fsbtodb(fs, nb), (daddr_t)-1, level - 1, &blkcount)) + allerror = error; + blocksreleased += blkcount; + } + ext2_blkfree(ip, nb, fs->s_blocksize); + blocksreleased += nblocks; + } + + /* + * Recursively free last partial block. + */ + if (level > SINGLE && lastbn >= 0) { + last = lastbn % factor; + nb = bap[i]; + if (nb != 0) { + if (error = ext2_indirtrunc(ip, nlbn, fsbtodb(fs, nb), + last, level - 1, &blkcount)) + allerror = error; + blocksreleased += blkcount; + } + } + FREE(copy, M_TEMP); + *countp = blocksreleased; + return (allerror); +} + +/* + * discard preallocated blocks + */ +int +ext2_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + } */ *ap; +{ + ext2_discard_prealloc(VTOI(ap->a_vp)); + return ufs_inactive(ap); +} + diff --git a/sys/gnu/ext2fs/ext2_inode_cnv.c b/sys/gnu/ext2fs/ext2_inode_cnv.c new file mode 100644 index 000000000000..1ab48e9c6184 --- /dev/null +++ b/sys/gnu/ext2fs/ext2_inode_cnv.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 1995 The University of Utah and + * the Computer Systems Laboratory at the University of Utah (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Utah $Hdr$ + */ + +/* + * routines to convert on disk ext2 inodes in dinodes and back + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> + +/* these defs would destroy the ext2_fs_i #include */ +#undef i_atime +#undef i_blocks +#undef i_ctime +#undef i_db +#undef i_flags +#undef i_gen +#undef i_gid +#undef i_ib +#undef i_mode +#undef i_mtime +#undef i_nlink +#undef i_rdev +#undef i_shortlink +#undef i_size +#undef i_uid + +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_i.h> + +void ext2_print_dinode( di ) + struct dinode *di; +{ + int i; + printf( /* "Inode: %5d" */ + " Type: %10s Mode: 0x%o Flags: 0x%x Version: %d\n", + "n/a", di->di_mode, di->di_flags, di->di_gen); + printf( "User: %5d Group: %5d Size: %d\n", + di->di_uid, di->di_gid, di->di_size); + printf( "Links: %3d Blockcount: %d\n", + di->di_nlink, di->di_blocks); + printf( "ctime: 0x%x", di->di_ctime.ts_sec); +#if !defined(__FreeBSD__) + print_time(" -- %s\n", di->di_ctime.ts_sec); +#endif + printf( "atime: 0x%x", di->di_atime.ts_sec); +#if !defined(__FreeBSD__) + print_time(" -- %s\n", di->di_atime.ts_sec); +#endif + printf( "mtime: 0x%x", di->di_mtime.ts_sec); +#if !defined(__FreeBSD__) + print_time(" -- %s\n", di->di_mtime.ts_sec); +#endif + printf( "BLOCKS: "); + for(i=0; i < (di->di_blocks <= 24 ? ((di->di_blocks+1)/2): 12); i++) + printf("%d ", di->di_db[i]); + printf("\n"); +} + +void ext2_print_inode( in ) + struct inode *in; +{ + printf( "Inode: %5d", in->i_number); + ext2_print_dinode(&in->i_din); +} + +/* + * raw ext2 inode to dinode + */ +int ext2_ei2di(ei, di) + struct ext2_inode *ei; + struct dinode *di; +{ + int i; + + di->di_nlink = ei->i_links_count; + /* Godmar thinks - if the link count is zero, then the inode is + unused - according to ext2 standards. Ufs marks this fact + by setting i_mode to zero - why ? + I can see that this might lead to problems in an undelete. + */ + di->di_mode = ei->i_links_count ? ei->i_mode : 0; + di->di_size = ei->i_size; + di->di_atime.ts_sec = ei->i_atime; + di->di_mtime.ts_sec = ei->i_mtime; + di->di_ctime.ts_sec = ei->i_ctime; + di->di_flags = 0; + di->di_flags |= (ei->i_flags & EXT2_APPEND_FL) ? APPEND : 0; + di->di_flags |= (ei->i_flags & EXT2_IMMUTABLE_FL) ? IMMUTABLE : 0; + di->di_blocks = ei->i_blocks; + di->di_gen = ei->i_version; /* XXX is that true ??? */ + di->di_uid = ei->i_uid; + di->di_gid = ei->i_gid; + /* XXX use memcpy */ + for(i = 0; i < NDADDR; i++) + di->di_db[i] = ei->i_block[i]; + for(i = 0; i < NIADDR; i++) + di->di_ib[i] = ei->i_block[EXT2_NDIR_BLOCKS + i]; +} + +/* + * dinode to raw ext2 inode + */ +int ext2_di2ei(di, ei) + struct dinode *di; + struct ext2_inode *ei; +{ + int i; + + ei->i_mode = di->di_mode; + ei->i_links_count = di->di_nlink; + /* + Godmar thinks: if dtime is nonzero, ext2 says this inode + has been deleted, this would correspond to a zero link count + */ + ei->i_dtime = ei->i_links_count ? 0 : di->di_mtime.ts_sec; + ei->i_size = di->di_size; + ei->i_atime = di->di_atime.ts_sec; + ei->i_mtime = di->di_mtime.ts_sec; + ei->i_ctime = di->di_ctime.ts_sec; + ei->i_flags = di->di_flags; + ei->i_flags = 0; + ei->i_flags |= (di->di_flags & APPEND) ? EXT2_APPEND_FL: 0; + ei->i_flags |= (di->di_flags & IMMUTABLE) + ? EXT2_IMMUTABLE_FL: 0; + ei->i_blocks = di->di_blocks; + ei->i_version = di->di_gen; /* XXX is that true ??? */ + ei->i_uid = di->di_uid; + ei->i_gid = di->di_gid; + /* XXX use memcpy */ + for(i = 0; i < NDADDR; i++) + ei->i_block[i] = di->di_db[i]; + for(i = 0; i < NIADDR; i++) + ei->i_block[EXT2_NDIR_BLOCKS + i] = di->di_ib[i]; +} diff --git a/sys/gnu/ext2fs/ext2_linux_balloc.c b/sys/gnu/ext2fs/ext2_linux_balloc.c new file mode 100644 index 000000000000..25b98913d706 --- /dev/null +++ b/sys/gnu/ext2fs/ext2_linux_balloc.c @@ -0,0 +1,582 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * linux/fs/ext2/balloc.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * Enhanced block allocation by Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 + */ + +/* + * The free blocks are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. The descriptors are loaded in memory + * when a file system is mounted (see ext2_read_super). + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/ufsmount.h> +#include <gnu/ext2fs/ext2_extern.h> +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> +#include <sys/stat.h> + +#ifdef i386 +#include <gnu/ext2fs/i386-bitops.h> +#else +#error Provide an bitops.h file, please ! +#endif + +unsigned long ext2_count_free __P((struct buffer_head *, unsigned int)); + +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + +/* got rid of get_group_desc since it can already be found in + * ext2_linux_ialloc.c + */ + +static void read_block_bitmap (struct mount * mp, + unsigned int block_group, + unsigned long bitmap_nr) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + struct ext2_group_desc * gdp; + struct buffer_head * bh; + int error; + + gdp = get_group_desc (mp, block_group, NULL); + if(error = bread (VFSTOUFS(mp)->um_devvp, + fsbtodb(sb, gdp->bg_block_bitmap),sb->s_blocksize, NOCRED, &bh)) + panic ( "read_block_bitmap: " + "Cannot read block bitmap - " + "block_group = %d, block_bitmap = %lu", + block_group, (unsigned long) gdp->bg_block_bitmap); + sb->s_block_bitmap_number[bitmap_nr] = block_group; + sb->s_block_bitmap[bitmap_nr] = bh; +} + +/* + * load_block_bitmap loads the block bitmap for a blocks group + * + * It maintains a cache for the last bitmaps loaded. This cache is managed + * with a LRU algorithm. + * + * Notes: + * 1/ There is one cache per mounted file system. + * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups, + * this function reads the bitmap without maintaining a LRU cache. + */ +static int load__block_bitmap (struct mount * mp, + unsigned int block_group) +{ + int i, j; + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + unsigned long block_bitmap_number; + struct buffer_head * block_bitmap; + int error; + + if (block_group >= sb->s_groups_count) + panic ( "load_block_bitmap: " + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sb->s_groups_count); + + if (sb->s_groups_count <= EXT2_MAX_GROUP_LOADED) { + if (sb->s_block_bitmap[block_group]) { + if (sb->s_block_bitmap_number[block_group] != + block_group) + panic ( "load_block_bitmap: " + "block_group != block_bitmap_number"); + else + return block_group; + } else { + read_block_bitmap (mp, block_group, block_group); + return block_group; + } + } + + for (i = 0; i < sb->s_loaded_block_bitmaps && + sb->s_block_bitmap_number[i] != block_group; i++) + ; + if (i < sb->s_loaded_block_bitmaps && + sb->s_block_bitmap_number[i] == block_group) { + block_bitmap_number = sb->s_block_bitmap_number[i]; + block_bitmap = sb->s_block_bitmap[i]; + for (j = i; j > 0; j--) { + sb->s_block_bitmap_number[j] = + sb->s_block_bitmap_number[j - 1]; + sb->s_block_bitmap[j] = + sb->s_block_bitmap[j - 1]; + } + sb->s_block_bitmap_number[0] = block_bitmap_number; + sb->s_block_bitmap[0] = block_bitmap; + } else { + if (sb->s_loaded_block_bitmaps < EXT2_MAX_GROUP_LOADED) + sb->s_loaded_block_bitmaps++; + else + brelse (sb->s_block_bitmap[EXT2_MAX_GROUP_LOADED - 1]); + for (j = sb->s_loaded_block_bitmaps - 1; j > 0; j--) { + sb->s_block_bitmap_number[j] = + sb->s_block_bitmap_number[j - 1]; + sb->s_block_bitmap[j] = + sb->s_block_bitmap[j - 1]; + } + read_block_bitmap (mp, block_group, 0); + } + return 0; +} + +static inline int load_block_bitmap (struct mount * mp, + unsigned int block_group) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + if (sb->s_loaded_block_bitmaps > 0 && + sb->s_block_bitmap_number[0] == block_group) + return 0; + + if (sb->s_groups_count <= EXT2_MAX_GROUP_LOADED && + sb->s_block_bitmap_number[block_group] == block_group && + sb->s_block_bitmap[block_group]) + return block_group; + + return load__block_bitmap (mp, block_group); +} + +void ext2_free_blocks (struct mount * mp, unsigned long block, + unsigned long count) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + struct buffer_head * bh; + struct buffer_head * bh2; + unsigned long block_group; + unsigned long bit; + unsigned long i; + int bitmap_nr; + struct ext2_group_desc * gdp; + struct ext2_super_block * es = sb->s_es; + + if (!sb) { + printf ("ext2_free_blocks: nonexistent device"); + return; + } + lock_super (VFSTOUFS(mp)->um_devvp); + if (block < es->s_first_data_block || + (block + count) > es->s_blocks_count) { + printf ( "ext2_free_blocks: " + "Freeing blocks not in datazone - " + "block = %lu, count = %lu", block, count); + unlock_super (VFSTOUFS(mp)->um_devvp); + return; + } + + ext2_debug ("freeing blocks %lu to %lu\n", block, block+count-1); + + block_group = (block - es->s_first_data_block) / + EXT2_BLOCKS_PER_GROUP(sb); + bit = (block - es->s_first_data_block) % EXT2_BLOCKS_PER_GROUP(sb); + if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) + panic ( "ext2_free_blocks: " + "Freeing blocks across group boundary - " + "Block = %lu, count = %lu", + block, count); + bitmap_nr = load_block_bitmap (mp, block_group); + bh = sb->s_block_bitmap[bitmap_nr]; + gdp = get_group_desc (mp, block_group, &bh2); + + if (/* test_opt (sb, CHECK_STRICT) && assume always strict ! */ + (in_range (gdp->bg_block_bitmap, block, count) || + in_range (gdp->bg_inode_bitmap, block, count) || + in_range (block, gdp->bg_inode_table, + sb->s_itb_per_group) || + in_range (block + count - 1, gdp->bg_inode_table, + sb->s_itb_per_group))) + panic ( "ext2_free_blocks: " + "Freeing blocks in system zones - " + "Block = %lu, count = %lu", + block, count); + + for (i = 0; i < count; i++) { + if (!clear_bit (bit + i, bh->b_data)) + printf ("ext2_free_blocks: " + "bit already cleared for block %lu", + block); + else { + gdp->bg_free_blocks_count++; + es->s_free_blocks_count++; + } + } + + mark_buffer_dirty(bh2); + mark_buffer_dirty(bh, 1); +/**** + if (sb->s_flags & MS_SYNCHRONOUS) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } +****/ + sb->s_dirt = 1; + unlock_super (VFSTOUFS(mp)->um_devvp); + return; +} + +/* + * ext2_new_block uses a goal block to assist allocation. If the goal is + * free, or there is a free block within 32 blocks of the goal, that block + * is allocated. Otherwise a forward search is made for a free block; within + * each block group the search first looks for an entire free byte in the block + * bitmap, and then for any free bit if that fails. + */ +int ext2_new_block (struct mount * mp, unsigned long goal, + long * prealloc_count, + long * prealloc_block) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + struct buffer_head * bh; + struct buffer_head * bh2; + char * p, * r; + int i, j, k, tmp; + int bitmap_nr; + struct ext2_group_desc * gdp; + struct ext2_super_block * es = sb->s_es; + +#ifdef EXT2FS_DEBUG + static int goal_hits = 0, goal_attempts = 0; +#endif + if (!sb) { + printf ("ext2_new_block: nonexistent device"); + return 0; + } + lock_super (VFSTOUFS(mp)->um_devvp); + + ext2_debug ("goal=%lu.\n", goal); + +repeat: + /* + * First, test whether the goal block is free. + */ + if (goal < es->s_first_data_block || goal >= es->s_blocks_count) + goal = es->s_first_data_block; + i = (goal - es->s_first_data_block) / EXT2_BLOCKS_PER_GROUP(sb); + gdp = get_group_desc (mp, i, &bh2); + if (gdp->bg_free_blocks_count > 0) { + j = ((goal - es->s_first_data_block) % EXT2_BLOCKS_PER_GROUP(sb)); +#ifdef EXT2FS_DEBUG + if (j) + goal_attempts++; +#endif + bitmap_nr = load_block_bitmap (mp, i); + bh = sb->s_block_bitmap[bitmap_nr]; + + ext2_debug ("goal is at %d:%d.\n", i, j); + + if (!test_bit(j, bh->b_data)) { +#ifdef EXT2FS_DEBUG + goal_hits++; + ext2_debug ("goal bit allocated.\n"); +#endif + goto got_block; + } + if (j) { + /* + * The goal was occupied; search forward for a free + * block within the next XX blocks. + * + * end_goal is more or less random, but it has to be + * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the + * next 64-bit boundary is simple.. + */ + int end_goal = (j + 63) & ~63; + j = find_next_zero_bit(bh->b_data, end_goal, j); + if (j < end_goal) + goto got_block; + } + + ext2_debug ("Bit not found near goal\n"); + + /* + * There has been no free block found in the near vicinity + * of the goal: do a search forward through the block groups, + * searching in each group first for an entire free byte in + * the bitmap and then for any free bit. + * + * Search first in the remainder of the current group; then, + * cyclicly search through the rest of the groups. + */ + p = ((char *) bh->b_data) + (j >> 3); + r = memscan(p, 0, (EXT2_BLOCKS_PER_GROUP(sb) - j + 7) >> 3); + k = (r - ((char *) bh->b_data)) << 3; + if (k < EXT2_BLOCKS_PER_GROUP(sb)) { + j = k; + goto search_back; + } + k = find_next_zero_bit ((unsigned long *) bh->b_data, + EXT2_BLOCKS_PER_GROUP(sb), + j); + if (k < EXT2_BLOCKS_PER_GROUP(sb)) { + j = k; + goto got_block; + } + } + + ext2_debug ("Bit not found in block group %d.\n", i); + + /* + * Now search the rest of the groups. We assume that + * i and gdp correctly point to the last group visited. + */ + for (k = 0; k < sb->s_groups_count; k++) { + i++; + if (i >= sb->s_groups_count) + i = 0; + gdp = get_group_desc (mp, i, &bh2); + if (gdp->bg_free_blocks_count > 0) + break; + } + if (k >= sb->s_groups_count) { + unlock_super (VFSTOUFS(mp)->um_devvp); + return 0; + } + bitmap_nr = load_block_bitmap (mp, i); + bh = sb->s_block_bitmap[bitmap_nr]; + r = memscan(bh->b_data, 0, EXT2_BLOCKS_PER_GROUP(sb) >> 3); + j = (r - bh->b_data) << 3; + + if (j < EXT2_BLOCKS_PER_GROUP(sb)) + goto search_back; + else + j = find_first_zero_bit ((unsigned long *) bh->b_data, + EXT2_BLOCKS_PER_GROUP(sb)); + if (j >= EXT2_BLOCKS_PER_GROUP(sb)) { + printf ( "ext2_new_block: " + "Free blocks count corrupted for block group %d", i); + unlock_super (VFSTOUFS(mp)->um_devvp); + return 0; + } + +search_back: + /* + * We have succeeded in finding a free byte in the block + * bitmap. Now search backwards up to 7 bits to find the + * start of this group of free blocks. + */ + for (k = 0; k < 7 && j > 0 && !test_bit (j - 1, bh->b_data); k++, j--); + +got_block: + + ext2_debug ("using block group %d(%d)\n", i, gdp->bg_free_blocks_count); + + tmp = j + i * EXT2_BLOCKS_PER_GROUP(sb) + es->s_first_data_block; + + if (/* test_opt (sb, CHECK_STRICT) && we are always strict. */ + (tmp == gdp->bg_block_bitmap || + tmp == gdp->bg_inode_bitmap || + in_range (tmp, gdp->bg_inode_table, sb->s_itb_per_group))) + panic ( "ext2_new_block: " + "Allocating block in system zone - " + "%dth block = %u in group %u", j, tmp, i); + + if (set_bit (j, bh->b_data)) { + printf ( "ext2_new_block: " + "bit already set for block %d", j); + goto repeat; + } + + ext2_debug ("found bit %d\n", j); + + /* + * Do block preallocation now if required. + */ +#ifdef EXT2_PREALLOCATE + if (prealloc_block) { + *prealloc_count = 0; + *prealloc_block = tmp + 1; + for (k = 1; + k < 8 && (j + k) < EXT2_BLOCKS_PER_GROUP(sb); k++) { + if (set_bit (j + k, bh->b_data)) + break; + (*prealloc_count)++; + } + gdp->bg_free_blocks_count -= *prealloc_count; + es->s_free_blocks_count -= *prealloc_count; + ext2_debug ("Preallocated a further %lu bits.\n", + *prealloc_count); + } +#endif + + j = tmp; + + mark_buffer_dirty(bh); +/**** + if (sb->s_flags & MS_SYNCHRONOUS) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } +****/ + if (j >= es->s_blocks_count) { + printf ( "ext2_new_block: " + "block >= blocks count - " + "block_group = %d, block=%d", i, j); + unlock_super (VFSTOUFS(mp)->um_devvp); + return 0; + } + + ext2_debug ("allocating block %d. " + "Goal hits %d of %d.\n", j, goal_hits, goal_attempts); + + gdp->bg_free_blocks_count--; + mark_buffer_dirty(bh2, 1); + es->s_free_blocks_count--; + sb->s_dirt = 1; + unlock_super (VFSTOUFS(mp)->um_devvp); + return j; +} + +unsigned long ext2_count_free_blocks (struct mount * mp) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; +#ifdef EXT2FS_DEBUG + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x; + int bitmap_nr; + struct ext2_group_desc * gdp; + int i; + + lock_super (VFSTOUFS(mp)->um_devvp); + es = sb->s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < sb->s_groups_count; i++) { + gdp = get_group_desc (mp, i, NULL); + desc_count += gdp->bg_free_blocks_count; + bitmap_nr = load_block_bitmap (mp, i); + x = ext2_count_free (sb->s_block_bitmap[bitmap_nr], + sb->s_blocksize); + ext2_debug ("group %d: stored = %d, counted = %lu\n", + i, gdp->bg_free_blocks_count, x); + bitmap_count += x; + } + ext2_debug( "stored = %lu, computed = %lu, %lu\n", + es->s_free_blocks_count, desc_count, bitmap_count); + unlock_super (VFSTOUFS(mp)->um_devvp); + return bitmap_count; +#else + return sb->s_es->s_free_blocks_count; +#endif +} + + +static inline int block_in_use (unsigned long block, + struct ext2_sb_info * sb, + unsigned char * map) +{ + return test_bit ((block - sb->s_es->s_first_data_block) % + EXT2_BLOCKS_PER_GROUP(sb), map); +} + +void ext2_check_blocks_bitmap (struct mount * mp) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + struct buffer_head * bh; + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x; + unsigned long desc_blocks; + int bitmap_nr; + struct ext2_group_desc * gdp; + int i, j; + + lock_super (VFSTOUFS(mp)->um_devvp); + es = sb->s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + desc_blocks = (sb->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / + EXT2_DESC_PER_BLOCK(sb); + for (i = 0; i < sb->s_groups_count; i++) { + gdp = get_group_desc (mp, i, NULL); + desc_count += gdp->bg_free_blocks_count; + bitmap_nr = load_block_bitmap (mp, i); + bh = sb->s_block_bitmap[bitmap_nr]; + + if (!test_bit (0, bh->b_data)) + printf ( "ext2_check_blocks_bitmap: " + "Superblock in group %d is marked free", i); + + for (j = 0; j < desc_blocks; j++) + if (!test_bit (j + 1, bh->b_data)) + printf ("ext2_check_blocks_bitmap: " + "Descriptor block #%d in group " + "%d is marked free", j, i); + + if (!block_in_use (gdp->bg_block_bitmap, sb, bh->b_data)) + printf ("ext2_check_blocks_bitmap: " + "Block bitmap for group %d is marked free", + i); + + if (!block_in_use (gdp->bg_inode_bitmap, sb, bh->b_data)) + printf ("ext2_check_blocks_bitmap: " + "Inode bitmap for group %d is marked free", + i); + + for (j = 0; j < sb->s_itb_per_group; j++) + if (!block_in_use (gdp->bg_inode_table + j, sb, bh->b_data)) + printf ("ext2_check_blocks_bitmap: " + "Block #%d of the inode table in " + "group %d is marked free", j, i); + + x = ext2_count_free (bh, sb->s_blocksize); + if (gdp->bg_free_blocks_count != x) + printf ("ext2_check_blocks_bitmap: " + "Wrong free blocks count for group %d, " + "stored = %d, counted = %lu", i, + gdp->bg_free_blocks_count, x); + bitmap_count += x; + } + if (es->s_free_blocks_count != bitmap_count) + printf ("ext2_check_blocks_bitmap: " + "Wrong free blocks count in super block, " + "stored = %lu, counted = %lu", + (unsigned long) es->s_free_blocks_count, bitmap_count); + unlock_super (VFSTOUFS(mp)->um_devvp); +} + +/* + * this function is taken from + * linux/fs/ext2/bitmap.c + */ + +static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; + +unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) +{ + unsigned int i; + unsigned long sum = 0; + + if (!map) + return (0); + for (i = 0; i < numchars; i++) + sum += nibblemap[map->b_data[i] & 0xf] + + nibblemap[(map->b_data[i] >> 4) & 0xf]; + return (sum); +} + diff --git a/sys/gnu/ext2fs/ext2_linux_ialloc.c b/sys/gnu/ext2fs/ext2_linux_ialloc.c new file mode 100644 index 000000000000..62e79387da35 --- /dev/null +++ b/sys/gnu/ext2fs/ext2_linux_ialloc.c @@ -0,0 +1,520 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * linux/fs/ext2/ialloc.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * BSD ufs-inspired inode and directory allocation by + * Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 + */ + +/* + * The free inodes are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. The descriptors are loaded in memory + * when a file system is mounted (see ext2_read_super). + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/ufsmount.h> +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> +#include <sys/stat.h> + +#if (i386) +#include <gnu/ext2fs/i386-bitops.h> +#else +#error please provide bit operation functions +#endif + +/* this is supposed to mark a buffer dirty on ready for delayed writing + */ +void mark_buffer_dirty(struct buf *bh) +{ + bh->b_flags |= B_DELWRI; + bh->b_flags &= ~(B_READ | B_ERROR); +} + +/* + this should write a buffer immediately w/o releasing it + */ +int ll_w_block(struct buf * bp, int waitfor) +{ + bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); + bp->b_flags |= B_WRITEINPROG; + bp->b_vp->v_numoutput++; +#if defined(__FreeBSD__) + vfs_busy_pages(bp, 1); +#endif + VOP_STRATEGY(bp); + return waitfor ? biowait(bp) : 0; +} + +struct ext2_group_desc * get_group_desc (struct mount * mp, + unsigned int block_group, + struct buffer_head ** bh) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + unsigned long group_desc; + unsigned long desc; + struct ext2_group_desc * gdp; + + if (block_group >= sb->s_groups_count) + panic ("get_group_desc: " + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sb->s_groups_count); + + group_desc = block_group / EXT2_DESC_PER_BLOCK(sb); + desc = block_group % EXT2_DESC_PER_BLOCK(sb); + if (!sb->s_group_desc[group_desc]) + panic ( "get_group_desc:" + "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, desc); + gdp = (struct ext2_group_desc *) + sb->s_group_desc[group_desc]->b_data; + if (bh) + *bh = sb->s_group_desc[group_desc]; + return gdp + desc; +} + +static void read_inode_bitmap (struct mount * mp, + unsigned long block_group, + unsigned int bitmap_nr) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + struct ext2_group_desc * gdp; + struct buffer_head * bh; + int error; + + gdp = get_group_desc (mp, block_group, NULL); + if (error = bread (VFSTOUFS(mp)->um_devvp, + fsbtodb(sb, gdp->bg_inode_bitmap), + sb->s_blocksize, + NOCRED, &bh)) + panic ( "read_inode_bitmap:" + "Cannot read inode bitmap - " + "block_group = %lu, inode_bitmap = %lu", + block_group, (unsigned long) gdp->bg_inode_bitmap); + sb->s_inode_bitmap_number[bitmap_nr] = block_group; + sb->s_inode_bitmap[bitmap_nr] = bh; +} + +/* + * load_inode_bitmap loads the inode bitmap for a blocks group + * + * It maintains a cache for the last bitmaps loaded. This cache is managed + * with a LRU algorithm. + * + * Notes: + * 1/ There is one cache per mounted file system. + * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups, + * this function reads the bitmap without maintaining a LRU cache. + */ +static int load_inode_bitmap (struct mount * mp, + unsigned int block_group) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + int i, j; + unsigned long inode_bitmap_number; + struct buffer_head * inode_bitmap; + + if (block_group >= sb->s_groups_count) + panic ("load_inode_bitmap:" + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sb->s_groups_count); + if (sb->s_loaded_inode_bitmaps > 0 && + sb->s_inode_bitmap_number[0] == block_group) + return 0; + if (sb->s_groups_count <= EXT2_MAX_GROUP_LOADED) { + if (sb->s_inode_bitmap[block_group]) { + if (sb->s_inode_bitmap_number[block_group] != + block_group) + panic ( "load_inode_bitmap:" + "block_group != inode_bitmap_number"); + else + return block_group; + } else { + read_inode_bitmap (mp, block_group, block_group); + return block_group; + } + } + + for (i = 0; i < sb->s_loaded_inode_bitmaps && + sb->s_inode_bitmap_number[i] != block_group; + i++) + ; + if (i < sb->s_loaded_inode_bitmaps && + sb->s_inode_bitmap_number[i] == block_group) { + inode_bitmap_number = sb->s_inode_bitmap_number[i]; + inode_bitmap = sb->s_inode_bitmap[i]; + for (j = i; j > 0; j--) { + sb->s_inode_bitmap_number[j] = + sb->s_inode_bitmap_number[j - 1]; + sb->s_inode_bitmap[j] = + sb->s_inode_bitmap[j - 1]; + } + sb->s_inode_bitmap_number[0] = inode_bitmap_number; + sb->s_inode_bitmap[0] = inode_bitmap; + } else { + if (sb->s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED) + sb->s_loaded_inode_bitmaps++; + else + brelse (sb->s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1]); + for (j = sb->s_loaded_inode_bitmaps - 1; j > 0; j--) { + sb->s_inode_bitmap_number[j] = + sb->s_inode_bitmap_number[j - 1]; + sb->s_inode_bitmap[j] = + sb->s_inode_bitmap[j - 1]; + } + read_inode_bitmap (mp, block_group, 0); + } + return 0; +} + + +void ext2_free_inode (struct inode * inode) +{ + struct ext2_sb_info * sb; + struct buffer_head * bh; + struct buffer_head * bh2; + unsigned long block_group; + unsigned long bit; + int bitmap_nr; + struct ext2_group_desc * gdp; + struct ext2_super_block * es; + + if (!inode) + return; + + if (inode->i_nlink) { + printf ("ext2_free_inode: inode has nlink=%d\n", + inode->i_nlink); + return; + } + + ext2_debug ("freeing inode %lu\n", inode->i_number); + + sb = inode->i_e2fs; + lock_super (DEVVP(inode)); + if (inode->i_number < EXT2_FIRST_INO || + inode->i_number > sb->s_es->s_inodes_count) { + printf ("free_inode reserved inode or nonexistent inode"); + unlock_super (DEVVP(inode)); + return; + } + es = sb->s_es; + block_group = (inode->i_number - 1) / EXT2_INODES_PER_GROUP(sb); + bit = (inode->i_number - 1) % EXT2_INODES_PER_GROUP(sb); + bitmap_nr = load_inode_bitmap (ITOV(inode)->v_mount, block_group); + bh = sb->s_inode_bitmap[bitmap_nr]; + if (!clear_bit (bit, bh->b_data)) + printf ( "ext2_free_inode:" + "bit already cleared for inode %lu", inode->i_number); + else { + gdp = get_group_desc (ITOV(inode)->v_mount, block_group, &bh2); + gdp->bg_free_inodes_count++; + if (S_ISDIR(inode->i_mode)) + gdp->bg_used_dirs_count--; + mark_buffer_dirty(bh2); + es->s_free_inodes_count++; + } + mark_buffer_dirty(bh); +/*** XXX + if (sb->s_flags & MS_SYNCHRONOUS) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } +***/ + sb->s_dirt = 1; + unlock_super (DEVVP(inode)); +} + +#if linux +/* + * This function increments the inode version number + * + * This may be used one day by the NFS server + */ +static void inc_inode_version (struct inode * inode, + struct ext2_group_desc *gdp, + int mode) +{ + unsigned long inode_block; + struct buffer_head * bh; + struct ext2_inode * raw_inode; + + inode_block = gdp->bg_inode_table + (((inode->i_number - 1) % + EXT2_INODES_PER_GROUP(inode->i_sb)) / + EXT2_INODES_PER_BLOCK(inode->i_sb)); + bh = bread (inode->i_sb->s_dev, inode_block, inode->i_sb->s_blocksize); + if (!bh) { + printf ("inc_inode_version Cannot load inode table block - " + "inode=%lu, inode_block=%lu\n", + inode->i_number, inode_block); + inode->u.ext2_i.i_version = 1; + return; + } + raw_inode = ((struct ext2_inode *) bh->b_data) + + (((inode->i_number - 1) % + EXT2_INODES_PER_GROUP(inode->i_sb)) % + EXT2_INODES_PER_BLOCK(inode->i_sb)); + raw_inode->i_version++; + inode->u.ext2_i.i_version = raw_inode->i_version; + mark_buffer_dirty(bh, 1); + brelse (bh); +} + +#endif /* linux */ + +/* + * There are two policies for allocating an inode. If the new inode is + * a directory, then a forward search is made for a block group with both + * free space and a low directory-to-inode ratio; if that fails, then of + * the groups with above-average free space, that group with the fewest + * directories already is chosen. + * + * For other inodes, search forward from the parent directory\'s block + * group to find a free inode. + */ +/* + * this functino has been reduced to the actual 'find the inode number' part + */ +ino_t ext2_new_inode (const struct inode * dir, int mode) +{ + struct ext2_sb_info * sb; + struct buffer_head * bh; + struct buffer_head * bh2; + int i, j, avefreei; + int bitmap_nr; + struct ext2_group_desc * gdp; + struct ext2_group_desc * tmp; + struct ext2_super_block * es; + + if (!dir) + return 0; + sb = dir->i_e2fs; + + lock_super (DEVVP(dir)); + es = sb->s_es; +repeat: + gdp = NULL; i=0; + + if (S_ISDIR(mode)) { + avefreei = es->s_free_inodes_count / + sb->s_groups_count; +/* I am not yet convinced that this next bit is necessary. + i = dir->u.ext2_i.i_block_group; + for (j = 0; j < sb->u.ext2_sb.s_groups_count; j++) { + tmp = get_group_desc (sb, i, &bh2); + if ((tmp->bg_used_dirs_count << 8) < + tmp->bg_free_inodes_count) { + gdp = tmp; + break; + } + else + i = ++i % sb->u.ext2_sb.s_groups_count; + } +*/ + if (!gdp) { + for (j = 0; j < sb->s_groups_count; j++) { + tmp = get_group_desc(ITOV(dir)->v_mount,j,&bh2); + if (tmp->bg_free_inodes_count && + tmp->bg_free_inodes_count >= avefreei) { + if (!gdp || + (tmp->bg_free_blocks_count > + gdp->bg_free_blocks_count)) { + i = j; + gdp = tmp; + } + } + } + } + } + else + { + /* + * Try to place the inode in its parent directory + */ + i = dir->i_block_group; + tmp = get_group_desc (ITOV(dir)->v_mount, i, &bh2); + if (tmp->bg_free_inodes_count) + gdp = tmp; + else + { + /* + * Use a quadratic hash to find a group with a + * free inode + */ + for (j = 1; j < sb->s_groups_count; j <<= 1) { + i += j; + if (i >= sb->s_groups_count) + i -= sb->s_groups_count; + tmp = get_group_desc(ITOV(dir)->v_mount,i,&bh2); + if (tmp->bg_free_inodes_count) { + gdp = tmp; + break; + } + } + } + if (!gdp) { + /* + * That failed: try linear search for a free inode + */ + i = dir->i_block_group + 1; + for (j = 2; j < sb->s_groups_count; j++) { + if (++i >= sb->s_groups_count) + i = 0; + tmp = get_group_desc(ITOV(dir)->v_mount,i,&bh2); + if (tmp->bg_free_inodes_count) { + gdp = tmp; + break; + } + } + } + } + + if (!gdp) { + unlock_super (DEVVP(dir)); + return 0; + } + bitmap_nr = load_inode_bitmap (ITOV(dir)->v_mount, i); + bh = sb->s_inode_bitmap[bitmap_nr]; + if ((j = find_first_zero_bit ((unsigned long *) bh->b_data, + EXT2_INODES_PER_GROUP(sb))) < + EXT2_INODES_PER_GROUP(sb)) { + if (set_bit (j, bh->b_data)) { + printf ( "ext2_new_inode:" + "bit already set for inode %d", j); + goto repeat; + } +/* Linux now does the following: + mark_buffer_dirty(bh, 1); + if (sb->s_flags & MS_SYNCHRONOUS) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } +*/ + mark_buffer_dirty(bh); + } else { + if (gdp->bg_free_inodes_count != 0) { + printf ( "ext2_new_inode:" + "Free inodes count corrupted in group %d", + i); + unlock_super (DEVVP(dir)); + return 0; + } + goto repeat; + } + j += i * EXT2_INODES_PER_GROUP(sb) + 1; + if (j < EXT2_FIRST_INO || j > es->s_inodes_count) { + printf ( "ext2_new_inode:" + "reserved inode or inode > inodes count - " + "block_group = %d,inode=%d", i, j); + unlock_super (DEVVP(dir)); + return 0; + } + gdp->bg_free_inodes_count--; + if (S_ISDIR(mode)) + gdp->bg_used_dirs_count++; + mark_buffer_dirty(bh2); + es->s_free_inodes_count--; + /* mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); */ + sb->s_dirt = 1; + unlock_super (DEVVP(dir)); + return j; +} + +unsigned long ext2_count_free_inodes (struct mount * mp) +{ +#ifdef EXT2FS_DEBUG + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x; + int bitmap_nr; + struct ext2_group_desc * gdp; + int i; + + lock_super (VFSTOUFS(mp)->um_devvp); + es = sb->s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < sb->s_groups_count; i++) { + gdp = get_group_desc (mp, i, NULL); + desc_count += gdp->bg_free_inodes_count; + bitmap_nr = load_inode_bitmap (mp, i); + x = ext2_count_free (sb->s_inode_bitmap[bitmap_nr], + EXT2_INODES_PER_GROUP(sb) / 8); + ext2_debug ("group %d: stored = %d, counted = %lu\n", + i, gdp->bg_free_inodes_count, x); + bitmap_count += x; + } + ext2_debug("stored = %lu, computed = %lu, %lu\n", + es->s_free_inodes_count, desc_count, bitmap_count); + unlock_super (VFSTOUFS(mp)->um_devvp); + return desc_count; +#else + return VFSTOUFS(mp)->um_e2fsb->s_free_inodes_count; +#endif +} + +#ifdef LATER +void ext2_check_inodes_bitmap (struct mount * mp) +{ + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x; + int bitmap_nr; + struct ext2_group_desc * gdp; + int i; + + lock_super (sb); + es = sb->u.ext2_sb.s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) { + gdp = get_group_desc (sb, i, NULL); + desc_count += gdp->bg_free_inodes_count; + bitmap_nr = load_inode_bitmap (sb, i); + x = ext2_count_free (sb->u.ext2_sb.s_inode_bitmap[bitmap_nr], + EXT2_INODES_PER_GROUP(sb) / 8); + if (gdp->bg_free_inodes_count != x) + printf ( "ext2_check_inodes_bitmap:" + "Wrong free inodes count in group %d, " + "stored = %d, counted = %lu", i, + gdp->bg_free_inodes_count, x); + bitmap_count += x; + } + if (es->s_free_inodes_count != bitmap_count) + printf ( "ext2_check_inodes_bitmap:" + "Wrong free inodes count in super block, " + "stored = %lu, counted = %lu", + (unsigned long) es->s_free_inodes_count, bitmap_count); + unlock_super (sb); +} +#endif + diff --git a/sys/gnu/ext2fs/ext2_lookup.c b/sys/gnu/ext2fs/ext2_lookup.c new file mode 100644 index 000000000000..79f30f288288 --- /dev/null +++ b/sys/gnu/ext2fs/ext2_lookup.c @@ -0,0 +1,1083 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ufs_lookup.c 8.6 (Berkeley) 4/1/94 + */ + +#if !defined(__FreeBSD__) +#include "diagnostic.h" +#endif + +#include <sys/param.h> +#include <sys/namei.h> +#include <sys/buf.h> +#include <sys/file.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/malloc.h> +#include <sys/dirent.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/dir.h> +#include <ufs/ufs/ufsmount.h> + +#include <gnu/ext2fs/ext2_extern.h> +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> + +/* + DIRBLKSIZE in ffs is DEV_BSIZE (in most cases 512) + while it is the native blocksize in ext2fs - thus, a #define + is no longer appropriate +*/ +#undef DIRBLKSIZ + +#if 1 +extern struct nchstats nchstats; +static int dirchk = 1; +#else +struct nchstats nchstats; +#if DIAGNOSTIC +int dirchk = 1; +#else +int dirchk = 0; +#endif +#endif + +/* + * the problem that is tackled below is the fact that FFS + * includes the terminating zero on disk while EXT2FS doesn't + * this implies that we need to introduce some padding. + * For instance, a filename "sbin" has normally a reclen 12 + * in EXT2, but 16 in FFS. + * This reminds me of that Pepsi commercial: 'Kid saved a lousy nine cents...' + * If it wasn't for that, the complete ufs code for directories would + * have worked w/o changes (except for the difference in DIRBLKSIZ) + */ +static void +ext2_dirconv2ffs( e2dir, ffsdir) + struct ext2_dir_entry *e2dir; + struct dirent *ffsdir; +{ + struct dirent de; + + bzero(&de, sizeof(struct dirent)); + de.d_fileno = e2dir->inode; + de.d_namlen = e2dir->name_len; + +#ifndef NO_HARDWIRED_CONSTANTS + if(e2dir->name_len + 8 == e2dir->rec_len) + de.d_reclen += 4; + + de.d_type = DT_UNKNOWN; /* don't know more here */ + strncpy(de.d_name, e2dir->name, e2dir->name_len); + de.d_name[de.d_namlen] = '\0'; + /* Godmar thinks: since e2dir->rec_len can be big and means + nothing anyway, we compute our own reclen according to what + we think is right + */ + de.d_reclen = (de.d_namlen+8+1+3) & ~3; + bcopy(&de, ffsdir, de.d_reclen); +#endif + +#if 0 + printf("dirconv: ino %d rec old %d rec new %d nam %d name %s\n", + ffsdir->d_fileno, e2dir->rec_len, ffsdir->d_reclen, + ffsdir->d_namlen, ffsdir->d_name); +#endif +} + +/* + * Vnode op for reading directories. + * + * The routine below assumes that the on-disk format of a directory + * is the same as that defined by <sys/dirent.h>. If the on-disk + * format changes, then it will be necessary to do a conversion + * from the on-disk format that read returns to the format defined + * by <sys/dirent.h>. + */ +/* + * this is exactly what we do here - the problem is that the conversion + * will blow up some entries by four bytes, so it can't be done in place. + * This is too bad. Right now the conversion is done entry by entry, the + * converted entry is sent via uiomove. + * + * XXX allocate a buffer, convert as many entries as possible, then send + * the whole buffer to uiomove + */ +int +ext2_readdir(ap) + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + } */ *ap; +{ + register struct uio *uio = ap->a_uio; + int count, lost, error; + + struct ext2_dir_entry *edp, *dp; + struct dirent dstdp; + struct uio auio; + struct iovec aiov; + caddr_t dirbuf; + int readcnt; + u_quad_t startoffset = uio->uio_offset; + u_char tmp; + int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->s_blocksize; + + count = uio->uio_resid; /* legyenek boldogok akik akarnak ... */ + uio->uio_resid = count; + uio->uio_iov->iov_len = count; + +#if 0 +printf("ext2_readdir called uio->uio_offset %d uio->uio_resid %d count %d \n", + (int)uio->uio_offset, (int)uio->uio_resid, (int)count); +#endif + + auio = *uio; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_SYSSPACE; + aiov.iov_len = count; + MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK); + aiov.iov_base = dirbuf; + error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); + if (error == 0) { + readcnt = count - auio.uio_resid; + edp = (struct ext2_dir_entry *)&dirbuf[readcnt]; + for (dp = (struct ext2_dir_entry *)dirbuf; + !error && uio->uio_resid > 0 && dp < edp; ) { + ext2_dirconv2ffs(dp, &dstdp); + if (dp->rec_len > 0) { + if(dstdp.d_reclen <= uio->uio_resid) { + /* advance dp */ + dp = (struct ext2_dir_entry *) + ((char *)dp + dp->rec_len); + error = + uiomove(&dstdp, dstdp.d_reclen, uio); + } else + break; + } else { + error = EIO; + break; + } + } + /* we need to correct uio_offset */ + uio->uio_offset = startoffset + (caddr_t)dp - dirbuf; + } + FREE(dirbuf, M_TEMP); + return (error); +} + +/* + * Convert a component of a pathname into a pointer to a locked inode. + * This is a very central and rather complicated routine. + * If the file system is not maintained in a strict tree hierarchy, + * this can result in a deadlock situation (see comments in code below). + * + * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending + * on whether the name is to be looked up, created, renamed, or deleted. + * When CREATE, RENAME, or DELETE is specified, information usable in + * creating, renaming, or deleting a directory entry may be calculated. + * If flag has LOCKPARENT or'ed into it and the target of the pathname + * exists, lookup returns both the target and its parent directory locked. + * When creating or renaming and LOCKPARENT is specified, the target may + * not be ".". When deleting and LOCKPARENT is specified, the target may + * be "."., but the caller must check to ensure it does an vrele and vput + * instead of two vputs. + * + * Overall outline of ufs_lookup: + * + * check accessibility of directory + * look for name in cache, if found, then if at end of path + * and deleting or creating, drop it, else return name + * search for name in directory, to found or notfound + * notfound: + * if creating, return locked directory, leaving info on available slots + * else return error + * found: + * if at end of path and deleting, return information to allow delete + * if at end of path and rewriting (RENAME and LOCKPARENT), lock target + * inode and return info to allow rewrite + * if not at end, add name to cache; if at end and neither creating + * nor deleting, add name to cache + */ +int +ext2_lookup(ap) + struct vop_lookup_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap; +{ + register struct vnode *vdp; /* vnode for directory being searched */ + register struct inode *dp; /* inode for directory being searched */ + struct buf *bp; /* a buffer of directory entries */ + register struct ext2_dir_entry *ep; /* the current directory entry */ + int entryoffsetinblock; /* offset of ep in bp's buffer */ + enum {NONE, COMPACT, FOUND} slotstatus; + doff_t slotoffset; /* offset of area with free space */ + int slotsize; /* size of area at slotoffset */ + int slotfreespace; /* amount of space free in slot */ + int slotneeded; /* size of the entry we're seeking */ + int numdirpasses; /* strategy for directory search */ + doff_t endsearch; /* offset to end directory search */ + doff_t prevoff; /* prev entry dp->i_offset */ + struct vnode *pdp; /* saved dp during symlink work */ + struct vnode *tdp; /* returned by VFS_VGET */ + doff_t enduseful; /* pointer past last used dir slot */ + u_long bmask; /* block offset mask */ + int lockparent; /* 1 => lockparent flag is set */ + int wantparent; /* 1 => wantparent or lockparent flag */ + int namlen, error; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct ucred *cred = cnp->cn_cred; + int flags = cnp->cn_flags; + int nameiop = cnp->cn_nameiop; + + int DIRBLKSIZ = VTOI(ap->a_dvp)->i_e2fs->s_blocksize; + + bp = NULL; + slotoffset = -1; + *vpp = NULL; + vdp = ap->a_dvp; + dp = VTOI(vdp); + lockparent = flags & LOCKPARENT; + wantparent = flags & (LOCKPARENT|WANTPARENT); + + /* + * Check accessiblity of directory. + */ + if ((dp->i_mode & IFMT) != IFDIR) + return (ENOTDIR); + if (error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) + return (error); + + /* + * We now have a segment name to search for, and a directory to search. + * + * Before tediously performing a linear scan of the directory, + * check the name cache to see if the directory/name pair + * we are looking for is known already. + */ + if (error = cache_lookup(vdp, vpp, cnp)) { + int vpid; /* capability number of vnode */ + + if (error == ENOENT) + return (error); + /* + * Get the next vnode in the path. + * See comment below starting `Step through' for + * an explaination of the locking protocol. + */ + pdp = vdp; + dp = VTOI(*vpp); + vdp = *vpp; + vpid = vdp->v_id; + if (pdp == vdp) { /* lookup on "." */ + VREF(vdp); + error = 0; + } else if (flags & ISDOTDOT) { + VOP_UNLOCK(pdp); + error = vget(vdp, 1); + if (!error && lockparent && (flags & ISLASTCN)) + error = VOP_LOCK(pdp); + } else { + error = vget(vdp, 1); + if (!lockparent || error || !(flags & ISLASTCN)) + VOP_UNLOCK(pdp); + } + /* + * Check that the capability number did not change + * while we were waiting for the lock. + */ + if (!error) { + if (vpid == vdp->v_id) + return (0); + vput(vdp); + if (lockparent && pdp != vdp && (flags & ISLASTCN)) + VOP_UNLOCK(pdp); + } + if (error = VOP_LOCK(pdp)) + return (error); + vdp = pdp; + dp = VTOI(pdp); + *vpp = NULL; + } + + /* + * Suppress search for slots unless creating + * file and at end of pathname, in which case + * we watch for a place to put the new file in + * case it doesn't already exist. + */ + slotstatus = FOUND; + slotfreespace = slotsize = slotneeded = 0; + if ((nameiop == CREATE || nameiop == RENAME) && + (flags & ISLASTCN)) { + slotstatus = NONE; + slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen); + /* was + slotneeded = (sizeof(struct direct) - MAXNAMLEN + + cnp->cn_namelen + 3) &~ 3; */ + } + + /* + * If there is cached information on a previous search of + * this directory, pick up where we last left off. + * We cache only lookups as these are the most common + * and have the greatest payoff. Caching CREATE has little + * benefit as it usually must search the entire directory + * to determine that the entry does not exist. Caching the + * location of the last DELETE or RENAME has not reduced + * profiling time and hence has been removed in the interest + * of simplicity. + */ + bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; + if (nameiop != LOOKUP || dp->i_diroff == 0 || + dp->i_diroff > dp->i_size) { + entryoffsetinblock = 0; + dp->i_offset = 0; + numdirpasses = 1; + } else { + dp->i_offset = dp->i_diroff; + if ((entryoffsetinblock = dp->i_offset & bmask) && + (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))) + return (error); + numdirpasses = 2; + nchstats.ncs_2passes++; + } + prevoff = dp->i_offset; + endsearch = roundup(dp->i_size, DIRBLKSIZ); + enduseful = 0; + +searchloop: + while (dp->i_offset < endsearch) { + /* + * If necessary, get the next directory block. + */ + if ((dp->i_offset & bmask) == 0) { + if (bp != NULL) + brelse(bp); + if (error = + VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)) + return (error); + entryoffsetinblock = 0; + } + /* + * If still looking for a slot, and at a DIRBLKSIZE + * boundary, have to start looking for free space again. + */ + if (slotstatus == NONE && + (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { + slotoffset = -1; + slotfreespace = 0; + } + /* + * Get pointer to next entry. + * Full validation checks are slow, so we only check + * enough to insure forward progress through the + * directory. Complete checks can be run by patching + * "dirchk" to be true. + */ + ep = (struct ext2_dir_entry *) + ((char *)bp->b_data + entryoffsetinblock); + if (ep->rec_len == 0 || + dirchk && ext2_dirbadentry(vdp, ep, entryoffsetinblock)) { + int i; + ufs_dirbad(dp, dp->i_offset, "mangled entry"); + i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); + dp->i_offset += i; + entryoffsetinblock += i; + continue; + } + + /* + * If an appropriate sized slot has not yet been found, + * check to see if one is available. Also accumulate space + * in the current block so that we can determine if + * compaction is viable. + */ + if (slotstatus != FOUND) { + int size = ep->rec_len; + + if (ep->inode != 0) + size -= EXT2_DIR_REC_LEN(ep->name_len); + if (size > 0) { + if (size >= slotneeded) { + slotstatus = FOUND; + slotoffset = dp->i_offset; + slotsize = ep->rec_len; + } else if (slotstatus == NONE) { + slotfreespace += size; + if (slotoffset == -1) + slotoffset = dp->i_offset; + if (slotfreespace >= slotneeded) { + slotstatus = COMPACT; + slotsize = dp->i_offset + + ep->rec_len - slotoffset; + } + } + } + } + + /* + * Check for a name match. + */ + if (ep->inode) { + namlen = ep->name_len; + if (namlen == cnp->cn_namelen && + !bcmp(cnp->cn_nameptr, ep->name, + (unsigned)namlen)) { + /* + * Save directory entry's inode number and + * reclen in ndp->ni_ufs area, and release + * directory buffer. + */ + dp->i_ino = ep->inode; + dp->i_reclen = ep->rec_len; + brelse(bp); + goto found; + } + } + prevoff = dp->i_offset; + dp->i_offset += ep->rec_len; + entryoffsetinblock += ep->rec_len; + if (ep->inode) + enduseful = dp->i_offset; + } +/* notfound: */ + /* + * If we started in the middle of the directory and failed + * to find our target, we must check the beginning as well. + */ + if (numdirpasses == 2) { + numdirpasses--; + dp->i_offset = 0; + endsearch = dp->i_diroff; + goto searchloop; + } + if (bp != NULL) + brelse(bp); + /* + * If creating, and at end of pathname and current + * directory has not been removed, then can consider + * allowing file to be created. + */ + if ((nameiop == CREATE || nameiop == RENAME) && + (flags & ISLASTCN) && dp->i_nlink != 0) { + /* + * Access for write is interpreted as allowing + * creation of files in the directory. + */ + if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) + return (error); + /* + * Return an indication of where the new directory + * entry should be put. If we didn't find a slot, + * then set dp->i_count to 0 indicating + * that the new slot belongs at the end of the + * directory. If we found a slot, then the new entry + * can be put in the range from dp->i_offset to + * dp->i_offset + dp->i_count. + */ + if (slotstatus == NONE) { + dp->i_offset = roundup(dp->i_size, DIRBLKSIZ); + dp->i_count = 0; + enduseful = dp->i_offset; + } else { + dp->i_offset = slotoffset; + dp->i_count = slotsize; + if (enduseful < slotoffset + slotsize) + enduseful = slotoffset + slotsize; + } + dp->i_endoff = roundup(enduseful, DIRBLKSIZ); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + /* + * We return with the directory locked, so that + * the parameters we set up above will still be + * valid if we actually decide to do a direnter(). + * We return ni_vp == NULL to indicate that the entry + * does not currently exist; we leave a pointer to + * the (locked) directory inode in ndp->ni_dvp. + * The pathname buffer is saved so that the name + * can be obtained later. + * + * NB - if the directory is unlocked, then this + * information cannot be used. + */ + cnp->cn_flags |= SAVENAME; + if (!lockparent) + VOP_UNLOCK(vdp); + return (EJUSTRETURN); + } + /* + * Insert name into cache (as non-existent) if appropriate. + */ + if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) + cache_enter(vdp, *vpp, cnp); + return (ENOENT); + +found: + if (numdirpasses == 2) + nchstats.ncs_pass2++; + /* + * Check that directory length properly reflects presence + * of this entry. + */ + if (entryoffsetinblock + EXT2_DIR_REC_LEN(ep->name_len) + > dp->i_size) { + ufs_dirbad(dp, dp->i_offset, "i_size too small"); + dp->i_size = entryoffsetinblock+EXT2_DIR_REC_LEN(ep->name_len); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + } + + /* + * Found component in pathname. + * If the final component of path name, save information + * in the cache as to where the entry was found. + */ + if ((flags & ISLASTCN) && nameiop == LOOKUP) + dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); + + /* + * If deleting, and at end of pathname, return + * parameters which can be used to remove file. + * If the wantparent flag isn't set, we return only + * the directory (in ndp->ni_dvp), otherwise we go + * on and lock the inode, being careful with ".". + */ + if (nameiop == DELETE && (flags & ISLASTCN)) { + /* + * Write access to directory required to delete files. + */ + if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) + return (error); + /* + * Return pointer to current entry in dp->i_offset, + * and distance past previous entry (if there + * is a previous entry in this block) in dp->i_count. + * Save directory inode pointer in ndp->ni_dvp for dirremove(). + */ + if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) + dp->i_count = 0; + else + dp->i_count = dp->i_offset - prevoff; + if (dp->i_number == dp->i_ino) { + VREF(vdp); + *vpp = vdp; + return (0); + } + if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) + return (error); + /* + * If directory is "sticky", then user must own + * the directory, or the file in it, else she + * may not delete it (unless she's root). This + * implements append-only directories. + */ + if ((dp->i_mode & ISVTX) && + cred->cr_uid != 0 && + cred->cr_uid != dp->i_uid && + VTOI(tdp)->i_uid != cred->cr_uid) { + vput(tdp); + return (EPERM); + } + *vpp = tdp; + if (!lockparent) + VOP_UNLOCK(vdp); + return (0); + } + + /* + * If rewriting (RENAME), return the inode and the + * information required to rewrite the present directory + * Must get inode of directory entry to verify it's a + * regular file, or empty directory. + */ + if (nameiop == RENAME && wantparent && + (flags & ISLASTCN)) { + if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) + return (error); + /* + * Careful about locking second inode. + * This can only occur if the target is ".". + */ + if (dp->i_number == dp->i_ino) + return (EISDIR); + if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) + return (error); + *vpp = tdp; + cnp->cn_flags |= SAVENAME; + if (!lockparent) + VOP_UNLOCK(vdp); + return (0); + } + + /* + * Step through the translation in the name. We do not `vput' the + * directory because we may need it again if a symbolic link + * is relative to the current directory. Instead we save it + * unlocked as "pdp". We must get the target inode before unlocking + * the directory to insure that the inode will not be removed + * before we get it. We prevent deadlock by always fetching + * inodes from the root, moving down the directory tree. Thus + * when following backward pointers ".." we must unlock the + * parent directory before getting the requested directory. + * There is a potential race condition here if both the current + * and parent directories are removed before the VFS_VGET for the + * inode associated with ".." returns. We hope that this occurs + * infrequently since we cannot avoid this race condition without + * implementing a sophisticated deadlock detection algorithm. + * Note also that this simple deadlock detection scheme will not + * work if the file system has any hard links other than ".." + * that point backwards in the directory structure. + */ + pdp = vdp; + if (flags & ISDOTDOT) { + VOP_UNLOCK(pdp); /* race to get the inode */ + if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) { + VOP_LOCK(pdp); + return (error); + } + if (lockparent && (flags & ISLASTCN) && + (error = VOP_LOCK(pdp))) { + vput(tdp); + return (error); + } + *vpp = tdp; + } else if (dp->i_number == dp->i_ino) { + VREF(vdp); /* we want ourself, ie "." */ + *vpp = vdp; + } else { + if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) + return (error); + if (!lockparent || !(flags & ISLASTCN)) + VOP_UNLOCK(pdp); + *vpp = tdp; + } + + /* + * Insert name into cache if appropriate. + */ + if (cnp->cn_flags & MAKEENTRY) + cache_enter(vdp, *vpp, cnp); + return (0); +} + +/* + * Do consistency checking on a directory entry: + * record length must be multiple of 4 + * entry must fit in rest of its DIRBLKSIZ block + * record must be large enough to contain entry + * name is not longer than MAXNAMLEN + * name must be as long as advertised, and null terminated + */ +/* + * changed so that it confirms to ext2_check_dir_entry + */ +int +ext2_dirbadentry(dp, de, entryoffsetinblock) + struct vnode *dp; + register struct ext2_dir_entry *de; + int entryoffsetinblock; +{ + register int i; + int namlen; + int DIRBLKSIZ = VTOI(dp)->i_e2fs->s_blocksize; + + char * error_msg = NULL; + + if (de->rec_len < EXT2_DIR_REC_LEN(1)) + error_msg = "rec_len is smaller than minimal"; + else if (de->rec_len % 4 != 0) + error_msg = "rec_len % 4 != 0"; + else if (de->rec_len < EXT2_DIR_REC_LEN(de->name_len)) + error_msg = "reclen is too small for name_len"; + else if (entryoffsetinblock + de->rec_len > DIRBLKSIZ) + error_msg = "directory entry across blocks"; + /* else LATER + if (de->inode > dir->i_sb->u.ext2_sb.s_es->s_inodes_count) + error_msg = "inode out of bounds"; + */ + + if (error_msg != NULL) + printf( "bad directory entry: %s\n" + "offset=%lu, inode=%lu, rec_len=%d, name_len=%d \n", + error_msg, entryoffsetinblock, + (unsigned long) de->inode, de->rec_len, de->name_len); + return error_msg == NULL ? 0 : 1; +} + +/* + * Write a directory entry after a call to namei, using the parameters + * that it left in nameidata. The argument ip is the inode which the new + * directory entry will refer to. Dvp is a pointer to the directory to + * be written, which was left locked by namei. Remaining parameters + * (dp->i_offset, dp->i_count) indicate how the space for the new + * entry is to be obtained. + */ +int +ext2_direnter(ip, dvp, cnp) + struct inode *ip; + struct vnode *dvp; + register struct componentname *cnp; +{ + register struct ext2_dir_entry *ep, *nep; + register struct inode *dp; + struct buf *bp; + struct ext2_dir_entry newdir; + struct iovec aiov; + struct uio auio; + u_int dsize; + int error, loc, newentrysize, spacefree; + char *dirbuf; + int DIRBLKSIZ = ip->i_e2fs->s_blocksize; + + +#if DIAGNOSTIC + if ((cnp->cn_flags & SAVENAME) == 0) + panic("direnter: missing name"); +#endif + dp = VTOI(dvp); + newdir.inode = ip->i_number; + newdir.name_len = cnp->cn_namelen; + bcopy(cnp->cn_nameptr, newdir.name, (unsigned)cnp->cn_namelen + 1); + newentrysize = EXT2_DIR_REC_LEN(newdir.name_len); + if (dp->i_count == 0) { + /* + * If dp->i_count is 0, then namei could find no + * space in the directory. Here, dp->i_offset will + * be on a directory block boundary and we will write the + * new entry into a fresh block. + */ + if (dp->i_offset & (DIRBLKSIZ - 1)) + panic("ext2_direnter: newblk"); + auio.uio_offset = dp->i_offset; + newdir.rec_len = DIRBLKSIZ; + auio.uio_resid = newentrysize; + aiov.iov_len = newentrysize; + aiov.iov_base = (caddr_t)&newdir; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = (struct proc *)0; + error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred); + if (DIRBLKSIZ > + VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) + /* XXX should grow with balloc() */ + panic("ext2_direnter: frag size"); + else if (!error) { + dp->i_size = roundup(dp->i_size, DIRBLKSIZ); + dp->i_flag |= IN_CHANGE; + } + return (error); + } + + /* + * If dp->i_count is non-zero, then namei found space + * for the new entry in the range dp->i_offset to + * dp->i_offset + dp->i_count in the directory. + * To use this space, we may have to compact the entries located + * there, by copying them together towards the beginning of the + * block, leaving the free space in one usable chunk at the end. + */ + + /* + * Increase size of directory if entry eats into new space. + * This should never push the size past a new multiple of + * DIRBLKSIZE. + * + * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. + */ + if (dp->i_offset + dp->i_count > dp->i_size) + dp->i_size = dp->i_offset + dp->i_count; + /* + * Get the block containing the space for the new directory entry. + */ + if (error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) + return (error); + /* + * Find space for the new entry. In the simple case, the entry at + * offset base will have the space. If it does not, then namei + * arranged that compacting the region dp->i_offset to + * dp->i_offset + dp->i_count would yield the + * space. + */ + ep = (struct ext2_dir_entry *)dirbuf; + dsize = EXT2_DIR_REC_LEN(ep->name_len); + spacefree = ep->rec_len - dsize; + for (loc = ep->rec_len; loc < dp->i_count; ) { + nep = (struct ext2_dir_entry *)(dirbuf + loc); + if (ep->inode) { + /* trim the existing slot */ + ep->rec_len = dsize; + ep = (struct ext2_dir_entry *)((char *)ep + dsize); + } else { + /* overwrite; nothing there; header is ours */ + spacefree += dsize; + } + dsize = EXT2_DIR_REC_LEN(ep->name_len); + spacefree += nep->rec_len - dsize; + loc += nep->rec_len; + bcopy((caddr_t)nep, (caddr_t)ep, dsize); + } + /* + * Update the pointer fields in the previous entry (if any), + * copy in the new entry, and write out the block. + */ + if (ep->inode == 0) { + if (spacefree + dsize < newentrysize) + panic("ext2_direnter: compact1"); + newdir.rec_len = spacefree + dsize; + } else { + if (spacefree < newentrysize) + panic("ext2_direnter: compact2"); + newdir.rec_len = spacefree; + ep->rec_len = dsize; + ep = (struct ext2_dir_entry *)((char *)ep + dsize); + } + bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize); + error = VOP_BWRITE(bp); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + if (!error && dp->i_endoff && dp->i_endoff < dp->i_size) + error = VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, + cnp->cn_cred, cnp->cn_proc); + return (error); +} + +/* + * Remove a directory entry after a call to namei, using + * the parameters which it left in nameidata. The entry + * dp->i_offset contains the offset into the directory of the + * entry to be eliminated. The dp->i_count field contains the + * size of the previous record in the directory. If this + * is 0, the first entry is being deleted, so we need only + * zero the inode number to mark the entry as free. If the + * entry is not the first in the directory, we must reclaim + * the space of the now empty record by adding the record size + * to the size of the previous entry. + */ +int +ext2_dirremove(dvp, cnp) + struct vnode *dvp; + struct componentname *cnp; +{ + register struct inode *dp; + struct ext2_dir_entry *ep; + struct buf *bp; + int error; + int DIRBLKSIZ = VTOI(dvp)->i_e2fs->s_blocksize; + + dp = VTOI(dvp); + if (dp->i_count == 0) { + /* + * First entry in block: set d_ino to zero. + */ + if (error = + VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) + return (error); + ep->inode = 0; + error = VOP_BWRITE(bp); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + return (error); + } + /* + * Collapse new free space into previous entry. + */ + if (error = VOP_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count), + (char **)&ep, &bp)) + return (error); + ep->rec_len += dp->i_reclen; + error = VOP_BWRITE(bp); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + return (error); +} + +/* + * Rewrite an existing directory entry to point at the inode + * supplied. The parameters describing the directory entry are + * set up by a call to namei. + */ +int +ext2_dirrewrite(dp, ip, cnp) + struct inode *dp, *ip; + struct componentname *cnp; +{ + struct buf *bp; + struct ext2_dir_entry *ep; + struct vnode *vdp = ITOV(dp); + int error; + + if (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp)) + return (error); + ep->inode = ip->i_number; + error = VOP_BWRITE(bp); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + return (error); +} + +/* + * Check if a directory is empty or not. + * Inode supplied must be locked. + * + * Using a struct dirtemplate here is not precisely + * what we want, but better than using a struct direct. + * + * NB: does not handle corrupted directories. + */ +int +ext2_dirempty(ip, parentino, cred) + register struct inode *ip; + ino_t parentino; + struct ucred *cred; +{ + register off_t off; + struct dirtemplate dbuf; + register struct ext2_dir_entry *dp = (struct ext2_dir_entry *)&dbuf; + int error, count, namlen; + int DIRBLKSIZ = ip->i_e2fs->s_blocksize; + +#define MINDIRSIZ (sizeof (struct dirtemplate) / 2) + + for (off = 0; off < ip->i_size; off += dp->rec_len) { + error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, + UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0); + /* + * Since we read MINDIRSIZ, residual must + * be 0 unless we're at end of file. + */ + if (error || count != 0) + return (0); + /* avoid infinite loops */ + if (dp->rec_len == 0) + return (0); + /* skip empty entries */ + if (dp->inode == 0) + continue; + /* accept only "." and ".." */ + namlen = dp->name_len; + if (namlen > 2) + return (0); + if (dp->name[0] != '.') + return (0); + /* + * At this point namlen must be 1 or 2. + * 1 implies ".", 2 implies ".." if second + * char is also "." + */ + if (namlen == 1) + continue; + if (dp->name[1] == '.' && dp->inode == parentino) + continue; + return (0); + } + return (1); +} + +/* + * Check if source directory is in the path of the target directory. + * Target is supplied locked, source is unlocked. + * The target is always vput before returning. + */ +int +ext2_checkpath(source, target, cred) + struct inode *source, *target; + struct ucred *cred; +{ + struct vnode *vp; + int error, rootino, namlen; + struct dirtemplate dirbuf; + + vp = ITOV(target); + if (target->i_number == source->i_number) { + error = EEXIST; + goto out; + } + rootino = ROOTINO; + error = 0; + if (target->i_number == rootino) + goto out; + + for (;;) { + if (vp->v_type != VDIR) { + error = ENOTDIR; + break; + } + error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, + IO_NODELOCKED, cred, (int *)0, (struct proc *)0); + if (error != 0) + break; + namlen = dirbuf.dotdot_namlen; + if (namlen != 2 || + dirbuf.dotdot_name[0] != '.' || + dirbuf.dotdot_name[1] != '.') { + error = ENOTDIR; + break; + } + if (dirbuf.dotdot_ino == source->i_number) { + error = EINVAL; + break; + } + if (dirbuf.dotdot_ino == rootino) + break; + vput(vp); + if (error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp)) { + vp = NULL; + break; + } + } + +out: + if (error == ENOTDIR) + printf("checkpath: .. not a directory\n"); + if (vp != NULL) + vput(vp); + return (error); +} + diff --git a/sys/gnu/ext2fs/ext2_readwrite.c b/sys/gnu/ext2fs/ext2_readwrite.c new file mode 100644 index 000000000000..be018317378f --- /dev/null +++ b/sys/gnu/ext2fs/ext2_readwrite.c @@ -0,0 +1,316 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ufs_readwrite.c 8.7 (Berkeley) 1/21/94 + */ + +#if !defined(__FreeBSD__) +#include "diagnostic.h" +#endif + +#define BLKSIZE(a, b, c) blksize(a, b, c) +#define FS struct ext2_sb_info +#define I_FS i_e2fs +#define READ ext2_read +#define READ_S "ext2_read" +#define WRITE ext2_write +#define WRITE_S "ext2_write" + +/* + * Vnode op for reading. + */ +/* ARGSUSED */ +int +READ(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct vnode *vp; + register struct inode *ip; + register struct uio *uio; + register FS *fs; + struct buf *bp; + daddr_t lbn, nextlbn; + off_t bytesinfile; + long size, xfersize, blkoffset; + int error; + u_short mode; + + vp = ap->a_vp; + ip = VTOI(vp); + mode = ip->i_mode; + uio = ap->a_uio; + +#if DIAGNOSTIC + if (uio->uio_rw != UIO_READ) + panic("%s: mode", READ_S); + + if (vp->v_type == VLNK) { + if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) + panic("%s: short symlink", READ_S); + } else if (vp->v_type != VREG && vp->v_type != VDIR) + panic("%s: type %d", READ_S, vp->v_type); +#endif + fs = ip->I_FS; +#if 0 + if ((u_quad_t)uio->uio_offset > fs->fs_maxfilesize) + return (EFBIG); +#endif + + for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { + if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) + break; + lbn = lblkno(fs, uio->uio_offset); + nextlbn = lbn + 1; + size = BLKSIZE(fs, ip, lbn); + blkoffset = blkoff(fs, uio->uio_offset); + xfersize = fs->s_frag_size - blkoffset; + if (uio->uio_resid < xfersize) + xfersize = uio->uio_resid; + if (bytesinfile < xfersize) + xfersize = bytesinfile; + + if (lblktosize(fs, nextlbn) > ip->i_size) + error = bread(vp, lbn, size, NOCRED, &bp); + else if (doclusterread) + error = cluster_read(vp, + ip->i_size, lbn, size, NOCRED, &bp); + else if (lbn - 1 == vp->v_lastr) { + int nextsize = BLKSIZE(fs, ip, nextlbn); + error = breadn(vp, lbn, + size, &nextlbn, &nextsize, 1, NOCRED, &bp); + } else + error = bread(vp, lbn, size, NOCRED, &bp); + if (error) + break; + vp->v_lastr = lbn; + + /* + * We should only get non-zero b_resid when an I/O error + * has occurred, which should cause us to break above. + * However, if the short read did not cause an error, + * then we want to ensure that we do not uiomove bad + * or uninitialized data. + */ + size -= bp->b_resid; + if (size < xfersize) { + if (size == 0) + break; + xfersize = size; + } + if (error = + uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio)) + break; + +#if !defined(__FreeBSD__) + if (S_ISREG(mode) && (xfersize + blkoffset == fs->s_frag_size || + uio->uio_offset == ip->i_size)) + bp->b_flags |= B_AGE; +#endif + brelse(bp); + } + if (bp != NULL) + brelse(bp); + ip->i_flag |= IN_ACCESS; + return (error); +} + +/* + * Vnode op for writing. + */ +int +WRITE(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct vnode *vp; + register struct uio *uio; + register struct inode *ip; + register FS *fs; + struct buf *bp; + struct proc *p; + daddr_t lbn; + off_t osize; + int blkoffset, error, flags, ioflag, resid, size, xfersize; + + ioflag = ap->a_ioflag; + uio = ap->a_uio; + vp = ap->a_vp; + ip = VTOI(vp); + +#if DIAGNOSTIC + if (uio->uio_rw != UIO_WRITE) + panic("%s: mode", WRITE_S); +#endif + + switch (vp->v_type) { + case VREG: + if (ioflag & IO_APPEND) + uio->uio_offset = ip->i_size; + if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) + return (EPERM); + /* FALLTHROUGH */ + case VLNK: + break; + case VDIR: + if ((ioflag & IO_SYNC) == 0) + panic("%s: nonsync dir write", WRITE_S); + break; + default: + panic("%s: type", WRITE_S); + } + + fs = ip->I_FS; +#if 0 + if (uio->uio_offset < 0 || + (u_quad_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) + return (EFBIG); +#endif + /* + * Maybe this should be above the vnode op call, but so long as + * file servers have no limits, I don't think it matters. + */ + p = uio->uio_procp; + if (vp->v_type == VREG && p && + uio->uio_offset + uio->uio_resid > + p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { + psignal(p, SIGXFSZ); + return (EFBIG); + } + + resid = uio->uio_resid; + osize = ip->i_size; + flags = ioflag & IO_SYNC ? B_SYNC : 0; + + for (error = 0; uio->uio_resid > 0;) { + lbn = lblkno(fs, uio->uio_offset); + blkoffset = blkoff(fs, uio->uio_offset); + xfersize = fs->s_frag_size - blkoffset; + if (uio->uio_resid < xfersize) + xfersize = uio->uio_resid; + +#if defined(__FreeBSD__) + if (uio->uio_offset + xfersize > ip->i_size) + vnode_pager_setsize(vp, (u_long)uio->uio_offset + xfersize); +#endif + + if (fs->s_frag_size > xfersize) + flags |= B_CLRBUF; + else + flags &= ~B_CLRBUF; + + error = ext2_balloc(ip, + lbn, blkoffset + xfersize, ap->a_cred, &bp, flags); + + if (error) + break; + if (uio->uio_offset + xfersize > ip->i_size) { + ip->i_size = uio->uio_offset + xfersize; +#if !defined(__FreeBSD__) + vnode_pager_setsize(vp, (u_long)ip->i_size); +#endif + } +#if !defined(__FreeBSD__) + (void)vnode_pager_uncache(vp); +#endif + + size = BLKSIZE(fs, ip, lbn) - bp->b_resid; + if (size < xfersize) + xfersize = size; + + error = + uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); + + if (ioflag & IO_SYNC) + (void)bwrite(bp); + else if (xfersize + blkoffset == fs->s_frag_size) { + if (doclusterwrite) { +#if defined(__FreeBSD__) + bp->b_flags |= B_CLUSTEROK; +#endif + cluster_write(bp, ip->i_size); + } else { +#if !defined(__FreeBSD__) + bp->b_flags |= B_AGE; +#endif + bawrite(bp); + } + } else { +#if defined(__FreeBSD__) + bp->b_flags |= B_CLUSTEROK; +#endif + bdwrite(bp); + } + + if (error || xfersize == 0) + break; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + } + /* + * If we successfully wrote any data, and we are not the superuser + * we clear the setuid and setgid bits as a precaution against + * tampering. + */ + if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) + ip->i_mode &= ~(ISUID | ISGID); + if (error) { + if (ioflag & IO_UNIT) { + (void)VOP_TRUNCATE(vp, osize, + ioflag & IO_SYNC, ap->a_cred, uio->uio_procp); + uio->uio_offset -= resid - uio->uio_resid; + uio->uio_resid = resid; + } + } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { + struct timeval tv; +#if !defined(__FreeBSD__) + get_time(&tv); +#else + tv = time; +#endif + error = VOP_UPDATE(vp, &tv, &tv, 1); + } + return (error); +} diff --git a/sys/gnu/ext2fs/ext2_subr.c b/sys/gnu/ext2fs/ext2_subr.c new file mode 100644 index 000000000000..c27abe5f5fde --- /dev/null +++ b/sys/gnu/ext2fs/ext2_subr.c @@ -0,0 +1,128 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ext2_subr.c 8.2 (Berkeley) 9/21/93 + */ + +#include <sys/param.h> +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> + +#include <sys/systm.h> +#include <sys/vnode.h> +#include <gnu/ext2fs/ext2_extern.h> +#include <sys/buf.h> +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> + +/* + * Return buffer with the contents of block "offset" from the beginning of + * directory "ip". If "res" is non-zero, fill it in with a pointer to the + * remaining space in the directory. + */ +int +ext2_blkatoff(ap) + struct vop_blkatoff_args /* { + struct vnode *a_vp; + off_t a_offset; + char **a_res; + struct buf **a_bpp; + } */ *ap; +{ + struct inode *ip; + register struct ext2_sb_info *fs; + struct buf *bp; + daddr_t lbn; + int bsize, error; + + ip = VTOI(ap->a_vp); + fs = ip->i_e2fs; + lbn = lblkno(fs, ap->a_offset); + bsize = blksize(fs, ip, lbn); + + *ap->a_bpp = NULL; + if (error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) { + brelse(bp); + return (error); + } + if (ap->a_res) + *ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset); + *ap->a_bpp = bp; + return (0); +} + +#if defined(KERNEL) && defined(DIAGNOSTIC) +void +ext2_checkoverlap(bp, ip) + struct buf *bp; + struct inode *ip; +{ + register struct buf *ebp, *ep; + register daddr_t start, last; + struct vnode *vp; + + ebp = &buf[nbuf]; + start = bp->b_blkno; + last = start + btodb(bp->b_bcount) - 1; + for (ep = buf; ep < ebp; ep++) { + if (ep == bp || (ep->b_flags & B_INVAL) || + ep->b_vp == NULLVP) + continue; +#if !defined(__FreeBSD__) + if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL)) + continue; +#else + if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL, NULL)) + continue; +#endif + if (vp != ip->i_devvp) + continue; + /* look for overlap */ + if (ep->b_bcount == 0 || ep->b_blkno > last || + ep->b_blkno + btodb(ep->b_bcount) <= start) + continue; + vprint("Disk overlap", vp); + (void)printf("\tstart %d, end %d overlap start %d, end %d\n", + start, last, ep->b_blkno, + ep->b_blkno + btodb(ep->b_bcount) - 1); + panic("Disk buffer overlap"); + } +} +#endif /* DIAGNOSTIC */ + diff --git a/sys/gnu/ext2fs/ext2_vfsops.c b/sys/gnu/ext2fs/ext2_vfsops.c new file mode 100644 index 000000000000..596eb35bfe32 --- /dev/null +++ b/sys/gnu/ext2fs/ext2_vfsops.c @@ -0,0 +1,1082 @@ +/* + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1989, 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 + */ + +#if !defined(__FreeBSD__) +#include "quota.h" +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/vnode.h> +#include <sys/socket.h> +#include <sys/mount.h> +#include <sys/buf.h> +#include <sys/mbuf.h> +#include <sys/file.h> +#include <sys/disklabel.h> +#include <sys/ioctl.h> +#include <sys/errno.h> +#include <sys/malloc.h> +#include <sys/stat.h> + +#include <miscfs/specfs/specdev.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/ufsmount.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/ufs_extern.h> + +#include <gnu/ext2fs/fs.h> +#include <gnu/ext2fs/ext2_extern.h> +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> + +int ext2_sbupdate __P((struct ufsmount *, int)); + +struct vfsops ext2fs_vfsops = { + ext2_mount, + ufs_start, /* empty function */ + ext2_unmount, + ufs_root, /* root inode via vget */ + ufs_quotactl, /* does operations associated with quotas */ + ext2_statfs, + ext2_sync, + ext2_vget, + ext2_fhtovp, + ext2_vptofh, + ext2_init, +}; + +#if defined(__FreeBSD__) +VFS_SET(ext2fs_vfsops, ext2fs, MOUNT_EXT2FS, 0); +#define bsd_malloc malloc +#define bsd_free free +#endif + +extern u_long nextgennumber; + +/* + * Called by main() when ufs is going to be mounted as root. + * + * Name is updated by mount(8) after booting. + */ +#define ROOTNAME "root_device" + +int +ext2_mountroot() +{ +#if !defined(__FreeBSD__) + extern struct vnode *rootvp; +#endif + register struct ext2_sb_info *fs; + register struct mount *mp; +#if defined(__FreeBSD__) + struct proc *p = curproc; +#else + struct proc *p = get_proc(); /* XXX */ +#endif + struct ufsmount *ump; + u_int size; + int error; + + /* + * Get vnodes for swapdev and rootdev. + */ + if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp)) + panic("ext2_mountroot: can't setup bdevvp's"); + + mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + mp->mnt_op = &ext2fs_vfsops; + mp->mnt_flag = MNT_RDONLY; + if (error = ext2_mountfs(rootvp, mp, p)) { + bsd_free(mp, M_MOUNT); + return (error); + } + if (error = vfs_lock(mp)) { + (void)ext2_unmount(mp, 0, p); + bsd_free(mp, M_MOUNT); + return (error); + } +#if defined(__FreeBSD__) + CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); +#else + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); +#endif + mp->mnt_flag |= MNT_ROOTFS; + mp->mnt_vnodecovered = NULLVP; + ump = VFSTOUFS(mp); + fs = ump->um_e2fs; + bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); + fs->fs_fsmnt[0] = '/'; + bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, + MNAMELEN); + (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, + &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void)ext2_statfs(mp, &mp->mnt_stat, p); + vfs_unlock(mp); + inittodr(fs->s_es->s_wtime); /* this helps to set the time */ + return (0); +} + +/* + * VFS Operations. + * + * mount system call + */ +int +ext2_mount(mp, path, data, ndp, p) + register struct mount *mp; + char *path; + caddr_t data; /* this is actually a (struct ufs_args *) */ + struct nameidata *ndp; + struct proc *p; +{ + struct vnode *devvp; + struct ufs_args args; + struct ufsmount *ump = 0; + register struct ext2_sb_info *fs; + u_int size; + int error, flags; + + if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) + return (error); + /* + * If updating, check whether changing from read-only to + * read/write; if there is no device name, that's all we do. + */ + if (mp->mnt_flag & MNT_UPDATE) { + ump = VFSTOUFS(mp); + fs = ump->um_e2fs; + error = 0; + if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) { + flags = WRITECLOSE; + if (mp->mnt_flag & MNT_FORCE) + flags |= FORCECLOSE; + if (vfs_busy(mp)) + return (EBUSY); + error = ext2_flushfiles(mp, flags, p); + vfs_unbusy(mp); + } + if (!error && (mp->mnt_flag & MNT_RELOAD)) + error = ext2_reload(mp, ndp->ni_cnd.cn_cred, p); + if (error) + return (error); + if (fs->s_rd_only && (mp->mnt_flag & MNT_WANTRDWR)) + fs->s_rd_only = 0; + if (fs->s_rd_only == 0) { + /* don't say it's clean */ + fs->s_es->s_state &= ~EXT2_VALID_FS; + ext2_sbupdate(ump, MNT_WAIT); + } + if (args.fspec == 0) { + /* + * Process export requests. + */ + return (vfs_export(mp, &ump->um_export, &args.export)); + } + } + /* + * Not an update, or updating the name: look up the name + * and verify that it refers to a sensible block device. + */ + NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); + if (error = namei(ndp)) + return (error); + devvp = ndp->ni_vp; + + if (devvp->v_type != VBLK) { + vrele(devvp); + return (ENOTBLK); + } + if (major(devvp->v_rdev) >= nblkdev) { + vrele(devvp); + return (ENXIO); + } + if ((mp->mnt_flag & MNT_UPDATE) == 0) + error = ext2_mountfs(devvp, mp, p); + else { + if (devvp != ump->um_devvp) + error = EINVAL; /* needs translation */ + else + vrele(devvp); + } + if (error) { + vrele(devvp); + return (error); + } + ump = VFSTOUFS(mp); + fs = ump->um_e2fs; + (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size); + bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size); + bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, + MNAMELEN); + (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, + &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void)ext2_statfs(mp, &mp->mnt_stat, p); + return (0); +} + +/* + * checks that the data in the descriptor blocks make sense + * this is taken from ext2/super.c + */ +static int ext2_check_descriptors (struct ext2_sb_info * sb) +{ + int i; + int desc_block = 0; + unsigned long block = sb->s_es->s_first_data_block; + struct ext2_group_desc * gdp = NULL; + + /* ext2_debug ("Checking group descriptors"); */ + + for (i = 0; i < sb->s_groups_count; i++) + { + /* examine next descriptor block */ + if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) + gdp = (struct ext2_group_desc *) + sb->s_group_desc[desc_block++]->b_data; + if (gdp->bg_block_bitmap < block || + gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) + { + printf ("ext2_check_descriptors: " + "Block bitmap for group %d" + " not in group (block %lu)!", + i, (unsigned long) gdp->bg_block_bitmap); + return 0; + } + if (gdp->bg_inode_bitmap < block || + gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) + { + printf ("ext2_check_descriptors: " + "Inode bitmap for group %d" + " not in group (block %lu)!", + i, (unsigned long) gdp->bg_inode_bitmap); + return 0; + } + if (gdp->bg_inode_table < block || + gdp->bg_inode_table + sb->s_itb_per_group >= + block + EXT2_BLOCKS_PER_GROUP(sb)) + { + printf ("ext2_check_descriptors: " + "Inode table for group %d" + " not in group (block %lu)!", + i, (unsigned long) gdp->bg_inode_table); + return 0; + } + block += EXT2_BLOCKS_PER_GROUP(sb); + gdp++; + } + return 1; +} + +/* + * this computes the fields of the ext2_sb_info structure from the + * data in the ext2_super_block structure read in + */ +static int compute_sb_data(devvp, es, fs) + struct vnode * devvp; + struct ext2_super_block * es; + struct ext2_sb_info * fs; +{ + int db_count, error; + int i, j; + int logic_sb_block = 1; /* XXX for now */ + +#if 1 +#define V(v) +#else +#define V(v) printf(#v"= %d\n", fs->v); +#endif + + fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; + V(s_blocksize) + fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; + V(s_bshift) + fs->s_fsbtodb = es->s_log_block_size + 1; + V(s_fsbtodb) + fs->s_qbmask = fs->s_blocksize - 1; + V(s_bmask) + fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); + V(s_blocksize_bits) + fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; + V(s_frag_size) + if (fs->s_frag_size) + fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; + V(s_frags_per_block) + fs->s_blocks_per_group = es->s_blocks_per_group; + V(s_blocks_per_group) + fs->s_frags_per_group = es->s_frags_per_group; + V(s_frags_per_group) + fs->s_inodes_per_group = es->s_inodes_per_group; + V(s_inodes_per_group) + fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; + V(s_inodes_per_block) + fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; + V(s_itb_per_group) + fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); + V(s_desc_per_block) + /* s_resuid / s_resgid ? */ + fs->s_groups_count = (es->s_blocks_count - + es->s_first_data_block + + EXT2_BLOCKS_PER_GROUP(fs) - 1) / + EXT2_BLOCKS_PER_GROUP(fs); + V(s_groups_count) + db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / + EXT2_DESC_PER_BLOCK(fs); + fs->s_db_per_group = db_count; + V(s_db_per_group) + + fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), + M_UFSMNT, M_WAITOK); + + /* adjust logic_sb_block */ + if(fs->s_blocksize > SBSIZE) + /* Godmar thinks: if the blocksize is greater than 1024, then + the superblock is logically part of block zero. + */ + logic_sb_block = 0; + + for (i = 0; i < db_count; i++) { + error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), + fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); + if(error) { + for (j = 0; j < i; j++) + brelse(fs->s_group_desc[j]); + bsd_free(fs->s_group_desc, M_UFSMNT); + printf("EXT2-fs: unable to read group descriptors (%d)\n", error); + return EIO; + } + } + if(!ext2_check_descriptors(fs)) { + for (j = 0; j < db_count; j++) + brelse(fs->s_group_desc[j]); + bsd_free(fs->s_group_desc, M_UFSMNT); + printf("EXT2-fs: (ext2_check_descriptors failure) " + "unable to read group descriptors\n"); + return EIO; + } + + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { + fs->s_inode_bitmap_number[i] = 0; + fs->s_inode_bitmap[i] = NULL; + fs->s_block_bitmap_number[i] = 0; + fs->s_block_bitmap[i] = NULL; + } + fs->s_loaded_inode_bitmaps = 0; + fs->s_loaded_block_bitmaps = 0; + return 0; +} + +/* + * Reload all incore data for a filesystem (used after running fsck on + * the root filesystem and finding things to fix). The filesystem must + * be mounted read-only. + * + * Things to do to update the mount: + * 1) invalidate all cached meta-data. + * 2) re-read superblock from disk. + * 3) re-read summary information from disk. + * 4) invalidate all inactive vnodes. + * 5) invalidate all cached file data. + * 6) re-read inode data for all active vnodes. + */ +int +ext2_reload(mountp, cred, p) + register struct mount *mountp; + struct ucred *cred; + struct proc *p; +{ + register struct vnode *vp, *nvp, *devvp; + struct inode *ip; + struct buf *bp; + struct ext2_super_block * es; + struct ext2_sb_info *fs; + int i, size, error; + + if ((mountp->mnt_flag & MNT_RDONLY) == 0) + return (EINVAL); + /* + * Step 1: invalidate all cached meta-data. + */ + devvp = VFSTOUFS(mountp)->um_devvp; + if (vinvalbuf(devvp, 0, cred, p, 0, 0)) + panic("ext2_reload: dirty1"); + /* + * Step 2: re-read superblock from disk. + * constants have been adjusted for ext2 + */ + if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) + return (error); + es = (struct ext2_super_block *)bp->b_data; + if (es->s_magic != EXT2_SUPER_MAGIC) { + if(es->s_magic == EXT2_PRE_02B_MAGIC) + printf("This filesystem bears the magic number of a pre " + "0.2b version of ext2. This is not supported by " + "Lites.\n"); + else + printf("Wrong magic number: %x (expected %x for ext2 fs\n", + es->s_magic, EXT2_SUPER_MAGIC); + brelse(bp); + return (EIO); /* XXX needs translation */ + } + fs = VFSTOUFS(mountp)->um_e2fs; + bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); + + if(error = compute_sb_data(devvp, es, fs)) { + brelse(bp); + return error; + } +#ifdef UNKLAR + if (fs->fs_sbsize < SBSIZE) + bp->b_flags |= B_INVAL; +#endif + brelse(bp); + +loop: + for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { + nvp = vp->v_mntvnodes.le_next; + /* + * Step 4: invalidate all inactive vnodes. + */ + if (vp->v_usecount == 0) { + vgone(vp); + continue; + } + /* + * Step 5: invalidate all cached file data. + */ + if (vget(vp, 1)) + goto loop; + if (vinvalbuf(vp, 0, cred, p, 0, 0)) + panic("ext2_reload: dirty2"); + /* + * Step 6: re-read inode data for all active vnodes. + */ + ip = VTOI(vp); + if (error = + bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + (int)fs->s_blocksize, NOCRED, &bp)) { + vput(vp); + return (error); + } + ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + + EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), + &ip->i_din); + brelse(bp); + vput(vp); + if (vp->v_mount != mountp) + goto loop; + } + return (0); +} + +/* + * Common code for mount and mountroot + */ +int +ext2_mountfs(devvp, mp, p) + register struct vnode *devvp; + struct mount *mp; + struct proc *p; +{ + register struct ufsmount *ump; + struct buf *bp; + register struct ext2_sb_info *fs; + struct ext2_super_block * es; + dev_t dev = devvp->v_rdev; + struct partinfo dpart; + caddr_t base; + int havepart = 0; + int error, i, size; + int ronly; +#if !defined(__FreeBSD__) + extern struct vnode *rootvp; +#endif + + /* + * Disallow multiple mounts of the same device. + * Disallow mounting of a device that is currently in use + * (except for root, which might share swap device for miniroot). + * Flush out any old buffers remaining from a previous use. + */ + if (error = vfs_mountedon(devvp)) + return (error); + if (vcount(devvp) > 1 && devvp != rootvp) + return (EBUSY); + if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) + return (error); +#ifdef READONLY +/* turn on this to force it to be read-only */ + mp->mnt_flag |= MNT_RDONLY; +#endif + + ronly = (mp->mnt_flag & MNT_RDONLY) != 0; + if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p)) + return (error); + if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) + size = DEV_BSIZE; + else { + havepart = 1; + size = dpart.disklab->d_secsize; + } + + bp = NULL; + ump = NULL; + if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) + goto out; + es = (struct ext2_super_block *)bp->b_data; + if (es->s_magic != EXT2_SUPER_MAGIC) { + if(es->s_magic == EXT2_PRE_02B_MAGIC) + printf("This filesystem bears the magic number of a pre " + "0.2b version of ext2. This is not supported by " + "Lites.\n"); + else + printf("Wrong magic number: %x (expected %x for EXT2FS)\n", + es->s_magic, EXT2_SUPER_MAGIC); + error = EINVAL; /* XXX needs translation */ + goto out; + } + ump = bsd_malloc(sizeof *ump, M_UFSMNT, M_WAITOK); + bzero((caddr_t)ump, sizeof *ump); + /* I don't know whether this is the right strategy. Note that + we dynamically allocate both a ext2_sb_info and a ext2_super_block + while Linux keeps the super block in a locked buffer + */ + ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), + M_UFSMNT, M_WAITOK); + ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), + M_UFSMNT, M_WAITOK); + bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); + if(error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs)) { + brelse(bp); + return error; + } + brelse(bp); + bp = NULL; + fs = ump->um_e2fs; + fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ + if (!(fs->s_es->s_state & EXT2_VALID_FS)) { + printf("WARNING: %s was not properly dismounted\n", + fs->fs_fsmnt); + } + /* if the fs is not mounted read-only, make sure the super block is + always written back on a sync() + */ + if (ronly == 0) { + fs->s_dirt = 1; /* mark it modified */ + fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ + } + mp->mnt_data = (qaddr_t)ump; + mp->mnt_stat.f_fsid.val[0] = (long)dev; + mp->mnt_stat.f_fsid.val[1] = MOUNT_EXT2FS; + mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; + mp->mnt_flag |= MNT_LOCAL; + ump->um_mountp = mp; + ump->um_dev = dev; + ump->um_devvp = devvp; + /* setting those two parameters allows us to use + ufs_bmap w/o changse ! + */ + ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); + ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; + ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); + for (i = 0; i < MAXQUOTAS; i++) + ump->um_quotas[i] = NULLVP; + devvp->v_specflags |= SI_MOUNTEDON; + if (ronly == 0) + ext2_sbupdate(ump, MNT_WAIT); + return (0); +out: + if (bp) + brelse(bp); + (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); + if (ump) { + bsd_free(ump->um_fs, M_UFSMNT); + bsd_free(ump, M_UFSMNT); + mp->mnt_data = (qaddr_t)0; + } + return (error); +} + +/* + * unmount system call + */ +int +ext2_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + register struct ufsmount *ump; + register struct ext2_sb_info *fs; + int error, flags, ronly, i; + + flags = 0; + if (mntflags & MNT_FORCE) { + if (mp->mnt_flag & MNT_ROOTFS) + return (EINVAL); + flags |= FORCECLOSE; + } + if (error = ext2_flushfiles(mp, flags, p)) + return (error); + ump = VFSTOUFS(mp); + fs = ump->um_e2fs; + ronly = fs->s_rd_only; + if (!ronly) { + fs->s_es->s_state |= EXT2_VALID_FS; /* was fs_clean = 1 */ + ext2_sbupdate(ump, MNT_WAIT); + } + /* release buffers containing group descriptors */ + for(i = 0; i < fs->s_db_per_group; i++) + brelse(fs->s_group_desc[i]); + /* release cached inode/block bitmaps */ + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) + if (fs->s_inode_bitmap[i]) + brelse (fs->s_inode_bitmap[i]); + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) + if (fs->s_block_bitmap[i]) + brelse (fs->s_block_bitmap[i]); + + ump->um_devvp->v_specflags &= ~SI_MOUNTEDON; + error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE, + NOCRED, p); + vrele(ump->um_devvp); + bsd_free(fs->s_es, M_UFSMNT); + bsd_free(fs, M_UFSMNT); + bsd_free(ump, M_UFSMNT); + mp->mnt_data = (qaddr_t)0; + mp->mnt_flag &= ~MNT_LOCAL; + return (error); +} + +/* + * Flush out all the files in a filesystem. + */ +int +ext2_flushfiles(mp, flags, p) + register struct mount *mp; + int flags; + struct proc *p; +{ +#if !defined(__FreeBSD__) + extern int doforce; +#endif + register struct ufsmount *ump; + int i, error; + + if (!doforce) + flags &= ~FORCECLOSE; + ump = VFSTOUFS(mp); +#if QUOTA + if (mp->mnt_flag & MNT_QUOTA) { + if (error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) + return (error); + for (i = 0; i < MAXQUOTAS; i++) { + if (ump->um_quotas[i] == NULLVP) + continue; + quotaoff(p, mp, i); + } + /* + * Here we fall through to vflush again to ensure + * that we have gotten rid of all the system vnodes. + */ + } +#endif + error = vflush(mp, NULLVP, flags); + return (error); +} + +/* + * Get file system statistics. + * taken from ext2/super.c ext2_statfs + */ +int +ext2_statfs(mp, sbp, p) + struct mount *mp; + register struct statfs *sbp; + struct proc *p; +{ + unsigned long overhead; + unsigned long overhead_per_group; + + register struct ufsmount *ump; + register struct ext2_sb_info *fs; + register struct ext2_super_block *es; + + ump = VFSTOUFS(mp); + fs = ump->um_e2fs; + es = fs->s_es; + + if (es->s_magic != EXT2_SUPER_MAGIC) + panic("ext2_statfs - magic number spoiled"); + + /* + * Compute the overhead (FS structures) + */ + overhead_per_group = 1 /* super block */ + + fs->s_db_per_group + + 1 /* block bitmap */ + + 1 /* inode bitmap */ + + fs->s_itb_per_group; + overhead = es->s_first_data_block + + fs->s_groups_count * overhead_per_group; + + sbp->f_type = MOUNT_EXT2FS; + sbp->f_bsize = EXT2_FRAG_SIZE(fs); + sbp->f_iosize = EXT2_BLOCK_SIZE(fs); + sbp->f_blocks = es->s_blocks_count - overhead; + sbp->f_bfree = es->s_free_blocks_count; + sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; + sbp->f_files = es->s_inodes_count; + sbp->f_ffree = es->s_free_inodes_count; + if (sbp != &mp->mnt_stat) { + bcopy((caddr_t)mp->mnt_stat.f_mntonname, + (caddr_t)&sbp->f_mntonname[0], MNAMELEN); + bcopy((caddr_t)mp->mnt_stat.f_mntfromname, + (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); + } + return (0); +} + +/* + * Go through the disk queues to initiate sandbagged IO; + * go through the inodes to write those that have been modified; + * initiate the writing of the super block if it has been modified. + * + * Note: we are always called with the filesystem marked `MPBUSY'. + */ +int +ext2_sync(mp, waitfor, cred, p) + struct mount *mp; + int waitfor; + struct ucred *cred; + struct proc *p; +{ + register struct vnode *vp; + register struct inode *ip; + register struct ufsmount *ump = VFSTOUFS(mp); + register struct ext2_sb_info *fs; + int error, allerror = 0; + + fs = ump->um_e2fs; + /* + * Write back modified superblock. + * Consistency check that the superblock + * is still in the buffer cache. + */ + if (fs->s_dirt) { +#if !defined(__FreeBSD__) + struct timeval time; +#endif + + if (fs->s_rd_only != 0) { /* XXX */ + printf("fs = %s\n", fs->fs_fsmnt); + panic("update: rofs mod"); + } + fs->s_dirt = 0; +#if !defined(__FreeBSD__) + get_time(&time); +#endif + fs->s_es->s_wtime = time.tv_sec; + allerror = ext2_sbupdate(ump, waitfor); + } + /* + * Write back each (modified) inode. + */ +loop: + for (vp = mp->mnt_vnodelist.lh_first; + vp != NULL; + vp = vp->v_mntvnodes.le_next) { + /* + * If the vnode that we are about to sync is no longer + * associated with this mount point, start over. + */ + if (vp->v_mount != mp) + goto loop; + if (VOP_ISLOCKED(vp)) + continue; + ip = VTOI(vp); + if ((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && + vp->v_dirtyblkhd.lh_first == NULL) + continue; + if (vget(vp, 1)) + goto loop; + if (error = VOP_FSYNC(vp, cred, waitfor, p)) + allerror = error; + vput(vp); + } + /* + * Force stale file system control information to be flushed. + */ + if (error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) + allerror = error; +#if QUOTA + qsync(mp); +#endif + return (allerror); +} + +/* + * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it + * in from disk. If it is in core, wait for the lock bit to clear, then + * return the inode locked. Detection and handling of mount points must be + * done by the calling routine. + */ +int +ext2_vget(mp, ino, vpp) + struct mount *mp; + ino_t ino; + struct vnode **vpp; +{ + register struct ext2_sb_info *fs; + register struct inode *ip; + struct ufsmount *ump; + struct buf *bp; + struct vnode *vp; + dev_t dev; + int i, type, error; + int used_blocks; + + ump = VFSTOUFS(mp); + dev = ump->um_dev; + if ((*vpp = ufs_ihashget(dev, ino)) != NULL) + return (0); + + /* Allocate a new vnode/inode. */ + if (error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) { + *vpp = NULL; + return (error); + } + /* I don't really know what this 'type' does. I suppose it's some kind + * of memory accounting. Let's just book this memory on FFS's account + * If I'm not mistaken, this stuff isn't implemented anyway in Lites + */ + type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */ + MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK); + insmntque(vp, mp); + bzero((caddr_t)ip, sizeof(struct inode)); + vp->v_data = ip; + ip->i_vnode = vp; + ip->i_e2fs = fs = ump->um_e2fs; + ip->i_dev = dev; + ip->i_number = ino; +#if QUOTA + for (i = 0; i < MAXQUOTAS; i++) + ip->i_dquot[i] = NODQUOT; +#endif + /* + * Put it onto its hash chain and lock it so that other requests for + * this inode will block if they arrive while we are sleeping waiting + * for old data structures to be purged or for the contents of the + * disk portion of this inode to be read. + */ + ufs_ihashins(ip); + + /* Read in the disk contents for the inode, copy into the inode. */ +#if 0 +printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); +#endif + if (error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), + (int)fs->s_blocksize, NOCRED, &bp)) { + /* + * The inode does not contain anything useful, so it would + * be misleading to leave it on its hash chain. With mode + * still zero, it will be unlinked and returned to the free + * list by vput(). + */ + vput(vp); + brelse(bp); + *vpp = NULL; + return (error); + } + /* convert ext2 inode to dinode */ + ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * + ino_to_fsbo(fs, ino)), &ip->i_din); + ip->i_block_group = ino_to_cg(fs, ino); + ip->i_next_alloc_block = 0; + ip->i_next_alloc_goal = 0; + ip->i_prealloc_count = 0; + ip->i_prealloc_block = 0; + /* now we want to make sure that block pointers for unused + blocks are zeroed out - ext2_balloc depends on this + although for regular files and directories only + */ + if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { + used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; + for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) + ip->i_db[i] = 0; + } +/* + ext2_print_inode(ip); +*/ + brelse(bp); + + /* + * Initialize the vnode from the inode, check for aliases. + * Note that the underlying vnode may have changed. + */ + if (error = ufs_vinit(mp, ext2_specop_p, EXT2_FIFOOPS, &vp)) { + vput(vp); + *vpp = NULL; + return (error); + } + /* + * Finish inode initialization now that aliasing has been resolved. + */ + ip->i_devvp = ump->um_devvp; + VREF(ip->i_devvp); + /* + * Set up a generation number for this inode if it does not + * already have one. This should only happen on old filesystems. + */ + if (ip->i_gen == 0) { +#if !defined(__FreeBSD__) + struct timeval time; + get_time(&time); +#endif + if (++nextgennumber < (u_long)time.tv_sec) + nextgennumber = time.tv_sec; + ip->i_gen = nextgennumber; + if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) + ip->i_flag |= IN_MODIFIED; + } + *vpp = vp; + return (0); +} + +/* + * File handle to vnode + * + * Have to be really careful about stale file handles: + * - check that the inode number is valid + * - call ext2_vget() to get the locked inode + * - check for an unallocated inode (i_mode == 0) + * - check that the given client host has export rights and return + * those rights via. exflagsp and credanonp + */ +int +ext2_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) + register struct mount *mp; + struct fid *fhp; + struct mbuf *nam; + struct vnode **vpp; + int *exflagsp; + struct ucred **credanonp; +{ + register struct ufid *ufhp; + struct ext2_sb_info *fs; + + ufhp = (struct ufid *)fhp; + fs = VFSTOUFS(mp)->um_e2fs; + if (ufhp->ufid_ino < ROOTINO || + ufhp->ufid_ino >= fs->s_groups_count * fs->s_es->s_inodes_per_group) + return (ESTALE); + return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)); +} + +/* + * Vnode pointer to File handle + */ +/* ARGSUSED */ +int +ext2_vptofh(vp, fhp) + struct vnode *vp; + struct fid *fhp; +{ + register struct inode *ip; + register struct ufid *ufhp; + + ip = VTOI(vp); + ufhp = (struct ufid *)fhp; + ufhp->ufid_len = sizeof(struct ufid); + ufhp->ufid_ino = ip->i_number; + ufhp->ufid_gen = ip->i_gen; + return (0); +} + +/* + * Write a superblock and associated information back to disk. + */ +int +ext2_sbupdate(mp, waitfor) + struct ufsmount *mp; + int waitfor; +{ + register struct ext2_sb_info *fs = mp->um_e2fs; + register struct ext2_super_block *es = fs->s_es; + register struct buf *bp; + int blks; + caddr_t space; + int i, size, error = 0; +/* +printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); +*/ + bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0); + bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); + if (waitfor == MNT_WAIT) + error = bwrite(bp); + else + bawrite(bp); + + /* write group descriptors back on disk */ + for(i = 0; i < fs->s_db_per_group; i++) + /* Godmar thinks: we must avoid using any of the b*write + * functions here: we want to keep the buffer locked + * so we use my 'housemade' write routine: + */ + error |= ll_w_block(fs->s_group_desc[i], waitfor == MNT_WAIT); + + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) + if (fs->s_inode_bitmap[i]) + ll_w_block (fs->s_inode_bitmap[i], 1); + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) + if (fs->s_block_bitmap[i]) + ll_w_block (fs->s_block_bitmap[i], 1); + + return (error); +} diff --git a/sys/gnu/ext2fs/ext2_vnops.c b/sys/gnu/ext2fs/ext2_vnops.c new file mode 100644 index 000000000000..1bf911322b87 --- /dev/null +++ b/sys/gnu/ext2fs/ext2_vnops.c @@ -0,0 +1,338 @@ +/* + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ext2_vnops.c 8.7 (Berkeley) 2/3/94 + */ + +#if !defined(__FreeBSD__) +#include "fifo.h" +#include "diagnostic.h" +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/resourcevar.h> +#include <sys/kernel.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/conf.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/malloc.h> + +#include <vm/vm.h> + +#include <miscfs/specfs/specdev.h> +#include <miscfs/fifofs/fifo.h> + +#if !defined(__FreeBSD__) +#include <ufs/ufs/lockf.h> +#else +#include <lockf.h> +#include <sys/signalvar.h> +#endif +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/dir.h> +#include <ufs/ufs/ufs_extern.h> + +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> +#include <gnu/ext2fs/ext2_extern.h> + +/* Global vfs data structures for ufs. */ +int (**ext2_vnodeop_p)(); +struct vnodeopv_entry_desc ext2_vnodeop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_lookup_desc, ext2_lookup }, /* lookup */ + { &vop_create_desc, ufs_create }, /* create */ + { &vop_mknod_desc, ufs_mknod }, /* mknod */ + { &vop_open_desc, ufs_open }, /* open */ + { &vop_close_desc, ufs_close }, /* close */ + { &vop_access_desc, ufs_access }, /* access */ + { &vop_getattr_desc, ufs_getattr }, /* getattr */ + { &vop_setattr_desc, ufs_setattr }, /* setattr */ + { &vop_read_desc, ext2_read }, /* read */ + { &vop_write_desc, ext2_write }, /* write */ + { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */ + { &vop_select_desc, ufs_select }, /* select */ + { &vop_mmap_desc, ufs_mmap }, /* mmap */ + { &vop_fsync_desc, ext2_fsync }, /* fsync */ + { &vop_seek_desc, ufs_seek }, /* seek */ + { &vop_remove_desc, ufs_remove }, /* remove */ + { &vop_link_desc, ufs_link }, /* link */ + { &vop_rename_desc, ufs_rename }, /* rename */ + { &vop_mkdir_desc, ufs_mkdir }, /* mkdir */ + { &vop_rmdir_desc, ufs_rmdir }, /* rmdir */ + { &vop_symlink_desc, ufs_symlink }, /* symlink */ + { &vop_readdir_desc, ext2_readdir }, /* readdir */ + { &vop_readlink_desc, ufs_readlink }, /* readlink */ + { &vop_abortop_desc, ufs_abortop }, /* abortop */ + { &vop_inactive_desc, ext2_inactive }, /* inactive */ + { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */ + { &vop_lock_desc, ufs_lock }, /* lock */ + { &vop_unlock_desc, ufs_unlock }, /* unlock */ + { &vop_bmap_desc, ufs_bmap }, /* bmap */ + { &vop_strategy_desc, ufs_strategy }, /* strategy */ + { &vop_print_desc, ufs_print }, /* print */ + { &vop_islocked_desc, ufs_islocked }, /* islocked */ + { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */ + { &vop_advlock_desc, ufs_advlock }, /* advlock */ + { &vop_blkatoff_desc, ext2_blkatoff }, /* blkatoff */ + { &vop_valloc_desc, ext2_valloc }, /* valloc */ + { &vop_reallocblks_desc, ext2_reallocblks }, /* reallocblks */ + { &vop_vfree_desc, ext2_vfree }, /* vfree */ + { &vop_truncate_desc, ext2_truncate }, /* truncate */ + { &vop_update_desc, ext2_update }, /* update */ + { &vop_bwrite_desc, vn_bwrite }, + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc ext2fs_vnodeop_opv_desc = + { &ext2_vnodeop_p, ext2_vnodeop_entries }; + +int (**ext2_specop_p)(); +struct vnodeopv_entry_desc ext2_specop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_lookup_desc, spec_lookup }, /* lookup */ + { &vop_create_desc, spec_create }, /* create */ + { &vop_mknod_desc, spec_mknod }, /* mknod */ + { &vop_open_desc, spec_open }, /* open */ + { &vop_close_desc, ufsspec_close }, /* close */ + { &vop_access_desc, ufs_access }, /* access */ + { &vop_getattr_desc, ufs_getattr }, /* getattr */ + { &vop_setattr_desc, ufs_setattr }, /* setattr */ + { &vop_read_desc, ufsspec_read }, /* read */ + { &vop_write_desc, ufsspec_write }, /* write */ + { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ + { &vop_select_desc, spec_select }, /* select */ + { &vop_mmap_desc, spec_mmap }, /* mmap */ + { &vop_fsync_desc, ext2_fsync }, /* fsync */ + { &vop_seek_desc, spec_seek }, /* seek */ + { &vop_remove_desc, spec_remove }, /* remove */ + { &vop_link_desc, spec_link }, /* link */ + { &vop_rename_desc, spec_rename }, /* rename */ + { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ + { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ + { &vop_symlink_desc, spec_symlink }, /* symlink */ + { &vop_readdir_desc, spec_readdir }, /* readdir */ + { &vop_readlink_desc, spec_readlink }, /* readlink */ + { &vop_abortop_desc, spec_abortop }, /* abortop */ + { &vop_inactive_desc, ext2_inactive }, /* inactive */ + { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */ + { &vop_lock_desc, ufs_lock }, /* lock */ + { &vop_unlock_desc, ufs_unlock }, /* unlock */ + { &vop_bmap_desc, spec_bmap }, /* bmap */ + { &vop_strategy_desc, spec_strategy }, /* strategy */ + { &vop_print_desc, ufs_print }, /* print */ + { &vop_islocked_desc, ufs_islocked }, /* islocked */ + { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ + { &vop_advlock_desc, spec_advlock }, /* advlock */ + { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ + { &vop_valloc_desc, spec_valloc }, /* valloc */ + { &vop_reallocblks_desc, spec_reallocblks }, /* reallocblks */ + { &vop_vfree_desc, ext2_vfree }, /* vfree */ + { &vop_truncate_desc, spec_truncate }, /* truncate */ + { &vop_update_desc, ext2_update }, /* update */ + { &vop_bwrite_desc, vn_bwrite }, + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc ext2fs_specop_opv_desc = + { &ext2_specop_p, ext2_specop_entries }; + +#if FIFO +int (**ext2_fifoop_p)(); +struct vnodeopv_entry_desc ext2_fifoop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_lookup_desc, fifo_lookup }, /* lookup */ + { &vop_create_desc, fifo_create }, /* create */ + { &vop_mknod_desc, fifo_mknod }, /* mknod */ + { &vop_open_desc, fifo_open }, /* open */ + { &vop_close_desc, ufsfifo_close }, /* close */ + { &vop_access_desc, ufs_access }, /* access */ + { &vop_getattr_desc, ufs_getattr }, /* getattr */ + { &vop_setattr_desc, ufs_setattr }, /* setattr */ + { &vop_read_desc, ufsfifo_read }, /* read */ + { &vop_write_desc, ufsfifo_write }, /* write */ + { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ + { &vop_select_desc, fifo_select }, /* select */ + { &vop_mmap_desc, fifo_mmap }, /* mmap */ + { &vop_fsync_desc, ext2_fsync }, /* fsync */ + { &vop_seek_desc, fifo_seek }, /* seek */ + { &vop_remove_desc, fifo_remove }, /* remove */ + { &vop_link_desc, fifo_link }, /* link */ + { &vop_rename_desc, fifo_rename }, /* rename */ + { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */ + { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */ + { &vop_symlink_desc, fifo_symlink }, /* symlink */ + { &vop_readdir_desc, fifo_readdir }, /* readdir */ + { &vop_readlink_desc, fifo_readlink }, /* readlink */ + { &vop_abortop_desc, fifo_abortop }, /* abortop */ + { &vop_inactive_desc, ext2_inactive }, /* inactive */ + { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */ + { &vop_lock_desc, ufs_lock }, /* lock */ + { &vop_unlock_desc, ufs_unlock }, /* unlock */ + { &vop_bmap_desc, fifo_bmap }, /* bmap */ + { &vop_strategy_desc, fifo_strategy }, /* strategy */ + { &vop_print_desc, ufs_print }, /* print */ + { &vop_islocked_desc, ufs_islocked }, /* islocked */ + { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */ + { &vop_advlock_desc, fifo_advlock }, /* advlock */ + { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */ + { &vop_valloc_desc, fifo_valloc }, /* valloc */ + { &vop_reallocblks_desc, fifo_reallocblks }, /* reallocblks */ + { &vop_vfree_desc, ext2_vfree }, /* vfree */ + { &vop_truncate_desc, fifo_truncate }, /* truncate */ + { &vop_update_desc, ext2_update }, /* update */ + { &vop_bwrite_desc, vn_bwrite }, + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc ext2fs_fifoop_opv_desc = + { &ext2_fifoop_p, ext2_fifoop_entries }; +#endif /* FIFO */ + +#if defined(__FreeBSD__) + VNODEOP_SET(ext2fs_vnodeop_opv_desc); + VNODEOP_SET(ext2fs_specop_opv_desc); + VNODEOP_SET(ext2fs_fifoop_opv_desc); +#endif + +/* + * Enabling cluster read/write operations. + */ +#ifdef DEBUG +#include <sys/sysctl.h> +int doclusterread = 1; +struct ctldebug debug11 = { "doclusterread", &doclusterread }; +int doclusterwrite = 1; +struct ctldebug debug12 = { "doclusterwrite", &doclusterwrite }; +#endif + +#if defined(__FreeBSD__) +#define doclusterwrite 1 +#define doclusterread 1 +#else +/* doclusterwrite is being tested + note that reallocblks is called when it's on, but this is not implemented */ +#define doclusterwrite 0 +/* doclusterread should work with new pagemove */ +#define doclusterread 1 +#endif + +#include <gnu/ext2fs/ext2_readwrite.c> + +/* + * Synch an open file. + */ +/* ARGSUSED */ +int +ext2_fsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct buf *bp; + struct timeval tv; + struct buf *nbp; + int s; + + /* + * Clean memory object. + * XXX add this to all file systems. + * XXX why is all this fs specific? + */ +#if !defined(__FreeBSD__) + vn_pager_sync(vp, ap->a_waitfor); +#endif + + /* + * Flush all dirty buffers associated with a vnode. + */ + ext2_discard_prealloc(VTOI(vp)); + +loop: + s = splbio(); + for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { + nbp = bp->b_vnbufs.le_next; + if ((bp->b_flags & B_BUSY)) + continue; + if ((bp->b_flags & B_DELWRI) == 0) + panic("ext2_fsync: not dirty"); + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + /* + * Wait for I/O associated with indirect blocks to complete, + * since there is no way to quickly wait for them below. + */ + if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT) + (void) bawrite(bp); + else + (void) bwrite(bp); + goto loop; + } + if (ap->a_waitfor == MNT_WAIT) { + while (vp->v_numoutput) { + vp->v_flag |= VBWAIT; +#if !defined(__FreeBSD__) + sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); +#else + tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "extfsn", 0); +#endif + } +#if DIAGNOSTIC + if (vp->v_dirtyblkhd.lh_first) { + vprint("ext2_fsync: dirty", vp); + goto loop; + } +#endif + } + splx(s); +#if defined(__FreeBSD__) + tv = time; +#else + get_time(&tv); +#endif + return (VOP_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT)); +} diff --git a/sys/gnu/ext2fs/fs.h b/sys/gnu/ext2fs/fs.h new file mode 100644 index 000000000000..28071d4754b2 --- /dev/null +++ b/sys/gnu/ext2fs/fs.h @@ -0,0 +1,157 @@ +/* + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fs.h 8.7 (Berkeley) 4/19/94 + */ + +/* + * Each disk drive contains some number of file systems. + * A file system consists of a number of cylinder groups. + * Each cylinder group has inodes and data. + * + * A file system is described by its super-block, which in turn + * describes the cylinder groups. The super-block is critical + * data and is replicated in each cylinder group to protect against + * catastrophic loss. This is done at `newfs' time and the critical + * super-block data does not change, so the copies need not be + * referenced further unless disaster strikes. + * + * The first boot and super blocks are given in absolute disk addresses. + * The byte-offset forms are preferred, as they don't imply a sector size. + */ +#define BBSIZE 1024 +#define SBSIZE 1024 +#define BBOFF ((off_t)(0)) +#define SBOFF ((off_t)(BBOFF + BBSIZE)) +#define BBLOCK ((daddr_t)(0)) +#define SBLOCK ((daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE)) + +/* + * The path name on which the file system is mounted is maintained + * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in + * the super block for this name. + */ +#define MAXMNTLEN 512 + +/* + * Macros for access to superblock array structures + */ + +/* + * Convert cylinder group to base address of its global summary info. + */ +#define fs_cs(fs, cgindx) (((struct ext2_group_desc *) \ + (fs->s_group_desc[cgindx / EXT2_DESC_PER_BLOCK(fs)]->b_data)) \ + [cgindx % EXT2_DESC_PER_BLOCK(fs)]) + +/* + * Turn file system block numbers into disk block addresses. + * This maps file system blocks to device size blocks. + */ +#define fsbtodb(fs, b) ((b) << ((fs)->s_fsbtodb)) +#define dbtofsb(fs, b) ((b) >> ((fs)->s_fsbtodb)) + +/* get group containing inode */ +#define ino_to_cg(fs, x) (((x) - 1) / EXT2_INODES_PER_GROUP(fs)) + +/* get block containing inode from its number x */ +#define ino_to_fsba(fs, x) fs_cs(fs, ino_to_cg(fs, x)).bg_inode_table + \ + (((x)-1) % EXT2_INODES_PER_GROUP(fs))/EXT2_INODES_PER_BLOCK(fs) + +/* get offset for inode in block */ +#define ino_to_fsbo(fs, x) ((x-1) % EXT2_INODES_PER_BLOCK(fs)) + +/* + * Give cylinder group number for a file system block. + * Give cylinder group block number for a file system block. + */ +#define dtog(fs, d) (((d) - fs->s_es->s_first_data_block) / \ + EXT2_BLOCKS_PER_GROUP(fs)) +#define dtogd(fs, d) (((d) - fs->s_es->s_first_data_block) % \ + EXT2_BLOCKS_PER_GROUP(fs)) + +/* + * The following macros optimize certain frequently calculated + * quantities by using shifts and masks in place of divisions + * modulos and multiplications. + */ +#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \ + ((loc) & (fs)->s_qbmask) + +#define lblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \ + ((blk) << (fs->s_bshift)) + +#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ + ((loc) >> (fs->s_bshift)) + +/* no fragments -> logical block number equal # of frags */ +#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \ + ((loc) >> (fs->s_bshift)) + +#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \ + roundup(size, fs->s_frag_size) + /* was (((size) + (fs)->fs_qfmask) & (fs)->fs_fmask) */ + +/* + * Determining the size of a file block in the file system. + * easy w/o fragments + */ +#define blksize(fs, ip, lbn) ((fs)->s_frag_size) + +/* + * INOPB is the number of inodes in a secondary storage block. + */ +#define INOPB(fs) EXT2_INODES_PER_BLOCK(fs) + +/* + * NINDIR is the number of indirects in a file system block. + */ +#define NINDIR(fs) (EXT2_ADDR_PER_BLOCK(fs)) + +extern int inside[], around[]; +extern u_char *fragtbl[]; + +/* a few remarks about superblock locking/unlocking + * Linux provides special routines for doing so + * I haven't figured out yet what BSD does + * I think I'll try a VOP_LOCK/VOP_UNLOCK on the device vnode + */ +#define DEVVP(inode) (VFSTOUFS(ITOV(inode)->v_mount)->um_devvp) +#define lock_super(devvp) VOP_LOCK(devvp) +#define unlock_super(devvp) VOP_UNLOCK(devvp) + diff --git a/sys/gnu/ext2fs/i386-bitops.h b/sys/gnu/ext2fs/i386-bitops.h new file mode 100644 index 000000000000..a66679ea3df8 --- /dev/null +++ b/sys/gnu/ext2fs/i386-bitops.h @@ -0,0 +1,159 @@ +/* + * this is mixture of i386/bitops.h and asm/string.h + * taken from the Linux source tree + * + * XXX replace with Mach routines or reprogram in C + */ +#ifndef _I386_BITOPS_H +#define _I386_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + */ + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +/* + * Some hacks to defeat gcc over-optimizations.. + */ +struct __dummy { unsigned long a[100]; }; +#define ADDR (*(struct __dummy *) addr) + +extern __inline__ int set_bit(int nr, void * addr) +{ + int oldbit; + + __asm__ __volatile__("btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"ir" (nr)); + return oldbit; +} + +extern __inline__ int clear_bit(int nr, void * addr) +{ + int oldbit; + + __asm__ __volatile__("btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"ir" (nr)); + return oldbit; +} + +extern __inline__ int change_bit(int nr, void * addr) +{ + int oldbit; + + __asm__ __volatile__("btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"ir" (nr)); + return oldbit; +} + +/* + * This routine doesn't need to be atomic, but it's faster to code it + * this way. + */ +extern __inline__ int test_bit(int nr, void * addr) +{ + int oldbit; + + __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"ir" (nr)); + return oldbit; +} + +/* + * Find-bit routines.. + */ +extern inline int find_first_zero_bit(void * addr, unsigned size) +{ + int res; + + if (!size) + return 0; + __asm__(" + cld + movl $-1,%%eax + xorl %%edx,%%edx + repe; scasl + je 1f + xorl -4(%%edi),%%eax + subl $4,%%edi + bsfl %%eax,%%edx +1: subl %%ebx,%%edi + shll $3,%%edi + addl %%edi,%%edx" + :"=d" (res) + :"c" ((size + 31) >> 5), "D" (addr), "b" (addr) + :"ax", "cx", "di"); + return res; +} + +extern inline int find_next_zero_bit (void * addr, int size, int offset) +{ + unsigned long * p = ((unsigned long *) addr) + (offset >> 5); + int set = 0, bit = offset & 31, res; + + if (bit) { + /* + * Look for zero in first byte + */ + __asm__(" + bsfl %1,%0 + jne 1f + movl $32, %0 +1: " + : "=r" (set) + : "r" (~(*p >> bit))); + if (set < (32 - bit)) + return set + offset; + set = 32 - bit; + p++; + } + /* + * No zero yet, search remaining full bytes for a zero + */ + res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr)); + return (offset + set + res); +} + +/* + * ffz = Find First Zero in word. Undefined if no zero exists, + * so code should check against ~0UL first.. + */ +extern inline unsigned long ffz(unsigned long word) +{ + __asm__("bsfl %1,%0" + :"=r" (word) + :"r" (~word)); + return word; +} + +/* + * memscan() taken from linux asm/string.h + */ +/* + * find the first occurrence of byte 'c', or 1 past the area if none + */ +extern inline char * memscan(void * addr, unsigned char c, int size) +{ + if (!size) + return addr; + __asm__("cld + repnz; scasb + jnz 1f + dec %%edi +1: " + : "=D" (addr), "=c" (size) + : "0" (addr), "1" (size), "a" (c)); + return addr; +} + +#endif /* _I386_BITOPS_H */ diff --git a/sys/gnu/fs/ext2fs/COPYRIGHT.INFO b/sys/gnu/fs/ext2fs/COPYRIGHT.INFO new file mode 100644 index 000000000000..bbb021451bee --- /dev/null +++ b/sys/gnu/fs/ext2fs/COPYRIGHT.INFO @@ -0,0 +1,30 @@ +Most of the files in this directory are written by Godmar Back or modified +by him using the CSRG sources. Those files are covered by the Berkeley-style +copyright. However the following files are covered by GPL. Since the policy +of the FreeBSD project is to keep the files with the more restrictive +copyright in the gnu tree and it is a good idea to keep the filesystem code +all together, the EXT2FS in it's entirety resides under the gnu tree. Note +that only the files below are under the GPL. In the eventuality that these +files are redesigned or rewritten, this tree can be moved back into the less +restrictive FreeBSD tree. + + ext2_fs.h + ext2_fs_i.h + ext2_fs_sb.h + ext2_linux_balloc.c + ext2_linux_ialloc.c + i386-bitops.h + +PS. + THANKS GODMAR!!! + +Note that this port has been modified by John Dyson and others on +the FreeBSD team, and it is best to send the bug reports to the FreeBSD +team. If there are any non-FreeBSD specific bugs, fixes will be sent to +Godmar to help him fix the original code base. It is also our intention +to send Godmar any FreeBSD specific porting changes so that he can keep +control of his code.... + +John +dyson@freebsd.org + diff --git a/sys/gnu/fs/ext2fs/ext2_alloc.c b/sys/gnu/fs/ext2fs/ext2_alloc.c new file mode 100644 index 000000000000..6a0f5d3c6360 --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_alloc.c @@ -0,0 +1,572 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ext2_alloc.c 8.8 (Berkeley) 2/21/94 + */ + +#if !defined(__FreeBSD__) +#include "quota.h" +#include "diagnostic.h" +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <sys/kernel.h> +#include <sys/syslog.h> + +#include <vm/vm.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> + +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> +#include <gnu/ext2fs/ext2_extern.h> + +extern u_long nextgennumber; + +static void ext2_fserr __P((struct ext2_sb_info *, u_int, char *)); + +/* + * Linux calls this functions at the following locations: + * (1) the inode is freed + * (2) a preallocation miss occurs + * (3) truncate is called + * (4) release_file is called and f_mode & 2 + * + * I call it in ext2_inactive, ext2_truncate, ext2_vfree and in (2) + * the call in vfree might be redundant + */ +void ext2_discard_prealloc (struct inode * ip) +{ +#ifdef EXT2_PREALLOCATE + if (ip->i_prealloc_count) { + int i = ip->i_prealloc_count; + ip->i_prealloc_count = 0; + ext2_free_blocks (ITOV(ip)->v_mount, + ip->i_prealloc_block, + i); + } +#endif +} + +/* + * Allocate a block in the file system. + * + * this takes the framework from ffs_alloc. To implement the + * actual allocation, it calls ext2_new_block, the ported version + * of the same Linux routine. + * + * we note that this is always called in connection with ext2_blkpref + * + * preallocation is done as Linux does it + */ +int +ext2_alloc(ip, lbn, bpref, size, cred, bnp) + register struct inode *ip; + daddr_t lbn, bpref; + int size; + struct ucred *cred; + daddr_t *bnp; +{ + register struct ext2_sb_info *fs; + daddr_t bno; + int cg, error; + + *bnp = 0; + fs = ip->i_e2fs; +#if DIAGNOSTIC + if ((u_int)size > fs->s_blocksize || blkoff(fs, size) != 0) { + printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", + ip->i_dev, fs->s_blocksize, size, fs->fs_fsmnt); + panic("ext2_alloc: bad size"); + } + if (cred == NOCRED) + panic("ext2_alloc: missing credential\n"); +#endif /* DIAGNOSTIC */ + if (size == fs->s_blocksize && fs->s_es->s_free_blocks_count == 0) + goto nospace; + if (cred->cr_uid != 0 && + fs->s_es->s_free_blocks_count < fs->s_es->s_r_blocks_count) + goto nospace; +#if QUOTA + if (error = chkdq(ip, (long)btodb(size), cred, 0)) + return (error); +#endif + if (bpref >= fs->s_es->s_blocks_count) + bpref = 0; + /* call the Linux code */ +#ifdef EXT2_PREALLOCATE + /* To have a preallocation hit, we must + * - have at least one block preallocated + * - and our preferred block must have that block number or one below + */ + if (ip->i_prealloc_count && + (bpref == ip->i_prealloc_block || + bpref + 1 == ip->i_prealloc_block)) + { + bno = ip->i_prealloc_block++; + ip->i_prealloc_count--; + /* ext2_debug ("preallocation hit (%lu/%lu).\n", + ++alloc_hits, ++alloc_attempts); */ + + /* Linux gets, clears, and releases the buffer at this + point - we don't have to that; we leave it to the caller + */ + } else { + ext2_discard_prealloc (ip); + /* ext2_debug ("preallocation miss (%lu/%lu).\n", + alloc_hits, ++alloc_attempts); */ + if (S_ISREG(ip->i_mode)) + bno = ext2_new_block + (ITOV(ip)->v_mount, bpref, + &ip->i_prealloc_count, + &ip->i_prealloc_block); + else + bno = (daddr_t)ext2_new_block(ITOV(ip)->v_mount, + bpref, 0, 0); + } +#else + bno = (daddr_t)ext2_new_block(ITOV(ip)->v_mount, bpref, 0, 0); +#endif + + if (bno > 0) { + /* set next_alloc fields as done in block_getblk */ + ip->i_next_alloc_block = lbn; + ip->i_next_alloc_goal = bno; + + ip->i_blocks += btodb(size); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + *bnp = bno; + return (0); + } +#if QUOTA + /* + * Restore user's disk quota because allocation failed. + */ + (void) chkdq(ip, (long)-btodb(size), cred, FORCE); +#endif +nospace: + ext2_fserr(fs, cred->cr_uid, "file system full"); + uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); + return (ENOSPC); +} + +/* + * Reallocate a sequence of blocks into a contiguous sequence of blocks. + * + * The vnode and an array of buffer pointers for a range of sequential + * logical blocks to be made contiguous is given. The allocator attempts + * to find a range of sequential blocks starting as close as possible to + * an fs_rotdelay offset from the end of the allocation for the logical + * block immediately preceeding the current range. If successful, the + * physical block numbers in the buffer pointers and in the inode are + * changed to reflect the new allocation. If unsuccessful, the allocation + * is left unchanged. The success in doing the reallocation is returned. + * Note that the error return is not reflected back to the user. Rather + * the previous block allocation will be used. + */ +#include <sys/sysctl.h> +static int doasyncfree = 1; +#ifdef OPT_DEBUG +struct ctldebug debug14 = { "doasyncfree", &doasyncfree }; +#endif /* OPT_DEBUG */ +int +ext2_reallocblks(ap) + struct vop_reallocblks_args /* { + struct vnode *a_vp; + struct cluster_save *a_buflist; + } */ *ap; +{ +#ifndef FANCY_REALLOC +/* printf("ext2_reallocblks not implemented\n"); */ +return ENOSPC; +#else + + struct ext2_sb_info *fs; + struct inode *ip; + struct vnode *vp; + struct buf *sbp, *ebp; + daddr_t *bap, *sbap, *ebap; + struct cluster_save *buflist; + daddr_t start_lbn, end_lbn, soff, eoff, newblk, blkno; + struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; + int i, len, start_lvl, end_lvl, pref, ssize; + + vp = ap->a_vp; + ip = VTOI(vp); + fs = ip->i_e2fs; +#ifdef UNKLAR + if (fs->fs_contigsumsize <= 0) + return (ENOSPC); +#endif + buflist = ap->a_buflist; + len = buflist->bs_nchildren; + start_lbn = buflist->bs_children[0]->b_lblkno; + end_lbn = start_lbn + len - 1; +#if DIAGNOSTIC + for (i = 1; i < len; i++) + if (buflist->bs_children[i]->b_lblkno != start_lbn + i) + panic("ext2_reallocblks: non-cluster"); +#endif + /* + * If the latest allocation is in a new cylinder group, assume that + * the filesystem has decided to move and do not force it back to + * the previous cylinder group. + */ + if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != + dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) + return (ENOSPC); + if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || + ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) + return (ENOSPC); + /* + * Get the starting offset and block map for the first block. + */ + if (start_lvl == 0) { + sbap = &ip->i_db[0]; + soff = start_lbn; + } else { + idp = &start_ap[start_lvl - 1]; + if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &sbp)) { + brelse(sbp); + return (ENOSPC); + } + sbap = (daddr_t *)sbp->b_data; + soff = idp->in_off; + } + /* + * Find the preferred location for the cluster. + */ + pref = ext2_blkpref(ip, start_lbn, soff, sbap); + /* + * If the block range spans two block maps, get the second map. + */ + if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { + ssize = len; + } else { +#if DIAGNOSTIC + if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) + panic("ext2_reallocblk: start == end"); +#endif + ssize = len - (idp->in_off + 1); + if (bread(vp, idp->in_lbn, (int)fs->s_blocksize, NOCRED, &ebp)) + goto fail; + ebap = (daddr_t *)ebp->b_data; + } + /* + * Search the block map looking for an allocation of the desired size. + */ + if ((newblk = (daddr_t)ext2_hashalloc(ip, dtog(fs, pref), (long)pref, + len, (u_long (*)())ext2_clusteralloc)) == 0) + goto fail; + /* + * We have found a new contiguous block. + * + * First we have to replace the old block pointers with the new + * block pointers in the inode and indirect blocks associated + * with the file. + */ + blkno = newblk; + for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->s_frags_per_block) { + if (i == ssize) + bap = ebap; +#if DIAGNOSTIC + if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) + panic("ext2_reallocblks: alloc mismatch"); +#endif + *bap++ = blkno; + } + /* + * Next we must write out the modified inode and indirect blocks. + * For strict correctness, the writes should be synchronous since + * the old block values may have been written to disk. In practise + * they are almost never written, but if we are concerned about + * strict correctness, the `doasyncfree' flag should be set to zero. + * + * The test on `doasyncfree' should be changed to test a flag + * that shows whether the associated buffers and inodes have + * been written. The flag should be set when the cluster is + * started and cleared whenever the buffer or inode is flushed. + * We can then check below to see if it is set, and do the + * synchronous write only when it has been cleared. + */ + if (sbap != &ip->i_db[0]) { + if (doasyncfree) + bdwrite(sbp); + else + bwrite(sbp); + } else { +#if !defined(__FreeBSD__) + struct timeval time; + get_time(&time); +#endif + ip->i_flag |= IN_CHANGE | IN_UPDATE; + if (!doasyncfree) + VOP_UPDATE(vp, &time, &time, MNT_WAIT); + } + if (ssize < len) + if (doasyncfree) + bdwrite(ebp); + else + bwrite(ebp); + /* + * Last, free the old blocks and assign the new blocks to the buffers. + */ + for (blkno = newblk, i = 0; i < len; i++, blkno += fs->s_frags_per_block) { + ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), + fs->s_blocksize); + buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); + } + return (0); + +fail: + if (ssize < len) + brelse(ebp); + if (sbap != &ip->i_db[0]) + brelse(sbp); + return (ENOSPC); + +#endif /* FANCY_REALLOC */ +} + +/* + * Allocate an inode in the file system. + * + * we leave the actual allocation strategy to the (modified) + * ext2_new_inode(), to make sure we get the policies right + */ +int +ext2_valloc(ap) + struct vop_valloc_args /* { + struct vnode *a_pvp; + int a_mode; + struct ucred *a_cred; + struct vnode **a_vpp; + } */ *ap; +{ + register struct vnode *pvp = ap->a_pvp; + register struct inode *pip; + register struct ext2_sb_info *fs; + register struct inode *ip; + mode_t mode = ap->a_mode; + ino_t ino, ipref; + int i, error; +#if !defined(__FreeBSD__) + struct timeval time; +#endif + + *ap->a_vpp = NULL; + pip = VTOI(pvp); + fs = pip->i_e2fs; + if (fs->s_es->s_free_inodes_count == 0) + goto noinodes; + + /* call the Linux routine - it returns the inode number only */ + ino = ext2_new_inode(pip, mode); + + if (ino == 0) + goto noinodes; + error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp); + if (error) { + VOP_VFREE(pvp, ino, mode); + return (error); + } + ip = VTOI(*ap->a_vpp); + + /* + the question is whether using VGET was such good idea at all - + Linux doesn't read the old inode in when it's allocating a + new one. I will set at least i_size & i_blocks the zero. + */ + ip->i_mode = 0; + ip->i_size = 0; + ip->i_blocks = 0; + ip->i_flags = 0; + /* now we want to make sure that the block pointers are zeroed out */ + for(i = 0; i < EXT2_NDIR_BLOCKS; i++) + ip->i_db[i] = 0; + + /* + * Set up a new generation number for this inode. + * XXX check if this makes sense in ext2 + */ +#if !defined(__FreeBSD__) + get_time(&time); +#endif + if (++nextgennumber < (u_long)time.tv_sec) + nextgennumber = time.tv_sec; + ip->i_gen = nextgennumber; +/* +printf("ext2_valloc: allocated inode %d\n", ino); +*/ + return (0); +noinodes: + ext2_fserr(fs, ap->a_cred->cr_uid, "out of inodes"); + uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); + return (ENOSPC); +} + +/* + * Select the desired position for the next block in a file. + * + * we try to mimic what Remy does in inode_getblk/block_getblk + * + * we note: blocknr == 0 means that we're about to allocate either + * a direct block or a pointer block at the first level of indirection + * (In other words, stuff that will go in i_db[] or i_ib[]) + * + * blocknr != 0 means that we're allocating a block that is none + * of the above. Then, blocknr tells us the number of the block + * that will hold the pointer + */ +daddr_t +ext2_blkpref(ip, lbn, indx, bap, blocknr) + struct inode *ip; + daddr_t lbn; + int indx; + daddr_t *bap; + daddr_t blocknr; +{ + register struct ext2_sb_info *fs; + int tmp; + + /* if the next block is actually what we thought it is, + then set the goal to what we thought it should be + */ + if(ip->i_next_alloc_block == lbn) + return ip->i_next_alloc_goal; + + /* now check whether we were provided with an array that basically + tells us previous blocks to which we want to stay closeby + */ + if(bap) + for (tmp = indx - 1; tmp >= 0; tmp--) + if (bap[tmp]) + return bap[tmp]; + + /* else let's fall back to the blocknr, or, if there is none, + follow the rule that a block should be allocated near it's inode + */ + return blocknr ? blocknr : + (daddr_t)(ip->i_block_group * + EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + + ip->i_e2fs->s_es->s_first_data_block; +} + +/* + * Free a block or fragment. + * + * pass on to the Linux code + */ +void +ext2_blkfree(ip, bno, size) + register struct inode *ip; + daddr_t bno; + long size; +{ + register struct ext2_sb_info *fs; + + fs = ip->i_e2fs; + /* + * call Linux code with mount *, block number, count + */ + ext2_free_blocks(ITOV(ip)->v_mount, bno, size / fs->s_frag_size); +} + +/* + * Free an inode. + * + * the maintenance of the actual bitmaps is again up to the linux code + */ +int +ext2_vfree(ap) + struct vop_vfree_args /* { + struct vnode *a_pvp; + ino_t a_ino; + int a_mode; + } */ *ap; +{ + register struct ext2_sb_info *fs; + register struct inode *pip; + ino_t ino = ap->a_ino; + int mode; + + pip = VTOI(ap->a_pvp); + fs = pip->i_e2fs; + if ((u_int)ino >= fs->s_inodes_per_group * fs->s_groups_count) + panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n", + pip->i_dev, ino, fs->fs_fsmnt); + +/* ext2_debug("ext2_vfree (%d, %d) called\n", pip->i_number, ap->a_mode); + */ + ext2_discard_prealloc(pip); + + /* we need to make sure that ext2_free_inode can adjust the + used_dir_counts in the group summary information - I'd + really like to know what the rationale behind this + 'set i_mode to zero to denote an unused inode' is + */ + mode = pip->i_mode; + pip->i_mode = ap->a_mode; + ext2_free_inode(pip); + pip->i_mode = mode; + return (0); +} + +/* + * Fserr prints the name of a file system with an error diagnostic. + * + * The form of the error message is: + * fs: error message + */ +static void +ext2_fserr(fs, uid, cp) + struct ext2_sb_info *fs; + u_int uid; + char *cp; +{ + + log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); +} diff --git a/sys/gnu/fs/ext2fs/ext2_balloc.c b/sys/gnu/fs/ext2fs/ext2_balloc.c new file mode 100644 index 000000000000..44d75ae34a2d --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_balloc.c @@ -0,0 +1,335 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_balloc.c 8.4 (Berkeley) 9/23/93 + */ + +#if !defined(__FreeBSD__) +#include "diagnostic.h" +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/file.h> +#include <sys/vnode.h> + +#include <vm/vm.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/ufs_extern.h> + +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> +#include <gnu/ext2fs/ext2_extern.h> + +/* + * Balloc defines the structure of file system storage + * by allocating the physical blocks on a device given + * the inode and the logical block number in a file. + */ +int +ext2_balloc(ip, bn, size, cred, bpp, flags) + register struct inode *ip; + register daddr_t bn; + int size; + struct ucred *cred; + struct buf **bpp; + int flags; +{ + register struct ext2_sb_info *fs; + register daddr_t nb; + struct buf *bp, *nbp; + struct vnode *vp = ITOV(ip); + struct indir indirs[NIADDR + 2]; + daddr_t newb, lbn, *bap, pref; + int osize, nsize, num, i, error; +/* +ext2_debug("ext2_balloc called (%d, %d, %d)\n", + ip->i_number, (int)bn, (int)size); +*/ + *bpp = NULL; + if (bn < 0) + return (EFBIG); + fs = ip->i_e2fs; + lbn = bn; + + /* + * check if this is a sequential block allocation. + * If so, increment next_alloc fields to allow ext2_blkpref + * to make a good guess + */ + if (lbn == ip->i_next_alloc_block + 1) { + ip->i_next_alloc_block++; + ip->i_next_alloc_goal++; + } + + /* + * The first NDADDR blocks are direct blocks + */ + if (bn < NDADDR) { + nb = ip->i_db[bn]; + /* no new block is to be allocated, and no need to expand + the file */ + if (nb != 0 && ip->i_size >= (bn + 1) * fs->s_blocksize) { + error = bread(vp, bn, fs->s_blocksize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + *bpp = bp; + return (0); + } + if (nb != 0) { + /* + * Consider need to reallocate a fragment. + */ + osize = fragroundup(fs, blkoff(fs, ip->i_size)); + nsize = fragroundup(fs, size); + if (nsize <= osize) { + error = bread(vp, bn, osize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + } else { + /* Godmar thinks: this shouldn't happen w/o fragments */ + printf("nsize %d(%d) > osize %d(%d) nb %d\n", + (int)nsize, (int)size, (int)osize, + (int)ip->i_size, (int)nb); + panic("ext2_balloc: " + "Something is terribly wrong\n"); +/* + * please note there haven't been any changes from here on - + * FFS seems to work. + */ + } + } else { + if (ip->i_size < (bn + 1) * fs->s_blocksize) + nsize = fragroundup(fs, size); + else + nsize = fs->s_blocksize; + error = ext2_alloc(ip, bn, + ext2_blkpref(ip, bn, (int)bn, &ip->i_db[0], 0), + nsize, cred, &newb); + if (error) + return (error); + bp = getblk(vp, bn, nsize, 0, 0); + bp->b_blkno = fsbtodb(fs, newb); + if (flags & B_CLRBUF) +#if defined(__FreeBSD__) + vfs_bio_clrbuf(bp); +#else + clrbuf(bp); +#endif + } + ip->i_db[bn] = dbtofsb(fs, bp->b_blkno); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + *bpp = bp; + return (0); + } + /* + * Determine the number of levels of indirection. + */ + pref = 0; + if (error = ufs_getlbns(vp, bn, indirs, &num)) + return(error); +#if DIAGNOSTIC + if (num < 1) + panic ("ext2_balloc: ufs_bmaparray returned indirect block\n"); +#endif + /* + * Fetch the first indirect block allocating if necessary. + */ + --num; + nb = ip->i_ib[indirs[0].in_off]; + if (nb == 0) { +#if 0 + pref = ext2_blkpref(ip, lbn, 0, (daddr_t *)0, 0); +#else + /* see the comment by ext2_blkpref. What we do here is + to pretend that it'd be good for a block holding indirect + pointers to be allocated near its predecessor in terms + of indirection, or the last direct block. + We shamelessly exploit the fact that i_ib immediately + follows i_db. + Godmar thinks it make sense to allocate i_ib[0] immediately + after i_db[11], but it's not utterly clear whether this also + applies to i_ib[1] and i_ib[0] + */ + + pref = ext2_blkpref(ip, lbn, indirs[0].in_off + + EXT2_NDIR_BLOCKS, &ip->i_db[0], 0); +#endif + if (error = ext2_alloc(ip, lbn, pref, (int)fs->s_blocksize, + cred, &newb)) + return (error); + nb = newb; + bp = getblk(vp, indirs[1].in_lbn, fs->s_blocksize, 0, 0); + bp->b_blkno = fsbtodb(fs, newb); +#if defined(__FreeBSD__) + vfs_bio_clrbuf(bp); +#else + clrbuf(bp); +#endif + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if (error = bwrite(bp)) { + ext2_blkfree(ip, nb, fs->s_blocksize); + return (error); + } + ip->i_ib[indirs[0].in_off] = newb; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + } + /* + * Fetch through the indirect blocks, allocating as necessary. + */ + for (i = 1;;) { + error = bread(vp, + indirs[i].in_lbn, (int)fs->s_blocksize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + bap = (daddr_t *)bp->b_data; + nb = bap[indirs[i].in_off]; + if (i == num) + break; + i += 1; + if (nb != 0) { + brelse(bp); + continue; + } + if (pref == 0) +#if 1 + /* see the comment above and by ext2_blkpref + * I think this implements Linux policy, but + * does it really make sense to allocate to + * block containing pointers together ? + * Also, will it ever succeed ? + */ + pref = ext2_blkpref(ip, lbn, indirs[i].in_off, bap, + bp->b_lblkno); +#else + pref = ext2_blkpref(ip, lbn, 0, (daddr_t *)0, 0); +#endif + if (error = + ext2_alloc(ip, lbn, pref, (int)fs->s_blocksize, cred, &newb)) { + brelse(bp); + return (error); + } + nb = newb; + nbp = getblk(vp, indirs[i].in_lbn, fs->s_blocksize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); +#if defined(__FreeBSD__) + vfs_bio_clrbuf(nbp); +#else + clrbuf(nbp); +#endif + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if (error = bwrite(nbp)) { + ext2_blkfree(ip, nb, fs->s_blocksize); + brelse(bp); + return (error); + } + bap[indirs[i - 1].in_off] = nb; + /* + * If required, write synchronously, otherwise use + * delayed write. + */ + if (flags & B_SYNC) { + bwrite(bp); + } else { + bdwrite(bp); + } + } + /* + * Get the data block, allocating if necessary. + */ + if (nb == 0) { + pref = ext2_blkpref(ip, lbn, indirs[i].in_off, &bap[0], + bp->b_lblkno); + if (error = ext2_alloc(ip, + lbn, pref, (int)fs->s_blocksize, cred, &newb)) { + brelse(bp); + return (error); + } + nb = newb; + nbp = getblk(vp, lbn, fs->s_blocksize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); + if (flags & B_CLRBUF) +#if defined(__FreeBSD__) + vfs_bio_clrbuf(nbp); +#else + clrbuf(nbp); +#endif + bap[indirs[i].in_off] = nb; + /* + * If required, write synchronously, otherwise use + * delayed write. + */ + if (flags & B_SYNC) { + bwrite(bp); + } else { + bdwrite(bp); + } + *bpp = nbp; + return (0); + } + brelse(bp); + if (flags & B_CLRBUF) { + error = bread(vp, lbn, (int)fs->s_blocksize, NOCRED, &nbp); + if (error) { + brelse(nbp); + return (error); + } + } else { + nbp = getblk(vp, lbn, fs->s_blocksize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); + } + *bpp = nbp; + return (0); +} diff --git a/sys/gnu/fs/ext2fs/ext2_extern.h b/sys/gnu/fs/ext2fs/ext2_extern.h new file mode 100644 index 000000000000..92ec0427aa3d --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_extern.h @@ -0,0 +1,140 @@ +/* + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/*- + * Copyright (c) 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_extern.h 8.3 (Berkeley) 4/16/94 + */ + +struct buf; +struct fid; +struct fs; +struct inode; +struct mount; +struct nameidata; +struct proc; +struct statfs; +struct timeval; +struct ucred; +struct uio; +struct vnode; +struct mbuf; +struct dinode; +struct ext2_group_desc; +struct ext2_inode; + +__BEGIN_DECLS +int ext2_alloc __P((struct inode *, + daddr_t, daddr_t, int, struct ucred *, daddr_t *)); +int ext2_balloc __P((struct inode *, + daddr_t, int, struct ucred *, struct buf **, int)); +int ext2_blkatoff __P((struct vop_blkatoff_args *)); +void ext2_blkfree __P((struct inode *, daddr_t, long)); +daddr_t ext2_blkpref __P((struct inode *, daddr_t, int, daddr_t *, daddr_t)); +int ext2_bmap __P((struct vop_bmap_args *)); +int ext2_fhtovp __P((struct mount *, struct fid *, struct mbuf *, + struct vnode **, int *, struct ucred **)); +int ext2_fsync __P((struct vop_fsync_args *)); +int ext2_init __P((void)); +int ext2_mount __P((struct mount *, + char *, caddr_t, struct nameidata *, struct proc *)); +int ext2_mountfs __P((struct vnode *, struct mount *, struct proc *)); +int ext2_mountroot __P((void)); +int ext2_read __P((struct vop_read_args *)); +int ext2_reallocblks __P((struct vop_reallocblks_args *)); +int ext2_reclaim __P((struct vop_reclaim_args *)); +void ext2_setblock __P((struct ext2_sb_info *, u_char *, daddr_t)); +int ext2_statfs __P((struct mount *, struct statfs *, struct proc *)); +int ext2_sync __P((struct mount *, int, struct ucred *, struct proc *)); +int ext2_truncate __P((struct vop_truncate_args *)); +int ext2_unmount __P((struct mount *, int, struct proc *)); +int ext2_update __P((struct vop_update_args *)); +int ext2_valloc __P((struct vop_valloc_args *)); +int ext2_vfree __P((struct vop_vfree_args *)); +int ext2_vget __P((struct mount *, ino_t, struct vnode **)); +int ext2_vptofh __P((struct vnode *, struct fid *)); +int ext2_write __P((struct vop_write_args *)); +int ext2_lookup __P((struct vop_lookup_args *)); +int ext2_readdir __P((struct vop_readdir_args *)); +void ext2_print_dinode __P((struct dinode *)); +void ext2_print_inode __P((struct inode *)); +int ext2_direnter __P((struct inode *, + struct vnode *, struct componentname *)); +int ext2_dirremove __P((struct vnode *, struct componentname *)); +int ext2_dirrewrite __P((struct inode *, + struct inode *, struct componentname *)); +int ext2_dirempty __P((struct inode *, ino_t, struct ucred *)); +int ext2_checkpath __P((struct inode *, struct inode *, struct ucred *)); +struct ext2_group_desc * get_group_desc __P((struct mount * , + unsigned int , struct buf ** )); +void ext2_discard_prealloc __P((struct inode *)); +int ext2_inactive __P((struct vop_inactive_args *)); +int ll_w_block __P((struct buf *, int )); +int ext2_di2ei __P((struct dinode *di, struct ext2_inode *ei)); +int ext2_ei2di __P((struct ext2_inode *ei, struct dinode *di)); +int ext2_new_block __P ((struct mount * mp, unsigned long goal, + long * prealloc_count, + long * prealloc_block)); +ino_t ext2_new_inode __P ((const struct inode * dir, int mode)); +void ext2_free_blocks (struct mount * mp, unsigned long block, + unsigned long count); +void ext2_free_inode (struct inode * inode); +int ext2_flushfiles __P((struct mount *mp, int flags, struct proc *p)); +int ext2_reload __P((struct mount *mountp, struct ucred *cred, + struct proc *p)); + +#if !defined(__FreeBSD__) +int bwrite(); /* FFS needs a bwrite routine. XXX */ +#endif + +/* this macros allows some of the ufs code to distinguish between + * an EXT2 and a non-ext2(FFS/LFS) vnode. + */ +#define IS_EXT2_VNODE(vp) (vp->v_mount->mnt_stat.f_type == MOUNT_EXT2FS) + +#ifdef DIAGNOSTIC +void ext2_checkoverlap __P((struct buf *, struct inode *)); +#endif +__END_DECLS + +extern int (**ext2_vnodeop_p)(); +extern int (**ext2_specop_p)(); +#ifdef FIFO +extern int (**ext2_fifoop_p)(); +#define EXT2_FIFOOPS ext2_fifoop_p +#else +#define EXT2_FIFOOPS NULL +#endif diff --git a/sys/gnu/fs/ext2fs/ext2_fs.h b/sys/gnu/fs/ext2fs/ext2_fs.h new file mode 100644 index 000000000000..56a85756f75c --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_fs.h @@ -0,0 +1,340 @@ +/* + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * linux/include/linux/ext2_fs.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ +#ifndef _LINUX_EXT2_FS_H +#define _LINUX_EXT2_FS_H + +#include <sys/types.h> + +#ifdef i386 +#if defined(__FreeBSD__) +#include <machine/types.h> +#else +#include <i386/types.h> +#endif +#else +#error need processor specific types +#endif + +#define __u32 u_int32_t +#define u32 u_int32_t +#define __u16 u_int16_t +#define __u8 u_int8_t + +#define __s32 int32_t +#define __s16 int16_t +#define __s8 int8_t + +#define umode_t mode_t +#define loff_t off_t + +/* the Linux implementation of EXT2 stores some information about + * an inode in a ext2_inode_info structure which is part of the incore + * inode in Linux + * I decided to use the i_spare[11] fields instead - we'll see how this + * works out + */ + +#define i_block_group i_spare[0] +#define i_next_alloc_block i_spare[1] +#define i_next_alloc_goal i_spare[2] +#define i_prealloc_block i_spare[3] +#define i_prealloc_count i_spare[4] + +/* + * The second extended filesystem constants/structures + */ + +/* + * Define EXT2FS_DEBUG to produce debug messages + */ +#undef EXT2FS_DEBUG + +/* + * Define EXT2FS_DEBUG_CACHE to produce cache debug messages + */ +#undef EXT2FS_DEBUG_CACHE + +/* + * Define EXT2FS_CHECK_CACHE to add some checks to the name cache code + */ +#undef EXT2FS_CHECK_CACHE + +/* + * Define EXT2FS_PRE_02B_COMPAT to convert ext 2 fs prior to 0.2b + */ +#undef EXT2FS_PRE_02B_COMPAT + +/* + * Define EXT2_PREALLOCATE to preallocate data blocks for expanding files + */ +#define EXT2_PREALLOCATE + +/* + * The second extended file system version + */ +#define EXT2FS_DATE "95/03/19" +#define EXT2FS_VERSION "0.5a" + +/* + * Debug code + */ +#ifdef EXT2FS_DEBUG +# define ext2_debug(f, a...) { \ + printf ("EXT2-fs DEBUG (%s, %d): %s:", \ + __FILE__, __LINE__, __FUNCTION__); \ + printf (f, ## a); \ + } +#else +# define ext2_debug(f, a...) /**/ +#endif + +/* + * Special inodes numbers + */ +#define EXT2_BAD_INO 1 /* Bad blocks inode */ +#define EXT2_ROOT_INO 2 /* Root inode */ +#define EXT2_ACL_IDX_INO 3 /* ACL inode */ +#define EXT2_ACL_DATA_INO 4 /* ACL inode */ +#define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ +#define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ +#define EXT2_FIRST_INO 11 /* First non reserved inode */ + +/* + * The second extended file system magic number + */ +#define EXT2_PRE_02B_MAGIC 0xEF51 +#define EXT2_SUPER_MAGIC 0xEF53 + +/* + * Maximal count of links to a file + */ +#define EXT2_LINK_MAX 32000 + +/* + * Macro-instructions used to manage several block sizes + */ +#define EXT2_MIN_BLOCK_SIZE 1024 +#define EXT2_MAX_BLOCK_SIZE 4096 +#define EXT2_MIN_BLOCK_LOG_SIZE 10 + +#define EXT2_BLOCK_SIZE(s) ((s)->s_blocksize) +#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / \ + sizeof (struct ext2_acl_entry)) +#define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) +#define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) + +#define EXT2_INODE_SIZE 128 + /* ought to be sizeof (struct ext2_inode)) */ +#define EXT2_INODES_PER_BLOCK(s) ((s)->s_inodes_per_block) + +/* + * Macro-instructions used to manage fragments + */ +#define EXT2_MIN_FRAG_SIZE 1024 +#define EXT2_MAX_FRAG_SIZE 4096 +#define EXT2_MIN_FRAG_LOG_SIZE 10 +#define EXT2_FRAG_SIZE(s) ((s)->s_frag_size) +#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / EXT2_FRAG_SIZE(s)) + +/* + * ACL structures + */ +struct ext2_acl_header /* Header of Access Control Lists */ +{ + __u32 aclh_size; + __u32 aclh_file_count; + __u32 aclh_acle_count; + __u32 aclh_first_acle; +}; + +struct ext2_acl_entry /* Access Control List Entry */ +{ + __u32 acle_size; + __u16 acle_perms; /* Access permissions */ + __u16 acle_type; /* Type of entry */ + __u16 acle_tag; /* User or group identity */ + __u16 acle_pad1; + __u32 acle_next; /* Pointer on next entry for the */ + /* same inode or on next free entry */ +}; + +/* + * Structure of a blocks group descriptor + */ +struct ext2_old_group_desc +{ + __u32 bg_block_bitmap; /* Blocks bitmap block */ + __u32 bg_inode_bitmap; /* Inodes bitmap block */ + __u32 bg_inode_table; /* Inodes table block */ + __u16 bg_free_blocks_count; /* Free blocks count */ + __u16 bg_free_inodes_count; /* Free inodes count */ +}; + +struct ext2_group_desc +{ + __u32 bg_block_bitmap; /* Blocks bitmap block */ + __u32 bg_inode_bitmap; /* Inodes bitmap block */ + __u32 bg_inode_table; /* Inodes table block */ + __u16 bg_free_blocks_count; /* Free blocks count */ + __u16 bg_free_inodes_count; /* Free inodes count */ + __u16 bg_used_dirs_count; /* Directories count */ + __u16 bg_pad; + __u32 bg_reserved[3]; +}; + +/* + * Macro-instructions used to manage group descriptors + */ +#define EXT2_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) +#define EXT2_DESC_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_group_desc)) +#define EXT2_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) + +/* + * Constants relative to the data blocks + */ +#define EXT2_NDIR_BLOCKS 12 +#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS +#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1) +#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1) +#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1) +#define EXT2_MAXSYMLINKLEN (EXT2_N_BLOCKS * sizeof (__u32)) + +/* + * Inode flags + */ +#define EXT2_SECRM_FL 0x00000001 /* Secure deletion */ +#define EXT2_UNRM_FL 0x00000002 /* Undelete */ +#define EXT2_COMPR_FL 0x00000004 /* Compress file */ +#define EXT2_SYNC_FL 0x00000008 /* Synchronous updates */ +#define EXT2_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define EXT2_APPEND_FL 0x00000020 /* writes to file may only append */ +#define EXT2_NODUMP_FL 0x00000040 /* do not dump file */ + +/* + * ioctl commands + */ +#define EXT2_IOC_GETFLAGS _IOR('f', 1, long) +#define EXT2_IOC_SETFLAGS _IOW('f', 2, long) +#define EXT2_IOC_GETVERSION _IOR('v', 1, long) +#define EXT2_IOC_SETVERSION _IOW('v', 2, long) + +/* + * File system states + */ +#define EXT2_VALID_FS 0x0001 /* Unmounted cleanly */ +#define EXT2_ERROR_FS 0x0002 /* Errors detected */ + +/* + * Mount flags + */ +#define EXT2_MOUNT_CHECK_NORMAL 0x0001 /* Do some more checks */ +#define EXT2_MOUNT_CHECK_STRICT 0x0002 /* Do again more checks */ +#define EXT2_MOUNT_CHECK (EXT2_MOUNT_CHECK_NORMAL | \ + EXT2_MOUNT_CHECK_STRICT) +#define EXT2_MOUNT_GRPID 0x0004 /* Create files with directory's group */ +#define EXT2_MOUNT_DEBUG 0x0008 /* Some debugging messages */ +#define EXT2_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */ +#define EXT2_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */ +#define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ +#define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ + +#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt +#define set_opt(o, opt) o |= EXT2_MOUNT_##opt +#define test_opt(sb, opt) ((sb)->u.ext2_sb.s_mount_opt & \ + EXT2_MOUNT_##opt) +/* + * Maximal mount counts between two filesystem checks + */ +#define EXT2_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ +#define EXT2_DFL_CHECKINTERVAL 0 /* Don't use interval check */ + +/* + * Behaviour when detecting errors + */ +#define EXT2_ERRORS_CONTINUE 1 /* Continue execution */ +#define EXT2_ERRORS_RO 2 /* Remount fs read-only */ +#define EXT2_ERRORS_PANIC 3 /* Panic */ +#define EXT2_ERRORS_DEFAULT EXT2_ERRORS_CONTINUE + +/* + * Structure of the super block + */ +struct ext2_super_block { + __u32 s_inodes_count; /* Inodes count */ + __u32 s_blocks_count; /* Blocks count */ + __u32 s_r_blocks_count; /* Reserved blocks count */ + __u32 s_free_blocks_count; /* Free blocks count */ + __u32 s_free_inodes_count; /* Free inodes count */ + __u32 s_first_data_block; /* First Data Block */ + __u32 s_log_block_size; /* Block size */ + __s32 s_log_frag_size; /* Fragment size */ + __u32 s_blocks_per_group; /* # Blocks per group */ + __u32 s_frags_per_group; /* # Fragments per group */ + __u32 s_inodes_per_group; /* # Inodes per group */ + __u32 s_mtime; /* Mount time */ + __u32 s_wtime; /* Write time */ + __u16 s_mnt_count; /* Mount count */ + __s16 s_max_mnt_count; /* Maximal mount count */ + __u16 s_magic; /* Magic signature */ + __u16 s_state; /* File system state */ + __u16 s_errors; /* Behaviour when detecting errors */ + __u16 s_pad; + __u32 s_lastcheck; /* time of last check */ + __u32 s_checkinterval; /* max. time between checks */ + __u32 s_creator_os; /* OS */ + __u32 s_rev_level; /* Revision level */ + __u16 s_def_resuid; /* Default uid for reserved blocks */ + __u16 s_def_resgid; /* Default gid for reserved blocks */ + __u32 s_reserved[235]; /* Padding to the end of the block */ +}; + +#define EXT2_OS_LINUX 0 +#define EXT2_OS_HURD 1 +#define EXT2_OS_MASIX 2 + +#define EXT2_CURRENT_REV 0 + +#define EXT2_DEF_RESUID 0 +#define EXT2_DEF_RESGID 0 + +/* + * Structure of a directory entry + */ +#define EXT2_NAME_LEN 255 + +struct ext2_dir_entry { + __u32 inode; /* Inode number */ + __u16 rec_len; /* Directory entry length */ + __u16 name_len; /* Name length */ + char name[EXT2_NAME_LEN]; /* File name */ +}; + +/* + * EXT2_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a multiple of 4 + */ +#define EXT2_DIR_PAD 4 +#define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) +#define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ + ~EXT2_DIR_ROUND) + +#endif /* _LINUX_EXT2_FS_H */ diff --git a/sys/gnu/fs/ext2fs/ext2_fs_sb.h b/sys/gnu/fs/ext2fs/ext2_fs_sb.h new file mode 100644 index 000000000000..f475ce279d82 --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_fs_sb.h @@ -0,0 +1,87 @@ +/* + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * linux/include/linux/ext2_fs_sb.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs_sb.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _LINUX_EXT2_FS_SB +#define _LINUX_EXT2_FS_SB + +/* + * The following is not needed anymore since the descriptors buffer + * heads are now dynamically allocated + */ +/* #define EXT2_MAX_GROUP_DESC 8 */ + +#define EXT2_MAX_GROUP_LOADED 8 + +#if defined(LITES) || defined(__FreeBSD__) +#define buffer_head buf +#define MAXMNTLEN 512 +#endif + +/* + * second extended-fs super-block data in memory + */ +struct ext2_sb_info { + unsigned long s_frag_size; /* Size of a fragment in bytes */ + unsigned long s_frags_per_block;/* Number of fragments per block */ + unsigned long s_inodes_per_block;/* Number of inodes per block */ + unsigned long s_frags_per_group;/* Number of fragments in a group */ + unsigned long s_blocks_per_group;/* Number of blocks in a group */ + unsigned long s_inodes_per_group;/* Number of inodes in a group */ + unsigned long s_itb_per_group; /* Number of inode table blocks per group */ + unsigned long s_db_per_group; /* Number of descriptor blocks per group */ + unsigned long s_desc_per_block; /* Number of group descriptors per block */ + unsigned long s_groups_count; /* Number of groups in the fs */ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext2_super_block * s_es; /* Pointer to the super block in the buffer */ + struct buffer_head ** s_group_desc; + unsigned short s_loaded_inode_bitmaps; + unsigned short s_loaded_block_bitmaps; + unsigned long s_inode_bitmap_number[EXT2_MAX_GROUP_LOADED]; + struct buffer_head * s_inode_bitmap[EXT2_MAX_GROUP_LOADED]; + unsigned long s_block_bitmap_number[EXT2_MAX_GROUP_LOADED]; + struct buffer_head * s_block_bitmap[EXT2_MAX_GROUP_LOADED]; + int s_rename_lock; +#if !defined(LITES) && !defined(__FreeBSD__) + struct wait_queue * s_rename_wait; +#endif + unsigned long s_mount_opt; + unsigned short s_resuid; + unsigned short s_resgid; + unsigned short s_mount_state; +#if defined(LITES) || defined(__FreeBSD__) + /* + stuff that FFS keeps in its super block or that linux + has in its non-ext2 specific super block and which is + generally considered useful + */ + unsigned long s_blocksize; + unsigned long s_blocksize_bits; + unsigned int s_bshift; /* = log2(s_blocksize) */ + quad_t s_qbmask; /* = s_blocksize - 1 */ + unsigned int s_fsbtodb; /* shift to get disk block */ + char s_rd_only; /* read-only */ + char s_dirt; /* fs modified flag */ + + char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ +#endif +}; + +#endif /* _LINUX_EXT2_FS_SB */ diff --git a/sys/gnu/fs/ext2fs/ext2_inode.c b/sys/gnu/fs/ext2fs/ext2_inode.c new file mode 100644 index 000000000000..f2d6fd7aae3f --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_inode.c @@ -0,0 +1,547 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ext2_inode.c 8.5 (Berkeley) 12/30/93 + */ + +#if !defined(__FreeBSD__) +#include "quota.h" +#include "diagnostic.h" +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mount.h> +#include <sys/proc.h> +#include <sys/file.h> +#include <sys/buf.h> +#include <sys/vnode.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#if !defined(__FreeBSD__) +#include <sys/trace.h> +#endif +#include <sys/resourcevar.h> + +#include <vm/vm.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/ufsmount.h> +#include <ufs/ufs/ufs_extern.h> + +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> +#include <gnu/ext2fs/ext2_extern.h> + +static int ext2_indirtrunc __P((struct inode *, daddr_t, daddr_t, daddr_t, int, + long *)); + +int +ext2_init() +{ + return (ufs_init()); +} + +/* + * Update the access, modified, and inode change times as specified by the + * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is + * used to specify that the inode needs to be updated but that the times have + * already been set. The access and modified times are taken from the second + * and third parameters; the inode change time is always taken from the current + * time. If waitfor is set, then wait for the disk write of the inode to + * complete. + */ +int +ext2_update(ap) + struct vop_update_args /* { + struct vnode *a_vp; + struct timeval *a_access; + struct timeval *a_modify; + int a_waitfor; + } */ *ap; +{ + register struct ext2_sb_info *fs; + struct buf *bp; + struct inode *ip; + int error; +#if !defined(__FreeBSD__) + struct timeval time; +#endif + + ip = VTOI(ap->a_vp); + if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) { + ip->i_flag &= + ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); + return (0); + } + if ((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) + return (0); + if (ip->i_flag & IN_ACCESS) + ip->i_atime.ts_sec = ap->a_access->tv_sec; + if (ip->i_flag & IN_UPDATE) { + ip->i_mtime.ts_sec = ap->a_modify->tv_sec; + ip->i_modrev++; + } + if (ip->i_flag & IN_CHANGE) { +#if !defined(__FreeBSD__) + get_time(&time); +#endif + ip->i_ctime.ts_sec = time.tv_sec; + } + ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); + fs = ip->i_e2fs; + if (error = bread(ip->i_devvp, + fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + (int)fs->s_blocksize, NOCRED, &bp)) { + brelse(bp); + return (error); + } + ext2_di2ei( &ip->i_din, (struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * + ino_to_fsbo(fs, ip->i_number))); +/* + if (ap->a_waitfor && (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0) + return (bwrite(bp)); + else { +*/ + bdwrite(bp); + return (0); +/* + } +*/ +} + +#define SINGLE 0 /* index of single indirect block */ +#define DOUBLE 1 /* index of double indirect block */ +#define TRIPLE 2 /* index of triple indirect block */ +/* + * Truncate the inode oip to at most length size, freeing the + * disk blocks. + */ +int +ext2_truncate(ap) + struct vop_truncate_args /* { + struct vnode *a_vp; + off_t a_length; + int a_flags; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *ovp = ap->a_vp; + register daddr_t lastblock; + register struct inode *oip; + daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; + daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; + off_t length = ap->a_length; + register struct ext2_sb_info *fs; + struct buf *bp; + int offset, size, level; + long count, nblocks, vflags, blocksreleased = 0; + struct timeval tv; + register int i; + int aflags, error, allerror; + off_t osize; +/* +printf("ext2_truncate called %d to %d\n", VTOI(ovp)->i_number, ap->a_length); +*/ /* + * negative file sizes will totally break the code below and + * are not meaningful anyways. + */ + if (length < 0) + return EFBIG; + + oip = VTOI(ovp); +#if defined(__FreeBSD__) + tv = time; +#else + get_time(&tv); +#endif + if (ovp->v_type == VLNK && + oip->i_size < ovp->v_mount->mnt_maxsymlinklen) { +#if DIAGNOSTIC + if (length != 0) + panic("ext2_truncate: partial truncate of symlink"); +#endif + bzero((char *)&oip->i_shortlink, (u_int)oip->i_size); + oip->i_size = 0; + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (VOP_UPDATE(ovp, &tv, &tv, 1)); + } + if (oip->i_size == length) { + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (VOP_UPDATE(ovp, &tv, &tv, 0)); + } +#if QUOTA + if (error = getinoquota(oip)) + return (error); +#endif + vnode_pager_setsize(ovp, (u_long)length); + fs = oip->i_e2fs; + osize = oip->i_size; + ext2_discard_prealloc(oip); + /* + * Lengthen the size of the file. We must ensure that the + * last byte of the file is allocated. Since the smallest + * value of oszie is 0, length will be at least 1. + */ + if (osize < length) { + offset = blkoff(fs, length - 1); + lbn = lblkno(fs, length - 1); + aflags = B_CLRBUF; + if (ap->a_flags & IO_SYNC) + aflags |= B_SYNC; + if (error = ext2_balloc(oip, lbn, offset + 1, ap->a_cred, &bp, + aflags)) + return (error); + oip->i_size = length; +#if !defined(__FreeBSD__) + (void) vnode_pager_uncache(ovp); +#endif + if (aflags & IO_SYNC) + bwrite(bp); + else + bawrite(bp); + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (VOP_UPDATE(ovp, &tv, &tv, 1)); + } + /* + * Shorten the size of the file. If the file is not being + * truncated to a block boundry, the contents of the + * partial block following the end of the file must be + * zero'ed in case it ever become accessable again because + * of subsequent file growth. + */ + /* I don't understand the comment above */ + offset = blkoff(fs, length); + if (offset == 0) { + oip->i_size = length; + } else { + lbn = lblkno(fs, length); + aflags = B_CLRBUF; + if (ap->a_flags & IO_SYNC) + aflags |= B_SYNC; + if (error = ext2_balloc(oip, lbn, offset, ap->a_cred, &bp, + aflags)) + return (error); + oip->i_size = length; + size = blksize(fs, oip, lbn); +#if !defined(__FreeBSD__) + (void) vnode_pager_uncache(ovp); +#endif + bzero((char *)bp->b_data + offset, (u_int)(size - offset)); + allocbuf(bp, size); + if (aflags & IO_SYNC) + bwrite(bp); + else + bawrite(bp); + } + /* + * Calculate index into inode's block list of + * last direct and indirect blocks (if any) + * which we want to keep. Lastblock is -1 when + * the file is truncated to 0. + */ + lastblock = lblkno(fs, length + fs->s_blocksize - 1) - 1; + lastiblock[SINGLE] = lastblock - NDADDR; + lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); + lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); + nblocks = btodb(fs->s_blocksize); + /* + * Update file and block pointers on disk before we start freeing + * blocks. If we crash before free'ing blocks below, the blocks + * will be returned to the free list. lastiblock values are also + * normalized to -1 for calls to ext2_indirtrunc below. + */ + bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks); + for (level = TRIPLE; level >= SINGLE; level--) + if (lastiblock[level] < 0) { + oip->i_ib[level] = 0; + lastiblock[level] = -1; + } + for (i = NDADDR - 1; i > lastblock; i--) + oip->i_db[i] = 0; + oip->i_flag |= IN_CHANGE | IN_UPDATE; + if (error = VOP_UPDATE(ovp, &tv, &tv, MNT_WAIT)) + allerror = error; + /* + * Having written the new inode to disk, save its new configuration + * and put back the old block pointers long enough to process them. + * Note that we save the new block configuration so we can check it + * when we are done. + */ + bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks); + bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks); + oip->i_size = osize; + vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; + allerror = vinvalbuf(ovp, vflags, ap->a_cred, ap->a_p, 0, 0); + + /* + * Indirect blocks first. + */ + indir_lbn[SINGLE] = -NDADDR; + indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; + indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; + for (level = TRIPLE; level >= SINGLE; level--) { + bn = oip->i_ib[level]; + if (bn != 0) { + error = ext2_indirtrunc(oip, indir_lbn[level], + fsbtodb(fs, bn), lastiblock[level], level, &count); + if (error) + allerror = error; + blocksreleased += count; + if (lastiblock[level] < 0) { + oip->i_ib[level] = 0; + ext2_blkfree(oip, bn, fs->s_frag_size); + blocksreleased += nblocks; + } + } + if (lastiblock[level] >= 0) + goto done; + } + + /* + * All whole direct blocks or frags. + */ + for (i = NDADDR - 1; i > lastblock; i--) { + register long bsize; + + bn = oip->i_db[i]; + if (bn == 0) + continue; + oip->i_db[i] = 0; + bsize = blksize(fs, oip, i); + ext2_blkfree(oip, bn, bsize); + blocksreleased += btodb(bsize); + } + if (lastblock < 0) + goto done; + + /* + * Finally, look for a change in size of the + * last direct block; release any frags. + */ + bn = oip->i_db[lastblock]; + if (bn != 0) { + long oldspace, newspace; + + /* + * Calculate amount of space we're giving + * back as old block size minus new block size. + */ + oldspace = blksize(fs, oip, lastblock); + oip->i_size = length; + newspace = blksize(fs, oip, lastblock); + if (newspace == 0) + panic("itrunc: newspace"); + if (oldspace - newspace > 0) { + /* + * Block number of space to be free'd is + * the old block # plus the number of frags + * required for the storage we're keeping. + */ + bn += numfrags(fs, newspace); + ext2_blkfree(oip, bn, oldspace - newspace); + blocksreleased += btodb(oldspace - newspace); + } + } +done: +#if DIAGNOSTIC + for (level = SINGLE; level <= TRIPLE; level++) + if (newblks[NDADDR + level] != oip->i_ib[level]) + panic("itrunc1"); + for (i = 0; i < NDADDR; i++) + if (newblks[i] != oip->i_db[i]) + panic("itrunc2"); + if (length == 0 && + (ovp->v_dirtyblkhd.lh_first || ovp->v_cleanblkhd.lh_first)) + panic("itrunc3"); +#endif /* DIAGNOSTIC */ + /* + * Put back the real size. + */ + oip->i_size = length; + oip->i_blocks -= blocksreleased; + if (oip->i_blocks < 0) /* sanity */ + oip->i_blocks = 0; + oip->i_flag |= IN_CHANGE; +#if QUOTA + (void) chkdq(oip, -blocksreleased, NOCRED, 0); +#endif + return (allerror); +} + +/* + * Release blocks associated with the inode ip and stored in the indirect + * block bn. Blocks are free'd in LIFO order up to (but not including) + * lastbn. If level is greater than SINGLE, the block is an indirect block + * and recursive calls to indirtrunc must be used to cleanse other indirect + * blocks. + * + * NB: triple indirect blocks are untested. + */ + +static int +ext2_indirtrunc(ip, lbn, dbn, lastbn, level, countp) + register struct inode *ip; + daddr_t lbn, lastbn; + daddr_t dbn; + int level; + long *countp; +{ + register int i; + struct buf *bp; + register struct ext2_sb_info *fs = ip->i_e2fs; + register daddr_t *bap; + struct vnode *vp; + daddr_t *copy, nb, nlbn, last; + long blkcount, factor; + int nblocks, blocksreleased = 0; + int error = 0, allerror = 0; + + /* + * Calculate index in current block of last + * block to be kept. -1 indicates the entire + * block so we need not calculate the index. + */ + factor = 1; + for (i = SINGLE; i < level; i++) + factor *= NINDIR(fs); + last = lastbn; + if (lastbn > 0) + last /= factor; + nblocks = btodb(fs->s_blocksize); + /* + * Get buffer of block pointers, zero those entries corresponding + * to blocks to be free'd, and update on disk copy first. Since + * double(triple) indirect before single(double) indirect, calls + * to bmap on these blocks will fail. However, we already have + * the on disk address, so we have to set the b_blkno field + * explicitly instead of letting bread do everything for us. + */ + vp = ITOV(ip); + bp = getblk(vp, lbn, (int)fs->s_blocksize, 0, 0); + if (bp->b_flags & (B_DONE | B_DELWRI)) { + /* Braces must be here in case trace evaluates to nothing. */ +#if !defined(__FreeBSD__) + trace(TR_BREADHIT, pack(vp, fs->s_blocksize), lbn); +#endif + } else { +#if !defined(__FreeBSD__) + trace(TR_BREADMISS, pack(vp, fs->s_blocksize), lbn); + get_proc()->p_stats->p_ru.ru_inblock++; /* pay for read */ +#endif + bp->b_flags |= B_READ; + if (bp->b_bcount > bp->b_bufsize) + panic("ext2_indirtrunc: bad buffer size"); + bp->b_blkno = dbn; +#if defined(__FreeBSD__) + vfs_busy_pages(bp, 0); +#endif + VOP_STRATEGY(bp); + error = biowait(bp); + } + if (error) { + brelse(bp); + *countp = 0; + return (error); + } + + bap = (daddr_t *)bp->b_data; + MALLOC(copy, daddr_t *, fs->s_blocksize, M_TEMP, M_WAITOK); + bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->s_blocksize); + bzero((caddr_t)&bap[last + 1], + (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); + if (last == -1) + bp->b_flags |= B_INVAL; + error = bwrite(bp); + if (error) + allerror = error; + bap = copy; + + /* + * Recursively free totally unused blocks. + */ + for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; + i--, nlbn += factor) { + nb = bap[i]; + if (nb == 0) + continue; + if (level > SINGLE) { + if (error = ext2_indirtrunc(ip, nlbn, + fsbtodb(fs, nb), (daddr_t)-1, level - 1, &blkcount)) + allerror = error; + blocksreleased += blkcount; + } + ext2_blkfree(ip, nb, fs->s_blocksize); + blocksreleased += nblocks; + } + + /* + * Recursively free last partial block. + */ + if (level > SINGLE && lastbn >= 0) { + last = lastbn % factor; + nb = bap[i]; + if (nb != 0) { + if (error = ext2_indirtrunc(ip, nlbn, fsbtodb(fs, nb), + last, level - 1, &blkcount)) + allerror = error; + blocksreleased += blkcount; + } + } + FREE(copy, M_TEMP); + *countp = blocksreleased; + return (allerror); +} + +/* + * discard preallocated blocks + */ +int +ext2_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + } */ *ap; +{ + ext2_discard_prealloc(VTOI(ap->a_vp)); + return ufs_inactive(ap); +} + diff --git a/sys/gnu/fs/ext2fs/ext2_inode_cnv.c b/sys/gnu/fs/ext2fs/ext2_inode_cnv.c new file mode 100644 index 000000000000..1ab48e9c6184 --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_inode_cnv.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 1995 The University of Utah and + * the Computer Systems Laboratory at the University of Utah (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Utah $Hdr$ + */ + +/* + * routines to convert on disk ext2 inodes in dinodes and back + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> + +/* these defs would destroy the ext2_fs_i #include */ +#undef i_atime +#undef i_blocks +#undef i_ctime +#undef i_db +#undef i_flags +#undef i_gen +#undef i_gid +#undef i_ib +#undef i_mode +#undef i_mtime +#undef i_nlink +#undef i_rdev +#undef i_shortlink +#undef i_size +#undef i_uid + +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_i.h> + +void ext2_print_dinode( di ) + struct dinode *di; +{ + int i; + printf( /* "Inode: %5d" */ + " Type: %10s Mode: 0x%o Flags: 0x%x Version: %d\n", + "n/a", di->di_mode, di->di_flags, di->di_gen); + printf( "User: %5d Group: %5d Size: %d\n", + di->di_uid, di->di_gid, di->di_size); + printf( "Links: %3d Blockcount: %d\n", + di->di_nlink, di->di_blocks); + printf( "ctime: 0x%x", di->di_ctime.ts_sec); +#if !defined(__FreeBSD__) + print_time(" -- %s\n", di->di_ctime.ts_sec); +#endif + printf( "atime: 0x%x", di->di_atime.ts_sec); +#if !defined(__FreeBSD__) + print_time(" -- %s\n", di->di_atime.ts_sec); +#endif + printf( "mtime: 0x%x", di->di_mtime.ts_sec); +#if !defined(__FreeBSD__) + print_time(" -- %s\n", di->di_mtime.ts_sec); +#endif + printf( "BLOCKS: "); + for(i=0; i < (di->di_blocks <= 24 ? ((di->di_blocks+1)/2): 12); i++) + printf("%d ", di->di_db[i]); + printf("\n"); +} + +void ext2_print_inode( in ) + struct inode *in; +{ + printf( "Inode: %5d", in->i_number); + ext2_print_dinode(&in->i_din); +} + +/* + * raw ext2 inode to dinode + */ +int ext2_ei2di(ei, di) + struct ext2_inode *ei; + struct dinode *di; +{ + int i; + + di->di_nlink = ei->i_links_count; + /* Godmar thinks - if the link count is zero, then the inode is + unused - according to ext2 standards. Ufs marks this fact + by setting i_mode to zero - why ? + I can see that this might lead to problems in an undelete. + */ + di->di_mode = ei->i_links_count ? ei->i_mode : 0; + di->di_size = ei->i_size; + di->di_atime.ts_sec = ei->i_atime; + di->di_mtime.ts_sec = ei->i_mtime; + di->di_ctime.ts_sec = ei->i_ctime; + di->di_flags = 0; + di->di_flags |= (ei->i_flags & EXT2_APPEND_FL) ? APPEND : 0; + di->di_flags |= (ei->i_flags & EXT2_IMMUTABLE_FL) ? IMMUTABLE : 0; + di->di_blocks = ei->i_blocks; + di->di_gen = ei->i_version; /* XXX is that true ??? */ + di->di_uid = ei->i_uid; + di->di_gid = ei->i_gid; + /* XXX use memcpy */ + for(i = 0; i < NDADDR; i++) + di->di_db[i] = ei->i_block[i]; + for(i = 0; i < NIADDR; i++) + di->di_ib[i] = ei->i_block[EXT2_NDIR_BLOCKS + i]; +} + +/* + * dinode to raw ext2 inode + */ +int ext2_di2ei(di, ei) + struct dinode *di; + struct ext2_inode *ei; +{ + int i; + + ei->i_mode = di->di_mode; + ei->i_links_count = di->di_nlink; + /* + Godmar thinks: if dtime is nonzero, ext2 says this inode + has been deleted, this would correspond to a zero link count + */ + ei->i_dtime = ei->i_links_count ? 0 : di->di_mtime.ts_sec; + ei->i_size = di->di_size; + ei->i_atime = di->di_atime.ts_sec; + ei->i_mtime = di->di_mtime.ts_sec; + ei->i_ctime = di->di_ctime.ts_sec; + ei->i_flags = di->di_flags; + ei->i_flags = 0; + ei->i_flags |= (di->di_flags & APPEND) ? EXT2_APPEND_FL: 0; + ei->i_flags |= (di->di_flags & IMMUTABLE) + ? EXT2_IMMUTABLE_FL: 0; + ei->i_blocks = di->di_blocks; + ei->i_version = di->di_gen; /* XXX is that true ??? */ + ei->i_uid = di->di_uid; + ei->i_gid = di->di_gid; + /* XXX use memcpy */ + for(i = 0; i < NDADDR; i++) + ei->i_block[i] = di->di_db[i]; + for(i = 0; i < NIADDR; i++) + ei->i_block[EXT2_NDIR_BLOCKS + i] = di->di_ib[i]; +} diff --git a/sys/gnu/fs/ext2fs/ext2_linux_balloc.c b/sys/gnu/fs/ext2fs/ext2_linux_balloc.c new file mode 100644 index 000000000000..25b98913d706 --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_linux_balloc.c @@ -0,0 +1,582 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * linux/fs/ext2/balloc.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * Enhanced block allocation by Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 + */ + +/* + * The free blocks are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. The descriptors are loaded in memory + * when a file system is mounted (see ext2_read_super). + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/ufsmount.h> +#include <gnu/ext2fs/ext2_extern.h> +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> +#include <sys/stat.h> + +#ifdef i386 +#include <gnu/ext2fs/i386-bitops.h> +#else +#error Provide an bitops.h file, please ! +#endif + +unsigned long ext2_count_free __P((struct buffer_head *, unsigned int)); + +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + +/* got rid of get_group_desc since it can already be found in + * ext2_linux_ialloc.c + */ + +static void read_block_bitmap (struct mount * mp, + unsigned int block_group, + unsigned long bitmap_nr) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + struct ext2_group_desc * gdp; + struct buffer_head * bh; + int error; + + gdp = get_group_desc (mp, block_group, NULL); + if(error = bread (VFSTOUFS(mp)->um_devvp, + fsbtodb(sb, gdp->bg_block_bitmap),sb->s_blocksize, NOCRED, &bh)) + panic ( "read_block_bitmap: " + "Cannot read block bitmap - " + "block_group = %d, block_bitmap = %lu", + block_group, (unsigned long) gdp->bg_block_bitmap); + sb->s_block_bitmap_number[bitmap_nr] = block_group; + sb->s_block_bitmap[bitmap_nr] = bh; +} + +/* + * load_block_bitmap loads the block bitmap for a blocks group + * + * It maintains a cache for the last bitmaps loaded. This cache is managed + * with a LRU algorithm. + * + * Notes: + * 1/ There is one cache per mounted file system. + * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups, + * this function reads the bitmap without maintaining a LRU cache. + */ +static int load__block_bitmap (struct mount * mp, + unsigned int block_group) +{ + int i, j; + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + unsigned long block_bitmap_number; + struct buffer_head * block_bitmap; + int error; + + if (block_group >= sb->s_groups_count) + panic ( "load_block_bitmap: " + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sb->s_groups_count); + + if (sb->s_groups_count <= EXT2_MAX_GROUP_LOADED) { + if (sb->s_block_bitmap[block_group]) { + if (sb->s_block_bitmap_number[block_group] != + block_group) + panic ( "load_block_bitmap: " + "block_group != block_bitmap_number"); + else + return block_group; + } else { + read_block_bitmap (mp, block_group, block_group); + return block_group; + } + } + + for (i = 0; i < sb->s_loaded_block_bitmaps && + sb->s_block_bitmap_number[i] != block_group; i++) + ; + if (i < sb->s_loaded_block_bitmaps && + sb->s_block_bitmap_number[i] == block_group) { + block_bitmap_number = sb->s_block_bitmap_number[i]; + block_bitmap = sb->s_block_bitmap[i]; + for (j = i; j > 0; j--) { + sb->s_block_bitmap_number[j] = + sb->s_block_bitmap_number[j - 1]; + sb->s_block_bitmap[j] = + sb->s_block_bitmap[j - 1]; + } + sb->s_block_bitmap_number[0] = block_bitmap_number; + sb->s_block_bitmap[0] = block_bitmap; + } else { + if (sb->s_loaded_block_bitmaps < EXT2_MAX_GROUP_LOADED) + sb->s_loaded_block_bitmaps++; + else + brelse (sb->s_block_bitmap[EXT2_MAX_GROUP_LOADED - 1]); + for (j = sb->s_loaded_block_bitmaps - 1; j > 0; j--) { + sb->s_block_bitmap_number[j] = + sb->s_block_bitmap_number[j - 1]; + sb->s_block_bitmap[j] = + sb->s_block_bitmap[j - 1]; + } + read_block_bitmap (mp, block_group, 0); + } + return 0; +} + +static inline int load_block_bitmap (struct mount * mp, + unsigned int block_group) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + if (sb->s_loaded_block_bitmaps > 0 && + sb->s_block_bitmap_number[0] == block_group) + return 0; + + if (sb->s_groups_count <= EXT2_MAX_GROUP_LOADED && + sb->s_block_bitmap_number[block_group] == block_group && + sb->s_block_bitmap[block_group]) + return block_group; + + return load__block_bitmap (mp, block_group); +} + +void ext2_free_blocks (struct mount * mp, unsigned long block, + unsigned long count) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + struct buffer_head * bh; + struct buffer_head * bh2; + unsigned long block_group; + unsigned long bit; + unsigned long i; + int bitmap_nr; + struct ext2_group_desc * gdp; + struct ext2_super_block * es = sb->s_es; + + if (!sb) { + printf ("ext2_free_blocks: nonexistent device"); + return; + } + lock_super (VFSTOUFS(mp)->um_devvp); + if (block < es->s_first_data_block || + (block + count) > es->s_blocks_count) { + printf ( "ext2_free_blocks: " + "Freeing blocks not in datazone - " + "block = %lu, count = %lu", block, count); + unlock_super (VFSTOUFS(mp)->um_devvp); + return; + } + + ext2_debug ("freeing blocks %lu to %lu\n", block, block+count-1); + + block_group = (block - es->s_first_data_block) / + EXT2_BLOCKS_PER_GROUP(sb); + bit = (block - es->s_first_data_block) % EXT2_BLOCKS_PER_GROUP(sb); + if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) + panic ( "ext2_free_blocks: " + "Freeing blocks across group boundary - " + "Block = %lu, count = %lu", + block, count); + bitmap_nr = load_block_bitmap (mp, block_group); + bh = sb->s_block_bitmap[bitmap_nr]; + gdp = get_group_desc (mp, block_group, &bh2); + + if (/* test_opt (sb, CHECK_STRICT) && assume always strict ! */ + (in_range (gdp->bg_block_bitmap, block, count) || + in_range (gdp->bg_inode_bitmap, block, count) || + in_range (block, gdp->bg_inode_table, + sb->s_itb_per_group) || + in_range (block + count - 1, gdp->bg_inode_table, + sb->s_itb_per_group))) + panic ( "ext2_free_blocks: " + "Freeing blocks in system zones - " + "Block = %lu, count = %lu", + block, count); + + for (i = 0; i < count; i++) { + if (!clear_bit (bit + i, bh->b_data)) + printf ("ext2_free_blocks: " + "bit already cleared for block %lu", + block); + else { + gdp->bg_free_blocks_count++; + es->s_free_blocks_count++; + } + } + + mark_buffer_dirty(bh2); + mark_buffer_dirty(bh, 1); +/**** + if (sb->s_flags & MS_SYNCHRONOUS) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } +****/ + sb->s_dirt = 1; + unlock_super (VFSTOUFS(mp)->um_devvp); + return; +} + +/* + * ext2_new_block uses a goal block to assist allocation. If the goal is + * free, or there is a free block within 32 blocks of the goal, that block + * is allocated. Otherwise a forward search is made for a free block; within + * each block group the search first looks for an entire free byte in the block + * bitmap, and then for any free bit if that fails. + */ +int ext2_new_block (struct mount * mp, unsigned long goal, + long * prealloc_count, + long * prealloc_block) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + struct buffer_head * bh; + struct buffer_head * bh2; + char * p, * r; + int i, j, k, tmp; + int bitmap_nr; + struct ext2_group_desc * gdp; + struct ext2_super_block * es = sb->s_es; + +#ifdef EXT2FS_DEBUG + static int goal_hits = 0, goal_attempts = 0; +#endif + if (!sb) { + printf ("ext2_new_block: nonexistent device"); + return 0; + } + lock_super (VFSTOUFS(mp)->um_devvp); + + ext2_debug ("goal=%lu.\n", goal); + +repeat: + /* + * First, test whether the goal block is free. + */ + if (goal < es->s_first_data_block || goal >= es->s_blocks_count) + goal = es->s_first_data_block; + i = (goal - es->s_first_data_block) / EXT2_BLOCKS_PER_GROUP(sb); + gdp = get_group_desc (mp, i, &bh2); + if (gdp->bg_free_blocks_count > 0) { + j = ((goal - es->s_first_data_block) % EXT2_BLOCKS_PER_GROUP(sb)); +#ifdef EXT2FS_DEBUG + if (j) + goal_attempts++; +#endif + bitmap_nr = load_block_bitmap (mp, i); + bh = sb->s_block_bitmap[bitmap_nr]; + + ext2_debug ("goal is at %d:%d.\n", i, j); + + if (!test_bit(j, bh->b_data)) { +#ifdef EXT2FS_DEBUG + goal_hits++; + ext2_debug ("goal bit allocated.\n"); +#endif + goto got_block; + } + if (j) { + /* + * The goal was occupied; search forward for a free + * block within the next XX blocks. + * + * end_goal is more or less random, but it has to be + * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the + * next 64-bit boundary is simple.. + */ + int end_goal = (j + 63) & ~63; + j = find_next_zero_bit(bh->b_data, end_goal, j); + if (j < end_goal) + goto got_block; + } + + ext2_debug ("Bit not found near goal\n"); + + /* + * There has been no free block found in the near vicinity + * of the goal: do a search forward through the block groups, + * searching in each group first for an entire free byte in + * the bitmap and then for any free bit. + * + * Search first in the remainder of the current group; then, + * cyclicly search through the rest of the groups. + */ + p = ((char *) bh->b_data) + (j >> 3); + r = memscan(p, 0, (EXT2_BLOCKS_PER_GROUP(sb) - j + 7) >> 3); + k = (r - ((char *) bh->b_data)) << 3; + if (k < EXT2_BLOCKS_PER_GROUP(sb)) { + j = k; + goto search_back; + } + k = find_next_zero_bit ((unsigned long *) bh->b_data, + EXT2_BLOCKS_PER_GROUP(sb), + j); + if (k < EXT2_BLOCKS_PER_GROUP(sb)) { + j = k; + goto got_block; + } + } + + ext2_debug ("Bit not found in block group %d.\n", i); + + /* + * Now search the rest of the groups. We assume that + * i and gdp correctly point to the last group visited. + */ + for (k = 0; k < sb->s_groups_count; k++) { + i++; + if (i >= sb->s_groups_count) + i = 0; + gdp = get_group_desc (mp, i, &bh2); + if (gdp->bg_free_blocks_count > 0) + break; + } + if (k >= sb->s_groups_count) { + unlock_super (VFSTOUFS(mp)->um_devvp); + return 0; + } + bitmap_nr = load_block_bitmap (mp, i); + bh = sb->s_block_bitmap[bitmap_nr]; + r = memscan(bh->b_data, 0, EXT2_BLOCKS_PER_GROUP(sb) >> 3); + j = (r - bh->b_data) << 3; + + if (j < EXT2_BLOCKS_PER_GROUP(sb)) + goto search_back; + else + j = find_first_zero_bit ((unsigned long *) bh->b_data, + EXT2_BLOCKS_PER_GROUP(sb)); + if (j >= EXT2_BLOCKS_PER_GROUP(sb)) { + printf ( "ext2_new_block: " + "Free blocks count corrupted for block group %d", i); + unlock_super (VFSTOUFS(mp)->um_devvp); + return 0; + } + +search_back: + /* + * We have succeeded in finding a free byte in the block + * bitmap. Now search backwards up to 7 bits to find the + * start of this group of free blocks. + */ + for (k = 0; k < 7 && j > 0 && !test_bit (j - 1, bh->b_data); k++, j--); + +got_block: + + ext2_debug ("using block group %d(%d)\n", i, gdp->bg_free_blocks_count); + + tmp = j + i * EXT2_BLOCKS_PER_GROUP(sb) + es->s_first_data_block; + + if (/* test_opt (sb, CHECK_STRICT) && we are always strict. */ + (tmp == gdp->bg_block_bitmap || + tmp == gdp->bg_inode_bitmap || + in_range (tmp, gdp->bg_inode_table, sb->s_itb_per_group))) + panic ( "ext2_new_block: " + "Allocating block in system zone - " + "%dth block = %u in group %u", j, tmp, i); + + if (set_bit (j, bh->b_data)) { + printf ( "ext2_new_block: " + "bit already set for block %d", j); + goto repeat; + } + + ext2_debug ("found bit %d\n", j); + + /* + * Do block preallocation now if required. + */ +#ifdef EXT2_PREALLOCATE + if (prealloc_block) { + *prealloc_count = 0; + *prealloc_block = tmp + 1; + for (k = 1; + k < 8 && (j + k) < EXT2_BLOCKS_PER_GROUP(sb); k++) { + if (set_bit (j + k, bh->b_data)) + break; + (*prealloc_count)++; + } + gdp->bg_free_blocks_count -= *prealloc_count; + es->s_free_blocks_count -= *prealloc_count; + ext2_debug ("Preallocated a further %lu bits.\n", + *prealloc_count); + } +#endif + + j = tmp; + + mark_buffer_dirty(bh); +/**** + if (sb->s_flags & MS_SYNCHRONOUS) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } +****/ + if (j >= es->s_blocks_count) { + printf ( "ext2_new_block: " + "block >= blocks count - " + "block_group = %d, block=%d", i, j); + unlock_super (VFSTOUFS(mp)->um_devvp); + return 0; + } + + ext2_debug ("allocating block %d. " + "Goal hits %d of %d.\n", j, goal_hits, goal_attempts); + + gdp->bg_free_blocks_count--; + mark_buffer_dirty(bh2, 1); + es->s_free_blocks_count--; + sb->s_dirt = 1; + unlock_super (VFSTOUFS(mp)->um_devvp); + return j; +} + +unsigned long ext2_count_free_blocks (struct mount * mp) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; +#ifdef EXT2FS_DEBUG + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x; + int bitmap_nr; + struct ext2_group_desc * gdp; + int i; + + lock_super (VFSTOUFS(mp)->um_devvp); + es = sb->s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < sb->s_groups_count; i++) { + gdp = get_group_desc (mp, i, NULL); + desc_count += gdp->bg_free_blocks_count; + bitmap_nr = load_block_bitmap (mp, i); + x = ext2_count_free (sb->s_block_bitmap[bitmap_nr], + sb->s_blocksize); + ext2_debug ("group %d: stored = %d, counted = %lu\n", + i, gdp->bg_free_blocks_count, x); + bitmap_count += x; + } + ext2_debug( "stored = %lu, computed = %lu, %lu\n", + es->s_free_blocks_count, desc_count, bitmap_count); + unlock_super (VFSTOUFS(mp)->um_devvp); + return bitmap_count; +#else + return sb->s_es->s_free_blocks_count; +#endif +} + + +static inline int block_in_use (unsigned long block, + struct ext2_sb_info * sb, + unsigned char * map) +{ + return test_bit ((block - sb->s_es->s_first_data_block) % + EXT2_BLOCKS_PER_GROUP(sb), map); +} + +void ext2_check_blocks_bitmap (struct mount * mp) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + struct buffer_head * bh; + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x; + unsigned long desc_blocks; + int bitmap_nr; + struct ext2_group_desc * gdp; + int i, j; + + lock_super (VFSTOUFS(mp)->um_devvp); + es = sb->s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + desc_blocks = (sb->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / + EXT2_DESC_PER_BLOCK(sb); + for (i = 0; i < sb->s_groups_count; i++) { + gdp = get_group_desc (mp, i, NULL); + desc_count += gdp->bg_free_blocks_count; + bitmap_nr = load_block_bitmap (mp, i); + bh = sb->s_block_bitmap[bitmap_nr]; + + if (!test_bit (0, bh->b_data)) + printf ( "ext2_check_blocks_bitmap: " + "Superblock in group %d is marked free", i); + + for (j = 0; j < desc_blocks; j++) + if (!test_bit (j + 1, bh->b_data)) + printf ("ext2_check_blocks_bitmap: " + "Descriptor block #%d in group " + "%d is marked free", j, i); + + if (!block_in_use (gdp->bg_block_bitmap, sb, bh->b_data)) + printf ("ext2_check_blocks_bitmap: " + "Block bitmap for group %d is marked free", + i); + + if (!block_in_use (gdp->bg_inode_bitmap, sb, bh->b_data)) + printf ("ext2_check_blocks_bitmap: " + "Inode bitmap for group %d is marked free", + i); + + for (j = 0; j < sb->s_itb_per_group; j++) + if (!block_in_use (gdp->bg_inode_table + j, sb, bh->b_data)) + printf ("ext2_check_blocks_bitmap: " + "Block #%d of the inode table in " + "group %d is marked free", j, i); + + x = ext2_count_free (bh, sb->s_blocksize); + if (gdp->bg_free_blocks_count != x) + printf ("ext2_check_blocks_bitmap: " + "Wrong free blocks count for group %d, " + "stored = %d, counted = %lu", i, + gdp->bg_free_blocks_count, x); + bitmap_count += x; + } + if (es->s_free_blocks_count != bitmap_count) + printf ("ext2_check_blocks_bitmap: " + "Wrong free blocks count in super block, " + "stored = %lu, counted = %lu", + (unsigned long) es->s_free_blocks_count, bitmap_count); + unlock_super (VFSTOUFS(mp)->um_devvp); +} + +/* + * this function is taken from + * linux/fs/ext2/bitmap.c + */ + +static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; + +unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) +{ + unsigned int i; + unsigned long sum = 0; + + if (!map) + return (0); + for (i = 0; i < numchars; i++) + sum += nibblemap[map->b_data[i] & 0xf] + + nibblemap[(map->b_data[i] >> 4) & 0xf]; + return (sum); +} + diff --git a/sys/gnu/fs/ext2fs/ext2_linux_ialloc.c b/sys/gnu/fs/ext2fs/ext2_linux_ialloc.c new file mode 100644 index 000000000000..62e79387da35 --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_linux_ialloc.c @@ -0,0 +1,520 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * linux/fs/ext2/ialloc.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * BSD ufs-inspired inode and directory allocation by + * Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 + */ + +/* + * The free inodes are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. The descriptors are loaded in memory + * when a file system is mounted (see ext2_read_super). + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/ufsmount.h> +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> +#include <sys/stat.h> + +#if (i386) +#include <gnu/ext2fs/i386-bitops.h> +#else +#error please provide bit operation functions +#endif + +/* this is supposed to mark a buffer dirty on ready for delayed writing + */ +void mark_buffer_dirty(struct buf *bh) +{ + bh->b_flags |= B_DELWRI; + bh->b_flags &= ~(B_READ | B_ERROR); +} + +/* + this should write a buffer immediately w/o releasing it + */ +int ll_w_block(struct buf * bp, int waitfor) +{ + bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); + bp->b_flags |= B_WRITEINPROG; + bp->b_vp->v_numoutput++; +#if defined(__FreeBSD__) + vfs_busy_pages(bp, 1); +#endif + VOP_STRATEGY(bp); + return waitfor ? biowait(bp) : 0; +} + +struct ext2_group_desc * get_group_desc (struct mount * mp, + unsigned int block_group, + struct buffer_head ** bh) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + unsigned long group_desc; + unsigned long desc; + struct ext2_group_desc * gdp; + + if (block_group >= sb->s_groups_count) + panic ("get_group_desc: " + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sb->s_groups_count); + + group_desc = block_group / EXT2_DESC_PER_BLOCK(sb); + desc = block_group % EXT2_DESC_PER_BLOCK(sb); + if (!sb->s_group_desc[group_desc]) + panic ( "get_group_desc:" + "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, desc); + gdp = (struct ext2_group_desc *) + sb->s_group_desc[group_desc]->b_data; + if (bh) + *bh = sb->s_group_desc[group_desc]; + return gdp + desc; +} + +static void read_inode_bitmap (struct mount * mp, + unsigned long block_group, + unsigned int bitmap_nr) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + struct ext2_group_desc * gdp; + struct buffer_head * bh; + int error; + + gdp = get_group_desc (mp, block_group, NULL); + if (error = bread (VFSTOUFS(mp)->um_devvp, + fsbtodb(sb, gdp->bg_inode_bitmap), + sb->s_blocksize, + NOCRED, &bh)) + panic ( "read_inode_bitmap:" + "Cannot read inode bitmap - " + "block_group = %lu, inode_bitmap = %lu", + block_group, (unsigned long) gdp->bg_inode_bitmap); + sb->s_inode_bitmap_number[bitmap_nr] = block_group; + sb->s_inode_bitmap[bitmap_nr] = bh; +} + +/* + * load_inode_bitmap loads the inode bitmap for a blocks group + * + * It maintains a cache for the last bitmaps loaded. This cache is managed + * with a LRU algorithm. + * + * Notes: + * 1/ There is one cache per mounted file system. + * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups, + * this function reads the bitmap without maintaining a LRU cache. + */ +static int load_inode_bitmap (struct mount * mp, + unsigned int block_group) +{ + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + int i, j; + unsigned long inode_bitmap_number; + struct buffer_head * inode_bitmap; + + if (block_group >= sb->s_groups_count) + panic ("load_inode_bitmap:" + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sb->s_groups_count); + if (sb->s_loaded_inode_bitmaps > 0 && + sb->s_inode_bitmap_number[0] == block_group) + return 0; + if (sb->s_groups_count <= EXT2_MAX_GROUP_LOADED) { + if (sb->s_inode_bitmap[block_group]) { + if (sb->s_inode_bitmap_number[block_group] != + block_group) + panic ( "load_inode_bitmap:" + "block_group != inode_bitmap_number"); + else + return block_group; + } else { + read_inode_bitmap (mp, block_group, block_group); + return block_group; + } + } + + for (i = 0; i < sb->s_loaded_inode_bitmaps && + sb->s_inode_bitmap_number[i] != block_group; + i++) + ; + if (i < sb->s_loaded_inode_bitmaps && + sb->s_inode_bitmap_number[i] == block_group) { + inode_bitmap_number = sb->s_inode_bitmap_number[i]; + inode_bitmap = sb->s_inode_bitmap[i]; + for (j = i; j > 0; j--) { + sb->s_inode_bitmap_number[j] = + sb->s_inode_bitmap_number[j - 1]; + sb->s_inode_bitmap[j] = + sb->s_inode_bitmap[j - 1]; + } + sb->s_inode_bitmap_number[0] = inode_bitmap_number; + sb->s_inode_bitmap[0] = inode_bitmap; + } else { + if (sb->s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED) + sb->s_loaded_inode_bitmaps++; + else + brelse (sb->s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1]); + for (j = sb->s_loaded_inode_bitmaps - 1; j > 0; j--) { + sb->s_inode_bitmap_number[j] = + sb->s_inode_bitmap_number[j - 1]; + sb->s_inode_bitmap[j] = + sb->s_inode_bitmap[j - 1]; + } + read_inode_bitmap (mp, block_group, 0); + } + return 0; +} + + +void ext2_free_inode (struct inode * inode) +{ + struct ext2_sb_info * sb; + struct buffer_head * bh; + struct buffer_head * bh2; + unsigned long block_group; + unsigned long bit; + int bitmap_nr; + struct ext2_group_desc * gdp; + struct ext2_super_block * es; + + if (!inode) + return; + + if (inode->i_nlink) { + printf ("ext2_free_inode: inode has nlink=%d\n", + inode->i_nlink); + return; + } + + ext2_debug ("freeing inode %lu\n", inode->i_number); + + sb = inode->i_e2fs; + lock_super (DEVVP(inode)); + if (inode->i_number < EXT2_FIRST_INO || + inode->i_number > sb->s_es->s_inodes_count) { + printf ("free_inode reserved inode or nonexistent inode"); + unlock_super (DEVVP(inode)); + return; + } + es = sb->s_es; + block_group = (inode->i_number - 1) / EXT2_INODES_PER_GROUP(sb); + bit = (inode->i_number - 1) % EXT2_INODES_PER_GROUP(sb); + bitmap_nr = load_inode_bitmap (ITOV(inode)->v_mount, block_group); + bh = sb->s_inode_bitmap[bitmap_nr]; + if (!clear_bit (bit, bh->b_data)) + printf ( "ext2_free_inode:" + "bit already cleared for inode %lu", inode->i_number); + else { + gdp = get_group_desc (ITOV(inode)->v_mount, block_group, &bh2); + gdp->bg_free_inodes_count++; + if (S_ISDIR(inode->i_mode)) + gdp->bg_used_dirs_count--; + mark_buffer_dirty(bh2); + es->s_free_inodes_count++; + } + mark_buffer_dirty(bh); +/*** XXX + if (sb->s_flags & MS_SYNCHRONOUS) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } +***/ + sb->s_dirt = 1; + unlock_super (DEVVP(inode)); +} + +#if linux +/* + * This function increments the inode version number + * + * This may be used one day by the NFS server + */ +static void inc_inode_version (struct inode * inode, + struct ext2_group_desc *gdp, + int mode) +{ + unsigned long inode_block; + struct buffer_head * bh; + struct ext2_inode * raw_inode; + + inode_block = gdp->bg_inode_table + (((inode->i_number - 1) % + EXT2_INODES_PER_GROUP(inode->i_sb)) / + EXT2_INODES_PER_BLOCK(inode->i_sb)); + bh = bread (inode->i_sb->s_dev, inode_block, inode->i_sb->s_blocksize); + if (!bh) { + printf ("inc_inode_version Cannot load inode table block - " + "inode=%lu, inode_block=%lu\n", + inode->i_number, inode_block); + inode->u.ext2_i.i_version = 1; + return; + } + raw_inode = ((struct ext2_inode *) bh->b_data) + + (((inode->i_number - 1) % + EXT2_INODES_PER_GROUP(inode->i_sb)) % + EXT2_INODES_PER_BLOCK(inode->i_sb)); + raw_inode->i_version++; + inode->u.ext2_i.i_version = raw_inode->i_version; + mark_buffer_dirty(bh, 1); + brelse (bh); +} + +#endif /* linux */ + +/* + * There are two policies for allocating an inode. If the new inode is + * a directory, then a forward search is made for a block group with both + * free space and a low directory-to-inode ratio; if that fails, then of + * the groups with above-average free space, that group with the fewest + * directories already is chosen. + * + * For other inodes, search forward from the parent directory\'s block + * group to find a free inode. + */ +/* + * this functino has been reduced to the actual 'find the inode number' part + */ +ino_t ext2_new_inode (const struct inode * dir, int mode) +{ + struct ext2_sb_info * sb; + struct buffer_head * bh; + struct buffer_head * bh2; + int i, j, avefreei; + int bitmap_nr; + struct ext2_group_desc * gdp; + struct ext2_group_desc * tmp; + struct ext2_super_block * es; + + if (!dir) + return 0; + sb = dir->i_e2fs; + + lock_super (DEVVP(dir)); + es = sb->s_es; +repeat: + gdp = NULL; i=0; + + if (S_ISDIR(mode)) { + avefreei = es->s_free_inodes_count / + sb->s_groups_count; +/* I am not yet convinced that this next bit is necessary. + i = dir->u.ext2_i.i_block_group; + for (j = 0; j < sb->u.ext2_sb.s_groups_count; j++) { + tmp = get_group_desc (sb, i, &bh2); + if ((tmp->bg_used_dirs_count << 8) < + tmp->bg_free_inodes_count) { + gdp = tmp; + break; + } + else + i = ++i % sb->u.ext2_sb.s_groups_count; + } +*/ + if (!gdp) { + for (j = 0; j < sb->s_groups_count; j++) { + tmp = get_group_desc(ITOV(dir)->v_mount,j,&bh2); + if (tmp->bg_free_inodes_count && + tmp->bg_free_inodes_count >= avefreei) { + if (!gdp || + (tmp->bg_free_blocks_count > + gdp->bg_free_blocks_count)) { + i = j; + gdp = tmp; + } + } + } + } + } + else + { + /* + * Try to place the inode in its parent directory + */ + i = dir->i_block_group; + tmp = get_group_desc (ITOV(dir)->v_mount, i, &bh2); + if (tmp->bg_free_inodes_count) + gdp = tmp; + else + { + /* + * Use a quadratic hash to find a group with a + * free inode + */ + for (j = 1; j < sb->s_groups_count; j <<= 1) { + i += j; + if (i >= sb->s_groups_count) + i -= sb->s_groups_count; + tmp = get_group_desc(ITOV(dir)->v_mount,i,&bh2); + if (tmp->bg_free_inodes_count) { + gdp = tmp; + break; + } + } + } + if (!gdp) { + /* + * That failed: try linear search for a free inode + */ + i = dir->i_block_group + 1; + for (j = 2; j < sb->s_groups_count; j++) { + if (++i >= sb->s_groups_count) + i = 0; + tmp = get_group_desc(ITOV(dir)->v_mount,i,&bh2); + if (tmp->bg_free_inodes_count) { + gdp = tmp; + break; + } + } + } + } + + if (!gdp) { + unlock_super (DEVVP(dir)); + return 0; + } + bitmap_nr = load_inode_bitmap (ITOV(dir)->v_mount, i); + bh = sb->s_inode_bitmap[bitmap_nr]; + if ((j = find_first_zero_bit ((unsigned long *) bh->b_data, + EXT2_INODES_PER_GROUP(sb))) < + EXT2_INODES_PER_GROUP(sb)) { + if (set_bit (j, bh->b_data)) { + printf ( "ext2_new_inode:" + "bit already set for inode %d", j); + goto repeat; + } +/* Linux now does the following: + mark_buffer_dirty(bh, 1); + if (sb->s_flags & MS_SYNCHRONOUS) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } +*/ + mark_buffer_dirty(bh); + } else { + if (gdp->bg_free_inodes_count != 0) { + printf ( "ext2_new_inode:" + "Free inodes count corrupted in group %d", + i); + unlock_super (DEVVP(dir)); + return 0; + } + goto repeat; + } + j += i * EXT2_INODES_PER_GROUP(sb) + 1; + if (j < EXT2_FIRST_INO || j > es->s_inodes_count) { + printf ( "ext2_new_inode:" + "reserved inode or inode > inodes count - " + "block_group = %d,inode=%d", i, j); + unlock_super (DEVVP(dir)); + return 0; + } + gdp->bg_free_inodes_count--; + if (S_ISDIR(mode)) + gdp->bg_used_dirs_count++; + mark_buffer_dirty(bh2); + es->s_free_inodes_count--; + /* mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); */ + sb->s_dirt = 1; + unlock_super (DEVVP(dir)); + return j; +} + +unsigned long ext2_count_free_inodes (struct mount * mp) +{ +#ifdef EXT2FS_DEBUG + struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x; + int bitmap_nr; + struct ext2_group_desc * gdp; + int i; + + lock_super (VFSTOUFS(mp)->um_devvp); + es = sb->s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < sb->s_groups_count; i++) { + gdp = get_group_desc (mp, i, NULL); + desc_count += gdp->bg_free_inodes_count; + bitmap_nr = load_inode_bitmap (mp, i); + x = ext2_count_free (sb->s_inode_bitmap[bitmap_nr], + EXT2_INODES_PER_GROUP(sb) / 8); + ext2_debug ("group %d: stored = %d, counted = %lu\n", + i, gdp->bg_free_inodes_count, x); + bitmap_count += x; + } + ext2_debug("stored = %lu, computed = %lu, %lu\n", + es->s_free_inodes_count, desc_count, bitmap_count); + unlock_super (VFSTOUFS(mp)->um_devvp); + return desc_count; +#else + return VFSTOUFS(mp)->um_e2fsb->s_free_inodes_count; +#endif +} + +#ifdef LATER +void ext2_check_inodes_bitmap (struct mount * mp) +{ + struct ext2_super_block * es; + unsigned long desc_count, bitmap_count, x; + int bitmap_nr; + struct ext2_group_desc * gdp; + int i; + + lock_super (sb); + es = sb->u.ext2_sb.s_es; + desc_count = 0; + bitmap_count = 0; + gdp = NULL; + for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) { + gdp = get_group_desc (sb, i, NULL); + desc_count += gdp->bg_free_inodes_count; + bitmap_nr = load_inode_bitmap (sb, i); + x = ext2_count_free (sb->u.ext2_sb.s_inode_bitmap[bitmap_nr], + EXT2_INODES_PER_GROUP(sb) / 8); + if (gdp->bg_free_inodes_count != x) + printf ( "ext2_check_inodes_bitmap:" + "Wrong free inodes count in group %d, " + "stored = %d, counted = %lu", i, + gdp->bg_free_inodes_count, x); + bitmap_count += x; + } + if (es->s_free_inodes_count != bitmap_count) + printf ( "ext2_check_inodes_bitmap:" + "Wrong free inodes count in super block, " + "stored = %lu, counted = %lu", + (unsigned long) es->s_free_inodes_count, bitmap_count); + unlock_super (sb); +} +#endif + diff --git a/sys/gnu/fs/ext2fs/ext2_lookup.c b/sys/gnu/fs/ext2fs/ext2_lookup.c new file mode 100644 index 000000000000..79f30f288288 --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_lookup.c @@ -0,0 +1,1083 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ufs_lookup.c 8.6 (Berkeley) 4/1/94 + */ + +#if !defined(__FreeBSD__) +#include "diagnostic.h" +#endif + +#include <sys/param.h> +#include <sys/namei.h> +#include <sys/buf.h> +#include <sys/file.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/malloc.h> +#include <sys/dirent.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/dir.h> +#include <ufs/ufs/ufsmount.h> + +#include <gnu/ext2fs/ext2_extern.h> +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> + +/* + DIRBLKSIZE in ffs is DEV_BSIZE (in most cases 512) + while it is the native blocksize in ext2fs - thus, a #define + is no longer appropriate +*/ +#undef DIRBLKSIZ + +#if 1 +extern struct nchstats nchstats; +static int dirchk = 1; +#else +struct nchstats nchstats; +#if DIAGNOSTIC +int dirchk = 1; +#else +int dirchk = 0; +#endif +#endif + +/* + * the problem that is tackled below is the fact that FFS + * includes the terminating zero on disk while EXT2FS doesn't + * this implies that we need to introduce some padding. + * For instance, a filename "sbin" has normally a reclen 12 + * in EXT2, but 16 in FFS. + * This reminds me of that Pepsi commercial: 'Kid saved a lousy nine cents...' + * If it wasn't for that, the complete ufs code for directories would + * have worked w/o changes (except for the difference in DIRBLKSIZ) + */ +static void +ext2_dirconv2ffs( e2dir, ffsdir) + struct ext2_dir_entry *e2dir; + struct dirent *ffsdir; +{ + struct dirent de; + + bzero(&de, sizeof(struct dirent)); + de.d_fileno = e2dir->inode; + de.d_namlen = e2dir->name_len; + +#ifndef NO_HARDWIRED_CONSTANTS + if(e2dir->name_len + 8 == e2dir->rec_len) + de.d_reclen += 4; + + de.d_type = DT_UNKNOWN; /* don't know more here */ + strncpy(de.d_name, e2dir->name, e2dir->name_len); + de.d_name[de.d_namlen] = '\0'; + /* Godmar thinks: since e2dir->rec_len can be big and means + nothing anyway, we compute our own reclen according to what + we think is right + */ + de.d_reclen = (de.d_namlen+8+1+3) & ~3; + bcopy(&de, ffsdir, de.d_reclen); +#endif + +#if 0 + printf("dirconv: ino %d rec old %d rec new %d nam %d name %s\n", + ffsdir->d_fileno, e2dir->rec_len, ffsdir->d_reclen, + ffsdir->d_namlen, ffsdir->d_name); +#endif +} + +/* + * Vnode op for reading directories. + * + * The routine below assumes that the on-disk format of a directory + * is the same as that defined by <sys/dirent.h>. If the on-disk + * format changes, then it will be necessary to do a conversion + * from the on-disk format that read returns to the format defined + * by <sys/dirent.h>. + */ +/* + * this is exactly what we do here - the problem is that the conversion + * will blow up some entries by four bytes, so it can't be done in place. + * This is too bad. Right now the conversion is done entry by entry, the + * converted entry is sent via uiomove. + * + * XXX allocate a buffer, convert as many entries as possible, then send + * the whole buffer to uiomove + */ +int +ext2_readdir(ap) + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + } */ *ap; +{ + register struct uio *uio = ap->a_uio; + int count, lost, error; + + struct ext2_dir_entry *edp, *dp; + struct dirent dstdp; + struct uio auio; + struct iovec aiov; + caddr_t dirbuf; + int readcnt; + u_quad_t startoffset = uio->uio_offset; + u_char tmp; + int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->s_blocksize; + + count = uio->uio_resid; /* legyenek boldogok akik akarnak ... */ + uio->uio_resid = count; + uio->uio_iov->iov_len = count; + +#if 0 +printf("ext2_readdir called uio->uio_offset %d uio->uio_resid %d count %d \n", + (int)uio->uio_offset, (int)uio->uio_resid, (int)count); +#endif + + auio = *uio; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_SYSSPACE; + aiov.iov_len = count; + MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK); + aiov.iov_base = dirbuf; + error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); + if (error == 0) { + readcnt = count - auio.uio_resid; + edp = (struct ext2_dir_entry *)&dirbuf[readcnt]; + for (dp = (struct ext2_dir_entry *)dirbuf; + !error && uio->uio_resid > 0 && dp < edp; ) { + ext2_dirconv2ffs(dp, &dstdp); + if (dp->rec_len > 0) { + if(dstdp.d_reclen <= uio->uio_resid) { + /* advance dp */ + dp = (struct ext2_dir_entry *) + ((char *)dp + dp->rec_len); + error = + uiomove(&dstdp, dstdp.d_reclen, uio); + } else + break; + } else { + error = EIO; + break; + } + } + /* we need to correct uio_offset */ + uio->uio_offset = startoffset + (caddr_t)dp - dirbuf; + } + FREE(dirbuf, M_TEMP); + return (error); +} + +/* + * Convert a component of a pathname into a pointer to a locked inode. + * This is a very central and rather complicated routine. + * If the file system is not maintained in a strict tree hierarchy, + * this can result in a deadlock situation (see comments in code below). + * + * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending + * on whether the name is to be looked up, created, renamed, or deleted. + * When CREATE, RENAME, or DELETE is specified, information usable in + * creating, renaming, or deleting a directory entry may be calculated. + * If flag has LOCKPARENT or'ed into it and the target of the pathname + * exists, lookup returns both the target and its parent directory locked. + * When creating or renaming and LOCKPARENT is specified, the target may + * not be ".". When deleting and LOCKPARENT is specified, the target may + * be "."., but the caller must check to ensure it does an vrele and vput + * instead of two vputs. + * + * Overall outline of ufs_lookup: + * + * check accessibility of directory + * look for name in cache, if found, then if at end of path + * and deleting or creating, drop it, else return name + * search for name in directory, to found or notfound + * notfound: + * if creating, return locked directory, leaving info on available slots + * else return error + * found: + * if at end of path and deleting, return information to allow delete + * if at end of path and rewriting (RENAME and LOCKPARENT), lock target + * inode and return info to allow rewrite + * if not at end, add name to cache; if at end and neither creating + * nor deleting, add name to cache + */ +int +ext2_lookup(ap) + struct vop_lookup_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap; +{ + register struct vnode *vdp; /* vnode for directory being searched */ + register struct inode *dp; /* inode for directory being searched */ + struct buf *bp; /* a buffer of directory entries */ + register struct ext2_dir_entry *ep; /* the current directory entry */ + int entryoffsetinblock; /* offset of ep in bp's buffer */ + enum {NONE, COMPACT, FOUND} slotstatus; + doff_t slotoffset; /* offset of area with free space */ + int slotsize; /* size of area at slotoffset */ + int slotfreespace; /* amount of space free in slot */ + int slotneeded; /* size of the entry we're seeking */ + int numdirpasses; /* strategy for directory search */ + doff_t endsearch; /* offset to end directory search */ + doff_t prevoff; /* prev entry dp->i_offset */ + struct vnode *pdp; /* saved dp during symlink work */ + struct vnode *tdp; /* returned by VFS_VGET */ + doff_t enduseful; /* pointer past last used dir slot */ + u_long bmask; /* block offset mask */ + int lockparent; /* 1 => lockparent flag is set */ + int wantparent; /* 1 => wantparent or lockparent flag */ + int namlen, error; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct ucred *cred = cnp->cn_cred; + int flags = cnp->cn_flags; + int nameiop = cnp->cn_nameiop; + + int DIRBLKSIZ = VTOI(ap->a_dvp)->i_e2fs->s_blocksize; + + bp = NULL; + slotoffset = -1; + *vpp = NULL; + vdp = ap->a_dvp; + dp = VTOI(vdp); + lockparent = flags & LOCKPARENT; + wantparent = flags & (LOCKPARENT|WANTPARENT); + + /* + * Check accessiblity of directory. + */ + if ((dp->i_mode & IFMT) != IFDIR) + return (ENOTDIR); + if (error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) + return (error); + + /* + * We now have a segment name to search for, and a directory to search. + * + * Before tediously performing a linear scan of the directory, + * check the name cache to see if the directory/name pair + * we are looking for is known already. + */ + if (error = cache_lookup(vdp, vpp, cnp)) { + int vpid; /* capability number of vnode */ + + if (error == ENOENT) + return (error); + /* + * Get the next vnode in the path. + * See comment below starting `Step through' for + * an explaination of the locking protocol. + */ + pdp = vdp; + dp = VTOI(*vpp); + vdp = *vpp; + vpid = vdp->v_id; + if (pdp == vdp) { /* lookup on "." */ + VREF(vdp); + error = 0; + } else if (flags & ISDOTDOT) { + VOP_UNLOCK(pdp); + error = vget(vdp, 1); + if (!error && lockparent && (flags & ISLASTCN)) + error = VOP_LOCK(pdp); + } else { + error = vget(vdp, 1); + if (!lockparent || error || !(flags & ISLASTCN)) + VOP_UNLOCK(pdp); + } + /* + * Check that the capability number did not change + * while we were waiting for the lock. + */ + if (!error) { + if (vpid == vdp->v_id) + return (0); + vput(vdp); + if (lockparent && pdp != vdp && (flags & ISLASTCN)) + VOP_UNLOCK(pdp); + } + if (error = VOP_LOCK(pdp)) + return (error); + vdp = pdp; + dp = VTOI(pdp); + *vpp = NULL; + } + + /* + * Suppress search for slots unless creating + * file and at end of pathname, in which case + * we watch for a place to put the new file in + * case it doesn't already exist. + */ + slotstatus = FOUND; + slotfreespace = slotsize = slotneeded = 0; + if ((nameiop == CREATE || nameiop == RENAME) && + (flags & ISLASTCN)) { + slotstatus = NONE; + slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen); + /* was + slotneeded = (sizeof(struct direct) - MAXNAMLEN + + cnp->cn_namelen + 3) &~ 3; */ + } + + /* + * If there is cached information on a previous search of + * this directory, pick up where we last left off. + * We cache only lookups as these are the most common + * and have the greatest payoff. Caching CREATE has little + * benefit as it usually must search the entire directory + * to determine that the entry does not exist. Caching the + * location of the last DELETE or RENAME has not reduced + * profiling time and hence has been removed in the interest + * of simplicity. + */ + bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; + if (nameiop != LOOKUP || dp->i_diroff == 0 || + dp->i_diroff > dp->i_size) { + entryoffsetinblock = 0; + dp->i_offset = 0; + numdirpasses = 1; + } else { + dp->i_offset = dp->i_diroff; + if ((entryoffsetinblock = dp->i_offset & bmask) && + (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))) + return (error); + numdirpasses = 2; + nchstats.ncs_2passes++; + } + prevoff = dp->i_offset; + endsearch = roundup(dp->i_size, DIRBLKSIZ); + enduseful = 0; + +searchloop: + while (dp->i_offset < endsearch) { + /* + * If necessary, get the next directory block. + */ + if ((dp->i_offset & bmask) == 0) { + if (bp != NULL) + brelse(bp); + if (error = + VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)) + return (error); + entryoffsetinblock = 0; + } + /* + * If still looking for a slot, and at a DIRBLKSIZE + * boundary, have to start looking for free space again. + */ + if (slotstatus == NONE && + (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { + slotoffset = -1; + slotfreespace = 0; + } + /* + * Get pointer to next entry. + * Full validation checks are slow, so we only check + * enough to insure forward progress through the + * directory. Complete checks can be run by patching + * "dirchk" to be true. + */ + ep = (struct ext2_dir_entry *) + ((char *)bp->b_data + entryoffsetinblock); + if (ep->rec_len == 0 || + dirchk && ext2_dirbadentry(vdp, ep, entryoffsetinblock)) { + int i; + ufs_dirbad(dp, dp->i_offset, "mangled entry"); + i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); + dp->i_offset += i; + entryoffsetinblock += i; + continue; + } + + /* + * If an appropriate sized slot has not yet been found, + * check to see if one is available. Also accumulate space + * in the current block so that we can determine if + * compaction is viable. + */ + if (slotstatus != FOUND) { + int size = ep->rec_len; + + if (ep->inode != 0) + size -= EXT2_DIR_REC_LEN(ep->name_len); + if (size > 0) { + if (size >= slotneeded) { + slotstatus = FOUND; + slotoffset = dp->i_offset; + slotsize = ep->rec_len; + } else if (slotstatus == NONE) { + slotfreespace += size; + if (slotoffset == -1) + slotoffset = dp->i_offset; + if (slotfreespace >= slotneeded) { + slotstatus = COMPACT; + slotsize = dp->i_offset + + ep->rec_len - slotoffset; + } + } + } + } + + /* + * Check for a name match. + */ + if (ep->inode) { + namlen = ep->name_len; + if (namlen == cnp->cn_namelen && + !bcmp(cnp->cn_nameptr, ep->name, + (unsigned)namlen)) { + /* + * Save directory entry's inode number and + * reclen in ndp->ni_ufs area, and release + * directory buffer. + */ + dp->i_ino = ep->inode; + dp->i_reclen = ep->rec_len; + brelse(bp); + goto found; + } + } + prevoff = dp->i_offset; + dp->i_offset += ep->rec_len; + entryoffsetinblock += ep->rec_len; + if (ep->inode) + enduseful = dp->i_offset; + } +/* notfound: */ + /* + * If we started in the middle of the directory and failed + * to find our target, we must check the beginning as well. + */ + if (numdirpasses == 2) { + numdirpasses--; + dp->i_offset = 0; + endsearch = dp->i_diroff; + goto searchloop; + } + if (bp != NULL) + brelse(bp); + /* + * If creating, and at end of pathname and current + * directory has not been removed, then can consider + * allowing file to be created. + */ + if ((nameiop == CREATE || nameiop == RENAME) && + (flags & ISLASTCN) && dp->i_nlink != 0) { + /* + * Access for write is interpreted as allowing + * creation of files in the directory. + */ + if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) + return (error); + /* + * Return an indication of where the new directory + * entry should be put. If we didn't find a slot, + * then set dp->i_count to 0 indicating + * that the new slot belongs at the end of the + * directory. If we found a slot, then the new entry + * can be put in the range from dp->i_offset to + * dp->i_offset + dp->i_count. + */ + if (slotstatus == NONE) { + dp->i_offset = roundup(dp->i_size, DIRBLKSIZ); + dp->i_count = 0; + enduseful = dp->i_offset; + } else { + dp->i_offset = slotoffset; + dp->i_count = slotsize; + if (enduseful < slotoffset + slotsize) + enduseful = slotoffset + slotsize; + } + dp->i_endoff = roundup(enduseful, DIRBLKSIZ); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + /* + * We return with the directory locked, so that + * the parameters we set up above will still be + * valid if we actually decide to do a direnter(). + * We return ni_vp == NULL to indicate that the entry + * does not currently exist; we leave a pointer to + * the (locked) directory inode in ndp->ni_dvp. + * The pathname buffer is saved so that the name + * can be obtained later. + * + * NB - if the directory is unlocked, then this + * information cannot be used. + */ + cnp->cn_flags |= SAVENAME; + if (!lockparent) + VOP_UNLOCK(vdp); + return (EJUSTRETURN); + } + /* + * Insert name into cache (as non-existent) if appropriate. + */ + if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) + cache_enter(vdp, *vpp, cnp); + return (ENOENT); + +found: + if (numdirpasses == 2) + nchstats.ncs_pass2++; + /* + * Check that directory length properly reflects presence + * of this entry. + */ + if (entryoffsetinblock + EXT2_DIR_REC_LEN(ep->name_len) + > dp->i_size) { + ufs_dirbad(dp, dp->i_offset, "i_size too small"); + dp->i_size = entryoffsetinblock+EXT2_DIR_REC_LEN(ep->name_len); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + } + + /* + * Found component in pathname. + * If the final component of path name, save information + * in the cache as to where the entry was found. + */ + if ((flags & ISLASTCN) && nameiop == LOOKUP) + dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); + + /* + * If deleting, and at end of pathname, return + * parameters which can be used to remove file. + * If the wantparent flag isn't set, we return only + * the directory (in ndp->ni_dvp), otherwise we go + * on and lock the inode, being careful with ".". + */ + if (nameiop == DELETE && (flags & ISLASTCN)) { + /* + * Write access to directory required to delete files. + */ + if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) + return (error); + /* + * Return pointer to current entry in dp->i_offset, + * and distance past previous entry (if there + * is a previous entry in this block) in dp->i_count. + * Save directory inode pointer in ndp->ni_dvp for dirremove(). + */ + if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) + dp->i_count = 0; + else + dp->i_count = dp->i_offset - prevoff; + if (dp->i_number == dp->i_ino) { + VREF(vdp); + *vpp = vdp; + return (0); + } + if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) + return (error); + /* + * If directory is "sticky", then user must own + * the directory, or the file in it, else she + * may not delete it (unless she's root). This + * implements append-only directories. + */ + if ((dp->i_mode & ISVTX) && + cred->cr_uid != 0 && + cred->cr_uid != dp->i_uid && + VTOI(tdp)->i_uid != cred->cr_uid) { + vput(tdp); + return (EPERM); + } + *vpp = tdp; + if (!lockparent) + VOP_UNLOCK(vdp); + return (0); + } + + /* + * If rewriting (RENAME), return the inode and the + * information required to rewrite the present directory + * Must get inode of directory entry to verify it's a + * regular file, or empty directory. + */ + if (nameiop == RENAME && wantparent && + (flags & ISLASTCN)) { + if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) + return (error); + /* + * Careful about locking second inode. + * This can only occur if the target is ".". + */ + if (dp->i_number == dp->i_ino) + return (EISDIR); + if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) + return (error); + *vpp = tdp; + cnp->cn_flags |= SAVENAME; + if (!lockparent) + VOP_UNLOCK(vdp); + return (0); + } + + /* + * Step through the translation in the name. We do not `vput' the + * directory because we may need it again if a symbolic link + * is relative to the current directory. Instead we save it + * unlocked as "pdp". We must get the target inode before unlocking + * the directory to insure that the inode will not be removed + * before we get it. We prevent deadlock by always fetching + * inodes from the root, moving down the directory tree. Thus + * when following backward pointers ".." we must unlock the + * parent directory before getting the requested directory. + * There is a potential race condition here if both the current + * and parent directories are removed before the VFS_VGET for the + * inode associated with ".." returns. We hope that this occurs + * infrequently since we cannot avoid this race condition without + * implementing a sophisticated deadlock detection algorithm. + * Note also that this simple deadlock detection scheme will not + * work if the file system has any hard links other than ".." + * that point backwards in the directory structure. + */ + pdp = vdp; + if (flags & ISDOTDOT) { + VOP_UNLOCK(pdp); /* race to get the inode */ + if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) { + VOP_LOCK(pdp); + return (error); + } + if (lockparent && (flags & ISLASTCN) && + (error = VOP_LOCK(pdp))) { + vput(tdp); + return (error); + } + *vpp = tdp; + } else if (dp->i_number == dp->i_ino) { + VREF(vdp); /* we want ourself, ie "." */ + *vpp = vdp; + } else { + if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) + return (error); + if (!lockparent || !(flags & ISLASTCN)) + VOP_UNLOCK(pdp); + *vpp = tdp; + } + + /* + * Insert name into cache if appropriate. + */ + if (cnp->cn_flags & MAKEENTRY) + cache_enter(vdp, *vpp, cnp); + return (0); +} + +/* + * Do consistency checking on a directory entry: + * record length must be multiple of 4 + * entry must fit in rest of its DIRBLKSIZ block + * record must be large enough to contain entry + * name is not longer than MAXNAMLEN + * name must be as long as advertised, and null terminated + */ +/* + * changed so that it confirms to ext2_check_dir_entry + */ +int +ext2_dirbadentry(dp, de, entryoffsetinblock) + struct vnode *dp; + register struct ext2_dir_entry *de; + int entryoffsetinblock; +{ + register int i; + int namlen; + int DIRBLKSIZ = VTOI(dp)->i_e2fs->s_blocksize; + + char * error_msg = NULL; + + if (de->rec_len < EXT2_DIR_REC_LEN(1)) + error_msg = "rec_len is smaller than minimal"; + else if (de->rec_len % 4 != 0) + error_msg = "rec_len % 4 != 0"; + else if (de->rec_len < EXT2_DIR_REC_LEN(de->name_len)) + error_msg = "reclen is too small for name_len"; + else if (entryoffsetinblock + de->rec_len > DIRBLKSIZ) + error_msg = "directory entry across blocks"; + /* else LATER + if (de->inode > dir->i_sb->u.ext2_sb.s_es->s_inodes_count) + error_msg = "inode out of bounds"; + */ + + if (error_msg != NULL) + printf( "bad directory entry: %s\n" + "offset=%lu, inode=%lu, rec_len=%d, name_len=%d \n", + error_msg, entryoffsetinblock, + (unsigned long) de->inode, de->rec_len, de->name_len); + return error_msg == NULL ? 0 : 1; +} + +/* + * Write a directory entry after a call to namei, using the parameters + * that it left in nameidata. The argument ip is the inode which the new + * directory entry will refer to. Dvp is a pointer to the directory to + * be written, which was left locked by namei. Remaining parameters + * (dp->i_offset, dp->i_count) indicate how the space for the new + * entry is to be obtained. + */ +int +ext2_direnter(ip, dvp, cnp) + struct inode *ip; + struct vnode *dvp; + register struct componentname *cnp; +{ + register struct ext2_dir_entry *ep, *nep; + register struct inode *dp; + struct buf *bp; + struct ext2_dir_entry newdir; + struct iovec aiov; + struct uio auio; + u_int dsize; + int error, loc, newentrysize, spacefree; + char *dirbuf; + int DIRBLKSIZ = ip->i_e2fs->s_blocksize; + + +#if DIAGNOSTIC + if ((cnp->cn_flags & SAVENAME) == 0) + panic("direnter: missing name"); +#endif + dp = VTOI(dvp); + newdir.inode = ip->i_number; + newdir.name_len = cnp->cn_namelen; + bcopy(cnp->cn_nameptr, newdir.name, (unsigned)cnp->cn_namelen + 1); + newentrysize = EXT2_DIR_REC_LEN(newdir.name_len); + if (dp->i_count == 0) { + /* + * If dp->i_count is 0, then namei could find no + * space in the directory. Here, dp->i_offset will + * be on a directory block boundary and we will write the + * new entry into a fresh block. + */ + if (dp->i_offset & (DIRBLKSIZ - 1)) + panic("ext2_direnter: newblk"); + auio.uio_offset = dp->i_offset; + newdir.rec_len = DIRBLKSIZ; + auio.uio_resid = newentrysize; + aiov.iov_len = newentrysize; + aiov.iov_base = (caddr_t)&newdir; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = (struct proc *)0; + error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred); + if (DIRBLKSIZ > + VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) + /* XXX should grow with balloc() */ + panic("ext2_direnter: frag size"); + else if (!error) { + dp->i_size = roundup(dp->i_size, DIRBLKSIZ); + dp->i_flag |= IN_CHANGE; + } + return (error); + } + + /* + * If dp->i_count is non-zero, then namei found space + * for the new entry in the range dp->i_offset to + * dp->i_offset + dp->i_count in the directory. + * To use this space, we may have to compact the entries located + * there, by copying them together towards the beginning of the + * block, leaving the free space in one usable chunk at the end. + */ + + /* + * Increase size of directory if entry eats into new space. + * This should never push the size past a new multiple of + * DIRBLKSIZE. + * + * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. + */ + if (dp->i_offset + dp->i_count > dp->i_size) + dp->i_size = dp->i_offset + dp->i_count; + /* + * Get the block containing the space for the new directory entry. + */ + if (error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) + return (error); + /* + * Find space for the new entry. In the simple case, the entry at + * offset base will have the space. If it does not, then namei + * arranged that compacting the region dp->i_offset to + * dp->i_offset + dp->i_count would yield the + * space. + */ + ep = (struct ext2_dir_entry *)dirbuf; + dsize = EXT2_DIR_REC_LEN(ep->name_len); + spacefree = ep->rec_len - dsize; + for (loc = ep->rec_len; loc < dp->i_count; ) { + nep = (struct ext2_dir_entry *)(dirbuf + loc); + if (ep->inode) { + /* trim the existing slot */ + ep->rec_len = dsize; + ep = (struct ext2_dir_entry *)((char *)ep + dsize); + } else { + /* overwrite; nothing there; header is ours */ + spacefree += dsize; + } + dsize = EXT2_DIR_REC_LEN(ep->name_len); + spacefree += nep->rec_len - dsize; + loc += nep->rec_len; + bcopy((caddr_t)nep, (caddr_t)ep, dsize); + } + /* + * Update the pointer fields in the previous entry (if any), + * copy in the new entry, and write out the block. + */ + if (ep->inode == 0) { + if (spacefree + dsize < newentrysize) + panic("ext2_direnter: compact1"); + newdir.rec_len = spacefree + dsize; + } else { + if (spacefree < newentrysize) + panic("ext2_direnter: compact2"); + newdir.rec_len = spacefree; + ep->rec_len = dsize; + ep = (struct ext2_dir_entry *)((char *)ep + dsize); + } + bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize); + error = VOP_BWRITE(bp); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + if (!error && dp->i_endoff && dp->i_endoff < dp->i_size) + error = VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, + cnp->cn_cred, cnp->cn_proc); + return (error); +} + +/* + * Remove a directory entry after a call to namei, using + * the parameters which it left in nameidata. The entry + * dp->i_offset contains the offset into the directory of the + * entry to be eliminated. The dp->i_count field contains the + * size of the previous record in the directory. If this + * is 0, the first entry is being deleted, so we need only + * zero the inode number to mark the entry as free. If the + * entry is not the first in the directory, we must reclaim + * the space of the now empty record by adding the record size + * to the size of the previous entry. + */ +int +ext2_dirremove(dvp, cnp) + struct vnode *dvp; + struct componentname *cnp; +{ + register struct inode *dp; + struct ext2_dir_entry *ep; + struct buf *bp; + int error; + int DIRBLKSIZ = VTOI(dvp)->i_e2fs->s_blocksize; + + dp = VTOI(dvp); + if (dp->i_count == 0) { + /* + * First entry in block: set d_ino to zero. + */ + if (error = + VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) + return (error); + ep->inode = 0; + error = VOP_BWRITE(bp); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + return (error); + } + /* + * Collapse new free space into previous entry. + */ + if (error = VOP_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count), + (char **)&ep, &bp)) + return (error); + ep->rec_len += dp->i_reclen; + error = VOP_BWRITE(bp); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + return (error); +} + +/* + * Rewrite an existing directory entry to point at the inode + * supplied. The parameters describing the directory entry are + * set up by a call to namei. + */ +int +ext2_dirrewrite(dp, ip, cnp) + struct inode *dp, *ip; + struct componentname *cnp; +{ + struct buf *bp; + struct ext2_dir_entry *ep; + struct vnode *vdp = ITOV(dp); + int error; + + if (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp)) + return (error); + ep->inode = ip->i_number; + error = VOP_BWRITE(bp); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + return (error); +} + +/* + * Check if a directory is empty or not. + * Inode supplied must be locked. + * + * Using a struct dirtemplate here is not precisely + * what we want, but better than using a struct direct. + * + * NB: does not handle corrupted directories. + */ +int +ext2_dirempty(ip, parentino, cred) + register struct inode *ip; + ino_t parentino; + struct ucred *cred; +{ + register off_t off; + struct dirtemplate dbuf; + register struct ext2_dir_entry *dp = (struct ext2_dir_entry *)&dbuf; + int error, count, namlen; + int DIRBLKSIZ = ip->i_e2fs->s_blocksize; + +#define MINDIRSIZ (sizeof (struct dirtemplate) / 2) + + for (off = 0; off < ip->i_size; off += dp->rec_len) { + error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, + UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0); + /* + * Since we read MINDIRSIZ, residual must + * be 0 unless we're at end of file. + */ + if (error || count != 0) + return (0); + /* avoid infinite loops */ + if (dp->rec_len == 0) + return (0); + /* skip empty entries */ + if (dp->inode == 0) + continue; + /* accept only "." and ".." */ + namlen = dp->name_len; + if (namlen > 2) + return (0); + if (dp->name[0] != '.') + return (0); + /* + * At this point namlen must be 1 or 2. + * 1 implies ".", 2 implies ".." if second + * char is also "." + */ + if (namlen == 1) + continue; + if (dp->name[1] == '.' && dp->inode == parentino) + continue; + return (0); + } + return (1); +} + +/* + * Check if source directory is in the path of the target directory. + * Target is supplied locked, source is unlocked. + * The target is always vput before returning. + */ +int +ext2_checkpath(source, target, cred) + struct inode *source, *target; + struct ucred *cred; +{ + struct vnode *vp; + int error, rootino, namlen; + struct dirtemplate dirbuf; + + vp = ITOV(target); + if (target->i_number == source->i_number) { + error = EEXIST; + goto out; + } + rootino = ROOTINO; + error = 0; + if (target->i_number == rootino) + goto out; + + for (;;) { + if (vp->v_type != VDIR) { + error = ENOTDIR; + break; + } + error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, + IO_NODELOCKED, cred, (int *)0, (struct proc *)0); + if (error != 0) + break; + namlen = dirbuf.dotdot_namlen; + if (namlen != 2 || + dirbuf.dotdot_name[0] != '.' || + dirbuf.dotdot_name[1] != '.') { + error = ENOTDIR; + break; + } + if (dirbuf.dotdot_ino == source->i_number) { + error = EINVAL; + break; + } + if (dirbuf.dotdot_ino == rootino) + break; + vput(vp); + if (error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp)) { + vp = NULL; + break; + } + } + +out: + if (error == ENOTDIR) + printf("checkpath: .. not a directory\n"); + if (vp != NULL) + vput(vp); + return (error); +} + diff --git a/sys/gnu/fs/ext2fs/ext2_readwrite.c b/sys/gnu/fs/ext2fs/ext2_readwrite.c new file mode 100644 index 000000000000..be018317378f --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_readwrite.c @@ -0,0 +1,316 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ufs_readwrite.c 8.7 (Berkeley) 1/21/94 + */ + +#if !defined(__FreeBSD__) +#include "diagnostic.h" +#endif + +#define BLKSIZE(a, b, c) blksize(a, b, c) +#define FS struct ext2_sb_info +#define I_FS i_e2fs +#define READ ext2_read +#define READ_S "ext2_read" +#define WRITE ext2_write +#define WRITE_S "ext2_write" + +/* + * Vnode op for reading. + */ +/* ARGSUSED */ +int +READ(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct vnode *vp; + register struct inode *ip; + register struct uio *uio; + register FS *fs; + struct buf *bp; + daddr_t lbn, nextlbn; + off_t bytesinfile; + long size, xfersize, blkoffset; + int error; + u_short mode; + + vp = ap->a_vp; + ip = VTOI(vp); + mode = ip->i_mode; + uio = ap->a_uio; + +#if DIAGNOSTIC + if (uio->uio_rw != UIO_READ) + panic("%s: mode", READ_S); + + if (vp->v_type == VLNK) { + if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) + panic("%s: short symlink", READ_S); + } else if (vp->v_type != VREG && vp->v_type != VDIR) + panic("%s: type %d", READ_S, vp->v_type); +#endif + fs = ip->I_FS; +#if 0 + if ((u_quad_t)uio->uio_offset > fs->fs_maxfilesize) + return (EFBIG); +#endif + + for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { + if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) + break; + lbn = lblkno(fs, uio->uio_offset); + nextlbn = lbn + 1; + size = BLKSIZE(fs, ip, lbn); + blkoffset = blkoff(fs, uio->uio_offset); + xfersize = fs->s_frag_size - blkoffset; + if (uio->uio_resid < xfersize) + xfersize = uio->uio_resid; + if (bytesinfile < xfersize) + xfersize = bytesinfile; + + if (lblktosize(fs, nextlbn) > ip->i_size) + error = bread(vp, lbn, size, NOCRED, &bp); + else if (doclusterread) + error = cluster_read(vp, + ip->i_size, lbn, size, NOCRED, &bp); + else if (lbn - 1 == vp->v_lastr) { + int nextsize = BLKSIZE(fs, ip, nextlbn); + error = breadn(vp, lbn, + size, &nextlbn, &nextsize, 1, NOCRED, &bp); + } else + error = bread(vp, lbn, size, NOCRED, &bp); + if (error) + break; + vp->v_lastr = lbn; + + /* + * We should only get non-zero b_resid when an I/O error + * has occurred, which should cause us to break above. + * However, if the short read did not cause an error, + * then we want to ensure that we do not uiomove bad + * or uninitialized data. + */ + size -= bp->b_resid; + if (size < xfersize) { + if (size == 0) + break; + xfersize = size; + } + if (error = + uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio)) + break; + +#if !defined(__FreeBSD__) + if (S_ISREG(mode) && (xfersize + blkoffset == fs->s_frag_size || + uio->uio_offset == ip->i_size)) + bp->b_flags |= B_AGE; +#endif + brelse(bp); + } + if (bp != NULL) + brelse(bp); + ip->i_flag |= IN_ACCESS; + return (error); +} + +/* + * Vnode op for writing. + */ +int +WRITE(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct vnode *vp; + register struct uio *uio; + register struct inode *ip; + register FS *fs; + struct buf *bp; + struct proc *p; + daddr_t lbn; + off_t osize; + int blkoffset, error, flags, ioflag, resid, size, xfersize; + + ioflag = ap->a_ioflag; + uio = ap->a_uio; + vp = ap->a_vp; + ip = VTOI(vp); + +#if DIAGNOSTIC + if (uio->uio_rw != UIO_WRITE) + panic("%s: mode", WRITE_S); +#endif + + switch (vp->v_type) { + case VREG: + if (ioflag & IO_APPEND) + uio->uio_offset = ip->i_size; + if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) + return (EPERM); + /* FALLTHROUGH */ + case VLNK: + break; + case VDIR: + if ((ioflag & IO_SYNC) == 0) + panic("%s: nonsync dir write", WRITE_S); + break; + default: + panic("%s: type", WRITE_S); + } + + fs = ip->I_FS; +#if 0 + if (uio->uio_offset < 0 || + (u_quad_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) + return (EFBIG); +#endif + /* + * Maybe this should be above the vnode op call, but so long as + * file servers have no limits, I don't think it matters. + */ + p = uio->uio_procp; + if (vp->v_type == VREG && p && + uio->uio_offset + uio->uio_resid > + p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { + psignal(p, SIGXFSZ); + return (EFBIG); + } + + resid = uio->uio_resid; + osize = ip->i_size; + flags = ioflag & IO_SYNC ? B_SYNC : 0; + + for (error = 0; uio->uio_resid > 0;) { + lbn = lblkno(fs, uio->uio_offset); + blkoffset = blkoff(fs, uio->uio_offset); + xfersize = fs->s_frag_size - blkoffset; + if (uio->uio_resid < xfersize) + xfersize = uio->uio_resid; + +#if defined(__FreeBSD__) + if (uio->uio_offset + xfersize > ip->i_size) + vnode_pager_setsize(vp, (u_long)uio->uio_offset + xfersize); +#endif + + if (fs->s_frag_size > xfersize) + flags |= B_CLRBUF; + else + flags &= ~B_CLRBUF; + + error = ext2_balloc(ip, + lbn, blkoffset + xfersize, ap->a_cred, &bp, flags); + + if (error) + break; + if (uio->uio_offset + xfersize > ip->i_size) { + ip->i_size = uio->uio_offset + xfersize; +#if !defined(__FreeBSD__) + vnode_pager_setsize(vp, (u_long)ip->i_size); +#endif + } +#if !defined(__FreeBSD__) + (void)vnode_pager_uncache(vp); +#endif + + size = BLKSIZE(fs, ip, lbn) - bp->b_resid; + if (size < xfersize) + xfersize = size; + + error = + uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); + + if (ioflag & IO_SYNC) + (void)bwrite(bp); + else if (xfersize + blkoffset == fs->s_frag_size) { + if (doclusterwrite) { +#if defined(__FreeBSD__) + bp->b_flags |= B_CLUSTEROK; +#endif + cluster_write(bp, ip->i_size); + } else { +#if !defined(__FreeBSD__) + bp->b_flags |= B_AGE; +#endif + bawrite(bp); + } + } else { +#if defined(__FreeBSD__) + bp->b_flags |= B_CLUSTEROK; +#endif + bdwrite(bp); + } + + if (error || xfersize == 0) + break; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + } + /* + * If we successfully wrote any data, and we are not the superuser + * we clear the setuid and setgid bits as a precaution against + * tampering. + */ + if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) + ip->i_mode &= ~(ISUID | ISGID); + if (error) { + if (ioflag & IO_UNIT) { + (void)VOP_TRUNCATE(vp, osize, + ioflag & IO_SYNC, ap->a_cred, uio->uio_procp); + uio->uio_offset -= resid - uio->uio_resid; + uio->uio_resid = resid; + } + } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { + struct timeval tv; +#if !defined(__FreeBSD__) + get_time(&tv); +#else + tv = time; +#endif + error = VOP_UPDATE(vp, &tv, &tv, 1); + } + return (error); +} diff --git a/sys/gnu/fs/ext2fs/ext2_subr.c b/sys/gnu/fs/ext2fs/ext2_subr.c new file mode 100644 index 000000000000..c27abe5f5fde --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_subr.c @@ -0,0 +1,128 @@ +/* + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ext2_subr.c 8.2 (Berkeley) 9/21/93 + */ + +#include <sys/param.h> +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> + +#include <sys/systm.h> +#include <sys/vnode.h> +#include <gnu/ext2fs/ext2_extern.h> +#include <sys/buf.h> +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> + +/* + * Return buffer with the contents of block "offset" from the beginning of + * directory "ip". If "res" is non-zero, fill it in with a pointer to the + * remaining space in the directory. + */ +int +ext2_blkatoff(ap) + struct vop_blkatoff_args /* { + struct vnode *a_vp; + off_t a_offset; + char **a_res; + struct buf **a_bpp; + } */ *ap; +{ + struct inode *ip; + register struct ext2_sb_info *fs; + struct buf *bp; + daddr_t lbn; + int bsize, error; + + ip = VTOI(ap->a_vp); + fs = ip->i_e2fs; + lbn = lblkno(fs, ap->a_offset); + bsize = blksize(fs, ip, lbn); + + *ap->a_bpp = NULL; + if (error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) { + brelse(bp); + return (error); + } + if (ap->a_res) + *ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset); + *ap->a_bpp = bp; + return (0); +} + +#if defined(KERNEL) && defined(DIAGNOSTIC) +void +ext2_checkoverlap(bp, ip) + struct buf *bp; + struct inode *ip; +{ + register struct buf *ebp, *ep; + register daddr_t start, last; + struct vnode *vp; + + ebp = &buf[nbuf]; + start = bp->b_blkno; + last = start + btodb(bp->b_bcount) - 1; + for (ep = buf; ep < ebp; ep++) { + if (ep == bp || (ep->b_flags & B_INVAL) || + ep->b_vp == NULLVP) + continue; +#if !defined(__FreeBSD__) + if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL)) + continue; +#else + if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL, NULL)) + continue; +#endif + if (vp != ip->i_devvp) + continue; + /* look for overlap */ + if (ep->b_bcount == 0 || ep->b_blkno > last || + ep->b_blkno + btodb(ep->b_bcount) <= start) + continue; + vprint("Disk overlap", vp); + (void)printf("\tstart %d, end %d overlap start %d, end %d\n", + start, last, ep->b_blkno, + ep->b_blkno + btodb(ep->b_bcount) - 1); + panic("Disk buffer overlap"); + } +} +#endif /* DIAGNOSTIC */ + diff --git a/sys/gnu/fs/ext2fs/ext2_vfsops.c b/sys/gnu/fs/ext2fs/ext2_vfsops.c new file mode 100644 index 000000000000..596eb35bfe32 --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_vfsops.c @@ -0,0 +1,1082 @@ +/* + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1989, 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 + */ + +#if !defined(__FreeBSD__) +#include "quota.h" +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/vnode.h> +#include <sys/socket.h> +#include <sys/mount.h> +#include <sys/buf.h> +#include <sys/mbuf.h> +#include <sys/file.h> +#include <sys/disklabel.h> +#include <sys/ioctl.h> +#include <sys/errno.h> +#include <sys/malloc.h> +#include <sys/stat.h> + +#include <miscfs/specfs/specdev.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/ufsmount.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/ufs_extern.h> + +#include <gnu/ext2fs/fs.h> +#include <gnu/ext2fs/ext2_extern.h> +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> + +int ext2_sbupdate __P((struct ufsmount *, int)); + +struct vfsops ext2fs_vfsops = { + ext2_mount, + ufs_start, /* empty function */ + ext2_unmount, + ufs_root, /* root inode via vget */ + ufs_quotactl, /* does operations associated with quotas */ + ext2_statfs, + ext2_sync, + ext2_vget, + ext2_fhtovp, + ext2_vptofh, + ext2_init, +}; + +#if defined(__FreeBSD__) +VFS_SET(ext2fs_vfsops, ext2fs, MOUNT_EXT2FS, 0); +#define bsd_malloc malloc +#define bsd_free free +#endif + +extern u_long nextgennumber; + +/* + * Called by main() when ufs is going to be mounted as root. + * + * Name is updated by mount(8) after booting. + */ +#define ROOTNAME "root_device" + +int +ext2_mountroot() +{ +#if !defined(__FreeBSD__) + extern struct vnode *rootvp; +#endif + register struct ext2_sb_info *fs; + register struct mount *mp; +#if defined(__FreeBSD__) + struct proc *p = curproc; +#else + struct proc *p = get_proc(); /* XXX */ +#endif + struct ufsmount *ump; + u_int size; + int error; + + /* + * Get vnodes for swapdev and rootdev. + */ + if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp)) + panic("ext2_mountroot: can't setup bdevvp's"); + + mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + mp->mnt_op = &ext2fs_vfsops; + mp->mnt_flag = MNT_RDONLY; + if (error = ext2_mountfs(rootvp, mp, p)) { + bsd_free(mp, M_MOUNT); + return (error); + } + if (error = vfs_lock(mp)) { + (void)ext2_unmount(mp, 0, p); + bsd_free(mp, M_MOUNT); + return (error); + } +#if defined(__FreeBSD__) + CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); +#else + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); +#endif + mp->mnt_flag |= MNT_ROOTFS; + mp->mnt_vnodecovered = NULLVP; + ump = VFSTOUFS(mp); + fs = ump->um_e2fs; + bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); + fs->fs_fsmnt[0] = '/'; + bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, + MNAMELEN); + (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, + &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void)ext2_statfs(mp, &mp->mnt_stat, p); + vfs_unlock(mp); + inittodr(fs->s_es->s_wtime); /* this helps to set the time */ + return (0); +} + +/* + * VFS Operations. + * + * mount system call + */ +int +ext2_mount(mp, path, data, ndp, p) + register struct mount *mp; + char *path; + caddr_t data; /* this is actually a (struct ufs_args *) */ + struct nameidata *ndp; + struct proc *p; +{ + struct vnode *devvp; + struct ufs_args args; + struct ufsmount *ump = 0; + register struct ext2_sb_info *fs; + u_int size; + int error, flags; + + if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) + return (error); + /* + * If updating, check whether changing from read-only to + * read/write; if there is no device name, that's all we do. + */ + if (mp->mnt_flag & MNT_UPDATE) { + ump = VFSTOUFS(mp); + fs = ump->um_e2fs; + error = 0; + if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) { + flags = WRITECLOSE; + if (mp->mnt_flag & MNT_FORCE) + flags |= FORCECLOSE; + if (vfs_busy(mp)) + return (EBUSY); + error = ext2_flushfiles(mp, flags, p); + vfs_unbusy(mp); + } + if (!error && (mp->mnt_flag & MNT_RELOAD)) + error = ext2_reload(mp, ndp->ni_cnd.cn_cred, p); + if (error) + return (error); + if (fs->s_rd_only && (mp->mnt_flag & MNT_WANTRDWR)) + fs->s_rd_only = 0; + if (fs->s_rd_only == 0) { + /* don't say it's clean */ + fs->s_es->s_state &= ~EXT2_VALID_FS; + ext2_sbupdate(ump, MNT_WAIT); + } + if (args.fspec == 0) { + /* + * Process export requests. + */ + return (vfs_export(mp, &ump->um_export, &args.export)); + } + } + /* + * Not an update, or updating the name: look up the name + * and verify that it refers to a sensible block device. + */ + NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); + if (error = namei(ndp)) + return (error); + devvp = ndp->ni_vp; + + if (devvp->v_type != VBLK) { + vrele(devvp); + return (ENOTBLK); + } + if (major(devvp->v_rdev) >= nblkdev) { + vrele(devvp); + return (ENXIO); + } + if ((mp->mnt_flag & MNT_UPDATE) == 0) + error = ext2_mountfs(devvp, mp, p); + else { + if (devvp != ump->um_devvp) + error = EINVAL; /* needs translation */ + else + vrele(devvp); + } + if (error) { + vrele(devvp); + return (error); + } + ump = VFSTOUFS(mp); + fs = ump->um_e2fs; + (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size); + bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size); + bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, + MNAMELEN); + (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, + &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void)ext2_statfs(mp, &mp->mnt_stat, p); + return (0); +} + +/* + * checks that the data in the descriptor blocks make sense + * this is taken from ext2/super.c + */ +static int ext2_check_descriptors (struct ext2_sb_info * sb) +{ + int i; + int desc_block = 0; + unsigned long block = sb->s_es->s_first_data_block; + struct ext2_group_desc * gdp = NULL; + + /* ext2_debug ("Checking group descriptors"); */ + + for (i = 0; i < sb->s_groups_count; i++) + { + /* examine next descriptor block */ + if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) + gdp = (struct ext2_group_desc *) + sb->s_group_desc[desc_block++]->b_data; + if (gdp->bg_block_bitmap < block || + gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) + { + printf ("ext2_check_descriptors: " + "Block bitmap for group %d" + " not in group (block %lu)!", + i, (unsigned long) gdp->bg_block_bitmap); + return 0; + } + if (gdp->bg_inode_bitmap < block || + gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) + { + printf ("ext2_check_descriptors: " + "Inode bitmap for group %d" + " not in group (block %lu)!", + i, (unsigned long) gdp->bg_inode_bitmap); + return 0; + } + if (gdp->bg_inode_table < block || + gdp->bg_inode_table + sb->s_itb_per_group >= + block + EXT2_BLOCKS_PER_GROUP(sb)) + { + printf ("ext2_check_descriptors: " + "Inode table for group %d" + " not in group (block %lu)!", + i, (unsigned long) gdp->bg_inode_table); + return 0; + } + block += EXT2_BLOCKS_PER_GROUP(sb); + gdp++; + } + return 1; +} + +/* + * this computes the fields of the ext2_sb_info structure from the + * data in the ext2_super_block structure read in + */ +static int compute_sb_data(devvp, es, fs) + struct vnode * devvp; + struct ext2_super_block * es; + struct ext2_sb_info * fs; +{ + int db_count, error; + int i, j; + int logic_sb_block = 1; /* XXX for now */ + +#if 1 +#define V(v) +#else +#define V(v) printf(#v"= %d\n", fs->v); +#endif + + fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; + V(s_blocksize) + fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; + V(s_bshift) + fs->s_fsbtodb = es->s_log_block_size + 1; + V(s_fsbtodb) + fs->s_qbmask = fs->s_blocksize - 1; + V(s_bmask) + fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); + V(s_blocksize_bits) + fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; + V(s_frag_size) + if (fs->s_frag_size) + fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; + V(s_frags_per_block) + fs->s_blocks_per_group = es->s_blocks_per_group; + V(s_blocks_per_group) + fs->s_frags_per_group = es->s_frags_per_group; + V(s_frags_per_group) + fs->s_inodes_per_group = es->s_inodes_per_group; + V(s_inodes_per_group) + fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; + V(s_inodes_per_block) + fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; + V(s_itb_per_group) + fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); + V(s_desc_per_block) + /* s_resuid / s_resgid ? */ + fs->s_groups_count = (es->s_blocks_count - + es->s_first_data_block + + EXT2_BLOCKS_PER_GROUP(fs) - 1) / + EXT2_BLOCKS_PER_GROUP(fs); + V(s_groups_count) + db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / + EXT2_DESC_PER_BLOCK(fs); + fs->s_db_per_group = db_count; + V(s_db_per_group) + + fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), + M_UFSMNT, M_WAITOK); + + /* adjust logic_sb_block */ + if(fs->s_blocksize > SBSIZE) + /* Godmar thinks: if the blocksize is greater than 1024, then + the superblock is logically part of block zero. + */ + logic_sb_block = 0; + + for (i = 0; i < db_count; i++) { + error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), + fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); + if(error) { + for (j = 0; j < i; j++) + brelse(fs->s_group_desc[j]); + bsd_free(fs->s_group_desc, M_UFSMNT); + printf("EXT2-fs: unable to read group descriptors (%d)\n", error); + return EIO; + } + } + if(!ext2_check_descriptors(fs)) { + for (j = 0; j < db_count; j++) + brelse(fs->s_group_desc[j]); + bsd_free(fs->s_group_desc, M_UFSMNT); + printf("EXT2-fs: (ext2_check_descriptors failure) " + "unable to read group descriptors\n"); + return EIO; + } + + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { + fs->s_inode_bitmap_number[i] = 0; + fs->s_inode_bitmap[i] = NULL; + fs->s_block_bitmap_number[i] = 0; + fs->s_block_bitmap[i] = NULL; + } + fs->s_loaded_inode_bitmaps = 0; + fs->s_loaded_block_bitmaps = 0; + return 0; +} + +/* + * Reload all incore data for a filesystem (used after running fsck on + * the root filesystem and finding things to fix). The filesystem must + * be mounted read-only. + * + * Things to do to update the mount: + * 1) invalidate all cached meta-data. + * 2) re-read superblock from disk. + * 3) re-read summary information from disk. + * 4) invalidate all inactive vnodes. + * 5) invalidate all cached file data. + * 6) re-read inode data for all active vnodes. + */ +int +ext2_reload(mountp, cred, p) + register struct mount *mountp; + struct ucred *cred; + struct proc *p; +{ + register struct vnode *vp, *nvp, *devvp; + struct inode *ip; + struct buf *bp; + struct ext2_super_block * es; + struct ext2_sb_info *fs; + int i, size, error; + + if ((mountp->mnt_flag & MNT_RDONLY) == 0) + return (EINVAL); + /* + * Step 1: invalidate all cached meta-data. + */ + devvp = VFSTOUFS(mountp)->um_devvp; + if (vinvalbuf(devvp, 0, cred, p, 0, 0)) + panic("ext2_reload: dirty1"); + /* + * Step 2: re-read superblock from disk. + * constants have been adjusted for ext2 + */ + if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) + return (error); + es = (struct ext2_super_block *)bp->b_data; + if (es->s_magic != EXT2_SUPER_MAGIC) { + if(es->s_magic == EXT2_PRE_02B_MAGIC) + printf("This filesystem bears the magic number of a pre " + "0.2b version of ext2. This is not supported by " + "Lites.\n"); + else + printf("Wrong magic number: %x (expected %x for ext2 fs\n", + es->s_magic, EXT2_SUPER_MAGIC); + brelse(bp); + return (EIO); /* XXX needs translation */ + } + fs = VFSTOUFS(mountp)->um_e2fs; + bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); + + if(error = compute_sb_data(devvp, es, fs)) { + brelse(bp); + return error; + } +#ifdef UNKLAR + if (fs->fs_sbsize < SBSIZE) + bp->b_flags |= B_INVAL; +#endif + brelse(bp); + +loop: + for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { + nvp = vp->v_mntvnodes.le_next; + /* + * Step 4: invalidate all inactive vnodes. + */ + if (vp->v_usecount == 0) { + vgone(vp); + continue; + } + /* + * Step 5: invalidate all cached file data. + */ + if (vget(vp, 1)) + goto loop; + if (vinvalbuf(vp, 0, cred, p, 0, 0)) + panic("ext2_reload: dirty2"); + /* + * Step 6: re-read inode data for all active vnodes. + */ + ip = VTOI(vp); + if (error = + bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + (int)fs->s_blocksize, NOCRED, &bp)) { + vput(vp); + return (error); + } + ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + + EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), + &ip->i_din); + brelse(bp); + vput(vp); + if (vp->v_mount != mountp) + goto loop; + } + return (0); +} + +/* + * Common code for mount and mountroot + */ +int +ext2_mountfs(devvp, mp, p) + register struct vnode *devvp; + struct mount *mp; + struct proc *p; +{ + register struct ufsmount *ump; + struct buf *bp; + register struct ext2_sb_info *fs; + struct ext2_super_block * es; + dev_t dev = devvp->v_rdev; + struct partinfo dpart; + caddr_t base; + int havepart = 0; + int error, i, size; + int ronly; +#if !defined(__FreeBSD__) + extern struct vnode *rootvp; +#endif + + /* + * Disallow multiple mounts of the same device. + * Disallow mounting of a device that is currently in use + * (except for root, which might share swap device for miniroot). + * Flush out any old buffers remaining from a previous use. + */ + if (error = vfs_mountedon(devvp)) + return (error); + if (vcount(devvp) > 1 && devvp != rootvp) + return (EBUSY); + if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) + return (error); +#ifdef READONLY +/* turn on this to force it to be read-only */ + mp->mnt_flag |= MNT_RDONLY; +#endif + + ronly = (mp->mnt_flag & MNT_RDONLY) != 0; + if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p)) + return (error); + if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) + size = DEV_BSIZE; + else { + havepart = 1; + size = dpart.disklab->d_secsize; + } + + bp = NULL; + ump = NULL; + if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) + goto out; + es = (struct ext2_super_block *)bp->b_data; + if (es->s_magic != EXT2_SUPER_MAGIC) { + if(es->s_magic == EXT2_PRE_02B_MAGIC) + printf("This filesystem bears the magic number of a pre " + "0.2b version of ext2. This is not supported by " + "Lites.\n"); + else + printf("Wrong magic number: %x (expected %x for EXT2FS)\n", + es->s_magic, EXT2_SUPER_MAGIC); + error = EINVAL; /* XXX needs translation */ + goto out; + } + ump = bsd_malloc(sizeof *ump, M_UFSMNT, M_WAITOK); + bzero((caddr_t)ump, sizeof *ump); + /* I don't know whether this is the right strategy. Note that + we dynamically allocate both a ext2_sb_info and a ext2_super_block + while Linux keeps the super block in a locked buffer + */ + ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), + M_UFSMNT, M_WAITOK); + ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), + M_UFSMNT, M_WAITOK); + bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); + if(error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs)) { + brelse(bp); + return error; + } + brelse(bp); + bp = NULL; + fs = ump->um_e2fs; + fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ + if (!(fs->s_es->s_state & EXT2_VALID_FS)) { + printf("WARNING: %s was not properly dismounted\n", + fs->fs_fsmnt); + } + /* if the fs is not mounted read-only, make sure the super block is + always written back on a sync() + */ + if (ronly == 0) { + fs->s_dirt = 1; /* mark it modified */ + fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ + } + mp->mnt_data = (qaddr_t)ump; + mp->mnt_stat.f_fsid.val[0] = (long)dev; + mp->mnt_stat.f_fsid.val[1] = MOUNT_EXT2FS; + mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; + mp->mnt_flag |= MNT_LOCAL; + ump->um_mountp = mp; + ump->um_dev = dev; + ump->um_devvp = devvp; + /* setting those two parameters allows us to use + ufs_bmap w/o changse ! + */ + ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); + ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; + ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); + for (i = 0; i < MAXQUOTAS; i++) + ump->um_quotas[i] = NULLVP; + devvp->v_specflags |= SI_MOUNTEDON; + if (ronly == 0) + ext2_sbupdate(ump, MNT_WAIT); + return (0); +out: + if (bp) + brelse(bp); + (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); + if (ump) { + bsd_free(ump->um_fs, M_UFSMNT); + bsd_free(ump, M_UFSMNT); + mp->mnt_data = (qaddr_t)0; + } + return (error); +} + +/* + * unmount system call + */ +int +ext2_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + register struct ufsmount *ump; + register struct ext2_sb_info *fs; + int error, flags, ronly, i; + + flags = 0; + if (mntflags & MNT_FORCE) { + if (mp->mnt_flag & MNT_ROOTFS) + return (EINVAL); + flags |= FORCECLOSE; + } + if (error = ext2_flushfiles(mp, flags, p)) + return (error); + ump = VFSTOUFS(mp); + fs = ump->um_e2fs; + ronly = fs->s_rd_only; + if (!ronly) { + fs->s_es->s_state |= EXT2_VALID_FS; /* was fs_clean = 1 */ + ext2_sbupdate(ump, MNT_WAIT); + } + /* release buffers containing group descriptors */ + for(i = 0; i < fs->s_db_per_group; i++) + brelse(fs->s_group_desc[i]); + /* release cached inode/block bitmaps */ + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) + if (fs->s_inode_bitmap[i]) + brelse (fs->s_inode_bitmap[i]); + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) + if (fs->s_block_bitmap[i]) + brelse (fs->s_block_bitmap[i]); + + ump->um_devvp->v_specflags &= ~SI_MOUNTEDON; + error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE, + NOCRED, p); + vrele(ump->um_devvp); + bsd_free(fs->s_es, M_UFSMNT); + bsd_free(fs, M_UFSMNT); + bsd_free(ump, M_UFSMNT); + mp->mnt_data = (qaddr_t)0; + mp->mnt_flag &= ~MNT_LOCAL; + return (error); +} + +/* + * Flush out all the files in a filesystem. + */ +int +ext2_flushfiles(mp, flags, p) + register struct mount *mp; + int flags; + struct proc *p; +{ +#if !defined(__FreeBSD__) + extern int doforce; +#endif + register struct ufsmount *ump; + int i, error; + + if (!doforce) + flags &= ~FORCECLOSE; + ump = VFSTOUFS(mp); +#if QUOTA + if (mp->mnt_flag & MNT_QUOTA) { + if (error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) + return (error); + for (i = 0; i < MAXQUOTAS; i++) { + if (ump->um_quotas[i] == NULLVP) + continue; + quotaoff(p, mp, i); + } + /* + * Here we fall through to vflush again to ensure + * that we have gotten rid of all the system vnodes. + */ + } +#endif + error = vflush(mp, NULLVP, flags); + return (error); +} + +/* + * Get file system statistics. + * taken from ext2/super.c ext2_statfs + */ +int +ext2_statfs(mp, sbp, p) + struct mount *mp; + register struct statfs *sbp; + struct proc *p; +{ + unsigned long overhead; + unsigned long overhead_per_group; + + register struct ufsmount *ump; + register struct ext2_sb_info *fs; + register struct ext2_super_block *es; + + ump = VFSTOUFS(mp); + fs = ump->um_e2fs; + es = fs->s_es; + + if (es->s_magic != EXT2_SUPER_MAGIC) + panic("ext2_statfs - magic number spoiled"); + + /* + * Compute the overhead (FS structures) + */ + overhead_per_group = 1 /* super block */ + + fs->s_db_per_group + + 1 /* block bitmap */ + + 1 /* inode bitmap */ + + fs->s_itb_per_group; + overhead = es->s_first_data_block + + fs->s_groups_count * overhead_per_group; + + sbp->f_type = MOUNT_EXT2FS; + sbp->f_bsize = EXT2_FRAG_SIZE(fs); + sbp->f_iosize = EXT2_BLOCK_SIZE(fs); + sbp->f_blocks = es->s_blocks_count - overhead; + sbp->f_bfree = es->s_free_blocks_count; + sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; + sbp->f_files = es->s_inodes_count; + sbp->f_ffree = es->s_free_inodes_count; + if (sbp != &mp->mnt_stat) { + bcopy((caddr_t)mp->mnt_stat.f_mntonname, + (caddr_t)&sbp->f_mntonname[0], MNAMELEN); + bcopy((caddr_t)mp->mnt_stat.f_mntfromname, + (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); + } + return (0); +} + +/* + * Go through the disk queues to initiate sandbagged IO; + * go through the inodes to write those that have been modified; + * initiate the writing of the super block if it has been modified. + * + * Note: we are always called with the filesystem marked `MPBUSY'. + */ +int +ext2_sync(mp, waitfor, cred, p) + struct mount *mp; + int waitfor; + struct ucred *cred; + struct proc *p; +{ + register struct vnode *vp; + register struct inode *ip; + register struct ufsmount *ump = VFSTOUFS(mp); + register struct ext2_sb_info *fs; + int error, allerror = 0; + + fs = ump->um_e2fs; + /* + * Write back modified superblock. + * Consistency check that the superblock + * is still in the buffer cache. + */ + if (fs->s_dirt) { +#if !defined(__FreeBSD__) + struct timeval time; +#endif + + if (fs->s_rd_only != 0) { /* XXX */ + printf("fs = %s\n", fs->fs_fsmnt); + panic("update: rofs mod"); + } + fs->s_dirt = 0; +#if !defined(__FreeBSD__) + get_time(&time); +#endif + fs->s_es->s_wtime = time.tv_sec; + allerror = ext2_sbupdate(ump, waitfor); + } + /* + * Write back each (modified) inode. + */ +loop: + for (vp = mp->mnt_vnodelist.lh_first; + vp != NULL; + vp = vp->v_mntvnodes.le_next) { + /* + * If the vnode that we are about to sync is no longer + * associated with this mount point, start over. + */ + if (vp->v_mount != mp) + goto loop; + if (VOP_ISLOCKED(vp)) + continue; + ip = VTOI(vp); + if ((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && + vp->v_dirtyblkhd.lh_first == NULL) + continue; + if (vget(vp, 1)) + goto loop; + if (error = VOP_FSYNC(vp, cred, waitfor, p)) + allerror = error; + vput(vp); + } + /* + * Force stale file system control information to be flushed. + */ + if (error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) + allerror = error; +#if QUOTA + qsync(mp); +#endif + return (allerror); +} + +/* + * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it + * in from disk. If it is in core, wait for the lock bit to clear, then + * return the inode locked. Detection and handling of mount points must be + * done by the calling routine. + */ +int +ext2_vget(mp, ino, vpp) + struct mount *mp; + ino_t ino; + struct vnode **vpp; +{ + register struct ext2_sb_info *fs; + register struct inode *ip; + struct ufsmount *ump; + struct buf *bp; + struct vnode *vp; + dev_t dev; + int i, type, error; + int used_blocks; + + ump = VFSTOUFS(mp); + dev = ump->um_dev; + if ((*vpp = ufs_ihashget(dev, ino)) != NULL) + return (0); + + /* Allocate a new vnode/inode. */ + if (error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) { + *vpp = NULL; + return (error); + } + /* I don't really know what this 'type' does. I suppose it's some kind + * of memory accounting. Let's just book this memory on FFS's account + * If I'm not mistaken, this stuff isn't implemented anyway in Lites + */ + type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */ + MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK); + insmntque(vp, mp); + bzero((caddr_t)ip, sizeof(struct inode)); + vp->v_data = ip; + ip->i_vnode = vp; + ip->i_e2fs = fs = ump->um_e2fs; + ip->i_dev = dev; + ip->i_number = ino; +#if QUOTA + for (i = 0; i < MAXQUOTAS; i++) + ip->i_dquot[i] = NODQUOT; +#endif + /* + * Put it onto its hash chain and lock it so that other requests for + * this inode will block if they arrive while we are sleeping waiting + * for old data structures to be purged or for the contents of the + * disk portion of this inode to be read. + */ + ufs_ihashins(ip); + + /* Read in the disk contents for the inode, copy into the inode. */ +#if 0 +printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); +#endif + if (error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), + (int)fs->s_blocksize, NOCRED, &bp)) { + /* + * The inode does not contain anything useful, so it would + * be misleading to leave it on its hash chain. With mode + * still zero, it will be unlinked and returned to the free + * list by vput(). + */ + vput(vp); + brelse(bp); + *vpp = NULL; + return (error); + } + /* convert ext2 inode to dinode */ + ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * + ino_to_fsbo(fs, ino)), &ip->i_din); + ip->i_block_group = ino_to_cg(fs, ino); + ip->i_next_alloc_block = 0; + ip->i_next_alloc_goal = 0; + ip->i_prealloc_count = 0; + ip->i_prealloc_block = 0; + /* now we want to make sure that block pointers for unused + blocks are zeroed out - ext2_balloc depends on this + although for regular files and directories only + */ + if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { + used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; + for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) + ip->i_db[i] = 0; + } +/* + ext2_print_inode(ip); +*/ + brelse(bp); + + /* + * Initialize the vnode from the inode, check for aliases. + * Note that the underlying vnode may have changed. + */ + if (error = ufs_vinit(mp, ext2_specop_p, EXT2_FIFOOPS, &vp)) { + vput(vp); + *vpp = NULL; + return (error); + } + /* + * Finish inode initialization now that aliasing has been resolved. + */ + ip->i_devvp = ump->um_devvp; + VREF(ip->i_devvp); + /* + * Set up a generation number for this inode if it does not + * already have one. This should only happen on old filesystems. + */ + if (ip->i_gen == 0) { +#if !defined(__FreeBSD__) + struct timeval time; + get_time(&time); +#endif + if (++nextgennumber < (u_long)time.tv_sec) + nextgennumber = time.tv_sec; + ip->i_gen = nextgennumber; + if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) + ip->i_flag |= IN_MODIFIED; + } + *vpp = vp; + return (0); +} + +/* + * File handle to vnode + * + * Have to be really careful about stale file handles: + * - check that the inode number is valid + * - call ext2_vget() to get the locked inode + * - check for an unallocated inode (i_mode == 0) + * - check that the given client host has export rights and return + * those rights via. exflagsp and credanonp + */ +int +ext2_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) + register struct mount *mp; + struct fid *fhp; + struct mbuf *nam; + struct vnode **vpp; + int *exflagsp; + struct ucred **credanonp; +{ + register struct ufid *ufhp; + struct ext2_sb_info *fs; + + ufhp = (struct ufid *)fhp; + fs = VFSTOUFS(mp)->um_e2fs; + if (ufhp->ufid_ino < ROOTINO || + ufhp->ufid_ino >= fs->s_groups_count * fs->s_es->s_inodes_per_group) + return (ESTALE); + return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)); +} + +/* + * Vnode pointer to File handle + */ +/* ARGSUSED */ +int +ext2_vptofh(vp, fhp) + struct vnode *vp; + struct fid *fhp; +{ + register struct inode *ip; + register struct ufid *ufhp; + + ip = VTOI(vp); + ufhp = (struct ufid *)fhp; + ufhp->ufid_len = sizeof(struct ufid); + ufhp->ufid_ino = ip->i_number; + ufhp->ufid_gen = ip->i_gen; + return (0); +} + +/* + * Write a superblock and associated information back to disk. + */ +int +ext2_sbupdate(mp, waitfor) + struct ufsmount *mp; + int waitfor; +{ + register struct ext2_sb_info *fs = mp->um_e2fs; + register struct ext2_super_block *es = fs->s_es; + register struct buf *bp; + int blks; + caddr_t space; + int i, size, error = 0; +/* +printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); +*/ + bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0); + bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); + if (waitfor == MNT_WAIT) + error = bwrite(bp); + else + bawrite(bp); + + /* write group descriptors back on disk */ + for(i = 0; i < fs->s_db_per_group; i++) + /* Godmar thinks: we must avoid using any of the b*write + * functions here: we want to keep the buffer locked + * so we use my 'housemade' write routine: + */ + error |= ll_w_block(fs->s_group_desc[i], waitfor == MNT_WAIT); + + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) + if (fs->s_inode_bitmap[i]) + ll_w_block (fs->s_inode_bitmap[i], 1); + for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) + if (fs->s_block_bitmap[i]) + ll_w_block (fs->s_block_bitmap[i], 1); + + return (error); +} diff --git a/sys/gnu/fs/ext2fs/ext2_vnops.c b/sys/gnu/fs/ext2fs/ext2_vnops.c new file mode 100644 index 000000000000..1bf911322b87 --- /dev/null +++ b/sys/gnu/fs/ext2fs/ext2_vnops.c @@ -0,0 +1,338 @@ +/* + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ext2_vnops.c 8.7 (Berkeley) 2/3/94 + */ + +#if !defined(__FreeBSD__) +#include "fifo.h" +#include "diagnostic.h" +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/resourcevar.h> +#include <sys/kernel.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/conf.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/malloc.h> + +#include <vm/vm.h> + +#include <miscfs/specfs/specdev.h> +#include <miscfs/fifofs/fifo.h> + +#if !defined(__FreeBSD__) +#include <ufs/ufs/lockf.h> +#else +#include <lockf.h> +#include <sys/signalvar.h> +#endif +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/dir.h> +#include <ufs/ufs/ufs_extern.h> + +#include <gnu/ext2fs/ext2_fs.h> +#include <gnu/ext2fs/ext2_fs_sb.h> +#include <gnu/ext2fs/fs.h> +#include <gnu/ext2fs/ext2_extern.h> + +/* Global vfs data structures for ufs. */ +int (**ext2_vnodeop_p)(); +struct vnodeopv_entry_desc ext2_vnodeop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_lookup_desc, ext2_lookup }, /* lookup */ + { &vop_create_desc, ufs_create }, /* create */ + { &vop_mknod_desc, ufs_mknod }, /* mknod */ + { &vop_open_desc, ufs_open }, /* open */ + { &vop_close_desc, ufs_close }, /* close */ + { &vop_access_desc, ufs_access }, /* access */ + { &vop_getattr_desc, ufs_getattr }, /* getattr */ + { &vop_setattr_desc, ufs_setattr }, /* setattr */ + { &vop_read_desc, ext2_read }, /* read */ + { &vop_write_desc, ext2_write }, /* write */ + { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */ + { &vop_select_desc, ufs_select }, /* select */ + { &vop_mmap_desc, ufs_mmap }, /* mmap */ + { &vop_fsync_desc, ext2_fsync }, /* fsync */ + { &vop_seek_desc, ufs_seek }, /* seek */ + { &vop_remove_desc, ufs_remove }, /* remove */ + { &vop_link_desc, ufs_link }, /* link */ + { &vop_rename_desc, ufs_rename }, /* rename */ + { &vop_mkdir_desc, ufs_mkdir }, /* mkdir */ + { &vop_rmdir_desc, ufs_rmdir }, /* rmdir */ + { &vop_symlink_desc, ufs_symlink }, /* symlink */ + { &vop_readdir_desc, ext2_readdir }, /* readdir */ + { &vop_readlink_desc, ufs_readlink }, /* readlink */ + { &vop_abortop_desc, ufs_abortop }, /* abortop */ + { &vop_inactive_desc, ext2_inactive }, /* inactive */ + { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */ + { &vop_lock_desc, ufs_lock }, /* lock */ + { &vop_unlock_desc, ufs_unlock }, /* unlock */ + { &vop_bmap_desc, ufs_bmap }, /* bmap */ + { &vop_strategy_desc, ufs_strategy }, /* strategy */ + { &vop_print_desc, ufs_print }, /* print */ + { &vop_islocked_desc, ufs_islocked }, /* islocked */ + { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */ + { &vop_advlock_desc, ufs_advlock }, /* advlock */ + { &vop_blkatoff_desc, ext2_blkatoff }, /* blkatoff */ + { &vop_valloc_desc, ext2_valloc }, /* valloc */ + { &vop_reallocblks_desc, ext2_reallocblks }, /* reallocblks */ + { &vop_vfree_desc, ext2_vfree }, /* vfree */ + { &vop_truncate_desc, ext2_truncate }, /* truncate */ + { &vop_update_desc, ext2_update }, /* update */ + { &vop_bwrite_desc, vn_bwrite }, + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc ext2fs_vnodeop_opv_desc = + { &ext2_vnodeop_p, ext2_vnodeop_entries }; + +int (**ext2_specop_p)(); +struct vnodeopv_entry_desc ext2_specop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_lookup_desc, spec_lookup }, /* lookup */ + { &vop_create_desc, spec_create }, /* create */ + { &vop_mknod_desc, spec_mknod }, /* mknod */ + { &vop_open_desc, spec_open }, /* open */ + { &vop_close_desc, ufsspec_close }, /* close */ + { &vop_access_desc, ufs_access }, /* access */ + { &vop_getattr_desc, ufs_getattr }, /* getattr */ + { &vop_setattr_desc, ufs_setattr }, /* setattr */ + { &vop_read_desc, ufsspec_read }, /* read */ + { &vop_write_desc, ufsspec_write }, /* write */ + { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ + { &vop_select_desc, spec_select }, /* select */ + { &vop_mmap_desc, spec_mmap }, /* mmap */ + { &vop_fsync_desc, ext2_fsync }, /* fsync */ + { &vop_seek_desc, spec_seek }, /* seek */ + { &vop_remove_desc, spec_remove }, /* remove */ + { &vop_link_desc, spec_link }, /* link */ + { &vop_rename_desc, spec_rename }, /* rename */ + { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ + { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ + { &vop_symlink_desc, spec_symlink }, /* symlink */ + { &vop_readdir_desc, spec_readdir }, /* readdir */ + { &vop_readlink_desc, spec_readlink }, /* readlink */ + { &vop_abortop_desc, spec_abortop }, /* abortop */ + { &vop_inactive_desc, ext2_inactive }, /* inactive */ + { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */ + { &vop_lock_desc, ufs_lock }, /* lock */ + { &vop_unlock_desc, ufs_unlock }, /* unlock */ + { &vop_bmap_desc, spec_bmap }, /* bmap */ + { &vop_strategy_desc, spec_strategy }, /* strategy */ + { &vop_print_desc, ufs_print }, /* print */ + { &vop_islocked_desc, ufs_islocked }, /* islocked */ + { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ + { &vop_advlock_desc, spec_advlock }, /* advlock */ + { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ + { &vop_valloc_desc, spec_valloc }, /* valloc */ + { &vop_reallocblks_desc, spec_reallocblks }, /* reallocblks */ + { &vop_vfree_desc, ext2_vfree }, /* vfree */ + { &vop_truncate_desc, spec_truncate }, /* truncate */ + { &vop_update_desc, ext2_update }, /* update */ + { &vop_bwrite_desc, vn_bwrite }, + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc ext2fs_specop_opv_desc = + { &ext2_specop_p, ext2_specop_entries }; + +#if FIFO +int (**ext2_fifoop_p)(); +struct vnodeopv_entry_desc ext2_fifoop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_lookup_desc, fifo_lookup }, /* lookup */ + { &vop_create_desc, fifo_create }, /* create */ + { &vop_mknod_desc, fifo_mknod }, /* mknod */ + { &vop_open_desc, fifo_open }, /* open */ + { &vop_close_desc, ufsfifo_close }, /* close */ + { &vop_access_desc, ufs_access }, /* access */ + { &vop_getattr_desc, ufs_getattr }, /* getattr */ + { &vop_setattr_desc, ufs_setattr }, /* setattr */ + { &vop_read_desc, ufsfifo_read }, /* read */ + { &vop_write_desc, ufsfifo_write }, /* write */ + { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ + { &vop_select_desc, fifo_select }, /* select */ + { &vop_mmap_desc, fifo_mmap }, /* mmap */ + { &vop_fsync_desc, ext2_fsync }, /* fsync */ + { &vop_seek_desc, fifo_seek }, /* seek */ + { &vop_remove_desc, fifo_remove }, /* remove */ + { &vop_link_desc, fifo_link }, /* link */ + { &vop_rename_desc, fifo_rename }, /* rename */ + { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */ + { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */ + { &vop_symlink_desc, fifo_symlink }, /* symlink */ + { &vop_readdir_desc, fifo_readdir }, /* readdir */ + { &vop_readlink_desc, fifo_readlink }, /* readlink */ + { &vop_abortop_desc, fifo_abortop }, /* abortop */ + { &vop_inactive_desc, ext2_inactive }, /* inactive */ + { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */ + { &vop_lock_desc, ufs_lock }, /* lock */ + { &vop_unlock_desc, ufs_unlock }, /* unlock */ + { &vop_bmap_desc, fifo_bmap }, /* bmap */ + { &vop_strategy_desc, fifo_strategy }, /* strategy */ + { &vop_print_desc, ufs_print }, /* print */ + { &vop_islocked_desc, ufs_islocked }, /* islocked */ + { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */ + { &vop_advlock_desc, fifo_advlock }, /* advlock */ + { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */ + { &vop_valloc_desc, fifo_valloc }, /* valloc */ + { &vop_reallocblks_desc, fifo_reallocblks }, /* reallocblks */ + { &vop_vfree_desc, ext2_vfree }, /* vfree */ + { &vop_truncate_desc, fifo_truncate }, /* truncate */ + { &vop_update_desc, ext2_update }, /* update */ + { &vop_bwrite_desc, vn_bwrite }, + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc ext2fs_fifoop_opv_desc = + { &ext2_fifoop_p, ext2_fifoop_entries }; +#endif /* FIFO */ + +#if defined(__FreeBSD__) + VNODEOP_SET(ext2fs_vnodeop_opv_desc); + VNODEOP_SET(ext2fs_specop_opv_desc); + VNODEOP_SET(ext2fs_fifoop_opv_desc); +#endif + +/* + * Enabling cluster read/write operations. + */ +#ifdef DEBUG +#include <sys/sysctl.h> +int doclusterread = 1; +struct ctldebug debug11 = { "doclusterread", &doclusterread }; +int doclusterwrite = 1; +struct ctldebug debug12 = { "doclusterwrite", &doclusterwrite }; +#endif + +#if defined(__FreeBSD__) +#define doclusterwrite 1 +#define doclusterread 1 +#else +/* doclusterwrite is being tested + note that reallocblks is called when it's on, but this is not implemented */ +#define doclusterwrite 0 +/* doclusterread should work with new pagemove */ +#define doclusterread 1 +#endif + +#include <gnu/ext2fs/ext2_readwrite.c> + +/* + * Synch an open file. + */ +/* ARGSUSED */ +int +ext2_fsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct buf *bp; + struct timeval tv; + struct buf *nbp; + int s; + + /* + * Clean memory object. + * XXX add this to all file systems. + * XXX why is all this fs specific? + */ +#if !defined(__FreeBSD__) + vn_pager_sync(vp, ap->a_waitfor); +#endif + + /* + * Flush all dirty buffers associated with a vnode. + */ + ext2_discard_prealloc(VTOI(vp)); + +loop: + s = splbio(); + for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { + nbp = bp->b_vnbufs.le_next; + if ((bp->b_flags & B_BUSY)) + continue; + if ((bp->b_flags & B_DELWRI) == 0) + panic("ext2_fsync: not dirty"); + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + /* + * Wait for I/O associated with indirect blocks to complete, + * since there is no way to quickly wait for them below. + */ + if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT) + (void) bawrite(bp); + else + (void) bwrite(bp); + goto loop; + } + if (ap->a_waitfor == MNT_WAIT) { + while (vp->v_numoutput) { + vp->v_flag |= VBWAIT; +#if !defined(__FreeBSD__) + sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); +#else + tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "extfsn", 0); +#endif + } +#if DIAGNOSTIC + if (vp->v_dirtyblkhd.lh_first) { + vprint("ext2_fsync: dirty", vp); + goto loop; + } +#endif + } + splx(s); +#if defined(__FreeBSD__) + tv = time; +#else + get_time(&tv); +#endif + return (VOP_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT)); +} diff --git a/sys/gnu/fs/ext2fs/fs.h b/sys/gnu/fs/ext2fs/fs.h new file mode 100644 index 000000000000..28071d4754b2 --- /dev/null +++ b/sys/gnu/fs/ext2fs/fs.h @@ -0,0 +1,157 @@ +/* + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fs.h 8.7 (Berkeley) 4/19/94 + */ + +/* + * Each disk drive contains some number of file systems. + * A file system consists of a number of cylinder groups. + * Each cylinder group has inodes and data. + * + * A file system is described by its super-block, which in turn + * describes the cylinder groups. The super-block is critical + * data and is replicated in each cylinder group to protect against + * catastrophic loss. This is done at `newfs' time and the critical + * super-block data does not change, so the copies need not be + * referenced further unless disaster strikes. + * + * The first boot and super blocks are given in absolute disk addresses. + * The byte-offset forms are preferred, as they don't imply a sector size. + */ +#define BBSIZE 1024 +#define SBSIZE 1024 +#define BBOFF ((off_t)(0)) +#define SBOFF ((off_t)(BBOFF + BBSIZE)) +#define BBLOCK ((daddr_t)(0)) +#define SBLOCK ((daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE)) + +/* + * The path name on which the file system is mounted is maintained + * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in + * the super block for this name. + */ +#define MAXMNTLEN 512 + +/* + * Macros for access to superblock array structures + */ + +/* + * Convert cylinder group to base address of its global summary info. + */ +#define fs_cs(fs, cgindx) (((struct ext2_group_desc *) \ + (fs->s_group_desc[cgindx / EXT2_DESC_PER_BLOCK(fs)]->b_data)) \ + [cgindx % EXT2_DESC_PER_BLOCK(fs)]) + +/* + * Turn file system block numbers into disk block addresses. + * This maps file system blocks to device size blocks. + */ +#define fsbtodb(fs, b) ((b) << ((fs)->s_fsbtodb)) +#define dbtofsb(fs, b) ((b) >> ((fs)->s_fsbtodb)) + +/* get group containing inode */ +#define ino_to_cg(fs, x) (((x) - 1) / EXT2_INODES_PER_GROUP(fs)) + +/* get block containing inode from its number x */ +#define ino_to_fsba(fs, x) fs_cs(fs, ino_to_cg(fs, x)).bg_inode_table + \ + (((x)-1) % EXT2_INODES_PER_GROUP(fs))/EXT2_INODES_PER_BLOCK(fs) + +/* get offset for inode in block */ +#define ino_to_fsbo(fs, x) ((x-1) % EXT2_INODES_PER_BLOCK(fs)) + +/* + * Give cylinder group number for a file system block. + * Give cylinder group block number for a file system block. + */ +#define dtog(fs, d) (((d) - fs->s_es->s_first_data_block) / \ + EXT2_BLOCKS_PER_GROUP(fs)) +#define dtogd(fs, d) (((d) - fs->s_es->s_first_data_block) % \ + EXT2_BLOCKS_PER_GROUP(fs)) + +/* + * The following macros optimize certain frequently calculated + * quantities by using shifts and masks in place of divisions + * modulos and multiplications. + */ +#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \ + ((loc) & (fs)->s_qbmask) + +#define lblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \ + ((blk) << (fs->s_bshift)) + +#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ + ((loc) >> (fs->s_bshift)) + +/* no fragments -> logical block number equal # of frags */ +#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \ + ((loc) >> (fs->s_bshift)) + +#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \ + roundup(size, fs->s_frag_size) + /* was (((size) + (fs)->fs_qfmask) & (fs)->fs_fmask) */ + +/* + * Determining the size of a file block in the file system. + * easy w/o fragments + */ +#define blksize(fs, ip, lbn) ((fs)->s_frag_size) + +/* + * INOPB is the number of inodes in a secondary storage block. + */ +#define INOPB(fs) EXT2_INODES_PER_BLOCK(fs) + +/* + * NINDIR is the number of indirects in a file system block. + */ +#define NINDIR(fs) (EXT2_ADDR_PER_BLOCK(fs)) + +extern int inside[], around[]; +extern u_char *fragtbl[]; + +/* a few remarks about superblock locking/unlocking + * Linux provides special routines for doing so + * I haven't figured out yet what BSD does + * I think I'll try a VOP_LOCK/VOP_UNLOCK on the device vnode + */ +#define DEVVP(inode) (VFSTOUFS(ITOV(inode)->v_mount)->um_devvp) +#define lock_super(devvp) VOP_LOCK(devvp) +#define unlock_super(devvp) VOP_UNLOCK(devvp) + diff --git a/sys/gnu/fs/ext2fs/i386-bitops.h b/sys/gnu/fs/ext2fs/i386-bitops.h new file mode 100644 index 000000000000..a66679ea3df8 --- /dev/null +++ b/sys/gnu/fs/ext2fs/i386-bitops.h @@ -0,0 +1,159 @@ +/* + * this is mixture of i386/bitops.h and asm/string.h + * taken from the Linux source tree + * + * XXX replace with Mach routines or reprogram in C + */ +#ifndef _I386_BITOPS_H +#define _I386_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + */ + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +/* + * Some hacks to defeat gcc over-optimizations.. + */ +struct __dummy { unsigned long a[100]; }; +#define ADDR (*(struct __dummy *) addr) + +extern __inline__ int set_bit(int nr, void * addr) +{ + int oldbit; + + __asm__ __volatile__("btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"ir" (nr)); + return oldbit; +} + +extern __inline__ int clear_bit(int nr, void * addr) +{ + int oldbit; + + __asm__ __volatile__("btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"ir" (nr)); + return oldbit; +} + +extern __inline__ int change_bit(int nr, void * addr) +{ + int oldbit; + + __asm__ __volatile__("btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"ir" (nr)); + return oldbit; +} + +/* + * This routine doesn't need to be atomic, but it's faster to code it + * this way. + */ +extern __inline__ int test_bit(int nr, void * addr) +{ + int oldbit; + + __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"ir" (nr)); + return oldbit; +} + +/* + * Find-bit routines.. + */ +extern inline int find_first_zero_bit(void * addr, unsigned size) +{ + int res; + + if (!size) + return 0; + __asm__(" + cld + movl $-1,%%eax + xorl %%edx,%%edx + repe; scasl + je 1f + xorl -4(%%edi),%%eax + subl $4,%%edi + bsfl %%eax,%%edx +1: subl %%ebx,%%edi + shll $3,%%edi + addl %%edi,%%edx" + :"=d" (res) + :"c" ((size + 31) >> 5), "D" (addr), "b" (addr) + :"ax", "cx", "di"); + return res; +} + +extern inline int find_next_zero_bit (void * addr, int size, int offset) +{ + unsigned long * p = ((unsigned long *) addr) + (offset >> 5); + int set = 0, bit = offset & 31, res; + + if (bit) { + /* + * Look for zero in first byte + */ + __asm__(" + bsfl %1,%0 + jne 1f + movl $32, %0 +1: " + : "=r" (set) + : "r" (~(*p >> bit))); + if (set < (32 - bit)) + return set + offset; + set = 32 - bit; + p++; + } + /* + * No zero yet, search remaining full bytes for a zero + */ + res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr)); + return (offset + set + res); +} + +/* + * ffz = Find First Zero in word. Undefined if no zero exists, + * so code should check against ~0UL first.. + */ +extern inline unsigned long ffz(unsigned long word) +{ + __asm__("bsfl %1,%0" + :"=r" (word) + :"r" (~word)); + return word; +} + +/* + * memscan() taken from linux asm/string.h + */ +/* + * find the first occurrence of byte 'c', or 1 past the area if none + */ +extern inline char * memscan(void * addr, unsigned char c, int size) +{ + if (!size) + return addr; + __asm__("cld + repnz; scasb + jnz 1f + dec %%edi +1: " + : "=D" (addr), "=c" (size) + : "0" (addr), "1" (size), "a" (c)); + return addr; +} + +#endif /* _I386_BITOPS_H */ |