===== fs/ext3/ialloc.c 1.26 vs edited ===== --- 1.26/fs/ext3/ialloc.c Fri Feb 14 19:24:09 2003 +++ edited/fs/ext3/ialloc.c Sat Mar 8 01:20:55 2003 @@ -195,6 +195,36 @@ } /* + * @block_group: block group of inode + * @offset: relative offset of inode within @block_group + * + * Check whether any of the inodes in this disk block are in use. + * + * Caller must be holding superblock lock (group/bitmap read lock in + * future). + */ +int ext3_itable_block_used(struct super_block *sb, unsigned int block_group, + int offset) +{ + struct buffer_head *ibitmap = read_inode_bitmap(sb, block_group); + int inodes_per_block; + unsigned long inum, iend; + + if (!ibitmap) + return 1; + + inodes_per_block = sb->s_blocksize / EXT3_SB(sb)->s_inode_size; + inum = offset & ~(inodes_per_block - 1); + iend = inum + inodes_per_block; + for (; inum < iend; inum++) { + if (inum != offset && ext3_test_bit(inum, ibitmap->b_data)) + return 1; + } + + return 0; +} + +/* * There are two policies for allocating an inode. If the new inode is * a directory, then a forward search is made for a block group with both * free space and a low directory-to-inode ratio; if that fails, then of @@ -422,8 +452,9 @@ struct ext3_group_desc * gdp; struct ext3_super_block * es; struct ext3_inode_info *ei; - int err = 0; + struct ext3_iloc iloc; struct inode *ret; + int err = 0; /* Cannot create files in a deleted directory */ if (!dir || !dir->i_nlink) @@ -587,16 +618,23 @@ goto fail2; } err = ext3_init_acl(handle, inode, dir); + if (err) + goto fail3; + + err = ext3_get_inode_loc_new(inode, &iloc, 1); + if (err) + goto fail3; + + BUFFER_TRACE(iloc->bh, "get_write_access"); + err = ext3_journal_get_write_access(handle, iloc.bh); if (err) { - DQUOT_FREE_INODE(inode); - goto fail2; - } - err = ext3_mark_inode_dirty(handle, inode); - if (err) { - ext3_std_error(sb, err); - DQUOT_FREE_INODE(inode); - goto fail2; - } + brelse(iloc.bh); + iloc.bh = NULL; + goto fail3; + } + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + if (err) + goto fail3; ext3_debug("allocating inode %lu\n", inode->i_ino); goto really_out; @@ -610,6 +648,9 @@ brelse(bitmap_bh); return ret; +fail3: + ext3_std_error(sb, err); + DQUOT_FREE_INODE(inode); fail2: inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; ===== fs/ext3/inode.c 1.62 vs edited ===== --- 1.62/fs/ext3/inode.c Fri Feb 14 19:24:09 2003 +++ edited/fs/ext3/inode.c Sat Mar 8 02:10:39 2003 @@ -2144,69 +2144,118 @@ unlock_kernel(); } -/* - * ext3_get_inode_loc returns with an extra refcount against the - * inode's underlying buffer_head on success. - */ +#define NUM_INODE_PREREAD 16 -int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) +/* + * ext3_get_inode_loc returns with an extra refcount against the inode's + * underlying buffer_head on success. If this is for a new inode allocation + * (new is non-zero) then we may be able to optimize away the read if there + * are no other in-use inodes in this inode table block. If we need to do + * a read, then read in a whole chunk of blocks to avoid blocking again soon + * if we are doing lots of creates/updates. + */ +int ext3_get_inode_loc_new(struct inode *inode, struct ext3_iloc *iloc, int new) { - struct buffer_head *bh = 0; + struct buffer_head *bh[NUM_INODE_PREREAD]; + struct super_block *sb = inode->i_sb; + struct ext3_sb_info *sbi = EXT3_SB(sb); + unsigned long ino = inode->i_ino; unsigned long block; unsigned long block_group; unsigned long group_desc; unsigned long desc; unsigned long offset; struct ext3_group_desc * gdp; - - if ((inode->i_ino != EXT3_ROOT_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( - EXT3_SB(inode->i_sb)->s_es->s_inodes_count)) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "bad inode number: %lu", inode->i_ino); + + if ((ino != EXT3_ROOT_INO && ino != EXT3_JOURNAL_INO && + ino < EXT3_FIRST_INO(sb)) || + ino > le32_to_cpu(sbi->s_es->s_inodes_count)) { + ext3_error(sb, "ext3_get_inode_loc", "bad inode number: %lu", + ino); goto bad_inode; } - block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb); - if (block_group >= EXT3_SB(inode->i_sb)->s_groups_count) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "group >= groups count"); + block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); + if (block_group >= EXT3_SB(sb)->s_groups_count) { + ext3_error(sb, "ext3_get_inode_loc", "group >= groups count"); goto bad_inode; } - group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); - desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); - bh = EXT3_SB(inode->i_sb)->s_group_desc[group_desc]; - if (!bh) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "Descriptor not loaded"); + group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb); + desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1); + if (!sbi->s_group_desc[group_desc]) { + ext3_error(sb, "ext3_get_inode_loc", "Descriptor not loaded"); goto bad_inode; } - gdp = (struct ext3_group_desc *) bh->b_data; + gdp = (struct ext3_group_desc *)(sbi->s_group_desc[group_desc]->b_data); /* * Figure out the offset within the block group inode table */ - offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * - EXT3_INODE_SIZE(inode->i_sb); + offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)); block = le32_to_cpu(gdp[desc].bg_inode_table) + - (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); - if (!(bh = sb_bread(inode->i_sb, block))) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "unable to read inode block - " - "inode=%lu, block=%lu", inode->i_ino, block); - goto bad_inode; + (offset * sbi->s_inode_size >> EXT3_BLOCK_SIZE_BITS(sb)); + bh[0] = sb_getblk(sb, block); + if (buffer_uptodate(bh[0])) + goto done; + + /* If we don't really need to read this block, and it isn't already + * in memory, then we just zero it out. Otherwise, we keep the + * current block contents (deleted inode data) for posterity. + */ + if (new && !ext3_itable_block_used(sb, block_group, offset)) { + lock_buffer(bh[0]); + memset(bh[0]->b_data, 0, bh[0]->b_size); + set_buffer_uptodate(bh[0]); + unlock_buffer(bh[0]); + } else { + unsigned long block_end, itable_end; + int count = 1; + + itable_end = le32_to_cpu(gdp[desc].bg_inode_table) + + sbi->s_itb_per_group; + block_end = block + NUM_INODE_PREREAD; + if (block_end > itable_end) + block_end = itable_end; + + for (++block; block < block_end; block++) { + bh[count] = sb_getblk(sb, block); + if (count && (buffer_uptodate(bh[count]) || + buffer_locked(bh[count]))) { + __brelse(bh[count]); + } else + count++; + } + + ll_rw_block(READ, count, bh); + + /* Release all but the block we actually need (bh[0]) */ + while (--count > 0) + __brelse(bh[count]); + + wait_on_buffer(bh[0]); + if (!buffer_uptodate(bh[0])) { + ext3_error(sb, __FUNCTION__, + "unable to read inode block - " + "inode=%lu, block=%llu", ino, + (unsigned long long)bh[0]->b_blocknr); + goto bad_inode; + } } - offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); +done: + offset = (offset * sbi->s_inode_size) & (EXT3_BLOCK_SIZE(sb) - 1); - iloc->bh = bh; - iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); + iloc->bh = bh[0]; + iloc->raw_inode = (struct ext3_inode *)(bh[0]->b_data + offset); iloc->block_group = block_group; - + return 0; - + bad_inode: return -EIO; +} + +int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) +{ + return ext3_get_inode_loc_new(inode, iloc, 0); } void ext3_read_inode(struct inode * inode) ===== include/linux/ext3_fs.h 1.22 vs edited ===== --- 1.22/include/linux/ext3_fs.h Tue Jan 14 00:56:29 2003 +++ edited/include/linux/ext3_fs.h Sat Mar 8 01:56:28 2003 @@ -719,6 +719,8 @@ extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); +extern int ext3_itable_block_used(struct super_block *, unsigned int, int); +extern int ext3_get_inode_loc_new(struct inode *, struct ext3_iloc *, int); extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *); extern void ext3_read_inode (struct inode *); extern void ext3_write_inode (struct inode *, int);