From 08a48da2102bde47d2190142346af37d39cc7a1e Mon Sep 17 00:00:00 2001 From: adilger Date: Sat, 8 Mar 2003 15:59:30 +0000 Subject: [PATCH] Completely untested (but compiled) port of noread-creates patch to 2.5.current. I suspect it is OK, since the ext3 code hasn't changed much, but needs testing. --- lustre/extN/ext3-2.5-noread.diff | 266 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 lustre/extN/ext3-2.5-noread.diff diff --git a/lustre/extN/ext3-2.5-noread.diff b/lustre/extN/ext3-2.5-noread.diff new file mode 100644 index 0000000..f1c611f --- /dev/null +++ b/lustre/extN/ext3-2.5-noread.diff @@ -0,0 +1,266 @@ +===== fs/ext3/ialloc.c 1.26 vs edited ===== +--- 1.26/fs/ext3/ialloc.c Fri Feb 14 19:24:09 2003 ++++ edited/fs/ext3/ialloc.c Sat Mar 8 01:20:55 2003 +@@ -195,6 +195,36 @@ + } + + /* ++ * @block_group: block group of inode ++ * @offset: relative offset of inode within @block_group ++ * ++ * Check whether any of the inodes in this disk block are in use. ++ * ++ * Caller must be holding superblock lock (group/bitmap read lock in ++ * future). ++ */ ++int ext3_itable_block_used(struct super_block *sb, unsigned int block_group, ++ int offset) ++{ ++ struct buffer_head *ibitmap = read_inode_bitmap(sb, block_group); ++ int inodes_per_block; ++ unsigned long inum, iend; ++ ++ if (!ibitmap) ++ return 1; ++ ++ inodes_per_block = sb->s_blocksize / EXT3_SB(sb)->s_inode_size; ++ inum = offset & ~(inodes_per_block - 1); ++ iend = inum + inodes_per_block; ++ for (; inum < iend; inum++) { ++ if (inum != offset && ext3_test_bit(inum, ibitmap->b_data)) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++/* + * There are two policies for allocating an inode. If the new inode is + * a directory, then a forward search is made for a block group with both + * free space and a low directory-to-inode ratio; if that fails, then of +@@ -422,8 +452,9 @@ + struct ext3_group_desc * gdp; + struct ext3_super_block * es; + struct ext3_inode_info *ei; +- int err = 0; ++ struct ext3_iloc iloc; + struct inode *ret; ++ int err = 0; + + /* Cannot create files in a deleted directory */ + if (!dir || !dir->i_nlink) +@@ -587,16 +618,23 @@ + goto fail2; + } + err = ext3_init_acl(handle, inode, dir); ++ if (err) ++ goto fail3; ++ ++ err = ext3_get_inode_loc_new(inode, &iloc, 1); ++ if (err) ++ goto fail3; ++ ++ BUFFER_TRACE(iloc->bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, iloc.bh); + if (err) { +- DQUOT_FREE_INODE(inode); +- goto fail2; +- } +- err = ext3_mark_inode_dirty(handle, inode); +- if (err) { +- ext3_std_error(sb, err); +- DQUOT_FREE_INODE(inode); +- goto fail2; +- } ++ brelse(iloc.bh); ++ iloc.bh = NULL; ++ goto fail3; ++ } ++ err = ext3_mark_iloc_dirty(handle, inode, &iloc); ++ if (err) ++ goto fail3; + + ext3_debug("allocating inode %lu\n", inode->i_ino); + goto really_out; +@@ -610,6 +648,9 @@ + brelse(bitmap_bh); + return ret; + ++fail3: ++ ext3_std_error(sb, err); ++ DQUOT_FREE_INODE(inode); + fail2: + inode->i_flags |= S_NOQUOTA; + inode->i_nlink = 0; +===== fs/ext3/inode.c 1.62 vs edited ===== +--- 1.62/fs/ext3/inode.c Fri Feb 14 19:24:09 2003 ++++ edited/fs/ext3/inode.c Sat Mar 8 02:10:39 2003 +@@ -2144,69 +2144,118 @@ + unlock_kernel(); + } + +-/* +- * ext3_get_inode_loc returns with an extra refcount against the +- * inode's underlying buffer_head on success. +- */ ++#define NUM_INODE_PREREAD 16 + +-int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) ++/* ++ * ext3_get_inode_loc returns with an extra refcount against the inode's ++ * underlying buffer_head on success. If this is for a new inode allocation ++ * (new is non-zero) then we may be able to optimize away the read if there ++ * are no other in-use inodes in this inode table block. If we need to do ++ * a read, then read in a whole chunk of blocks to avoid blocking again soon ++ * if we are doing lots of creates/updates. ++ */ ++int ext3_get_inode_loc_new(struct inode *inode, struct ext3_iloc *iloc, int new) + { +- struct buffer_head *bh = 0; ++ struct buffer_head *bh[NUM_INODE_PREREAD]; ++ struct super_block *sb = inode->i_sb; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ unsigned long ino = inode->i_ino; + unsigned long block; + unsigned long block_group; + unsigned long group_desc; + unsigned long desc; + unsigned long offset; + struct ext3_group_desc * gdp; +- +- if ((inode->i_ino != EXT3_ROOT_INO && +- inode->i_ino != EXT3_JOURNAL_INO && +- inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || +- inode->i_ino > le32_to_cpu( +- EXT3_SB(inode->i_sb)->s_es->s_inodes_count)) { +- ext3_error (inode->i_sb, "ext3_get_inode_loc", +- "bad inode number: %lu", inode->i_ino); ++ ++ if ((ino != EXT3_ROOT_INO && ino != EXT3_JOURNAL_INO && ++ ino < EXT3_FIRST_INO(sb)) || ++ ino > le32_to_cpu(sbi->s_es->s_inodes_count)) { ++ ext3_error(sb, "ext3_get_inode_loc", "bad inode number: %lu", ++ ino); + goto bad_inode; + } +- block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb); +- if (block_group >= EXT3_SB(inode->i_sb)->s_groups_count) { +- ext3_error (inode->i_sb, "ext3_get_inode_loc", +- "group >= groups count"); ++ block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); ++ if (block_group >= EXT3_SB(sb)->s_groups_count) { ++ ext3_error(sb, "ext3_get_inode_loc", "group >= groups count"); + goto bad_inode; + } +- group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); +- desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); +- bh = EXT3_SB(inode->i_sb)->s_group_desc[group_desc]; +- if (!bh) { +- ext3_error (inode->i_sb, "ext3_get_inode_loc", +- "Descriptor not loaded"); ++ group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb); ++ desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1); ++ if (!sbi->s_group_desc[group_desc]) { ++ ext3_error(sb, "ext3_get_inode_loc", "Descriptor not loaded"); + goto bad_inode; + } + +- gdp = (struct ext3_group_desc *) bh->b_data; ++ gdp = (struct ext3_group_desc *)(sbi->s_group_desc[group_desc]->b_data); + /* + * Figure out the offset within the block group inode table + */ +- offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * +- EXT3_INODE_SIZE(inode->i_sb); ++ offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)); + block = le32_to_cpu(gdp[desc].bg_inode_table) + +- (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); +- if (!(bh = sb_bread(inode->i_sb, block))) { +- ext3_error (inode->i_sb, "ext3_get_inode_loc", +- "unable to read inode block - " +- "inode=%lu, block=%lu", inode->i_ino, block); +- goto bad_inode; ++ (offset * sbi->s_inode_size >> EXT3_BLOCK_SIZE_BITS(sb)); ++ bh[0] = sb_getblk(sb, block); ++ if (buffer_uptodate(bh[0])) ++ goto done; ++ ++ /* If we don't really need to read this block, and it isn't already ++ * in memory, then we just zero it out. Otherwise, we keep the ++ * current block contents (deleted inode data) for posterity. ++ */ ++ if (new && !ext3_itable_block_used(sb, block_group, offset)) { ++ lock_buffer(bh[0]); ++ memset(bh[0]->b_data, 0, bh[0]->b_size); ++ set_buffer_uptodate(bh[0]); ++ unlock_buffer(bh[0]); ++ } else { ++ unsigned long block_end, itable_end; ++ int count = 1; ++ ++ itable_end = le32_to_cpu(gdp[desc].bg_inode_table) + ++ sbi->s_itb_per_group; ++ block_end = block + NUM_INODE_PREREAD; ++ if (block_end > itable_end) ++ block_end = itable_end; ++ ++ for (; block < block_end; block++) { ++ bh[count] = sb_getblk(sb, block); ++ if (count && (buffer_uptodate(bh[count]) || ++ buffer_locked(bh[count]))) { ++ __brelse(bh[count]); ++ } else ++ count++; ++ } ++ ++ ll_rw_block(READ, count, bh); ++ ++ /* Release all but the block we actually need (bh[0]) */ ++ while (--count > 0) ++ __brelse(bh[count]); ++ ++ wait_on_buffer(bh[0]); ++ if (!buffer_uptodate(bh[0])) { ++ ext3_error(sb, __FUNCTION__, ++ "unable to read inode block - " ++ "inode=%lu, block=%llu", ino, ++ (unsigned long long)bh[0]->b_blocknr); ++ goto bad_inode; ++ } + } +- offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); ++done: ++ offset = (offset * sbi->s_inode_size) & (EXT3_BLOCK_SIZE(sb) - 1); + +- iloc->bh = bh; +- iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); ++ iloc->bh = bh[0]; ++ iloc->raw_inode = (struct ext3_inode *)(bh[0]->b_data + offset); + iloc->block_group = block_group; +- ++ + return 0; +- ++ + bad_inode: + return -EIO; ++} ++ ++int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) ++{ ++ return ext3_get_inode_loc_new(inode, iloc, 0); + } + + void ext3_read_inode(struct inode * inode) +===== include/linux/ext3_fs.h 1.22 vs edited ===== +--- 1.22/include/linux/ext3_fs.h Tue Jan 14 00:56:29 2003 ++++ edited/include/linux/ext3_fs.h Sat Mar 8 01:56:28 2003 +@@ -719,6 +719,8 @@ + extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); + extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); + ++extern int ext3_itable_block_used(struct super_block *, unsigned int, int); ++extern int ext3_get_inode_loc_new(struct inode *, struct ext3_iloc *, int); + extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *); + extern void ext3_read_inode (struct inode *); + extern void ext3_write_inode (struct inode *, int); -- 1.8.3.1