From fb4edafd1453a7c10780e3645df826cedac30268 Mon Sep 17 00:00:00 2001 From: pschwan Date: Thu, 12 Jun 2003 07:28:56 +0000 Subject: [PATCH] more extN cleanup --- lustre/extN/ext3-2.5-noread.diff | 266 ----------------------------------- lustre/extN/ext3-unmount_sync.diff | 59 -------- lustre/extN/ext3-use-after-free.diff | 65 --------- lustre/extN/extN-iget-debug.diff | 48 ------- lustre/extN/extN-misc-fixup.diff | 23 --- 5 files changed, 461 deletions(-) delete mode 100644 lustre/extN/ext3-2.5-noread.diff delete mode 100644 lustre/extN/ext3-unmount_sync.diff delete mode 100644 lustre/extN/ext3-use-after-free.diff delete mode 100644 lustre/extN/extN-iget-debug.diff delete mode 100644 lustre/extN/extN-misc-fixup.diff diff --git a/lustre/extN/ext3-2.5-noread.diff b/lustre/extN/ext3-2.5-noread.diff deleted file mode 100644 index f1c611f..0000000 --- a/lustre/extN/ext3-2.5-noread.diff +++ /dev/null @@ -1,266 +0,0 @@ -===== fs/ext3/ialloc.c 1.26 vs edited ===== ---- 1.26/fs/ext3/ialloc.c Fri Feb 14 19:24:09 2003 -+++ edited/fs/ext3/ialloc.c Sat Mar 8 01:20:55 2003 -@@ -195,6 +195,36 @@ - } - - /* -+ * @block_group: block group of inode -+ * @offset: relative offset of inode within @block_group -+ * -+ * Check whether any of the inodes in this disk block are in use. -+ * -+ * Caller must be holding superblock lock (group/bitmap read lock in -+ * future). -+ */ -+int ext3_itable_block_used(struct super_block *sb, unsigned int block_group, -+ int offset) -+{ -+ struct buffer_head *ibitmap = read_inode_bitmap(sb, block_group); -+ int inodes_per_block; -+ unsigned long inum, iend; -+ -+ if (!ibitmap) -+ return 1; -+ -+ inodes_per_block = sb->s_blocksize / EXT3_SB(sb)->s_inode_size; -+ inum = offset & ~(inodes_per_block - 1); -+ iend = inum + inodes_per_block; -+ for (; inum < iend; inum++) { -+ if (inum != offset && ext3_test_bit(inum, ibitmap->b_data)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+/* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both - * free space and a low directory-to-inode ratio; if that fails, then of -@@ -422,8 +452,9 @@ - struct ext3_group_desc * gdp; - struct ext3_super_block * es; - struct ext3_inode_info *ei; -- int err = 0; -+ struct ext3_iloc iloc; - struct inode *ret; -+ int err = 0; - - /* Cannot create files in a deleted directory */ - if (!dir || !dir->i_nlink) -@@ -587,16 +618,23 @@ - goto fail2; - } - err = ext3_init_acl(handle, inode, dir); -+ if (err) -+ goto fail3; -+ -+ err = ext3_get_inode_loc_new(inode, &iloc, 1); -+ if (err) -+ goto fail3; -+ -+ BUFFER_TRACE(iloc->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, iloc.bh); - if (err) { -- DQUOT_FREE_INODE(inode); -- goto fail2; -- } -- err = ext3_mark_inode_dirty(handle, inode); -- if (err) { -- ext3_std_error(sb, err); -- DQUOT_FREE_INODE(inode); -- goto fail2; -- } -+ brelse(iloc.bh); -+ iloc.bh = NULL; -+ goto fail3; -+ } -+ err = ext3_mark_iloc_dirty(handle, inode, &iloc); -+ if (err) -+ goto fail3; - - ext3_debug("allocating inode %lu\n", inode->i_ino); - goto really_out; -@@ -610,6 +648,9 @@ - brelse(bitmap_bh); - return ret; - -+fail3: -+ ext3_std_error(sb, err); -+ DQUOT_FREE_INODE(inode); - fail2: - inode->i_flags |= S_NOQUOTA; - inode->i_nlink = 0; -===== fs/ext3/inode.c 1.62 vs edited ===== ---- 1.62/fs/ext3/inode.c Fri Feb 14 19:24:09 2003 -+++ edited/fs/ext3/inode.c Sat Mar 8 02:10:39 2003 -@@ -2144,69 +2144,118 @@ - unlock_kernel(); - } - --/* -- * ext3_get_inode_loc returns with an extra refcount against the -- * inode's underlying buffer_head on success. -- */ -+#define NUM_INODE_PREREAD 16 - --int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) -+/* -+ * ext3_get_inode_loc returns with an extra refcount against the inode's -+ * underlying buffer_head on success. If this is for a new inode allocation -+ * (new is non-zero) then we may be able to optimize away the read if there -+ * are no other in-use inodes in this inode table block. If we need to do -+ * a read, then read in a whole chunk of blocks to avoid blocking again soon -+ * if we are doing lots of creates/updates. -+ */ -+int ext3_get_inode_loc_new(struct inode *inode, struct ext3_iloc *iloc, int new) - { -- struct buffer_head *bh = 0; -+ struct buffer_head *bh[NUM_INODE_PREREAD]; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ unsigned long ino = inode->i_ino; - unsigned long block; - unsigned long block_group; - unsigned long group_desc; - unsigned long desc; - unsigned long offset; - struct ext3_group_desc * gdp; -- -- if ((inode->i_ino != EXT3_ROOT_INO && -- inode->i_ino != EXT3_JOURNAL_INO && -- inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || -- inode->i_ino > le32_to_cpu( -- EXT3_SB(inode->i_sb)->s_es->s_inodes_count)) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "bad inode number: %lu", inode->i_ino); -+ -+ if ((ino != EXT3_ROOT_INO && ino != EXT3_JOURNAL_INO && -+ ino < EXT3_FIRST_INO(sb)) || -+ ino > le32_to_cpu(sbi->s_es->s_inodes_count)) { -+ ext3_error(sb, "ext3_get_inode_loc", "bad inode number: %lu", -+ ino); - goto bad_inode; - } -- block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb); -- if (block_group >= EXT3_SB(inode->i_sb)->s_groups_count) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "group >= groups count"); -+ block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); -+ if (block_group >= EXT3_SB(sb)->s_groups_count) { -+ ext3_error(sb, "ext3_get_inode_loc", "group >= groups count"); - goto bad_inode; - } -- group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); -- desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); -- bh = EXT3_SB(inode->i_sb)->s_group_desc[group_desc]; -- if (!bh) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "Descriptor not loaded"); -+ group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb); -+ desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1); -+ if (!sbi->s_group_desc[group_desc]) { -+ ext3_error(sb, "ext3_get_inode_loc", "Descriptor not loaded"); - goto bad_inode; - } - -- gdp = (struct ext3_group_desc *) bh->b_data; -+ gdp = (struct ext3_group_desc *)(sbi->s_group_desc[group_desc]->b_data); - /* - * Figure out the offset within the block group inode table - */ -- offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * -- EXT3_INODE_SIZE(inode->i_sb); -+ offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)); - block = le32_to_cpu(gdp[desc].bg_inode_table) + -- (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); -- if (!(bh = sb_bread(inode->i_sb, block))) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "unable to read inode block - " -- "inode=%lu, block=%lu", inode->i_ino, block); -- goto bad_inode; -+ (offset * sbi->s_inode_size >> EXT3_BLOCK_SIZE_BITS(sb)); -+ bh[0] = sb_getblk(sb, block); -+ if (buffer_uptodate(bh[0])) -+ goto done; -+ -+ /* If we don't really need to read this block, and it isn't already -+ * in memory, then we just zero it out. Otherwise, we keep the -+ * current block contents (deleted inode data) for posterity. -+ */ -+ if (new && !ext3_itable_block_used(sb, block_group, offset)) { -+ lock_buffer(bh[0]); -+ memset(bh[0]->b_data, 0, bh[0]->b_size); -+ set_buffer_uptodate(bh[0]); -+ unlock_buffer(bh[0]); -+ } else { -+ unsigned long block_end, itable_end; -+ int count = 1; -+ -+ itable_end = le32_to_cpu(gdp[desc].bg_inode_table) + -+ sbi->s_itb_per_group; -+ block_end = block + NUM_INODE_PREREAD; -+ if (block_end > itable_end) -+ block_end = itable_end; -+ -+ for (; block < block_end; block++) { -+ bh[count] = sb_getblk(sb, block); -+ if (count && (buffer_uptodate(bh[count]) || -+ buffer_locked(bh[count]))) { -+ __brelse(bh[count]); -+ } else -+ count++; -+ } -+ -+ ll_rw_block(READ, count, bh); -+ -+ /* Release all but the block we actually need (bh[0]) */ -+ while (--count > 0) -+ __brelse(bh[count]); -+ -+ wait_on_buffer(bh[0]); -+ if (!buffer_uptodate(bh[0])) { -+ ext3_error(sb, __FUNCTION__, -+ "unable to read inode block - " -+ "inode=%lu, block=%llu", ino, -+ (unsigned long long)bh[0]->b_blocknr); -+ goto bad_inode; -+ } - } -- offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+done: -+ offset = (offset * sbi->s_inode_size) & (EXT3_BLOCK_SIZE(sb) - 1); - -- iloc->bh = bh; -- iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); -+ iloc->bh = bh[0]; -+ iloc->raw_inode = (struct ext3_inode *)(bh[0]->b_data + offset); - iloc->block_group = block_group; -- -+ - return 0; -- -+ - bad_inode: - return -EIO; -+} -+ -+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) -+{ -+ return ext3_get_inode_loc_new(inode, iloc, 0); - } - - void ext3_read_inode(struct inode * inode) -===== include/linux/ext3_fs.h 1.22 vs edited ===== ---- 1.22/include/linux/ext3_fs.h Tue Jan 14 00:56:29 2003 -+++ edited/include/linux/ext3_fs.h Sat Mar 8 01:56:28 2003 -@@ -719,6 +719,8 @@ - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -+extern int ext3_itable_block_used(struct super_block *, unsigned int, int); -+extern int ext3_get_inode_loc_new(struct inode *, struct ext3_iloc *, int); - extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *); - extern void ext3_read_inode (struct inode *); - extern void ext3_write_inode (struct inode *, int); diff --git a/lustre/extN/ext3-unmount_sync.diff b/lustre/extN/ext3-unmount_sync.diff deleted file mode 100644 index 1f9b796..0000000 --- a/lustre/extN/ext3-unmount_sync.diff +++ /dev/null @@ -1,59 +0,0 @@ -From adilger@clusterfs.com Mon Dec 2 10:26:44 2002 -Date: Mon, 2 Dec 2002 10:26:44 -0700 -From: Andreas Dilger -To: Lustre LLNL Mailing list , - Lustre Development Mailing List -Subject: Re: data corrupting bug in 2.4.20 ext3, data=journal -Message-ID: <20021202102644.H1422@schatzie.adilger.int> -Mail-Followup-To: Lustre LLNL Mailing list , - Lustre Development Mailing List -Mime-Version: 1.0 -Content-Type: text/plain; charset=us-ascii -Content-Disposition: inline -User-Agent: Mutt/1.2.5.1i -X-GPG-Key: 1024D/0D35BED6 -X-GPG-Fingerprint: 7A37 5D79 BF1B CECA D44F 8A29 A488 39F5 0D35 BED6 -Status: RO -Content-Length: 1160 -Lines: 39 - -Here is the new-improved fix for the ext3 discarding data at umount bug -discovered late last week. To be used instead of the previous ext3 fix. - -Sadly, this is completely unrelated to the problems Mike is having with -ext3 under UML, since it is an unmount-time problem. - ------ Forwarded message from "Stephen C. Tweedie" ----- -The attached patch seems to fix things for me. - -Cheers, - Stephen - - ---- linux-2.4-ext3merge/fs/ext3/super.c.=K0027=.orig 2002-12-02 15:35:13.000000000 +0000 -+++ linux-2.4-ext3merge/fs/ext3/super.c 2002-12-02 15:35:14.000000000 +0000 -@@ -1640,7 +1640,12 @@ - sb->s_dirt = 0; - target = log_start_commit(EXT3_SB(sb)->s_journal, NULL); - -- if (do_sync_supers) { -+ /* -+ * Tricky --- if we are unmounting, the write really does need -+ * to be synchronous. We can detect that by looking for NULL in -+ * sb->s_root. -+ */ -+ if (do_sync_supers || !sb->s_root) { - unlock_super(sb); - log_wait_commit(EXT3_SB(sb)->s_journal, target); - lock_super(sb); - - ------ End forwarded message ----- - -Cheers, Andreas --- -Andreas Dilger -http://sourceforge.net/projects/ext2resize/ -http://www-mddsp.enel.ucalgary.ca/People/adilger/ - - diff --git a/lustre/extN/ext3-use-after-free.diff b/lustre/extN/ext3-use-after-free.diff deleted file mode 100644 index 8cd673f..0000000 --- a/lustre/extN/ext3-use-after-free.diff +++ /dev/null @@ -1,65 +0,0 @@ - - -If ext3_add_nondir() fails it will do an iput() of the inode. But we -continue to run ext3_mark_inode_dirty() against the potentially-freed -inode. This oopses when slab poisoning is enabled. - -Fix it so that we only run ext3_mark_inode_dirty() if the inode was -successfully instantiated. - -This bug was added in 2.4.20-pre9. - - - fs/ext3/namei.c | 11 +++++------ - 1 files changed, 5 insertions(+), 6 deletions(-) - ---- 24/fs/ext3/namei.c~ext3-use-after-free Sun Dec 15 11:27:50 2002 -+++ 24-akpm/fs/ext3/namei.c Sun Dec 15 11:27:50 2002 -@@ -429,8 +429,11 @@ static int ext3_add_nondir(handle_t *han - { - int err = ext3_add_entry(handle, dentry, inode); - if (!err) { -- d_instantiate(dentry, inode); -- return 0; -+ err = ext3_mark_inode_dirty(handle, inode); -+ if (err == 0) { -+ d_instantiate(dentry, inode); -+ return 0; -+ } - } - ext3_dec_count(handle, inode); - iput(inode); -@@ -465,7 +468,6 @@ static int ext3_create (struct inode * d - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; - err = ext3_add_nondir(handle, dentry, inode); -- ext3_mark_inode_dirty(handle, inode); - } - ext3_journal_stop(handle, dir); - return err; -@@ -490,7 +492,6 @@ static int ext3_mknod (struct inode * di - if (!IS_ERR(inode)) { - init_special_inode(inode, mode, rdev); - err = ext3_add_nondir(handle, dentry, inode); -- ext3_mark_inode_dirty(handle, inode); - } - ext3_journal_stop(handle, dir); - return err; -@@ -934,7 +935,6 @@ static int ext3_symlink (struct inode * - } - inode->u.ext3_i.i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); -- ext3_mark_inode_dirty(handle, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -@@ -971,7 +971,6 @@ static int ext3_link (struct dentry * ol - atomic_inc(&inode->i_count); - - err = ext3_add_nondir(handle, dentry, inode); -- ext3_mark_inode_dirty(handle, inode); - ext3_journal_stop(handle, dir); - return err; - } - -_ diff --git a/lustre/extN/extN-iget-debug.diff b/lustre/extN/extN-iget-debug.diff deleted file mode 100644 index 9714e35..0000000 --- a/lustre/extN/extN-iget-debug.diff +++ /dev/null @@ -1,48 +0,0 @@ ---- linux/fs/ext3/namei.c.orig Thu Jan 30 01:15:13 2003 -+++ linux/fs/ext3/namei.c Sat Feb 1 00:33:46 2003 -@@ -710,6 +710,24 @@ - return ret; - } - -+static int extN_find_inode(struct inode *inode, unsigned long ino, -+ void *opaque) -+{ -+ const char *name = NULL; -+ int len = 0; -+ -+ if (opaque) { -+ struct dentry *dentry = opaque; -+ name = dentry->d_name.name; -+ len = dentry->d_name.len; -+ } -+ printk(KERN_INFO "finding inode %s:%lu (%p) count %d (%p = %*s)\n", -+ kdevname(inode->i_dev), ino, inode, atomic_read(&inode->i_count), -+ opaque, len, name ? name : ""); -+ -+ return 1; -+} -+ - static struct dentry *extN_lookup(struct inode * dir, struct dentry *dentry) - { - struct inode * inode; -@@ -724,7 +742,7 @@ - if (bh) { - unsigned long ino = le32_to_cpu(de->inode); - brelse (bh); -- inode = iget(dir->i_sb, ino); -+ inode = iget4(dir->i_sb, ino, extN_find_inode, dentry); - - if (!inode) - return ERR_PTR(-EACCES); ---- linux/fs/ext3/inode.c.orig Thu Jan 30 01:15:13 2003 -+++ linux/fs/ext3/inode.c Sat Feb 1 00:34:45 2003 -@@ -166,6 +166,9 @@ - */ - void extN_put_inode (struct inode * inode) - { -+ printk(KERN_INFO "putting inode %s:%lu (%p) count %d\n", -+ kdevname(inode->i_dev), inode->i_ino, inode, -+ atomic_read(&inode->i_count)); - extN_discard_prealloc (inode); - } - diff --git a/lustre/extN/extN-misc-fixup.diff b/lustre/extN/extN-misc-fixup.diff deleted file mode 100644 index db0bc0f..0000000 --- a/lustre/extN/extN-misc-fixup.diff +++ /dev/null @@ -1,23 +0,0 @@ ---- linux-2.4.17/fs/extN/super.c.orig Fri Dec 21 10:41:55 2001 -+++ linux-2.4.17/fs/extN/super.c Fri Mar 22 11:00:41 2002 -@@ -1344,10 +1342,10 @@ - printk(KERN_ERR "EXTN-fs: I/O error on journal device\n"); - goto out_journal; - } -- if (ntohl(journal->j_superblock->s_nr_users) != 1) { -+ if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { - printk(KERN_ERR "EXTN-fs: External journal has more than one " - "user (unsupported) - %d\n", -- ntohl(journal->j_superblock->s_nr_users)); -+ be32_to_cpu(journal->j_superblock->s_nr_users)); - goto out_journal; - } - EXTN_SB(sb)->journal_bdev = bdev; -@@ -1560,6 +1560,7 @@ - unlock_kernel(); - return ret; - } -+EXPORT_SYMBOL(extN_force_commit); /* here to avoid potential patch collisions */ - - /* - * Ext3 always journals updates to the superblock itself, so we don't -- 1.8.3.1