From: johann Date: Fri, 3 Aug 2007 10:25:25 +0000 (+0000) Subject: Branch b1_6 X-Git-Tag: v1_7_91~95 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=a56b6687b68421b032b333c1e95a45950f7b625c;hp=d4fa283efe4762c901cab75739bb83ed57e275b3 Branch b1_6 b=11802 i=adilger i=kalpak Severity : normal Bugzilla : 11802 Description: lustre support for RHEL5 Details : Add support for RHEL5. --- diff --git a/ldiskfs/configure.ac b/ldiskfs/configure.ac index d5c1f7c..7adc1446 100644 --- a/ldiskfs/configure.ac +++ b/ldiskfs/configure.ac @@ -72,6 +72,7 @@ case $LINUXRELEASE in 2.6.12*) LDISKFS_SERIES="2.6.12-vanilla.series" ;; 2.6.15*) LDISKFS_SERIES="2.6-fc5.series";; 2.6.16*) LDISKFS_SERIES="2.6-sles10.series";; +2.6.18-*el5*) LDISKFS_SERIES="2.6-rhel5.series";; 2.6.18*) LDISKFS_SERIES="2.6.18-vanilla.series";; *) AC_MSG_WARN([Unknown kernel version $LINUXRELEASE, fix ldiskfs/configure.ac]) esac diff --git a/ldiskfs/kernel_patches/patches/ext3-16tb-overflow-fixes.patch b/ldiskfs/kernel_patches/patches/ext3-16tb-overflow-fixes.patch new file mode 100644 index 0000000..935f97b --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-16tb-overflow-fixes.patch @@ -0,0 +1,484 @@ +Date: Tue, 19 Sep 2006 15:33:04 -0500 +From: Eric Sandeen +Subject: [RHEL5 Patch 3/3] (resend) Fix ext3 32-bit inodes + +This one is in the -mm tree as ext3-inode-numbers-are-unsigned-long.patch, +resent to accomodate sct's request for uints instead; just pinged akpm +to pick up the ulonglong->uint change. + +This is primarily format string fixes, with changes to ialloc.c where large +inode counts could overflow, and also pass around journal_inum as an +unsigned long, just to be pedantic about it.... + +Signed-off-by: Eric Sandeen +Cc: Mingming Cao +Signed-off-by: Andrew Morton + +Index: linux-2.6.17-1.2654.el5/fs/ext3/ialloc.c +=================================================================== +--- linux-2.6.17-1.2654.el5.orig/fs/ext3/ialloc.c ++++ linux-2.6.17-1.2654.el5/fs/ext3/ialloc.c +@@ -202,7 +202,7 @@ error_return: + static int find_group_dir(struct super_block *sb, struct inode *parent) + { + int ngroups = EXT3_SB(sb)->s_groups_count; +- int freei, avefreei; ++ unsigned int freei, avefreei; + struct ext3_group_desc *desc, *best_desc = NULL; + struct buffer_head *bh; + int group, best_group = -1; +@@ -261,10 +261,10 @@ static int find_group_orlov(struct super + struct ext3_super_block *es = sbi->s_es; + int ngroups = sbi->s_groups_count; + int inodes_per_group = EXT3_INODES_PER_GROUP(sb); +- int freei, avefreei; ++ unsigned int freei, avefreei; + ext3_fsblk_t freeb, avefreeb; + ext3_fsblk_t blocks_per_dir; +- int ndirs; ++ unsigned int ndirs; + int max_debt, max_dirs, min_inodes; + ext3_grpblk_t min_blocks; + int group = -1, i; +Index: linux-2.6.17-1.2654.el5/fs/ext3/inode.c +=================================================================== +--- linux-2.6.17-1.2654.el5.orig/fs/ext3/inode.c ++++ linux-2.6.17-1.2654.el5/fs/ext3/inode.c +@@ -2115,7 +2115,7 @@ static void ext3_free_branches(handle_t + */ + if (!bh) { + ext3_error(inode->i_sb, "ext3_free_branches", +- "Read failure, inode=%ld, block="E3FSBLK, ++ "Read failure, inode=%lu, block="E3FSBLK, + inode->i_ino, nr); + continue; + } +Index: linux-2.6.17-1.2654.el5/fs/ext3/namei.c +=================================================================== +--- linux-2.6.17-1.2654.el5.orig/fs/ext3/namei.c ++++ linux-2.6.17-1.2654.el5/fs/ext3/namei.c +@@ -1919,8 +1919,8 @@ int ext3_orphan_add(handle_t *handle, st + if (!err) + list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); + +- jbd_debug(4, "superblock will point to %ld\n", inode->i_ino); +- jbd_debug(4, "orphan inode %ld will point to %d\n", ++ jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); ++ jbd_debug(4, "orphan inode %lu will point to %d\n", + inode->i_ino, NEXT_ORPHAN(inode)); + out_unlock: + unlock_super(sb); +Index: linux-2.6.17-1.2654.el5/fs/ext3/super.c +=================================================================== +--- linux-2.6.17-1.2654.el5.orig/fs/ext3/super.c ++++ linux-2.6.17-1.2654.el5/fs/ext3/super.c +@@ -45,7 +45,7 @@ + static int ext3_load_journal(struct super_block *, struct ext3_super_block *, + unsigned long journal_devnum); + static int ext3_create_journal(struct super_block *, struct ext3_super_block *, +- int); ++ unsigned int); + static void ext3_commit_super (struct super_block * sb, + struct ext3_super_block * es, + int sync); +@@ -376,7 +376,7 @@ static void dump_orphan_list(struct supe + list_for_each(l, &sbi->s_orphan) { + struct inode *inode = orphan_list_entry(l); + printk(KERN_ERR " " +- "inode %s:%ld at %p: mode %o, nlink %d, next %d\n", ++ "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", + inode->i_sb->s_id, inode->i_ino, inode, + inode->i_mode, inode->i_nlink, + NEXT_ORPHAN(inode)); +@@ -711,7 +711,7 @@ static ext3_fsblk_t get_sb_block(void ** + } + + static int parse_options (char *options, struct super_block *sb, +- unsigned long *inum, unsigned long *journal_devnum, ++ unsigned int *inum, unsigned long *journal_devnum, + ext3_fsblk_t *n_blocks_count, int is_remount) + { + struct ext3_sb_info *sbi = EXT3_SB(sb); +@@ -1264,17 +1264,17 @@ static void ext3_orphan_cleanup (struct + DQUOT_INIT(inode); + if (inode->i_nlink) { + printk(KERN_DEBUG +- "%s: truncating inode %ld to %Ld bytes\n", ++ "%s: truncating inode %lu to %Ld bytes\n", + __FUNCTION__, inode->i_ino, inode->i_size); +- jbd_debug(2, "truncating inode %ld to %Ld bytes\n", ++ jbd_debug(2, "truncating inode %lu to %Ld bytes\n", + inode->i_ino, inode->i_size); + ext3_truncate(inode); + nr_truncates++; + } else { + printk(KERN_DEBUG +- "%s: deleting unreferenced inode %ld\n", ++ "%s: deleting unreferenced inode %lu\n", + __FUNCTION__, inode->i_ino); +- jbd_debug(2, "deleting unreferenced inode %ld\n", ++ jbd_debug(2, "deleting unreferenced inode %lu\n", + inode->i_ino); + nr_orphans++; + } +@@ -1353,7 +1353,7 @@ static int ext3_fill_super (struct super + ext3_fsblk_t sb_block = get_sb_block(&data); + ext3_fsblk_t logic_sb_block; + unsigned long offset = 0; +- unsigned long journal_inum = 0; ++ unsigned int journal_inum = 0; + unsigned long journal_devnum = 0; + unsigned long def_mount_opts; + struct inode *root; +@@ -1802,7 +1802,8 @@ static void ext3_init_journal_params(str + spin_unlock(&journal->j_state_lock); + } + +-static journal_t *ext3_get_journal(struct super_block *sb, int journal_inum) ++static journal_t *ext3_get_journal(struct super_block *sb, ++ unsigned int journal_inum) + { + struct inode *journal_inode; + journal_t *journal; +@@ -1937,7 +1938,7 @@ static int ext3_load_journal(struct supe + unsigned long journal_devnum) + { + journal_t *journal; +- int journal_inum = le32_to_cpu(es->s_journal_inum); ++ unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); + dev_t journal_dev; + int err = 0; + int really_read_only; +@@ -2023,7 +2024,7 @@ static int ext3_load_journal(struct supe + + static int ext3_create_journal(struct super_block * sb, + struct ext3_super_block * es, +- int journal_inum) ++ unsigned int journal_inum) + { + journal_t *journal; + +@@ -2036,7 +2037,7 @@ static int ext3_create_journal(struct su + if (!(journal = ext3_get_journal(sb, journal_inum))) + return -EINVAL; + +- printk(KERN_INFO "EXT3-fs: creating new journal on inode %d\n", ++ printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n", + journal_inum); + + if (journal_create(journal)) { +Index: linux-2.6.17-1.2654.el5/fs/ext3/xattr.c +=================================================================== +--- linux-2.6.17-1.2654.el5.orig/fs/ext3/xattr.c ++++ linux-2.6.17-1.2654.el5/fs/ext3/xattr.c +@@ -75,7 +75,7 @@ + + #ifdef EXT3_XATTR_DEBUG + # define ea_idebug(inode, f...) do { \ +- printk(KERN_DEBUG "inode %s:%ld: ", \ ++ printk(KERN_DEBUG "inode %s:%lu: ", \ + inode->i_sb->s_id, inode->i_ino); \ + printk(f); \ + printk("\n"); \ +@@ -233,7 +233,7 @@ ext3_xattr_block_get(struct inode *inode + atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); + if (ext3_xattr_check_block(bh)) { + bad_block: ext3_error(inode->i_sb, __FUNCTION__, +- "inode %ld: bad block "E3FSBLK, inode->i_ino, ++ "inode %lu: bad block "E3FSBLK, inode->i_ino, + EXT3_I(inode)->i_file_acl); + error = -EIO; + goto cleanup; +@@ -375,7 +375,7 @@ ext3_xattr_block_list(struct inode *inod + atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); + if (ext3_xattr_check_block(bh)) { + ext3_error(inode->i_sb, __FUNCTION__, +- "inode %ld: bad block "E3FSBLK, inode->i_ino, ++ "inode %lu: bad block "E3FSBLK, inode->i_ino, + EXT3_I(inode)->i_file_acl); + error = -EIO; + goto cleanup; +@@ -647,7 +647,7 @@ ext3_xattr_block_find(struct inode *inod + le32_to_cpu(BHDR(bs->bh)->h_refcount)); + if (ext3_xattr_check_block(bs->bh)) { + ext3_error(sb, __FUNCTION__, +- "inode %ld: bad block "E3FSBLK, inode->i_ino, ++ "inode %lu: bad block "E3FSBLK, inode->i_ino, + EXT3_I(inode)->i_file_acl); + error = -EIO; + goto cleanup; +@@ -848,7 +848,7 @@ cleanup_dquot: + + bad_block: + ext3_error(inode->i_sb, __FUNCTION__, +- "inode %ld: bad block "E3FSBLK, inode->i_ino, ++ "inode %lu: bad block "E3FSBLK, inode->i_ino, + EXT3_I(inode)->i_file_acl); + goto cleanup; + +@@ -1077,14 +1077,14 @@ ext3_xattr_delete_inode(handle_t *handle + bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); + if (!bh) { + ext3_error(inode->i_sb, __FUNCTION__, +- "inode %ld: block "E3FSBLK" read error", inode->i_ino, ++ "inode %lu: block "E3FSBLK" read error", inode->i_ino, + EXT3_I(inode)->i_file_acl); + goto cleanup; + } + if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || + BHDR(bh)->h_blocks != cpu_to_le32(1)) { + ext3_error(inode->i_sb, __FUNCTION__, +- "inode %ld: bad block "E3FSBLK, inode->i_ino, ++ "inode %lu: bad block "E3FSBLK, inode->i_ino, + EXT3_I(inode)->i_file_acl); + goto cleanup; + } +@@ -1211,7 +1211,7 @@ again: + bh = sb_bread(inode->i_sb, ce->e_block); + if (!bh) { + ext3_error(inode->i_sb, __FUNCTION__, +- "inode %ld: block %lu read error", ++ "inode %lu: block %lu read error", + inode->i_ino, (unsigned long) ce->e_block); + } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= + EXT3_XATTR_REFCOUNT_MAX) { + +Date: Tue, 19 Sep 2006 15:32:02 -0500 +From: Eric Sandeen +Subject: [RHEL5 Patch 1/3] (resend) Fix ext3 overflows at 16T + +This one is in -mm as fix-ext3-mounts-at-16t.patch and +fix-ext3-mounts-at-16t-fix.patch + +this gets things mounting for a 16T ext3 filesystem. (patched up +e2fsprogs will be needed too, working on that) + +jarod wilson has been helping with testing. + +This patch fixes these issues in the kernel: + +o sbi->s_groups_count overflows in ext3_fill_super() + + sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - + le32_to_cpu(es->s_first_data_block) + + EXT3_BLOCKS_PER_GROUP(sb) - 1) / + EXT3_BLOCKS_PER_GROUP(sb); + + at 16T, s_blocks_count is already maxed out; adding + EXT3_BLOCKS_PER_GROUP(sb) overflows it and groups_count comes out to 0. + Not really what we want, and causes a failed mount. + + Changing it this way works & avoids the overflow: + + (A + B - 1)/B changed to: ((A - 1)/B) + 1 + +o ext3_check_descriptors() overflows range checks + + ext3_check_descriptors() iterates over all block groups making sure + that various bits are within the right block ranges... on the last pass + through, it is checking the error case + + [item] >= block + EXT3_BLOCKS_PER_GROUP(sb) + + where "block" is the first block in the last block group. The last + block in this group (and the last one that will fit in 32 bits) is block + + EXT3_BLOCKS_PER_GROUP(sb)- 1. block + EXT3_BLOCKS_PER_GROUP(sb) wraps + back around to 0. + + so, make things clearer with "first_block" and "last_block" where those + are first and last, inclusive, and use <, > rather than <, >=. + + Finally, the last block group may be smaller than the rest, so account + for this on the last pass through: last_block = sb->s_blocks_count - 1; + +Signed-off-by: Eric Sandeen +Cc: Mingming Cao +Signed-off-by: Andrew Morton + +Index: linux-2.6.17-1.2654.el5/fs/ext3/super.c +=================================================================== +--- linux-2.6.17-1.2654.el5.orig/fs/ext3/super.c ++++ linux-2.6.17-1.2654.el5/fs/ext3/super.c +@@ -1132,7 +1132,8 @@ static int ext3_setup_super(struct super + static int ext3_check_descriptors (struct super_block * sb) + { + struct ext3_sb_info *sbi = EXT3_SB(sb); +- ext3_fsblk_t block = le32_to_cpu(sbi->s_es->s_first_data_block); ++ ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); ++ ext3_fsblk_t last_block; + struct ext3_group_desc * gdp = NULL; + int desc_block = 0; + int i; +@@ -1141,12 +1142,17 @@ static int ext3_check_descriptors (struc + + for (i = 0; i < sbi->s_groups_count; i++) + { ++ if (i == sbi->s_groups_count - 1) ++ last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; ++ else ++ last_block = first_block + ++ (EXT3_BLOCKS_PER_GROUP(sb) - 1); ++ + if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0) + gdp = (struct ext3_group_desc *) + sbi->s_group_desc[desc_block++]->b_data; +- if (le32_to_cpu(gdp->bg_block_bitmap) < block || +- le32_to_cpu(gdp->bg_block_bitmap) >= +- block + EXT3_BLOCKS_PER_GROUP(sb)) ++ if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || ++ le32_to_cpu(gdp->bg_block_bitmap) > last_block) + { + ext3_error (sb, "ext3_check_descriptors", + "Block bitmap for group %d" +@@ -1155,9 +1161,8 @@ static int ext3_check_descriptors (struc + le32_to_cpu(gdp->bg_block_bitmap)); + return 0; + } +- if (le32_to_cpu(gdp->bg_inode_bitmap) < block || +- le32_to_cpu(gdp->bg_inode_bitmap) >= +- block + EXT3_BLOCKS_PER_GROUP(sb)) ++ if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block || ++ le32_to_cpu(gdp->bg_inode_bitmap) > last_block) + { + ext3_error (sb, "ext3_check_descriptors", + "Inode bitmap for group %d" +@@ -1166,9 +1171,9 @@ static int ext3_check_descriptors (struc + le32_to_cpu(gdp->bg_inode_bitmap)); + return 0; + } +- if (le32_to_cpu(gdp->bg_inode_table) < block || +- le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >= +- block + EXT3_BLOCKS_PER_GROUP(sb)) ++ if (le32_to_cpu(gdp->bg_inode_table) < first_block || ++ le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group > ++ last_block) + { + ext3_error (sb, "ext3_check_descriptors", + "Inode table for group %d" +@@ -1177,7 +1182,7 @@ static int ext3_check_descriptors (struc + le32_to_cpu(gdp->bg_inode_table)); + return 0; + } +- block += EXT3_BLOCKS_PER_GROUP(sb); ++ first_block += EXT3_BLOCKS_PER_GROUP(sb); + gdp++; + } + +@@ -1580,10 +1585,9 @@ static int ext3_fill_super (struct super + + if (EXT3_BLOCKS_PER_GROUP(sb) == 0) + goto cantfind_ext3; +- sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) - +- le32_to_cpu(es->s_first_data_block) + +- EXT3_BLOCKS_PER_GROUP(sb) - 1) / +- EXT3_BLOCKS_PER_GROUP(sb); ++ sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - ++ le32_to_cpu(es->s_first_data_block) - 1) ++ / EXT3_BLOCKS_PER_GROUP(sb)) + 1; + db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) / + EXT3_DESC_PER_BLOCK(sb); + sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), + +Date: Tue, 19 Sep 2006 15:32:42 -0500 +From: Eric Sandeen +Subject: [RHEL5 Patch 2/3] (resend) Fix more ext3 overflows at 16T + +This is in akpm's tree as +more-ext3-16t-overflow-fixes.patch and more-ext3-16t-overflow-fixes-fix.patch + +Some of the changes in balloc.c are just cosmetic - +if they overflow they'll then underflow and things are fine. + +5th hunk actually fixes an overflow problem. + +Also check for potential overflows in inode & block counts when resizing. + +Signed-off-by: Eric Sandeen +Cc: Mingming Cao +Signed-off-by: Andrew Morton + +Index: linux-2.6.17-1.2654.el5/fs/ext3/balloc.c +=================================================================== +--- linux-2.6.17-1.2654.el5.orig/fs/ext3/balloc.c ++++ linux-2.6.17-1.2654.el5/fs/ext3/balloc.c +@@ -168,7 +168,7 @@ goal_in_my_reservation(struct ext3_reser + ext3_fsblk_t group_first_block, group_last_block; + + group_first_block = ext3_group_first_block_no(sb, group); +- group_last_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; ++ group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); + + if ((rsv->_rsv_start > group_last_block) || + (rsv->_rsv_end < group_first_block)) +@@ -897,7 +897,7 @@ static int alloc_new_reservation(struct + spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; + + group_first_block = ext3_group_first_block_no(sb, group); +- group_end_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; ++ group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); + + if (grp_goal < 0) + start_block = group_first_block; +@@ -1063,7 +1063,7 @@ ext3_try_to_allocate_with_rsv(struct sup + struct ext3_reserve_window_node * my_rsv, + unsigned long *count, int *errp) + { +- ext3_fsblk_t group_first_block; ++ ext3_fsblk_t group_first_block, group_last_block; + ext3_grpblk_t ret = 0; + int fatal; + unsigned long num = *count; +@@ -1100,6 +1100,7 @@ ext3_try_to_allocate_with_rsv(struct sup + * first block is the block number of the first block in this group + */ + group_first_block = ext3_group_first_block_no(sb, group); ++ group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); + + /* + * Basically we will allocate a new block from inode's reservation +@@ -1132,8 +1133,8 @@ ext3_try_to_allocate_with_rsv(struct sup + try_to_extend_reservation(my_rsv, sb, + *count-my_rsv->rsv_end + grp_goal - 1); + +- if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) +- || (my_rsv->rsv_end < group_first_block)) ++ if ((my_rsv->rsv_start > group_last_block) || ++ (my_rsv->rsv_end < group_first_block)) + BUG(); + ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, grp_goal, + &num, &my_rsv->rsv_window); +Index: linux-2.6.17-1.2654.el5/fs/ext3/resize.c +=================================================================== +--- linux-2.6.17-1.2654.el5.orig/fs/ext3/resize.c ++++ linux-2.6.17-1.2654.el5/fs/ext3/resize.c +@@ -730,6 +730,18 @@ int ext3_group_add(struct super_block *s + return -EPERM; + } + ++ if (le32_to_cpu(es->s_blocks_count) + input->blocks_count < ++ le32_to_cpu(es->s_blocks_count)) { ++ ext3_warning(sb, __FUNCTION__, "blocks_count overflow\n"); ++ return -EINVAL; ++ } ++ ++ if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) < ++ le32_to_cpu(es->s_inodes_count)) { ++ ext3_warning(sb, __FUNCTION__, "inodes_count overflow\n"); ++ return -EINVAL; ++ } ++ + if (reserved_gdb || gdb_off == 0) { + if (!EXT3_HAS_COMPAT_FEATURE(sb, + EXT3_FEATURE_COMPAT_RESIZE_INODE)){ +@@ -958,6 +970,11 @@ int ext3_group_extend(struct super_block + + add = EXT3_BLOCKS_PER_GROUP(sb) - last; + ++ if (o_blocks_count + add < o_blocks_count) { ++ ext3_warning(sb, __FUNCTION__, "blocks_count overflow"); ++ return -EINVAL; ++ } ++ + if (o_blocks_count + add > n_blocks_count) + add = n_blocks_count - o_blocks_count; + diff --git a/ldiskfs/kernel_patches/patches/ext3-handle-directory-corruption-better.patch b/ldiskfs/kernel_patches/patches/ext3-handle-directory-corruption-better.patch new file mode 100644 index 0000000..484e828 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-handle-directory-corruption-better.patch @@ -0,0 +1,86 @@ +Date: Mon, 23 Oct 2006 15:45:05 -0500 +From: Eric Sandeen +Subject: [PATCH RHEL5] - handle ext3 directory corruption better + +This is for BZ 209907 : kernel Soft lockup detected on corrupted ext3 filesystem. + +This patch is now in -mm. + +I've been using Steve Grubb's purely evil "fsfuzzer" tool, at +http://people.redhat.com/sgrubb/files/fsfuzzer-0.4.tar.gz + +Basically it makes a filesystem, splats some random bits over it, then +tries to mount it and do some simple filesystem actions. + +At best, the filesystem catches the corruption gracefully. At worst, +things spin out of control. + +As you might guess, we found a couple places in ext3 where things spin out +of control :) + +First, we had a corrupted directory that was never checked for +consistency... it was corrupt, and pointed to another bad "entry" of +length 0. The for() loop looped forever, since the length of +ext3_next_entry(de) was 0, and we kept looking at the same pointer over and +over and over and over... I modeled this check and subsequent action on +what is done for other directory types in ext3_readdir... + +(adding this check adds some computational expense; I am testing a followup +patch to reduce the number of times we check and re-check these directory +entries, in all cases. Thanks for the idea, Andreas). + +Next we had a root directory inode which had a corrupted size, claimed to +be > 200M on a 4M filesystem. There was only really 1 block in the +directory, but because the size was so large, readdir kept coming back for +more, spewing thousands of printk's along the way. + +Per Andreas' suggestion, if we're in this read error condition and we're +trying to read an offset which is greater than i_blocks worth of bytes, +stop trying, and break out of the loop. + +With these two changes fsfuzz test survives quite well on ext3. + +Signed-off-by: Eric Sandeen +Cc: +Signed-off-by: Andrew Morton +--- + + fs/ext3/dir.c | 3 +++ + fs/ext3/namei.c | 9 +++++++++ + 2 files changed, 12 insertions(+) + +Index: linux-2.6.18-1.2732.el5/fs/ext3/dir.c +=================================================================== +--- linux-2.6.18-1.2732.el5.orig/fs/ext3/dir.c ++++ linux-2.6.18-1.2732.el5/fs/ext3/dir.c +@@ -151,6 +151,9 @@ static int ext3_readdir(struct file * fi + ext3_error (sb, "ext3_readdir", + "directory #%lu contains a hole at offset %lu", + inode->i_ino, (unsigned long)filp->f_pos); ++ /* corrupt size? Maybe no more blocks to read */ ++ if (filp->f_pos > inode->i_blocks << 9) ++ break; + filp->f_pos += sb->s_blocksize - offset; + continue; + } +Index: linux-2.6.18-1.2732.el5/fs/ext3/namei.c +=================================================================== +--- linux-2.6.18-1.2732.el5.orig/fs/ext3/namei.c ++++ linux-2.6.18-1.2732.el5/fs/ext3/namei.c +@@ -551,6 +551,15 @@ static int htree_dirblock_to_tree(struct + dir->i_sb->s_blocksize - + EXT3_DIR_REC_LEN(0)); + for (; de < top; de = ext3_next_entry(de)) { ++ if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, ++ (block<i_sb)) ++ +((char *)de - bh->b_data))) { ++ /* On error, skip the f_pos to the next block. */ ++ dir_file->f_pos = (dir_file->f_pos | ++ (dir->i_sb->s_blocksize - 1)) + 1; ++ brelse (bh); ++ return count; ++ } + ext3fs_dirhash(de->name, de->name_len, hinfo); + if ((hinfo->hash < start_hash) || + ((hinfo->hash == start_hash) && + diff --git a/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.18.patch b/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.18.patch index d79674b..906caf2 100644 --- a/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.18.patch +++ b/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.18.patch @@ -6,10 +6,10 @@ group descriptor to avoid reading or scanning them at e2fsck time. A checksum of each group descriptor is used to ensure that corruption in the group descriptor's bit flags does not cause incorrect operation. -Index: linux-2.6.18.8.orig/include/linux/ext3_fs.h +Index: linux-rhel5/include/linux/ext3_fs.h =================================================================== ---- linux-2.6.18.8.orig.orig/include/linux/ext3_fs.h 2007-07-02 11:09:25.000000000 +0200 -+++ linux-2.6.18.8.orig/include/linux/ext3_fs.h 2007-07-02 11:09:31.000000000 +0200 +--- linux-rhel5.orig/include/linux/ext3_fs.h 2007-07-18 17:32:04.000000000 +0200 ++++ linux-rhel5/include/linux/ext3_fs.h 2007-07-18 17:32:15.000000000 +0200 @@ -150,16 +150,22 @@ struct ext3_allocation_request { */ struct ext3_group_desc @@ -53,10 +53,10 @@ Index: linux-2.6.18.8.orig/include/linux/ext3_fs.h EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -Index: linux-2.6.18.8.orig/fs/ext3/resize.c +Index: linux-rhel5/fs/ext3/resize.c =================================================================== ---- linux-2.6.18.8.orig.orig/fs/ext3/resize.c 2007-06-21 14:53:15.000000000 +0200 -+++ linux-2.6.18.8.orig/fs/ext3/resize.c 2007-07-02 11:09:26.000000000 +0200 +--- linux-rhel5.orig/fs/ext3/resize.c 2007-07-15 09:36:00.000000000 +0200 ++++ linux-rhel5/fs/ext3/resize.c 2007-07-18 17:32:15.000000000 +0200 @@ -18,6 +18,7 @@ #include #include @@ -65,7 +65,7 @@ Index: linux-2.6.18.8.orig/fs/ext3/resize.c #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) -@@ -822,6 +823,7 @@ int ext3_group_add(struct super_block *s +@@ -834,6 +835,7 @@ int ext3_group_add(struct super_block *s gdp->bg_inode_table = cpu_to_le32(input->inode_table); gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb)); @@ -73,10 +73,10 @@ Index: linux-2.6.18.8.orig/fs/ext3/resize.c /* * Make the new blocks and inodes valid next. We do this before -Index: linux-2.6.18.8.orig/fs/ext3/super.c +Index: linux-rhel5/fs/ext3/super.c =================================================================== ---- linux-2.6.18.8.orig.orig/fs/ext3/super.c 2007-07-02 11:09:26.000000000 +0200 -+++ linux-2.6.18.8.orig/fs/ext3/super.c 2007-07-02 11:18:04.000000000 +0200 +--- linux-rhel5.orig/fs/ext3/super.c 2007-07-18 17:32:06.000000000 +0200 ++++ linux-rhel5/fs/ext3/super.c 2007-07-18 17:35:03.000000000 +0200 @@ -41,6 +41,7 @@ #include "xattr.h" #include "acl.h" @@ -177,7 +177,7 @@ Index: linux-2.6.18.8.orig/fs/ext3/super.c /* Called at mount-time, super-block is locked */ static int ext3_check_descriptors (struct super_block * sb) { -@@ -1274,6 +1360,13 @@ static int ext3_check_descriptors (struc +@@ -1279,6 +1365,13 @@ static int ext3_check_descriptors (struc le32_to_cpu(gdp->bg_inode_table)); return 0; } @@ -188,13 +188,13 @@ Index: linux-2.6.18.8.orig/fs/ext3/super.c + le16_to_cpu(gdp->bg_checksum)); + return 0; + } - block += EXT3_BLOCKS_PER_GROUP(sb); + first_block += EXT3_BLOCKS_PER_GROUP(sb); gdp++; } -Index: linux-2.6.18.8.orig/fs/ext3/group.h +Index: linux-rhel5/fs/ext3/group.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ linux-2.6.18.8.orig/fs/ext3/group.h 2007-07-02 11:09:26.000000000 +0200 ++++ linux-rhel5/fs/ext3/group.h 2007-07-18 17:32:15.000000000 +0200 @@ -0,0 +1,29 @@ +/* + * linux/fs/ext3/group.h @@ -225,10 +225,10 @@ Index: linux-2.6.18.8.orig/fs/ext3/group.h + struct buffer_head *bh, int group, + struct ext3_group_desc *desc); +#endif /* _LINUX_EXT3_GROUP_H */ -Index: linux-2.6.18.8.orig/fs/ext3/ialloc.c +Index: linux-rhel5/fs/ext3/ialloc.c =================================================================== ---- linux-2.6.18.8.orig.orig/fs/ext3/ialloc.c 2007-07-02 11:09:26.000000000 +0200 -+++ linux-2.6.18.8.orig/fs/ext3/ialloc.c 2007-07-02 11:19:43.000000000 +0200 +--- linux-rhel5.orig/fs/ext3/ialloc.c 2007-07-18 17:32:05.000000000 +0200 ++++ linux-rhel5/fs/ext3/ialloc.c 2007-07-18 17:32:15.000000000 +0200 @@ -28,6 +28,7 @@ #include "xattr.h" @@ -421,12 +421,12 @@ Index: linux-2.6.18.8.orig/fs/ext3/ialloc.c - inode->i_ino = ino; + inode->i_ino = ino + group * EXT3_INODES_PER_GROUP(sb); /* This is the optimal IO size (for stat), not the fs block size */ - inode->i_blksize = PAGE_SIZE; inode->i_blocks = 0; -Index: linux-2.6.18.8.orig/fs/ext3/mballoc.c + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; +Index: linux-rhel5/fs/ext3/mballoc.c =================================================================== ---- linux-2.6.18.8.orig.orig/fs/ext3/mballoc.c 2007-07-02 11:09:25.000000000 +0200 -+++ linux-2.6.18.8.orig/fs/ext3/mballoc.c 2007-07-02 11:09:26.000000000 +0200 +--- linux-rhel5.orig/fs/ext3/mballoc.c 2007-07-18 17:32:04.000000000 +0200 ++++ linux-rhel5/fs/ext3/mballoc.c 2007-07-18 17:32:15.000000000 +0200 @@ -36,6 +36,8 @@ #include #include @@ -507,7 +507,7 @@ Index: linux-2.6.18.8.orig/fs/ext3/mballoc.c set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &meta_group_info[j]->bb_state); -@@ -2972,9 +2984,17 @@ int ext3_mb_mark_diskspace_used(struct e +@@ -2958,9 +2970,17 @@ int ext3_mb_mark_diskspace_used(struct e mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); @@ -525,7 +525,7 @@ Index: linux-2.6.18.8.orig/fs/ext3/mballoc.c spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len); -@@ -4343,6 +4363,7 @@ do_more: +@@ -4346,6 +4366,7 @@ do_more: spin_lock(sb_bgl_lock(sbi, block_group)); gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); @@ -533,10 +533,10 @@ Index: linux-2.6.18.8.orig/fs/ext3/mballoc.c spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_mod(&sbi->s_freeblocks_counter, count); -Index: linux-2.6.18.8.orig/fs/ext3/balloc.c +Index: linux-rhel5/fs/ext3/balloc.c =================================================================== ---- linux-2.6.18.8.orig.orig/fs/ext3/balloc.c 2007-07-02 11:09:25.000000000 +0200 -+++ linux-2.6.18.8.orig/fs/ext3/balloc.c 2007-07-02 11:09:26.000000000 +0200 +--- linux-rhel5.orig/fs/ext3/balloc.c 2007-07-18 17:32:04.000000000 +0200 ++++ linux-rhel5/fs/ext3/balloc.c 2007-07-18 17:32:15.000000000 +0200 @@ -20,6 +20,7 @@ #include #include @@ -650,7 +650,7 @@ Index: linux-2.6.18.8.orig/fs/ext3/balloc.c spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_mod(&sbi->s_freeblocks_counter, count); -@@ -1433,8 +1516,11 @@ allocated: +@@ -1434,8 +1517,11 @@ allocated: ret_block, goal_hits, goal_attempts); spin_lock(sb_bgl_lock(sbi, group_no)); diff --git a/ldiskfs/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch b/ldiskfs/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch index f71e470005..db4a12c 100644 --- a/ldiskfs/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch +++ b/ldiskfs/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch @@ -1,6 +1,7 @@ -diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/ialloc.c RH_2_6_9_42_0_3/fs/ext3/ialloc.c ---- RH_2_6_9_42_0_3.orig/fs/ext3/ialloc.c 2006-10-23 13:32:46.000000000 +0300 -+++ RH_2_6_9_42_0_3/fs/ext3/ialloc.c 2007-02-16 07:22:28.000000000 +0200 +Index: linux-2.6.9/fs/ext3/ialloc.c +=================================================================== +--- linux-2.6.9.orig/fs/ext3/ialloc.c 2007-03-13 00:47:22.000000000 +0100 ++++ linux-2.6.9/fs/ext3/ialloc.c 2007-07-26 09:23:37.000000000 +0200 @@ -419,7 +419,8 @@ static int find_group_other(struct super * For other inodes, search forward from the parent directory's block * group to find a free inode. @@ -53,10 +54,19 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/ialloc.c RH_2_6_9_42_0_3/fs/ext3/ialloc.c if (S_ISDIR(mode)) { if (test_opt (sb, OLDALLOC)) group = find_group_dir(sb, dir); -diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/ioctl.c RH_2_6_9_42_0_3/fs/ext3/ioctl.c ---- RH_2_6_9_42_0_3.orig/fs/ext3/ioctl.c 2006-10-23 13:32:46.000000000 +0300 -+++ RH_2_6_9_42_0_3/fs/ext3/ioctl.c 2007-02-16 07:22:28.000000000 +0200 -@@ -25,6 +25,31 @@ int ext3_ioctl (struct inode * inode, st +Index: linux-2.6.9/fs/ext3/ioctl.c +=================================================================== +--- linux-2.6.9.orig/fs/ext3/ioctl.c 2007-03-13 00:47:22.000000000 +0100 ++++ linux-2.6.9/fs/ext3/ioctl.c 2007-07-26 09:25:24.000000000 +0200 +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + + int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, +@@ -25,6 +26,31 @@ int ext3_ioctl (struct inode * inode, st ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); switch (cmd) { @@ -88,9 +98,10 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/ioctl.c RH_2_6_9_42_0_3/fs/ext3/ioctl.c case EXT3_IOC_GETFLAGS: flags = ei->i_flags & EXT3_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); -diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c ---- RH_2_6_9_42_0_3.orig/fs/ext3/namei.c 2006-10-23 13:32:59.000000000 +0300 -+++ RH_2_6_9_42_0_3/fs/ext3/namei.c 2007-02-22 18:58:13.000000000 +0200 +Index: linux-2.6.9/fs/ext3/namei.c +=================================================================== +--- linux-2.6.9.orig/fs/ext3/namei.c 2007-03-13 00:47:27.000000000 +0100 ++++ linux-2.6.9/fs/ext3/namei.c 2007-07-26 09:23:37.000000000 +0200 @@ -97,6 +97,7 @@ struct dx_entry __le32 block; }; @@ -114,7 +125,7 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c #ifdef CONFIG_EXT3_INDEX static inline unsigned dx_get_block (struct dx_entry *entry); static void dx_set_block (struct dx_entry *entry, unsigned value); -@@ -1624,6 +1633,20 @@ static int ext3_add_nondir(handle_t *han +@@ -1633,6 +1642,20 @@ static int ext3_add_nondir(handle_t *han return err; } @@ -135,7 +146,7 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c /* * By the time this is called, we already have created * the directory cache entry for the new file, but it -@@ -1649,7 +1672,7 @@ retry: +@@ -1658,7 +1681,7 @@ retry: if (IS_DIRSYNC(dir)) handle->h_sync = 1; @@ -144,7 +155,7 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext3_file_inode_operations; -@@ -1683,7 +1706,7 @@ retry: +@@ -1692,7 +1715,7 @@ retry: if (IS_DIRSYNC(dir)) handle->h_sync = 1; @@ -153,7 +164,7 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); -@@ -1719,7 +1742,7 @@ retry: +@@ -1728,7 +1751,7 @@ retry: if (IS_DIRSYNC(dir)) handle->h_sync = 1; @@ -162,7 +173,7 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; -@@ -2124,7 +2147,7 @@ retry: +@@ -2133,7 +2156,7 @@ retry: if (IS_DIRSYNC(dir)) handle->h_sync = 1; @@ -171,9 +182,10 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; -diff -urp RH_2_6_9_42_0_3.orig/include/linux/ext3_fs.h RH_2_6_9_42_0_3/include/linux/ext3_fs.h ---- RH_2_6_9_42_0_3.orig/include/linux/ext3_fs.h 2006-10-23 13:32:46.000000000 +0300 -+++ RH_2_6_9_42_0_3/include/linux/ext3_fs.h 2007-02-16 07:22:28.000000000 +0200 +Index: linux-2.6.9/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.9.orig/include/linux/ext3_fs.h 2007-03-13 00:47:22.000000000 +0100 ++++ linux-2.6.9/include/linux/ext3_fs.h 2007-07-26 09:23:37.000000000 +0200 @@ -741,7 +741,8 @@ extern int ext3fs_dirhash(const char *na dx_hash_info *hinfo); diff --git a/ldiskfs/kernel_patches/patches/iopen-2.6.18-rhel5.patch b/ldiskfs/kernel_patches/patches/iopen-2.6.18-rhel5.patch new file mode 100644 index 0000000..1d075ff --- /dev/null +++ b/ldiskfs/kernel_patches/patches/iopen-2.6.18-rhel5.patch @@ -0,0 +1,443 @@ +Index: linux-2.6.18.8/fs/ext3/iopen.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ linux-2.6.18.8/fs/ext3/iopen.c 2007-07-24 14:00:57.000000000 +0200 +@@ -0,0 +1,254 @@ ++/* ++ * linux/fs/ext3/iopen.c ++ * ++ * Special support for open by inode number ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ * ++ * ++ * Invariants: ++ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias ++ * for an inode at one time. ++ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry ++ * aliases on an inode at the same time. ++ * ++ * If we have any connected dentry aliases for an inode, use one of those ++ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED ++ * dentry for this inode, which thereafter will be found by the dcache ++ * when looking up this inode number in __iopen__, so we don't return here ++ * until it is gone. ++ * ++ * If we get an inode via a regular name lookup, then we "rename" the ++ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures ++ * existing users of the disconnected dentry will continue to use the same ++ * dentry as the connected users, and there will never be both kinds of ++ * dentry aliases at one time. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "iopen.h" ++ ++#ifndef assert ++#define assert(test) J_ASSERT(test) ++#endif ++ ++#define IOPEN_NAME_LEN 32 ++ ++/* ++ * This implements looking up an inode by number. ++ */ ++static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ struct inode *inode; ++ unsigned long ino; ++ struct list_head *lp; ++ struct dentry *alternate; ++ char buf[IOPEN_NAME_LEN]; ++ ++ if (dentry->d_name.len >= IOPEN_NAME_LEN) ++ return ERR_PTR(-ENAMETOOLONG); ++ ++ memcpy(buf, dentry->d_name.name, dentry->d_name.len); ++ buf[dentry->d_name.len] = 0; ++ ++ if (strcmp(buf, ".") == 0) ++ ino = dir->i_ino; ++ else if (strcmp(buf, "..") == 0) ++ ino = EXT3_ROOT_INO; ++ else ++ ino = simple_strtoul(buf, 0, 0); ++ ++ if ((ino != EXT3_ROOT_INO && ++ ino < EXT3_FIRST_INO(dir->i_sb)) || ++ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) ++ return ERR_PTR(-ENOENT); ++ ++ inode = iget(dir->i_sb, ino); ++ if (!inode) ++ return ERR_PTR(-EACCES); ++ if (is_bad_inode(inode)) { ++ iput(inode); ++ return ERR_PTR(-ENOENT); ++ } ++ ++ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ ++ assert(d_unhashed(dentry)); /* d_rehash */ ++ ++ /* preferrably return a connected dentry */ ++ spin_lock(&dcache_lock); ++ list_for_each(lp, &inode->i_dentry) { ++ alternate = list_entry(lp, struct dentry, d_alias); ++ assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); ++ } ++ ++ if (!list_empty(&inode->i_dentry)) { ++ alternate = list_entry(inode->i_dentry.next, ++ struct dentry, d_alias); ++ dget_locked(alternate); ++ spin_lock(&alternate->d_lock); ++ alternate->d_flags |= DCACHE_REFERENCED; ++ spin_unlock(&alternate->d_lock); ++ iput(inode); ++ spin_unlock(&dcache_lock); ++ return alternate; ++ } ++ dentry->d_flags |= DCACHE_DISCONNECTED; ++ ++ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ ++ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ ++ dentry->d_inode = inode; ++ ++ d_rehash_cond(dentry, 0); ++ spin_unlock(&dcache_lock); ++ ++ return NULL; ++} ++ ++/* This function is spliced into ext3_lookup and does the move of a ++ * disconnected dentry (if it exists) to a connected dentry. ++ */ ++struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, ++ int rehash) ++{ ++ struct dentry *tmp, *goal = NULL; ++ struct list_head *lp; ++ ++ /* verify this dentry is really new */ ++ assert(dentry->d_inode == NULL); ++ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ ++ if (rehash) ++ assert(d_unhashed(dentry)); /* d_rehash */ ++ assert(list_empty(&dentry->d_subdirs)); ++ ++ spin_lock(&dcache_lock); ++ if (!inode) ++ goto do_rehash; ++ ++ if (!test_opt(inode->i_sb, IOPEN)) ++ goto do_instantiate; ++ ++ /* preferrably return a connected dentry */ ++ list_for_each(lp, &inode->i_dentry) { ++ tmp = list_entry(lp, struct dentry, d_alias); ++ if (tmp->d_flags & DCACHE_DISCONNECTED) { ++ assert(tmp->d_alias.next == &inode->i_dentry); ++ assert(tmp->d_alias.prev == &inode->i_dentry); ++ goal = tmp; ++ dget_locked(goal); ++ break; ++ } ++ } ++ ++ if (!goal) ++ goto do_instantiate; ++ ++ /* Move the goal to the de hash queue */ ++ goal->d_flags &= ~DCACHE_DISCONNECTED; ++ security_d_instantiate(goal, inode); ++ __d_drop(dentry); ++ d_rehash_cond(dentry, 0); ++ __d_move(goal, dentry); ++ spin_unlock(&dcache_lock); ++ iput(inode); ++ ++ return goal; ++ ++ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ ++do_instantiate: ++ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ ++ dentry->d_inode = inode; ++do_rehash: ++ if (rehash) ++ d_rehash_cond(dentry, 0); ++ spin_unlock(&dcache_lock); ++ ++ return NULL; ++} ++ ++/* ++ * These are the special structures for the iopen pseudo directory. ++ */ ++ ++static struct inode_operations iopen_inode_operations = { ++ lookup: iopen_lookup, /* BKL held */ ++}; ++ ++static struct file_operations iopen_file_operations = { ++ read: generic_read_dir, ++}; ++ ++static int match_dentry(struct dentry *dentry, const char *name) ++{ ++ int len; ++ ++ len = strlen(name); ++ if (dentry->d_name.len != len) ++ return 0; ++ if (strncmp(dentry->d_name.name, name, len)) ++ return 0; ++ return 1; ++} ++ ++/* ++ * This function is spliced into ext3_lookup and returns 1 the file ++ * name is __iopen__ and dentry has been filled in appropriately. ++ */ ++int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) ++{ ++ struct inode *inode; ++ ++ if (dir->i_ino != EXT3_ROOT_INO || ++ !test_opt(dir->i_sb, IOPEN) || ++ !match_dentry(dentry, "__iopen__")) ++ return 0; ++ ++ inode = iget(dir->i_sb, EXT3_BAD_INO); ++ ++ if (!inode) ++ return 0; ++ d_add(dentry, inode); ++ return 1; ++} ++ ++/* ++ * This function is spliced into read_inode; it returns 1 if inode ++ * number is the one for /__iopen__, in which case the inode is filled ++ * in appropriately. Otherwise, this fuction returns 0. ++ */ ++int ext3_iopen_get_inode(struct inode *inode) ++{ ++ if (inode->i_ino != EXT3_BAD_INO) ++ return 0; ++ ++ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; ++ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) ++ inode->i_mode |= 0777; ++ inode->i_uid = 0; ++ inode->i_gid = 0; ++ inode->i_nlink = 1; ++ inode->i_size = 4096; ++ inode->i_atime = CURRENT_TIME; ++ inode->i_ctime = CURRENT_TIME; ++ inode->i_mtime = CURRENT_TIME; ++ EXT3_I(inode)->i_dtime = 0; ++ inode->i_blocks = 0; ++ inode->i_version = 1; ++ inode->i_generation = 0; ++ ++ inode->i_op = &iopen_inode_operations; ++ inode->i_fop = &iopen_file_operations; ++ inode->i_mapping->a_ops = 0; ++ ++ return 1; ++} +Index: linux-2.6.18.8/fs/ext3/iopen.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ linux-2.6.18.8/fs/ext3/iopen.h 2007-07-24 13:59:56.000000000 +0200 +@@ -0,0 +1,15 @@ ++/* ++ * iopen.h ++ * ++ * Special support for opening files by inode number. ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ */ ++ ++extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); ++extern int ext3_iopen_get_inode(struct inode *inode); ++extern struct dentry *iopen_connect_dentry(struct dentry *dentry, ++ struct inode *inode, int rehash); +Index: linux-2.6.18.8/fs/ext3/inode.c +=================================================================== +--- linux-2.6.18.8.orig/fs/ext3/inode.c 2007-07-24 12:25:00.000000000 +0200 ++++ linux-2.6.18.8/fs/ext3/inode.c 2007-07-24 13:59:56.000000000 +0200 +@@ -37,6 +37,7 @@ + #include + #include + #include "xattr.h" ++#include "iopen.h" + #include "acl.h" + + static int ext3_writepage_trans_blocks(struct inode *inode); +@@ -2593,6 +2594,8 @@ void ext3_read_inode(struct inode * inod + ei->i_default_acl = EXT3_ACL_NOT_CACHED; + #endif + ei->i_block_alloc_info = NULL; ++ if (ext3_iopen_get_inode(inode)) ++ return; + + if (__ext3_get_inode_loc(inode, &iloc, 0)) + goto bad_inode; +Index: linux-2.6.18.8/fs/ext3/super.c +=================================================================== +--- linux-2.6.18.8.orig/fs/ext3/super.c 2007-07-24 12:25:00.000000000 +0200 ++++ linux-2.6.18.8/fs/ext3/super.c 2007-07-24 13:59:56.000000000 +0200 +@@ -677,6 +677,7 @@ enum { + Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, + Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, + Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, ++ Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, + Opt_grpquota + }; + +@@ -726,6 +727,9 @@ static match_table_t tokens = { + {Opt_noquota, "noquota"}, + {Opt_quota, "quota"}, + {Opt_usrquota, "usrquota"}, ++ {Opt_iopen, "iopen"}, ++ {Opt_noiopen, "noiopen"}, ++ {Opt_iopen_nopriv, "iopen_nopriv"}, + {Opt_barrier, "barrier=%u"}, + {Opt_err, NULL}, + {Opt_resize, "resize"}, +@@ -1041,6 +1045,18 @@ clear_qf_name: + else + clear_opt(sbi->s_mount_opt, BARRIER); + break; ++ case Opt_iopen: ++ set_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; ++ case Opt_noiopen: ++ clear_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; ++ case Opt_iopen_nopriv: ++ set_opt (sbi->s_mount_opt, IOPEN); ++ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; + case Opt_ignore: + break; + case Opt_resize: +Index: linux-2.6.18.8/fs/ext3/namei.c +=================================================================== +--- linux-2.6.18.8.orig/fs/ext3/namei.c 2007-07-24 13:59:54.000000000 +0200 ++++ linux-2.6.18.8/fs/ext3/namei.c 2007-07-24 13:59:56.000000000 +0200 +@@ -39,6 +39,7 @@ + + #include "namei.h" + #include "xattr.h" ++#include "iopen.h" + #include "acl.h" + + /* +@@ -1013,6 +1014,9 @@ static struct dentry *ext3_lookup(struct + if (dentry->d_name.len > EXT3_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + ++ if (ext3_check_for_iopen(dir, dentry)) ++ return NULL; ++ + bh = ext3_find_entry(dentry, &de); + inode = NULL; + if (bh) { +@@ -1028,7 +1032,7 @@ static struct dentry *ext3_lookup(struct + if (!inode) + return ERR_PTR(-EACCES); + } +- return d_splice_alias(inode, dentry); ++ return iopen_connect_dentry(dentry, inode, 1); + } + + +@@ -2077,10 +2081,6 @@ static int ext3_rmdir (struct inode * di + inode->i_nlink); + inode->i_version++; + inode->i_nlink = 0; +- /* There's no need to set i_disksize: the fact that i_nlink is +- * zero will ensure that the right thing happens during any +- * recovery. */ +- inode->i_size = 0; + ext3_orphan_add(handle, inode); + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; + ext3_mark_inode_dirty(handle, inode); +@@ -2204,6 +2204,23 @@ out_stop: + return err; + } + ++/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ ++static int ext3_add_link(handle_t *handle, struct dentry *dentry, ++ struct inode *inode) ++{ ++ int err = ext3_add_entry(handle, dentry, inode); ++ if (!err) { ++ err = ext3_mark_inode_dirty(handle, inode); ++ if (err == 0) { ++ dput(iopen_connect_dentry(dentry, inode, 0)); ++ return 0; ++ } ++ } ++ ext3_dec_count(handle, inode); ++ iput(inode); ++ return err; ++} ++ + static int ext3_link (struct dentry * old_dentry, + struct inode * dir, struct dentry *dentry) + { +@@ -2233,7 +2250,8 @@ retry: + ext3_inc_count(handle, inode); + atomic_inc(&inode->i_count); + +- err = ext3_add_nondir(handle, dentry, inode); ++ err = ext3_add_link(handle, dentry, inode); ++ ext3_orphan_del(handle, inode); + ext3_journal_stop(handle); + if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) + goto retry; +Index: linux-2.6.18.8/fs/ext3/Makefile +=================================================================== +--- linux-2.6.18.8.orig/fs/ext3/Makefile 2007-07-24 12:25:00.000000000 +0200 ++++ linux-2.6.18.8/fs/ext3/Makefile 2007-07-24 13:59:56.000000000 +0200 +@@ -4,7 +4,7 @@ + + obj-$(CONFIG_EXT3_FS) += ext3.o + +-ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ++ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ + ioctl.o namei.o super.o symlink.o hash.o resize.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o +Index: linux-2.6.18.8/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.18.8.orig/include/linux/ext3_fs.h 2007-07-24 13:59:54.000000000 +0200 ++++ linux-2.6.18.8/include/linux/ext3_fs.h 2007-07-24 13:59:56.000000000 +0200 +@@ -371,6 +371,8 @@ struct ext3_inode { + #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ + #define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ + #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ ++#define EXT3_MOUNT_IOPEN 0x400000 /* Allow access via iopen */ ++#define EXT3_MOUNT_IOPEN_NOPRIV 0x800000/* Make iopen world-readable */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef _LINUX_EXT2_FS_H diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series new file mode 100644 index 0000000..24698be --- /dev/null +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series @@ -0,0 +1,17 @@ +ext3-wantedi-2.6-rhel4.patch +iopen-2.6.18-rhel5.patch +ext3-map_inode_page-2.6.18.patch +export-ext3-2.6-rhel4.patch +ext3-include-fixes-2.6-rhel4.patch +ext3-extents-2.6.18-vanilla.patch +ext3-mballoc3-core.patch +ext3-mballoc3-2.6.18.patch +ext3-nlinks-2.6.9.patch +ext3-ialloc-2.6.patch +ext3-remove-cond_resched-calls-2.6.12.patch +ext3-filterdata-sles10.patch +ext3-uninit-2.6.18.patch +ext3-nanosecond-2.6.18-vanilla.patch +ext3-inode-version-2.6.18-vanilla.patch +ext3-mmp-2.6.18-vanilla.patch +ext3-unlink-race.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series index b7b2ff1..94bc6ad 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series @@ -10,7 +10,9 @@ ext3-nlinks-2.6.9.patch ext3-ialloc-2.6.patch ext3-remove-cond_resched-calls-2.6.12.patch ext3-filterdata-sles10.patch +ext3-16tb-overflow-fixes.patch ext3-uninit-2.6.18.patch ext3-nanosecond-2.6.18-vanilla.patch ext3-inode-version-2.6.18-vanilla.patch ext3-mmp-2.6.18-vanilla.patch +ext3-handle-directory-corruption-better.patch