Whamcloud - gitweb
Branch b1_6
authorjohann <johann>
Fri, 3 Aug 2007 10:25:25 +0000 (10:25 +0000)
committerjohann <johann>
Fri, 3 Aug 2007 10:25:25 +0000 (10:25 +0000)
b=11802
i=adilger
i=kalpak

Severity   : normal
Bugzilla   : 11802
Description: lustre support for RHEL5
Details    : Add support for RHEL5.

ldiskfs/configure.ac
ldiskfs/kernel_patches/patches/ext3-16tb-overflow-fixes.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext3-handle-directory-corruption-better.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext3-uninit-2.6.18.patch
ldiskfs/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch
ldiskfs/kernel_patches/patches/iopen-2.6.18-rhel5.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-2.6.18-vanilla.series

index d5c1f7c..7adc144 100644 (file)
@@ -72,6 +72,7 @@ case $LINUXRELEASE in
 2.6.12*) LDISKFS_SERIES="2.6.12-vanilla.series" ;;
 2.6.15*) LDISKFS_SERIES="2.6-fc5.series";;
 2.6.16*) LDISKFS_SERIES="2.6-sles10.series";;
+2.6.18-*el5*) LDISKFS_SERIES="2.6-rhel5.series";;
 2.6.18*) LDISKFS_SERIES="2.6.18-vanilla.series";;
 *) AC_MSG_WARN([Unknown kernel version $LINUXRELEASE, fix ldiskfs/configure.ac])
 esac
diff --git a/ldiskfs/kernel_patches/patches/ext3-16tb-overflow-fixes.patch b/ldiskfs/kernel_patches/patches/ext3-16tb-overflow-fixes.patch
new file mode 100644 (file)
index 0000000..935f97b
--- /dev/null
@@ -0,0 +1,484 @@
+Date: Tue, 19 Sep 2006 15:33:04 -0500
+From: Eric Sandeen <esandeen@redhat.com>
+Subject: [RHEL5 Patch 3/3] (resend) Fix ext3 32-bit inodes
+
+This one is in the -mm tree as ext3-inode-numbers-are-unsigned-long.patch,
+resent to accomodate sct's request for uints instead; just pinged akpm
+to pick up the ulonglong->uint change.
+
+This is primarily format string fixes, with changes to ialloc.c where large
+inode counts could overflow, and also pass around journal_inum as an
+unsigned long, just to be pedantic about it....
+
+Signed-off-by: Eric Sandeen <esandeen@redhat.com>
+Cc: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+
+Index: linux-2.6.17-1.2654.el5/fs/ext3/ialloc.c
+===================================================================
+--- linux-2.6.17-1.2654.el5.orig/fs/ext3/ialloc.c
++++ linux-2.6.17-1.2654.el5/fs/ext3/ialloc.c
+@@ -202,7 +202,7 @@ error_return:
+ static int find_group_dir(struct super_block *sb, struct inode *parent)
+ {
+       int ngroups = EXT3_SB(sb)->s_groups_count;
+-      int freei, avefreei;
++      unsigned int freei, avefreei;
+       struct ext3_group_desc *desc, *best_desc = NULL;
+       struct buffer_head *bh;
+       int group, best_group = -1;
+@@ -261,10 +261,10 @@ static int find_group_orlov(struct super
+       struct ext3_super_block *es = sbi->s_es;
+       int ngroups = sbi->s_groups_count;
+       int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
+-      int freei, avefreei;
++      unsigned int freei, avefreei;
+       ext3_fsblk_t freeb, avefreeb;
+       ext3_fsblk_t blocks_per_dir;
+-      int ndirs;
++      unsigned int ndirs;
+       int max_debt, max_dirs, min_inodes;
+       ext3_grpblk_t min_blocks;
+       int group = -1, i;
+Index: linux-2.6.17-1.2654.el5/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.17-1.2654.el5.orig/fs/ext3/inode.c
++++ linux-2.6.17-1.2654.el5/fs/ext3/inode.c
+@@ -2115,7 +2115,7 @@ static void ext3_free_branches(handle_t 
+                        */
+                       if (!bh) {
+                               ext3_error(inode->i_sb, "ext3_free_branches",
+-                                         "Read failure, inode=%ld, block="E3FSBLK,
++                                         "Read failure, inode=%lu, block="E3FSBLK,
+                                          inode->i_ino, nr);
+                               continue;
+                       }
+Index: linux-2.6.17-1.2654.el5/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.17-1.2654.el5.orig/fs/ext3/namei.c
++++ linux-2.6.17-1.2654.el5/fs/ext3/namei.c
+@@ -1919,8 +1919,8 @@ int ext3_orphan_add(handle_t *handle, st
+       if (!err)
+               list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
+-      jbd_debug(4, "superblock will point to %ld\n", inode->i_ino);
+-      jbd_debug(4, "orphan inode %ld will point to %d\n",
++      jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
++      jbd_debug(4, "orphan inode %lu will point to %d\n",
+                       inode->i_ino, NEXT_ORPHAN(inode));
+ out_unlock:
+       unlock_super(sb);
+Index: linux-2.6.17-1.2654.el5/fs/ext3/super.c
+===================================================================
+--- linux-2.6.17-1.2654.el5.orig/fs/ext3/super.c
++++ linux-2.6.17-1.2654.el5/fs/ext3/super.c
+@@ -45,7 +45,7 @@
+ static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
+                            unsigned long journal_devnum);
+ static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
+-                             int);
++                             unsigned int);
+ static void ext3_commit_super (struct super_block * sb,
+                              struct ext3_super_block * es,
+                              int sync);
+@@ -376,7 +376,7 @@ static void dump_orphan_list(struct supe
+       list_for_each(l, &sbi->s_orphan) {
+               struct inode *inode = orphan_list_entry(l);
+               printk(KERN_ERR "  "
+-                     "inode %s:%ld at %p: mode %o, nlink %d, next %d\n",
++                     "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
+                      inode->i_sb->s_id, inode->i_ino, inode,
+                      inode->i_mode, inode->i_nlink, 
+                      NEXT_ORPHAN(inode));
+@@ -711,7 +711,7 @@ static ext3_fsblk_t get_sb_block(void **
+ }
+ static int parse_options (char *options, struct super_block *sb,
+-                        unsigned long *inum, unsigned long *journal_devnum,
++                        unsigned int *inum, unsigned long *journal_devnum,
+                         ext3_fsblk_t *n_blocks_count, int is_remount)
+ {
+       struct ext3_sb_info *sbi = EXT3_SB(sb);
+@@ -1264,17 +1264,17 @@ static void ext3_orphan_cleanup (struct 
+               DQUOT_INIT(inode);
+               if (inode->i_nlink) {
+                       printk(KERN_DEBUG
+-                              "%s: truncating inode %ld to %Ld bytes\n",
++                              "%s: truncating inode %lu to %Ld bytes\n",
+                               __FUNCTION__, inode->i_ino, inode->i_size);
+-                      jbd_debug(2, "truncating inode %ld to %Ld bytes\n",
++                      jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
+                                 inode->i_ino, inode->i_size);
+                       ext3_truncate(inode);
+                       nr_truncates++;
+               } else {
+                       printk(KERN_DEBUG
+-                              "%s: deleting unreferenced inode %ld\n",
++                              "%s: deleting unreferenced inode %lu\n",
+                               __FUNCTION__, inode->i_ino);
+-                      jbd_debug(2, "deleting unreferenced inode %ld\n",
++                      jbd_debug(2, "deleting unreferenced inode %lu\n",
+                                 inode->i_ino);
+                       nr_orphans++;
+               }
+@@ -1353,7 +1353,7 @@ static int ext3_fill_super (struct super
+       ext3_fsblk_t sb_block = get_sb_block(&data);
+       ext3_fsblk_t logic_sb_block;
+       unsigned long offset = 0;
+-      unsigned long journal_inum = 0;
++      unsigned int journal_inum = 0;
+       unsigned long journal_devnum = 0;
+       unsigned long def_mount_opts;
+       struct inode *root;
+@@ -1802,7 +1802,8 @@ static void ext3_init_journal_params(str
+       spin_unlock(&journal->j_state_lock);
+ }
+-static journal_t *ext3_get_journal(struct super_block *sb, int journal_inum)
++static journal_t *ext3_get_journal(struct super_block *sb,
++                                 unsigned int journal_inum)
+ {
+       struct inode *journal_inode;
+       journal_t *journal;
+@@ -1937,7 +1938,7 @@ static int ext3_load_journal(struct supe
+                            unsigned long journal_devnum)
+ {
+       journal_t *journal;
+-      int journal_inum = le32_to_cpu(es->s_journal_inum);
++      unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
+       dev_t journal_dev;
+       int err = 0;
+       int really_read_only;
+@@ -2023,7 +2024,7 @@ static int ext3_load_journal(struct supe
+ static int ext3_create_journal(struct super_block * sb,
+                              struct ext3_super_block * es,
+-                             int journal_inum)
++                             unsigned int journal_inum)
+ {
+       journal_t *journal;
+@@ -2036,7 +2037,7 @@ static int ext3_create_journal(struct su
+       if (!(journal = ext3_get_journal(sb, journal_inum)))
+               return -EINVAL;
+-      printk(KERN_INFO "EXT3-fs: creating new journal on inode %d\n",
++      printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
+              journal_inum);
+       if (journal_create(journal)) {
+Index: linux-2.6.17-1.2654.el5/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.17-1.2654.el5.orig/fs/ext3/xattr.c
++++ linux-2.6.17-1.2654.el5/fs/ext3/xattr.c
+@@ -75,7 +75,7 @@
+ #ifdef EXT3_XATTR_DEBUG
+ # define ea_idebug(inode, f...) do { \
+-              printk(KERN_DEBUG "inode %s:%ld: ", \
++              printk(KERN_DEBUG "inode %s:%lu: ", \
+                       inode->i_sb->s_id, inode->i_ino); \
+               printk(f); \
+               printk("\n"); \
+@@ -233,7 +233,7 @@ ext3_xattr_block_get(struct inode *inode
+               atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
+       if (ext3_xattr_check_block(bh)) {
+ bad_block:    ext3_error(inode->i_sb, __FUNCTION__,
+-                         "inode %ld: bad block "E3FSBLK, inode->i_ino,
++                         "inode %lu: bad block "E3FSBLK, inode->i_ino,
+                          EXT3_I(inode)->i_file_acl);
+               error = -EIO;
+               goto cleanup;
+@@ -375,7 +375,7 @@ ext3_xattr_block_list(struct inode *inod
+               atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
+       if (ext3_xattr_check_block(bh)) {
+               ext3_error(inode->i_sb, __FUNCTION__,
+-                         "inode %ld: bad block "E3FSBLK, inode->i_ino,
++                         "inode %lu: bad block "E3FSBLK, inode->i_ino,
+                          EXT3_I(inode)->i_file_acl);
+               error = -EIO;
+               goto cleanup;
+@@ -647,7 +647,7 @@ ext3_xattr_block_find(struct inode *inod
+                       le32_to_cpu(BHDR(bs->bh)->h_refcount));
+               if (ext3_xattr_check_block(bs->bh)) {
+                       ext3_error(sb, __FUNCTION__,
+-                              "inode %ld: bad block "E3FSBLK, inode->i_ino,
++                              "inode %lu: bad block "E3FSBLK, inode->i_ino,
+                               EXT3_I(inode)->i_file_acl);
+                       error = -EIO;
+                       goto cleanup;
+@@ -848,7 +848,7 @@ cleanup_dquot:
+ bad_block:
+       ext3_error(inode->i_sb, __FUNCTION__,
+-                 "inode %ld: bad block "E3FSBLK, inode->i_ino,
++                 "inode %lu: bad block "E3FSBLK, inode->i_ino,
+                  EXT3_I(inode)->i_file_acl);
+       goto cleanup;
+@@ -1077,14 +1077,14 @@ ext3_xattr_delete_inode(handle_t *handle
+       bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+       if (!bh) {
+               ext3_error(inode->i_sb, __FUNCTION__,
+-                      "inode %ld: block "E3FSBLK" read error", inode->i_ino,
++                      "inode %lu: block "E3FSBLK" read error", inode->i_ino,
+                       EXT3_I(inode)->i_file_acl);
+               goto cleanup;
+       }
+       if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+           BHDR(bh)->h_blocks != cpu_to_le32(1)) {
+               ext3_error(inode->i_sb, __FUNCTION__,
+-                      "inode %ld: bad block "E3FSBLK, inode->i_ino,
++                      "inode %lu: bad block "E3FSBLK, inode->i_ino,
+                       EXT3_I(inode)->i_file_acl);
+               goto cleanup;
+       }
+@@ -1211,7 +1211,7 @@ again:
+               bh = sb_bread(inode->i_sb, ce->e_block);
+               if (!bh) {
+                       ext3_error(inode->i_sb, __FUNCTION__,
+-                              "inode %ld: block %lu read error",
++                              "inode %lu: block %lu read error",
+                               inode->i_ino, (unsigned long) ce->e_block);
+               } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
+                               EXT3_XATTR_REFCOUNT_MAX) {
+
+Date: Tue, 19 Sep 2006 15:32:02 -0500
+From: Eric Sandeen <esandeen@redhat.com>
+Subject: [RHEL5 Patch 1/3] (resend) Fix ext3 overflows at 16T
+
+This one is in -mm as fix-ext3-mounts-at-16t.patch and 
+fix-ext3-mounts-at-16t-fix.patch  
+
+this gets things mounting for a 16T ext3 filesystem.  (patched up 
+e2fsprogs will be needed too, working on that)
+
+jarod wilson has been helping with testing.
+
+This patch fixes these issues in the kernel:
+
+o sbi->s_groups_count overflows in ext3_fill_super()
+
+       sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) -
+                              le32_to_cpu(es->s_first_data_block) +
+                              EXT3_BLOCKS_PER_GROUP(sb) - 1) /
+                             EXT3_BLOCKS_PER_GROUP(sb);
+
+  at 16T, s_blocks_count is already maxed out; adding
+  EXT3_BLOCKS_PER_GROUP(sb) overflows it and groups_count comes out to 0. 
+  Not really what we want, and causes a failed mount.
+
+  Changing it this way works & avoids the overflow:
+
+  (A + B - 1)/B changed to: ((A - 1)/B) + 1
+
+o ext3_check_descriptors() overflows range checks
+
+  ext3_check_descriptors() iterates over all block groups making sure
+  that various bits are within the right block ranges...  on the last pass
+  through, it is checking the error case
+
+   [item] >= block + EXT3_BLOCKS_PER_GROUP(sb)
+
+  where "block" is the first block in the last block group.  The last
+  block in this group (and the last one that will fit in 32 bits) is block
+  + EXT3_BLOCKS_PER_GROUP(sb)- 1.  block + EXT3_BLOCKS_PER_GROUP(sb) wraps
+  back around to 0.
+
+  so, make things clearer with "first_block" and "last_block" where those
+  are first and last, inclusive, and use <, > rather than <, >=.
+
+  Finally, the last block group may be smaller than the rest, so account
+  for this on the last pass through: last_block = sb->s_blocks_count - 1;
+
+Signed-off-by: Eric Sandeen <esandeen@redhat.com>
+Cc: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+
+Index: linux-2.6.17-1.2654.el5/fs/ext3/super.c
+===================================================================
+--- linux-2.6.17-1.2654.el5.orig/fs/ext3/super.c
++++ linux-2.6.17-1.2654.el5/fs/ext3/super.c
+@@ -1132,7 +1132,8 @@ static int ext3_setup_super(struct super
+ static int ext3_check_descriptors (struct super_block * sb)
+ {
+       struct ext3_sb_info *sbi = EXT3_SB(sb);
+-      ext3_fsblk_t block = le32_to_cpu(sbi->s_es->s_first_data_block);
++      ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
++      ext3_fsblk_t last_block;
+       struct ext3_group_desc * gdp = NULL;
+       int desc_block = 0;
+       int i;
+@@ -1141,12 +1142,17 @@ static int ext3_check_descriptors (struc
+       for (i = 0; i < sbi->s_groups_count; i++)
+       {
++              if (i == sbi->s_groups_count - 1)
++                      last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
++              else
++                      last_block = first_block +
++                              (EXT3_BLOCKS_PER_GROUP(sb) - 1);
++
+               if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0)
+                       gdp = (struct ext3_group_desc *)
+                                       sbi->s_group_desc[desc_block++]->b_data;
+-              if (le32_to_cpu(gdp->bg_block_bitmap) < block ||
+-                  le32_to_cpu(gdp->bg_block_bitmap) >=
+-                              block + EXT3_BLOCKS_PER_GROUP(sb))
++              if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
++                  le32_to_cpu(gdp->bg_block_bitmap) > last_block)
+               {
+                       ext3_error (sb, "ext3_check_descriptors",
+                                   "Block bitmap for group %d"
+@@ -1155,9 +1161,8 @@ static int ext3_check_descriptors (struc
+                                       le32_to_cpu(gdp->bg_block_bitmap));
+                       return 0;
+               }
+-              if (le32_to_cpu(gdp->bg_inode_bitmap) < block ||
+-                  le32_to_cpu(gdp->bg_inode_bitmap) >=
+-                              block + EXT3_BLOCKS_PER_GROUP(sb))
++              if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
++                  le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
+               {
+                       ext3_error (sb, "ext3_check_descriptors",
+                                   "Inode bitmap for group %d"
+@@ -1166,9 +1171,9 @@ static int ext3_check_descriptors (struc
+                                       le32_to_cpu(gdp->bg_inode_bitmap));
+                       return 0;
+               }
+-              if (le32_to_cpu(gdp->bg_inode_table) < block ||
+-                  le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >=
+-                  block + EXT3_BLOCKS_PER_GROUP(sb))
++              if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
++                  le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
++                  last_block)
+               {
+                       ext3_error (sb, "ext3_check_descriptors",
+                                   "Inode table for group %d"
+@@ -1177,7 +1182,7 @@ static int ext3_check_descriptors (struc
+                                       le32_to_cpu(gdp->bg_inode_table));
+                       return 0;
+               }
+-              block += EXT3_BLOCKS_PER_GROUP(sb);
++              first_block += EXT3_BLOCKS_PER_GROUP(sb);
+               gdp++;
+       }
+@@ -1580,10 +1585,9 @@ static int ext3_fill_super (struct super
+       if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
+               goto cantfind_ext3;
+-      sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) -
+-                             le32_to_cpu(es->s_first_data_block) +
+-                             EXT3_BLOCKS_PER_GROUP(sb) - 1) /
+-                            EXT3_BLOCKS_PER_GROUP(sb);
++      sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
++                             le32_to_cpu(es->s_first_data_block) - 1)
++                                     / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
+       db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
+                  EXT3_DESC_PER_BLOCK(sb);
+       sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
+
+Date: Tue, 19 Sep 2006 15:32:42 -0500
+From: Eric Sandeen <esandeen@redhat.com>
+Subject: [RHEL5 Patch 2/3] (resend) Fix more ext3 overflows at 16T
+
+This is in akpm's tree as
+more-ext3-16t-overflow-fixes.patch and more-ext3-16t-overflow-fixes-fix.patch
+
+Some of the changes in balloc.c are just cosmetic -
+if they overflow they'll then underflow and things are fine.
+
+5th hunk actually fixes an overflow problem.
+
+Also check for potential overflows in inode & block counts when resizing.
+
+Signed-off-by: Eric Sandeen <esandeen@redhat.com>
+Cc: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+
+Index: linux-2.6.17-1.2654.el5/fs/ext3/balloc.c
+===================================================================
+--- linux-2.6.17-1.2654.el5.orig/fs/ext3/balloc.c
++++ linux-2.6.17-1.2654.el5/fs/ext3/balloc.c
+@@ -168,7 +168,7 @@ goal_in_my_reservation(struct ext3_reser
+       ext3_fsblk_t group_first_block, group_last_block;
+       group_first_block = ext3_group_first_block_no(sb, group);
+-      group_last_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1;
++      group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+       if ((rsv->_rsv_start > group_last_block) ||
+           (rsv->_rsv_end < group_first_block))
+@@ -897,7 +897,7 @@ static int alloc_new_reservation(struct 
+       spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+       group_first_block = ext3_group_first_block_no(sb, group);
+-      group_end_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1;
++      group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+       if (grp_goal < 0)
+               start_block = group_first_block;
+@@ -1063,7 +1063,7 @@ ext3_try_to_allocate_with_rsv(struct sup
+                       struct ext3_reserve_window_node * my_rsv,
+                       unsigned long *count, int *errp)
+ {
+-      ext3_fsblk_t group_first_block;
++      ext3_fsblk_t group_first_block, group_last_block;
+       ext3_grpblk_t ret = 0;
+       int fatal;
+       unsigned long num = *count;
+@@ -1100,6 +1100,7 @@ ext3_try_to_allocate_with_rsv(struct sup
+        * first block is the block number of the first block in this group
+        */
+       group_first_block = ext3_group_first_block_no(sb, group);
++      group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+       /*
+        * Basically we will allocate a new block from inode's reservation
+@@ -1132,8 +1133,8 @@ ext3_try_to_allocate_with_rsv(struct sup
+                       try_to_extend_reservation(my_rsv, sb,
+                                       *count-my_rsv->rsv_end + grp_goal - 1);
+-              if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
+-                  || (my_rsv->rsv_end < group_first_block))
++              if ((my_rsv->rsv_start > group_last_block) ||
++                              (my_rsv->rsv_end < group_first_block))
+                       BUG();
+               ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, grp_goal,
+                                          &num, &my_rsv->rsv_window);
+Index: linux-2.6.17-1.2654.el5/fs/ext3/resize.c
+===================================================================
+--- linux-2.6.17-1.2654.el5.orig/fs/ext3/resize.c
++++ linux-2.6.17-1.2654.el5/fs/ext3/resize.c
+@@ -730,6 +730,18 @@ int ext3_group_add(struct super_block *s
+               return -EPERM;
+       }
++      if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
++          le32_to_cpu(es->s_blocks_count)) {
++              ext3_warning(sb, __FUNCTION__, "blocks_count overflow\n");
++              return -EINVAL;
++      }
++
++      if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
++          le32_to_cpu(es->s_inodes_count)) {
++              ext3_warning(sb, __FUNCTION__, "inodes_count overflow\n");
++              return -EINVAL;
++      }
++
+       if (reserved_gdb || gdb_off == 0) {
+               if (!EXT3_HAS_COMPAT_FEATURE(sb,
+                                            EXT3_FEATURE_COMPAT_RESIZE_INODE)){
+@@ -958,6 +970,11 @@ int ext3_group_extend(struct super_block
+       add = EXT3_BLOCKS_PER_GROUP(sb) - last;
++      if (o_blocks_count + add < o_blocks_count) {
++              ext3_warning(sb, __FUNCTION__, "blocks_count overflow");
++              return -EINVAL;
++      }
++
+       if (o_blocks_count + add > n_blocks_count)
+               add = n_blocks_count - o_blocks_count;
diff --git a/ldiskfs/kernel_patches/patches/ext3-handle-directory-corruption-better.patch b/ldiskfs/kernel_patches/patches/ext3-handle-directory-corruption-better.patch
new file mode 100644 (file)
index 0000000..484e828
--- /dev/null
@@ -0,0 +1,86 @@
+Date: Mon, 23 Oct 2006 15:45:05 -0500
+From: Eric Sandeen <sandeen@redhat.com>
+Subject: [PATCH RHEL5] - handle ext3 directory corruption better
+
+This is for BZ 209907 <https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=209907>: kernel Soft lockup detected on corrupted ext3 filesystem.
+
+This patch is now in -mm.
+
+I've been using Steve Grubb's purely evil "fsfuzzer" tool, at
+http://people.redhat.com/sgrubb/files/fsfuzzer-0.4.tar.gz
+
+Basically it makes a filesystem, splats some random bits over it, then
+tries to mount it and do some simple filesystem actions.
+
+At best, the filesystem catches the corruption gracefully.  At worst,
+things spin out of control.
+
+As you might guess, we found a couple places in ext3 where things spin out
+of control :)
+
+First, we had a corrupted directory that was never checked for
+consistency...  it was corrupt, and pointed to another bad "entry" of
+length 0.  The for() loop looped forever, since the length of
+ext3_next_entry(de) was 0, and we kept looking at the same pointer over and
+over and over and over...  I modeled this check and subsequent action on
+what is done for other directory types in ext3_readdir...
+
+(adding this check adds some computational expense; I am testing a followup
+patch to reduce the number of times we check and re-check these directory
+entries, in all cases.  Thanks for the idea, Andreas).
+
+Next we had a root directory inode which had a corrupted size, claimed to
+be > 200M on a 4M filesystem.  There was only really 1 block in the
+directory, but because the size was so large, readdir kept coming back for
+more, spewing thousands of printk's along the way.
+
+Per Andreas' suggestion, if we're in this read error condition and we're
+trying to read an offset which is greater than i_blocks worth of bytes,
+stop trying, and break out of the loop.
+
+With these two changes fsfuzz test survives quite well on ext3.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Cc: <linux-ext4@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+---
+
+ fs/ext3/dir.c   |    3 +++
+ fs/ext3/namei.c |    9 +++++++++
+ 2 files changed, 12 insertions(+)
+
+Index: linux-2.6.18-1.2732.el5/fs/ext3/dir.c
+===================================================================
+--- linux-2.6.18-1.2732.el5.orig/fs/ext3/dir.c
++++ linux-2.6.18-1.2732.el5/fs/ext3/dir.c
+@@ -151,6 +151,9 @@ static int ext3_readdir(struct file * fi
+                       ext3_error (sb, "ext3_readdir",
+                               "directory #%lu contains a hole at offset %lu",
+                               inode->i_ino, (unsigned long)filp->f_pos);
++                      /* corrupt size?  Maybe no more blocks to read */
++                      if (filp->f_pos > inode->i_blocks << 9)
++                              break;
+                       filp->f_pos += sb->s_blocksize - offset;
+                       continue;
+               }
+Index: linux-2.6.18-1.2732.el5/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.18-1.2732.el5.orig/fs/ext3/namei.c
++++ linux-2.6.18-1.2732.el5/fs/ext3/namei.c
+@@ -551,6 +551,15 @@ static int htree_dirblock_to_tree(struct
+                                          dir->i_sb->s_blocksize -
+                                          EXT3_DIR_REC_LEN(0));
+       for (; de < top; de = ext3_next_entry(de)) {
++              if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
++                                      (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
++                                              +((char *)de - bh->b_data))) {
++                      /* On error, skip the f_pos to the next block. */
++                      dir_file->f_pos = (dir_file->f_pos |
++                                      (dir->i_sb->s_blocksize - 1)) + 1;
++                      brelse (bh);
++                      return count;
++              }
+               ext3fs_dirhash(de->name, de->name_len, hinfo);
+               if ((hinfo->hash < start_hash) ||
+                   ((hinfo->hash == start_hash) &&
+
index d79674b..906caf2 100644 (file)
@@ -6,10 +6,10 @@ group descriptor to avoid reading or scanning them at e2fsck time.
 A checksum of each group descriptor is used to ensure that corruption in
 the group descriptor's bit flags does not cause incorrect operation.
 
-Index: linux-2.6.18.8.orig/include/linux/ext3_fs.h
+Index: linux-rhel5/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.6.18.8.orig.orig/include/linux/ext3_fs.h   2007-07-02 11:09:25.000000000 +0200
-+++ linux-2.6.18.8.orig/include/linux/ext3_fs.h        2007-07-02 11:09:31.000000000 +0200
+--- linux-rhel5.orig/include/linux/ext3_fs.h   2007-07-18 17:32:04.000000000 +0200
++++ linux-rhel5/include/linux/ext3_fs.h        2007-07-18 17:32:15.000000000 +0200
 @@ -150,16 +150,22 @@ struct ext3_allocation_request {
   */
  struct ext3_group_desc
@@ -53,10 +53,10 @@ Index: linux-2.6.18.8.orig/include/linux/ext3_fs.h
                                         EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
                                         EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
  
-Index: linux-2.6.18.8.orig/fs/ext3/resize.c
+Index: linux-rhel5/fs/ext3/resize.c
 ===================================================================
---- linux-2.6.18.8.orig.orig/fs/ext3/resize.c  2007-06-21 14:53:15.000000000 +0200
-+++ linux-2.6.18.8.orig/fs/ext3/resize.c       2007-07-02 11:09:26.000000000 +0200
+--- linux-rhel5.orig/fs/ext3/resize.c  2007-07-15 09:36:00.000000000 +0200
++++ linux-rhel5/fs/ext3/resize.c       2007-07-18 17:32:15.000000000 +0200
 @@ -18,6 +18,7 @@
  #include <linux/errno.h>
  #include <linux/slab.h>
@@ -65,7 +65,7 @@ Index: linux-2.6.18.8.orig/fs/ext3/resize.c
  
  #define outside(b, first, last)       ((b) < (first) || (b) >= (last))
  #define inside(b, first, last)        ((b) >= (first) && (b) < (last))
-@@ -822,6 +823,7 @@ int ext3_group_add(struct super_block *s
+@@ -834,6 +835,7 @@ int ext3_group_add(struct super_block *s
        gdp->bg_inode_table = cpu_to_le32(input->inode_table);
        gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
        gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
@@ -73,10 +73,10 @@ Index: linux-2.6.18.8.orig/fs/ext3/resize.c
  
        /*
         * Make the new blocks and inodes valid next.  We do this before
-Index: linux-2.6.18.8.orig/fs/ext3/super.c
+Index: linux-rhel5/fs/ext3/super.c
 ===================================================================
---- linux-2.6.18.8.orig.orig/fs/ext3/super.c   2007-07-02 11:09:26.000000000 +0200
-+++ linux-2.6.18.8.orig/fs/ext3/super.c        2007-07-02 11:18:04.000000000 +0200
+--- linux-rhel5.orig/fs/ext3/super.c   2007-07-18 17:32:06.000000000 +0200
++++ linux-rhel5/fs/ext3/super.c        2007-07-18 17:35:03.000000000 +0200
 @@ -41,6 +41,7 @@
  #include "xattr.h"
  #include "acl.h"
@@ -177,7 +177,7 @@ Index: linux-2.6.18.8.orig/fs/ext3/super.c
  /* Called at mount-time, super-block is locked */
  static int ext3_check_descriptors (struct super_block * sb)
  {
-@@ -1274,6 +1360,13 @@ static int ext3_check_descriptors (struc
+@@ -1279,6 +1365,13 @@ static int ext3_check_descriptors (struc
                                        le32_to_cpu(gdp->bg_inode_table));
                        return 0;
                }
@@ -188,13 +188,13 @@ Index: linux-2.6.18.8.orig/fs/ext3/super.c
 +                                 le16_to_cpu(gdp->bg_checksum));
 +                      return 0;
 +              }
-               block += EXT3_BLOCKS_PER_GROUP(sb);
+               first_block += EXT3_BLOCKS_PER_GROUP(sb);
                gdp++;
        }
-Index: linux-2.6.18.8.orig/fs/ext3/group.h
+Index: linux-rhel5/fs/ext3/group.h
 ===================================================================
 --- /dev/null  1970-01-01 00:00:00.000000000 +0000
-+++ linux-2.6.18.8.orig/fs/ext3/group.h        2007-07-02 11:09:26.000000000 +0200
++++ linux-rhel5/fs/ext3/group.h        2007-07-18 17:32:15.000000000 +0200
 @@ -0,0 +1,29 @@
 +/*
 + *  linux/fs/ext3/group.h
@@ -225,10 +225,10 @@ Index: linux-2.6.18.8.orig/fs/ext3/group.h
 +                                     struct buffer_head *bh, int group,
 +                                     struct ext3_group_desc *desc);
 +#endif /* _LINUX_EXT3_GROUP_H */
-Index: linux-2.6.18.8.orig/fs/ext3/ialloc.c
+Index: linux-rhel5/fs/ext3/ialloc.c
 ===================================================================
---- linux-2.6.18.8.orig.orig/fs/ext3/ialloc.c  2007-07-02 11:09:26.000000000 +0200
-+++ linux-2.6.18.8.orig/fs/ext3/ialloc.c       2007-07-02 11:19:43.000000000 +0200
+--- linux-rhel5.orig/fs/ext3/ialloc.c  2007-07-18 17:32:05.000000000 +0200
++++ linux-rhel5/fs/ext3/ialloc.c       2007-07-18 17:32:15.000000000 +0200
 @@ -28,6 +28,7 @@
  
  #include "xattr.h"
@@ -421,12 +421,12 @@ Index: linux-2.6.18.8.orig/fs/ext3/ialloc.c
 -      inode->i_ino = ino;
 +      inode->i_ino = ino + group * EXT3_INODES_PER_GROUP(sb);
        /* This is the optimal IO size (for stat), not the fs block size */
-       inode->i_blksize = PAGE_SIZE;
        inode->i_blocks = 0;
-Index: linux-2.6.18.8.orig/fs/ext3/mballoc.c
+       inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+Index: linux-rhel5/fs/ext3/mballoc.c
 ===================================================================
---- linux-2.6.18.8.orig.orig/fs/ext3/mballoc.c 2007-07-02 11:09:25.000000000 +0200
-+++ linux-2.6.18.8.orig/fs/ext3/mballoc.c      2007-07-02 11:09:26.000000000 +0200
+--- linux-rhel5.orig/fs/ext3/mballoc.c 2007-07-18 17:32:04.000000000 +0200
++++ linux-rhel5/fs/ext3/mballoc.c      2007-07-18 17:32:15.000000000 +0200
 @@ -36,6 +36,8 @@
  #include <linux/seq_file.h>
  #include <linux/version.h>
@@ -507,7 +507,7 @@ Index: linux-2.6.18.8.orig/fs/ext3/mballoc.c
                set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT,
                        &meta_group_info[j]->bb_state);
  
-@@ -2972,9 +2984,17 @@ int ext3_mb_mark_diskspace_used(struct e
+@@ -2958,9 +2970,17 @@ int ext3_mb_mark_diskspace_used(struct e
        mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
  
        spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
@@ -525,7 +525,7 @@ Index: linux-2.6.18.8.orig/fs/ext3/mballoc.c
        spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
        percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len);
  
-@@ -4343,6 +4363,7 @@ do_more:
+@@ -4346,6 +4366,7 @@ do_more:
        spin_lock(sb_bgl_lock(sbi, block_group));
        gdp->bg_free_blocks_count =
                cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
@@ -533,10 +533,10 @@ Index: linux-2.6.18.8.orig/fs/ext3/mballoc.c
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_mod(&sbi->s_freeblocks_counter, count);
  
-Index: linux-2.6.18.8.orig/fs/ext3/balloc.c
+Index: linux-rhel5/fs/ext3/balloc.c
 ===================================================================
---- linux-2.6.18.8.orig.orig/fs/ext3/balloc.c  2007-07-02 11:09:25.000000000 +0200
-+++ linux-2.6.18.8.orig/fs/ext3/balloc.c       2007-07-02 11:09:26.000000000 +0200
+--- linux-rhel5.orig/fs/ext3/balloc.c  2007-07-18 17:32:04.000000000 +0200
++++ linux-rhel5/fs/ext3/balloc.c       2007-07-18 17:32:15.000000000 +0200
 @@ -20,6 +20,7 @@
  #include <linux/quotaops.h>
  #include <linux/buffer_head.h>
@@ -650,7 +650,7 @@ Index: linux-2.6.18.8.orig/fs/ext3/balloc.c
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_mod(&sbi->s_freeblocks_counter, count);
  
-@@ -1433,8 +1516,11 @@ allocated:
+@@ -1434,8 +1517,11 @@ allocated:
                        ret_block, goal_hits, goal_attempts);
  
        spin_lock(sb_bgl_lock(sbi, group_no));
index f71e470..db4a12c 100644 (file)
@@ -1,6 +1,7 @@
-diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/ialloc.c RH_2_6_9_42_0_3/fs/ext3/ialloc.c
---- RH_2_6_9_42_0_3.orig/fs/ext3/ialloc.c      2006-10-23 13:32:46.000000000 +0300
-+++ RH_2_6_9_42_0_3/fs/ext3/ialloc.c   2007-02-16 07:22:28.000000000 +0200
+Index: linux-2.6.9/fs/ext3/ialloc.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/ialloc.c  2007-03-13 00:47:22.000000000 +0100
++++ linux-2.6.9/fs/ext3/ialloc.c       2007-07-26 09:23:37.000000000 +0200
 @@ -419,7 +419,8 @@ static int find_group_other(struct super
   * For other inodes, search forward from the parent directory's block
   * group to find a free inode.
@@ -53,10 +54,19 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/ialloc.c RH_2_6_9_42_0_3/fs/ext3/ialloc.c
        if (S_ISDIR(mode)) {
                if (test_opt (sb, OLDALLOC))
                        group = find_group_dir(sb, dir);
-diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/ioctl.c RH_2_6_9_42_0_3/fs/ext3/ioctl.c
---- RH_2_6_9_42_0_3.orig/fs/ext3/ioctl.c       2006-10-23 13:32:46.000000000 +0300
-+++ RH_2_6_9_42_0_3/fs/ext3/ioctl.c    2007-02-16 07:22:28.000000000 +0200
-@@ -25,6 +25,31 @@ int ext3_ioctl (struct inode * inode, st
+Index: linux-2.6.9/fs/ext3/ioctl.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/ioctl.c   2007-03-13 00:47:22.000000000 +0100
++++ linux-2.6.9/fs/ext3/ioctl.c        2007-07-26 09:25:24.000000000 +0200
+@@ -13,6 +13,7 @@
+ #include <linux/ext3_jbd.h>
+ #include <linux/time.h>
+ #include <asm/uaccess.h>
++#include <linux/namei.h>
+ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+@@ -25,6 +26,31 @@ int ext3_ioctl (struct inode * inode, st
        ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);
  
        switch (cmd) {
@@ -88,9 +98,10 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/ioctl.c RH_2_6_9_42_0_3/fs/ext3/ioctl.c
        case EXT3_IOC_GETFLAGS:
                flags = ei->i_flags & EXT3_FL_USER_VISIBLE;
                return put_user(flags, (int __user *) arg);
-diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c
---- RH_2_6_9_42_0_3.orig/fs/ext3/namei.c       2006-10-23 13:32:59.000000000 +0300
-+++ RH_2_6_9_42_0_3/fs/ext3/namei.c    2007-02-22 18:58:13.000000000 +0200
+Index: linux-2.6.9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/namei.c   2007-03-13 00:47:27.000000000 +0100
++++ linux-2.6.9/fs/ext3/namei.c        2007-07-26 09:23:37.000000000 +0200
 @@ -97,6 +97,7 @@ struct dx_entry
        __le32 block;
  };
@@ -114,7 +125,7 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c
  #ifdef CONFIG_EXT3_INDEX
  static inline unsigned dx_get_block (struct dx_entry *entry);
  static void dx_set_block (struct dx_entry *entry, unsigned value);
-@@ -1624,6 +1633,20 @@ static int ext3_add_nondir(handle_t *han
+@@ -1633,6 +1642,20 @@ static int ext3_add_nondir(handle_t *han
        return err;
  }
  
@@ -135,7 +146,7 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c
  /*
   * By the time this is called, we already have created
   * the directory cache entry for the new file, but it
-@@ -1649,7 +1672,7 @@ retry:
+@@ -1658,7 +1681,7 @@ retry:
        if (IS_DIRSYNC(dir))
                handle->h_sync = 1;
  
@@ -144,7 +155,7 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                inode->i_op = &ext3_file_inode_operations;
-@@ -1683,7 +1706,7 @@ retry:
+@@ -1692,7 +1715,7 @@ retry:
        if (IS_DIRSYNC(dir))
                handle->h_sync = 1;
  
@@ -153,7 +164,7 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                init_special_inode(inode, inode->i_mode, rdev);
-@@ -1719,7 +1742,7 @@ retry:
+@@ -1728,7 +1751,7 @@ retry:
        if (IS_DIRSYNC(dir))
                handle->h_sync = 1;
  
@@ -162,7 +173,7 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
-@@ -2124,7 +2147,7 @@ retry:
+@@ -2133,7 +2156,7 @@ retry:
        if (IS_DIRSYNC(dir))
                handle->h_sync = 1;
  
@@ -171,9 +182,10 @@ diff -urp RH_2_6_9_42_0_3.orig/fs/ext3/namei.c RH_2_6_9_42_0_3/fs/ext3/namei.c
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
-diff -urp RH_2_6_9_42_0_3.orig/include/linux/ext3_fs.h RH_2_6_9_42_0_3/include/linux/ext3_fs.h
---- RH_2_6_9_42_0_3.orig/include/linux/ext3_fs.h       2006-10-23 13:32:46.000000000 +0300
-+++ RH_2_6_9_42_0_3/include/linux/ext3_fs.h    2007-02-16 07:22:28.000000000 +0200
+Index: linux-2.6.9/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.9.orig/include/linux/ext3_fs.h   2007-03-13 00:47:22.000000000 +0100
++++ linux-2.6.9/include/linux/ext3_fs.h        2007-07-26 09:23:37.000000000 +0200
 @@ -741,7 +741,8 @@ extern int ext3fs_dirhash(const char *na
                          dx_hash_info *hinfo);
  
diff --git a/ldiskfs/kernel_patches/patches/iopen-2.6.18-rhel5.patch b/ldiskfs/kernel_patches/patches/iopen-2.6.18-rhel5.patch
new file mode 100644 (file)
index 0000000..1d075ff
--- /dev/null
@@ -0,0 +1,443 @@
+Index: linux-2.6.18.8/fs/ext3/iopen.c
+===================================================================
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ linux-2.6.18.8/fs/ext3/iopen.c     2007-07-24 14:00:57.000000000 +0200
+@@ -0,0 +1,254 @@
++/*
++ * linux/fs/ext3/iopen.c
++ *
++ * Special support for open by inode number
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ *
++ *
++ * Invariants:
++ *   - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias
++ *     for an inode at one time.
++ *   - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry
++ *     aliases on an inode at the same time.
++ *
++ * If we have any connected dentry aliases for an inode, use one of those
++ * in iopen_lookup().  Otherwise, we instantiate a single NFSD_DISCONNECTED
++ * dentry for this inode, which thereafter will be found by the dcache
++ * when looking up this inode number in __iopen__, so we don't return here
++ * until it is gone.
++ *
++ * If we get an inode via a regular name lookup, then we "rename" the
++ * NFSD_DISCONNECTED dentry to the proper name and parent.  This ensures
++ * existing users of the disconnected dentry will continue to use the same
++ * dentry as the connected users, and there will never be both kinds of
++ * dentry aliases at one time.
++ */
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/smp_lock.h>
++#include <linux/dcache.h>
++#include <linux/security.h>
++#include "iopen.h"
++
++#ifndef assert
++#define assert(test) J_ASSERT(test)
++#endif
++
++#define IOPEN_NAME_LEN        32
++
++/*
++ * This implements looking up an inode by number.
++ */
++static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry,
++                                 struct nameidata *nd)
++{
++      struct inode *inode;
++      unsigned long ino;
++      struct list_head *lp;
++      struct dentry *alternate;
++      char buf[IOPEN_NAME_LEN];
++
++      if (dentry->d_name.len >= IOPEN_NAME_LEN)
++              return ERR_PTR(-ENAMETOOLONG);
++
++      memcpy(buf, dentry->d_name.name, dentry->d_name.len);
++      buf[dentry->d_name.len] = 0;
++
++      if (strcmp(buf, ".") == 0)
++              ino = dir->i_ino;
++      else if (strcmp(buf, "..") == 0)
++              ino = EXT3_ROOT_INO;
++      else
++              ino = simple_strtoul(buf, 0, 0);
++
++      if ((ino != EXT3_ROOT_INO &&
++           ino < EXT3_FIRST_INO(dir->i_sb)) ||
++          ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
++              return ERR_PTR(-ENOENT);
++
++      inode = iget(dir->i_sb, ino);
++      if (!inode)
++              return ERR_PTR(-EACCES);
++      if (is_bad_inode(inode)) {
++              iput(inode);
++              return ERR_PTR(-ENOENT);
++      }
++
++      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
++      assert(d_unhashed(dentry));                     /* d_rehash */
++
++      /* preferrably return a connected dentry */
++      spin_lock(&dcache_lock);
++      list_for_each(lp, &inode->i_dentry) {
++              alternate = list_entry(lp, struct dentry, d_alias);
++              assert(!(alternate->d_flags & DCACHE_DISCONNECTED));
++      }
++
++      if (!list_empty(&inode->i_dentry)) {
++              alternate = list_entry(inode->i_dentry.next,
++                                     struct dentry, d_alias);
++              dget_locked(alternate);
++              spin_lock(&alternate->d_lock);
++              alternate->d_flags |= DCACHE_REFERENCED;
++              spin_unlock(&alternate->d_lock);
++              iput(inode);
++              spin_unlock(&dcache_lock);
++              return alternate;
++      }
++      dentry->d_flags |= DCACHE_DISCONNECTED;
++
++      /* d_add(), but don't drop dcache_lock before adding dentry to inode */
++      list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
++      dentry->d_inode = inode;
++
++      d_rehash_cond(dentry, 0);
++      spin_unlock(&dcache_lock);
++
++      return NULL;
++}
++
++/* This function is spliced into ext3_lookup and does the move of a
++ * disconnected dentry (if it exists) to a connected dentry.
++ */
++struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode,
++                                  int rehash)
++{
++      struct dentry *tmp, *goal = NULL;
++      struct list_head *lp;
++
++      /* verify this dentry is really new */
++      assert(dentry->d_inode == NULL);
++      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
++      if (rehash)
++              assert(d_unhashed(dentry));             /* d_rehash */
++      assert(list_empty(&dentry->d_subdirs));
++
++      spin_lock(&dcache_lock);
++      if (!inode)
++              goto do_rehash;
++
++      if (!test_opt(inode->i_sb, IOPEN))
++              goto do_instantiate;
++
++      /* preferrably return a connected dentry */
++      list_for_each(lp, &inode->i_dentry) {
++              tmp = list_entry(lp, struct dentry, d_alias);
++              if (tmp->d_flags & DCACHE_DISCONNECTED) {
++                      assert(tmp->d_alias.next == &inode->i_dentry);
++                      assert(tmp->d_alias.prev == &inode->i_dentry);
++                      goal = tmp;
++                      dget_locked(goal);
++                      break;
++              }
++      }
++
++      if (!goal)
++              goto do_instantiate;
++
++      /* Move the goal to the de hash queue */
++      goal->d_flags &= ~DCACHE_DISCONNECTED;
++      security_d_instantiate(goal, inode);
++      __d_drop(dentry);
++      d_rehash_cond(dentry, 0);
++      __d_move(goal, dentry);
++      spin_unlock(&dcache_lock);
++      iput(inode);
++
++      return goal;
++
++      /* d_add(), but don't drop dcache_lock before adding dentry to inode */
++do_instantiate:
++      list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
++      dentry->d_inode = inode;
++do_rehash:
++      if (rehash)
++              d_rehash_cond(dentry, 0);
++      spin_unlock(&dcache_lock);
++
++      return NULL;
++}
++
++/*
++ * These are the special structures for the iopen pseudo directory.
++ */
++
++static struct inode_operations iopen_inode_operations = {
++      lookup:         iopen_lookup,           /* BKL held */
++};
++
++static struct file_operations iopen_file_operations = {
++      read:           generic_read_dir,
++};
++
++static int match_dentry(struct dentry *dentry, const char *name)
++{
++      int     len;
++
++      len = strlen(name);
++      if (dentry->d_name.len != len)
++              return 0;
++      if (strncmp(dentry->d_name.name, name, len))
++              return 0;
++      return 1;
++}
++
++/*
++ * This function is spliced into ext3_lookup and returns 1 the file
++ * name is __iopen__ and dentry has been filled in appropriately.
++ */
++int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry)
++{
++      struct inode *inode;
++
++      if (dir->i_ino != EXT3_ROOT_INO ||
++          !test_opt(dir->i_sb, IOPEN) ||
++          !match_dentry(dentry, "__iopen__"))
++              return 0;
++
++      inode = iget(dir->i_sb, EXT3_BAD_INO);
++
++      if (!inode)
++              return 0;
++      d_add(dentry, inode);
++      return 1;
++}
++
++/*
++ * This function is spliced into read_inode; it returns 1 if inode
++ * number is the one for /__iopen__, in which case the inode is filled
++ * in appropriately.  Otherwise, this fuction returns 0.
++ */
++int ext3_iopen_get_inode(struct inode *inode)
++{
++      if (inode->i_ino != EXT3_BAD_INO)
++              return 0;
++
++      inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
++      if (test_opt(inode->i_sb, IOPEN_NOPRIV))
++              inode->i_mode |= 0777;
++      inode->i_uid = 0;
++      inode->i_gid = 0;
++      inode->i_nlink = 1;
++      inode->i_size = 4096;
++      inode->i_atime = CURRENT_TIME;
++      inode->i_ctime = CURRENT_TIME;
++      inode->i_mtime = CURRENT_TIME;
++      EXT3_I(inode)->i_dtime = 0;
++      inode->i_blocks = 0;
++      inode->i_version = 1;
++      inode->i_generation = 0;
++
++      inode->i_op = &iopen_inode_operations;
++      inode->i_fop = &iopen_file_operations;
++      inode->i_mapping->a_ops = 0;
++
++      return 1;
++}
+Index: linux-2.6.18.8/fs/ext3/iopen.h
+===================================================================
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ linux-2.6.18.8/fs/ext3/iopen.h     2007-07-24 13:59:56.000000000 +0200
+@@ -0,0 +1,15 @@
++/*
++ * iopen.h
++ *
++ * Special support for opening files by inode number.
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ */
++
++extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
++extern int ext3_iopen_get_inode(struct inode *inode);
++extern struct dentry *iopen_connect_dentry(struct dentry *dentry,
++                                         struct inode *inode, int rehash);
+Index: linux-2.6.18.8/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.18.8.orig/fs/ext3/inode.c        2007-07-24 12:25:00.000000000 +0200
++++ linux-2.6.18.8/fs/ext3/inode.c     2007-07-24 13:59:56.000000000 +0200
+@@ -37,6 +37,7 @@
+ #include <linux/mpage.h>
+ #include <linux/uio.h>
+ #include "xattr.h"
++#include "iopen.h"
+ #include "acl.h"
+ static int ext3_writepage_trans_blocks(struct inode *inode);
+@@ -2593,6 +2594,8 @@ void ext3_read_inode(struct inode * inod
+       ei->i_default_acl = EXT3_ACL_NOT_CACHED;
+ #endif
+       ei->i_block_alloc_info = NULL;
++      if (ext3_iopen_get_inode(inode))
++              return;
+       if (__ext3_get_inode_loc(inode, &iloc, 0))
+               goto bad_inode;
+Index: linux-2.6.18.8/fs/ext3/super.c
+===================================================================
+--- linux-2.6.18.8.orig/fs/ext3/super.c        2007-07-24 12:25:00.000000000 +0200
++++ linux-2.6.18.8/fs/ext3/super.c     2007-07-24 13:59:56.000000000 +0200
+@@ -677,6 +677,7 @@ enum {
+       Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+       Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
++      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+       Opt_grpquota
+ };
+@@ -726,6 +727,9 @@ static match_table_t tokens = {
+       {Opt_noquota, "noquota"},
+       {Opt_quota, "quota"},
+       {Opt_usrquota, "usrquota"},
++      {Opt_iopen, "iopen"},
++      {Opt_noiopen, "noiopen"},
++      {Opt_iopen_nopriv, "iopen_nopriv"},
+       {Opt_barrier, "barrier=%u"},
+       {Opt_err, NULL},
+       {Opt_resize, "resize"},
+@@ -1041,6 +1045,18 @@ clear_qf_name:
+                       else
+                               clear_opt(sbi->s_mount_opt, BARRIER);
+                       break;
++              case Opt_iopen:
++                      set_opt (sbi->s_mount_opt, IOPEN);
++                      clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++                      break;
++              case Opt_noiopen:
++                      clear_opt (sbi->s_mount_opt, IOPEN);
++                      clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++                      break;
++              case Opt_iopen_nopriv:
++                      set_opt (sbi->s_mount_opt, IOPEN);
++                      set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++                      break;
+               case Opt_ignore:
+                       break;
+               case Opt_resize:
+Index: linux-2.6.18.8/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.18.8.orig/fs/ext3/namei.c        2007-07-24 13:59:54.000000000 +0200
++++ linux-2.6.18.8/fs/ext3/namei.c     2007-07-24 13:59:56.000000000 +0200
+@@ -39,6 +39,7 @@
+ #include "namei.h"
+ #include "xattr.h"
++#include "iopen.h"
+ #include "acl.h"
+ /*
+@@ -1013,6 +1014,9 @@ static struct dentry *ext3_lookup(struct
+       if (dentry->d_name.len > EXT3_NAME_LEN)
+               return ERR_PTR(-ENAMETOOLONG);
++      if (ext3_check_for_iopen(dir, dentry))
++              return NULL;
++
+       bh = ext3_find_entry(dentry, &de);
+       inode = NULL;
+       if (bh) {
+@@ -1028,7 +1032,7 @@ static struct dentry *ext3_lookup(struct
+               if (!inode)
+                       return ERR_PTR(-EACCES);
+       }
+-      return d_splice_alias(inode, dentry);
++      return iopen_connect_dentry(dentry, inode, 1);
+ }
+@@ -2077,10 +2081,6 @@ static int ext3_rmdir (struct inode * di
+                             inode->i_nlink);
+       inode->i_version++;
+       inode->i_nlink = 0;
+-      /* There's no need to set i_disksize: the fact that i_nlink is
+-       * zero will ensure that the right thing happens during any
+-       * recovery. */
+-      inode->i_size = 0;
+       ext3_orphan_add(handle, inode);
+       inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+       ext3_mark_inode_dirty(handle, inode);
+@@ -2204,6 +2204,23 @@ out_stop:
+       return err;
+ }
++/* Like ext3_add_nondir() except for call to iopen_connect_dentry */
++static int ext3_add_link(handle_t *handle, struct dentry *dentry,
++                       struct inode *inode)
++{
++      int err = ext3_add_entry(handle, dentry, inode);
++      if (!err) {
++              err = ext3_mark_inode_dirty(handle, inode);
++              if (err == 0) {
++                      dput(iopen_connect_dentry(dentry, inode, 0));
++                      return 0;
++              }
++      }
++      ext3_dec_count(handle, inode);
++      iput(inode);
++      return err;
++}
++
+ static int ext3_link (struct dentry * old_dentry,
+               struct inode * dir, struct dentry *dentry)
+ {
+@@ -2233,7 +2250,8 @@ retry:
+       ext3_inc_count(handle, inode);
+       atomic_inc(&inode->i_count);
+-      err = ext3_add_nondir(handle, dentry, inode);
++      err = ext3_add_link(handle, dentry, inode);
++      ext3_orphan_del(handle, inode);
+       ext3_journal_stop(handle);
+       if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+               goto retry;
+Index: linux-2.6.18.8/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.18.8.orig/fs/ext3/Makefile       2007-07-24 12:25:00.000000000 +0200
++++ linux-2.6.18.8/fs/ext3/Makefile    2007-07-24 13:59:56.000000000 +0200
+@@ -4,7 +4,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+-ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
++ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+          ioctl.o namei.o super.o symlink.o hash.o resize.o
+ ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
+Index: linux-2.6.18.8/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.18.8.orig/include/linux/ext3_fs.h        2007-07-24 13:59:54.000000000 +0200
++++ linux-2.6.18.8/include/linux/ext3_fs.h     2007-07-24 13:59:56.000000000 +0200
+@@ -371,6 +371,8 @@ struct ext3_inode {
+ #define EXT3_MOUNT_QUOTA              0x80000 /* Some quota option set */
+ #define EXT3_MOUNT_USRQUOTA           0x100000 /* "old" user quota */
+ #define EXT3_MOUNT_GRPQUOTA           0x200000 /* "old" group quota */
++#define EXT3_MOUNT_IOPEN              0x400000        /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV               0x800000/* Make iopen world-readable */
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series
new file mode 100644 (file)
index 0000000..24698be
--- /dev/null
@@ -0,0 +1,17 @@
+ext3-wantedi-2.6-rhel4.patch
+iopen-2.6.18-rhel5.patch
+ext3-map_inode_page-2.6.18.patch
+export-ext3-2.6-rhel4.patch
+ext3-include-fixes-2.6-rhel4.patch
+ext3-extents-2.6.18-vanilla.patch
+ext3-mballoc3-core.patch
+ext3-mballoc3-2.6.18.patch
+ext3-nlinks-2.6.9.patch
+ext3-ialloc-2.6.patch
+ext3-remove-cond_resched-calls-2.6.12.patch
+ext3-filterdata-sles10.patch
+ext3-uninit-2.6.18.patch
+ext3-nanosecond-2.6.18-vanilla.patch
+ext3-inode-version-2.6.18-vanilla.patch
+ext3-mmp-2.6.18-vanilla.patch
+ext3-unlink-race.patch
index b7b2ff1..94bc6ad 100644 (file)
@@ -10,7 +10,9 @@ ext3-nlinks-2.6.9.patch
 ext3-ialloc-2.6.patch
 ext3-remove-cond_resched-calls-2.6.12.patch
 ext3-filterdata-sles10.patch
+ext3-16tb-overflow-fixes.patch
 ext3-uninit-2.6.18.patch
 ext3-nanosecond-2.6.18-vanilla.patch
 ext3-inode-version-2.6.18-vanilla.patch
 ext3-mmp-2.6.18-vanilla.patch
+ext3-handle-directory-corruption-better.patch