Whamcloud - gitweb
land v0.9.1 on HEAD, in preparation for a 1.0.x branch
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-inode-reuse-2.4.20.patch
diff --git a/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.20.patch b/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.20.patch
new file mode 100644 (file)
index 0000000..18c69ff
--- /dev/null
@@ -0,0 +1,352 @@
+Index: linux-2.4.20/fs/ext3/ialloc.c
+===================================================================
+--- linux-2.4.20.orig/fs/ext3/ialloc.c 2003-10-25 00:37:13.000000000 +0400
++++ linux-2.4.20/fs/ext3/ialloc.c      2003-10-29 20:33:33.000000000 +0300
+@@ -241,11 +241,16 @@
+       bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
+-      BUFFER_TRACE(bh, "get_write_access");
+-      fatal = ext3_journal_get_write_access(handle, bh);
++      BUFFER_TRACE(bh, "get_undo_access");
++      fatal = ext3_journal_get_undo_access(handle, bh);
+       if (fatal)
+               goto error_return;
++      /* to prevent inode reusing within single transaction -bzzz */
++      BUFFER_TRACE(bh, "clear in b_committed_data");
++      J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data != NULL);
++      ext3_set_bit(bit, bh2jh(bh)->b_committed_data);
++
+       /* Ok, now we can actually update the inode bitmaps.. */
+       if (!ext3_clear_bit (bit, bh->b_data))
+               ext3_error (sb, "ext3_free_inode",
+@@ -319,6 +324,131 @@
+       return 0;
+ }
++static int ext3_test_allocatable(int nr, struct buffer_head *bh)
++{
++      if (ext3_test_bit(nr, bh->b_data))
++              return 0;
++      if (!buffer_jbd(bh) || !bh2jh(bh)->b_committed_data)
++              return 1;
++#if 0
++      if (!ext3_test_bit(nr, bh2jh(bh)->b_committed_data))
++              printk("EXT3-fs: inode %d was used\n", nr);
++#endif
++      return !ext3_test_bit(nr, bh2jh(bh)->b_committed_data);
++}
++
++int ext3_find_group_dir(const struct inode *dir,
++                              struct ext3_group_desc **gdp,
++                              struct buffer_head **bh)
++{
++      struct super_block *sb = dir->i_sb;
++      struct ext3_super_block *es;
++      struct ext3_group_desc *tmp;
++      int i = 0, j, avefreei;
++
++      es = EXT3_SB(sb)->s_es;
++      avefreei = le32_to_cpu(es->s_free_inodes_count) /
++                      sb->u.ext3_sb.s_groups_count;
++      for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) {
++              struct buffer_head *temp_buffer;
++              tmp = ext3_get_group_desc(sb, j, &temp_buffer);
++              if (tmp && le16_to_cpu(tmp->bg_free_inodes_count) &&
++                      le16_to_cpu(tmp->bg_free_inodes_count) >= avefreei) {
++                      if (!*gdp || (le16_to_cpu(tmp->bg_free_blocks_count) >
++                              le16_to_cpu((*gdp)->bg_free_blocks_count))) {
++                              i = j;
++                              *gdp = tmp;
++                              *bh = temp_buffer;
++                      }
++              }
++      }
++
++      return i;
++}
++
++int ext3_find_group_other(const struct inode *dir,
++                              struct ext3_group_desc **gdp,
++                              struct buffer_head **bh)
++{
++      struct super_block *sb = dir->i_sb;
++      struct ext3_group_desc *tmp;
++      int i, j;
++
++      /*
++       * Try to place the inode in its parent directory
++       */
++      i = dir->u.ext3_i.i_block_group;
++      tmp = ext3_get_group_desc(sb, i, bh);
++      if (tmp && le16_to_cpu(tmp->bg_free_inodes_count))
++              *gdp = tmp;
++      else {
++              /*
++               * Use a quadratic hash to find a group with a
++               * free inode
++               */
++              for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) {
++                      i += j;
++                      if (i >= sb->u.ext3_sb.s_groups_count)
++                              i -= sb->u.ext3_sb.s_groups_count;
++                      tmp = ext3_get_group_desc (sb, i, bh);
++                      if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) {
++                              *gdp = tmp;
++                              break;
++                      }
++              }
++      }
++      if (!*gdp) {
++              /*
++               * That failed: try linear search for a free inode
++               */
++              i = dir->u.ext3_i.i_block_group + 1;
++              for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) {
++                      if (++i >= sb->u.ext3_sb.s_groups_count)
++                              i = 0;
++                      tmp = ext3_get_group_desc (sb, i, bh);
++                      if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) {
++                              *gdp = tmp;
++                              break;
++                      }
++              }
++      }
++
++      return i;
++}
++
++static int ext3_find_group(const struct inode *dir, int mode,
++                              struct ext3_group_desc **gdp,
++                              struct buffer_head **bh)
++{
++      if (S_ISDIR(mode))
++              return ext3_find_group_dir(dir, gdp, bh);
++      return ext3_find_group_other(dir, gdp, bh);
++}
++
++static int ext3_find_usable_inode(struct super_block *sb,
++                                      struct buffer_head *bh)
++{
++      int here, maxinodes, next;
++
++      maxinodes = EXT3_INODES_PER_GROUP(sb);
++      here = 0;       
++
++      while (here < maxinodes) {
++              next  = ext3_find_next_zero_bit((unsigned long *) bh->b_data, 
++                                               maxinodes, here);
++              if (next >= maxinodes)
++                      return -1;
++              if (ext3_test_allocatable(next, bh))
++                      return next;
++
++              J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data);
++              here = ext3_find_next_zero_bit
++                      ((unsigned long *) bh2jh(bh)->b_committed_data, 
++                       maxinodes, next);
++      }
++      return -1;
++}
++
+ /*
+  * There are two policies for allocating an inode.  If the new inode is
+  * a directory, then a forward search is made for a block group with both
+@@ -336,7 +466,7 @@
+       struct super_block * sb;
+       struct buffer_head * bh;
+       struct buffer_head * bh2;
+-      int i, j, avefreei;
++      int i, j, k;
+       struct inode * inode;
+       int bitmap_nr;
+       struct ext3_group_desc * gdp;
+@@ -371,11 +501,12 @@
+               bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr];
+-              BUFFER_TRACE(bh, "get_write_access");
+-              err = ext3_journal_get_write_access(handle, bh);
++              BUFFER_TRACE(bh, "get_undo_access");
++              err = ext3_journal_get_undo_access(handle, bh);
+               if (err) goto fail;
+-              if (ext3_set_bit(j, bh->b_data)) {
++              if (!ext3_test_allocatable(j, bh) ||
++                              ext3_set_bit(j, bh->b_data)) {
+                       printk(KERN_ERR "goal inode %lu unavailable\n", goal);
+                       /* Oh well, we tried. */
+                       goto repeat;
+@@ -393,119 +524,70 @@
+ repeat:
+       gdp = NULL;
+-      i = 0;
+-
+-      if (S_ISDIR(mode)) {
+-              avefreei = le32_to_cpu(es->s_free_inodes_count) /
+-                      sb->u.ext3_sb.s_groups_count;
+-              if (!gdp) {
+-                      for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) {
+-                              struct buffer_head *temp_buffer;
+-                              tmp = ext3_get_group_desc (sb, j, &temp_buffer);
+-                              if (tmp &&
+-                                  le16_to_cpu(tmp->bg_free_inodes_count) &&
+-                                  le16_to_cpu(tmp->bg_free_inodes_count) >=
+-                                                      avefreei) {
+-                                      if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) >
+-                                              le16_to_cpu(gdp->bg_free_blocks_count))) {
+-                                              i = j;
+-                                              gdp = tmp;
+-                                              bh2 = temp_buffer;
+-                                      }
+-                              }
+-                      }
+-              }
+-      } else {
+-              /*
+-               * Try to place the inode in its parent directory
+-               */
+-              i = dir->u.ext3_i.i_block_group;
+-              tmp = ext3_get_group_desc (sb, i, &bh2);
+-              if (tmp && le16_to_cpu(tmp->bg_free_inodes_count))
+-                      gdp = tmp;
+-              else
+-              {
+-                      /*
+-                       * Use a quadratic hash to find a group with a
+-                       * free inode
+-                       */
+-                      for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) {
+-                              i += j;
+-                              if (i >= sb->u.ext3_sb.s_groups_count)
+-                                      i -= sb->u.ext3_sb.s_groups_count;
+-                              tmp = ext3_get_group_desc (sb, i, &bh2);
+-                              if (tmp &&
+-                                  le16_to_cpu(tmp->bg_free_inodes_count)) {
+-                                      gdp = tmp;
+-                                      break;
+-                              }
+-                      }
+-              }
+-              if (!gdp) {
+-                      /*
+-                       * That failed: try linear search for a free inode
+-                       */
+-                      i = dir->u.ext3_i.i_block_group + 1;
+-                      for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) {
+-                              if (++i >= sb->u.ext3_sb.s_groups_count)
+-                                      i = 0;
+-                              tmp = ext3_get_group_desc (sb, i, &bh2);
+-                              if (tmp &&
+-                                  le16_to_cpu(tmp->bg_free_inodes_count)) {
+-                                      gdp = tmp;
+-                                      break;
+-                              }
+-                      }
+-              }
+-      }
++      /* choose group */
++      i = ext3_find_group(dir, mode, &gdp, &bh2);
+       err = -ENOSPC;
+       if (!gdp)
+               goto out;
+-
++      
+       err = -EIO;
+-      bitmap_nr = load_inode_bitmap (sb, i);
++      bitmap_nr = load_inode_bitmap(sb, i);
+       if (bitmap_nr < 0)
+               goto fail;
+-
+       bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
+-      if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data,
+-                                    EXT3_INODES_PER_GROUP(sb))) <
+-          EXT3_INODES_PER_GROUP(sb)) {
+-              BUFFER_TRACE(bh, "get_write_access");
+-              err = ext3_journal_get_write_access(handle, bh);
+-              if (err) goto fail;
+-              
+-              if (ext3_set_bit (j, bh->b_data)) {
+-                      ext3_error (sb, "ext3_new_inode",
+-                                    "bit already set for inode %d", j);
+-                      goto repeat;
+-              }
+-              BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+-              err = ext3_journal_dirty_metadata(handle, bh);
+-              if (err) goto fail;
+-      } else {
+-              if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) {
+-                      ext3_error (sb, "ext3_new_inode",
+-                                  "Free inodes count corrupted in group %d",
+-                                  i);
+-                      /* Is it really ENOSPC? */
+-                      err = -ENOSPC;
+-                      if (sb->s_flags & MS_RDONLY)
+-                              goto fail;
+-
+-                      BUFFER_TRACE(bh2, "get_write_access");
+-                      err = ext3_journal_get_write_access(handle, bh2);
+-                      if (err) goto fail;
+-                      gdp->bg_free_inodes_count = 0;
+-                      BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
+-                      err = ext3_journal_dirty_metadata(handle, bh2);
+-                      if (err) goto fail;
++      /* try to allocate in selected group */
++      j = ext3_find_usable_inode(sb, bh);
++      err = -ENOSPC;
++      if (j >= 0)
++              goto found_free;
++
++      /* can't allocate: try to allocate in ANY another groups */
++      k = i;
++      err = -EIO;
++      for (i = i + 1; i != k; i++) {
++              if (i >= sb->u.ext3_sb.s_groups_count)
++                      i = 0;
++              tmp = ext3_get_group_desc(sb, i, &bh2);
++              if (le16_to_cpu(tmp->bg_free_inodes_count) == 0)
++                      continue;
++
++              bitmap_nr = load_inode_bitmap(sb, i);
++              if (bitmap_nr < 0)
++                      goto fail;
++              bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
++
++              /* try to allocate in selected group */
++              if ((j = ext3_find_usable_inode(sb, bh)) >= 0) {
++                      gdp = tmp;
++                      break;
+               }
+-              goto repeat;
+       }
++      err = -ENOSPC;
++      if (!gdp)
++              goto out;
++
++ found_free:
++      BUFFER_TRACE(bh, "get_undo_access");
++      err = ext3_journal_get_undo_access(handle, bh);
++      if (err)
++              goto fail;
++
++      if (ext3_set_bit(j, bh->b_data)) {
++              ext3_error (sb, "ext3_new_inode",
++                              "bit already set for inode %d", j);
++              goto fail; 
++      }
++      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
++      err = ext3_journal_dirty_metadata(handle, bh);
++      if (err)
++              goto fail;
++      
+  have_bit_and_group:
++      if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data)
++              J_ASSERT_BH(bh, !ext3_test_bit(j, bh2jh(bh)->b_committed_data));
++
+       j += i * EXT3_INODES_PER_GROUP(sb) + 1;
+       if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) {
+               ext3_error (sb, "ext3_new_inode",