From c20991390734993a44c13141408eaafd47022761 Mon Sep 17 00:00:00 2001 From: nic Date: Mon, 4 Oct 2004 20:00:22 +0000 Subject: [PATCH] land b1_4_smallfix: - properly handle portals process identifiers in messages (4165) - finish default directory EA handling (3048) - removed PTL_MD_KIOV usage under CRAY_PORTALS (4420) - ignore -ENOENT errors in osc_destroy (3639) - notify osc create thread that OSC is being cleaned up (4600) - add nettype argument for llmount in #5d in conf-sanity.sh (3936) - reconstruct ost_handle() like mds_handle() (4657) - create a new thread to do import eviction to avoid deadlock (3969) - let lconf resolve symlinked-to devices (4629) - don't unlink "objects" from directory with default EA (4554) - allocate qswnal tx descriptors singly to avoid fragmentation (4504) - allow more than 32000 subdirectories in a single directory (3244) - OST returns ENOSPC from object create when no space left (4539) - limit OSC precreate to 1/2 of value OST considers bogus (4778) - bind to privileged port in socknal and tcpnal (3689) - rate limit CERROR/CWARN console message to avoid overload (4519) --- .../patches/ext3-inode-reuse-2.4.24.patch | 352 ---- .../patches/ext3-mballoc2-2.6.5.patch | 1723 -------------------- .../patches/ext3-nlinks-2.4.20-hp_pnnl.patch | 14 +- .../patches/ext3-nlinks-2.4.21-chaos.patch | 14 +- lustre/kernel_patches/series/rhel-2.4.21 | 1 - 5 files changed, 24 insertions(+), 2080 deletions(-) delete mode 100644 lustre/kernel_patches/patches/ext3-inode-reuse-2.4.24.patch delete mode 100644 lustre/kernel_patches/patches/ext3-mballoc2-2.6.5.patch diff --git a/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.24.patch b/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.24.patch deleted file mode 100644 index 18c69ff..0000000 --- a/lustre/kernel_patches/patches/ext3-inode-reuse-2.4.24.patch +++ /dev/null @@ -1,352 +0,0 @@ -Index: linux-2.4.20/fs/ext3/ialloc.c -=================================================================== ---- linux-2.4.20.orig/fs/ext3/ialloc.c 2003-10-25 00:37:13.000000000 +0400 -+++ linux-2.4.20/fs/ext3/ialloc.c 2003-10-29 20:33:33.000000000 +0300 -@@ -241,11 +241,16 @@ - - bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; - -- BUFFER_TRACE(bh, "get_write_access"); -- fatal = ext3_journal_get_write_access(handle, bh); -+ BUFFER_TRACE(bh, "get_undo_access"); -+ fatal = ext3_journal_get_undo_access(handle, bh); - if (fatal) - goto error_return; - -+ /* to prevent inode reusing within single transaction -bzzz */ -+ BUFFER_TRACE(bh, "clear in b_committed_data"); -+ J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data != NULL); -+ ext3_set_bit(bit, bh2jh(bh)->b_committed_data); -+ - /* Ok, now we can actually update the inode bitmaps.. */ - if (!ext3_clear_bit (bit, bh->b_data)) - ext3_error (sb, "ext3_free_inode", -@@ -319,6 +324,131 @@ - return 0; - } - -+static int ext3_test_allocatable(int nr, struct buffer_head *bh) -+{ -+ if (ext3_test_bit(nr, bh->b_data)) -+ return 0; -+ if (!buffer_jbd(bh) || !bh2jh(bh)->b_committed_data) -+ return 1; -+#if 0 -+ if (!ext3_test_bit(nr, bh2jh(bh)->b_committed_data)) -+ printk("EXT3-fs: inode %d was used\n", nr); -+#endif -+ return !ext3_test_bit(nr, bh2jh(bh)->b_committed_data); -+} -+ -+int ext3_find_group_dir(const struct inode *dir, -+ struct ext3_group_desc **gdp, -+ struct buffer_head **bh) -+{ -+ struct super_block *sb = dir->i_sb; -+ struct ext3_super_block *es; -+ struct ext3_group_desc *tmp; -+ int i = 0, j, avefreei; -+ -+ es = EXT3_SB(sb)->s_es; -+ avefreei = le32_to_cpu(es->s_free_inodes_count) / -+ sb->u.ext3_sb.s_groups_count; -+ for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) { -+ struct buffer_head *temp_buffer; -+ tmp = ext3_get_group_desc(sb, j, &temp_buffer); -+ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count) && -+ le16_to_cpu(tmp->bg_free_inodes_count) >= avefreei) { -+ if (!*gdp || (le16_to_cpu(tmp->bg_free_blocks_count) > -+ le16_to_cpu((*gdp)->bg_free_blocks_count))) { -+ i = j; -+ *gdp = tmp; -+ *bh = temp_buffer; -+ } -+ } -+ } -+ -+ return i; -+} -+ -+int ext3_find_group_other(const struct inode *dir, -+ struct ext3_group_desc **gdp, -+ struct buffer_head **bh) -+{ -+ struct super_block *sb = dir->i_sb; -+ struct ext3_group_desc *tmp; -+ int i, j; -+ -+ /* -+ * Try to place the inode in its parent directory -+ */ -+ i = dir->u.ext3_i.i_block_group; -+ tmp = ext3_get_group_desc(sb, i, bh); -+ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) -+ *gdp = tmp; -+ else { -+ /* -+ * Use a quadratic hash to find a group with a -+ * free inode -+ */ -+ for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) { -+ i += j; -+ if (i >= sb->u.ext3_sb.s_groups_count) -+ i -= sb->u.ext3_sb.s_groups_count; -+ tmp = ext3_get_group_desc (sb, i, bh); -+ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) { -+ *gdp = tmp; -+ break; -+ } -+ } -+ } -+ if (!*gdp) { -+ /* -+ * That failed: try linear search for a free inode -+ */ -+ i = dir->u.ext3_i.i_block_group + 1; -+ for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) { -+ if (++i >= sb->u.ext3_sb.s_groups_count) -+ i = 0; -+ tmp = ext3_get_group_desc (sb, i, bh); -+ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) { -+ *gdp = tmp; -+ break; -+ } -+ } -+ } -+ -+ return i; -+} -+ -+static int ext3_find_group(const struct inode *dir, int mode, -+ struct ext3_group_desc **gdp, -+ struct buffer_head **bh) -+{ -+ if (S_ISDIR(mode)) -+ return ext3_find_group_dir(dir, gdp, bh); -+ return ext3_find_group_other(dir, gdp, bh); -+} -+ -+static int ext3_find_usable_inode(struct super_block *sb, -+ struct buffer_head *bh) -+{ -+ int here, maxinodes, next; -+ -+ maxinodes = EXT3_INODES_PER_GROUP(sb); -+ here = 0; -+ -+ while (here < maxinodes) { -+ next = ext3_find_next_zero_bit((unsigned long *) bh->b_data, -+ maxinodes, here); -+ if (next >= maxinodes) -+ return -1; -+ if (ext3_test_allocatable(next, bh)) -+ return next; -+ -+ J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data); -+ here = ext3_find_next_zero_bit -+ ((unsigned long *) bh2jh(bh)->b_committed_data, -+ maxinodes, next); -+ } -+ return -1; -+} -+ - /* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both -@@ -336,7 +466,7 @@ - struct super_block * sb; - struct buffer_head * bh; - struct buffer_head * bh2; -- int i, j, avefreei; -+ int i, j, k; - struct inode * inode; - int bitmap_nr; - struct ext3_group_desc * gdp; -@@ -371,11 +501,12 @@ - - bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; - -- BUFFER_TRACE(bh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, bh); -+ BUFFER_TRACE(bh, "get_undo_access"); -+ err = ext3_journal_get_undo_access(handle, bh); - if (err) goto fail; - -- if (ext3_set_bit(j, bh->b_data)) { -+ if (!ext3_test_allocatable(j, bh) || -+ ext3_set_bit(j, bh->b_data)) { - printk(KERN_ERR "goal inode %lu unavailable\n", goal); - /* Oh well, we tried. */ - goto repeat; -@@ -393,119 +524,70 @@ - - repeat: - gdp = NULL; -- i = 0; -- -- if (S_ISDIR(mode)) { -- avefreei = le32_to_cpu(es->s_free_inodes_count) / -- sb->u.ext3_sb.s_groups_count; -- if (!gdp) { -- for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) { -- struct buffer_head *temp_buffer; -- tmp = ext3_get_group_desc (sb, j, &temp_buffer); -- if (tmp && -- le16_to_cpu(tmp->bg_free_inodes_count) && -- le16_to_cpu(tmp->bg_free_inodes_count) >= -- avefreei) { -- if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) > -- le16_to_cpu(gdp->bg_free_blocks_count))) { -- i = j; -- gdp = tmp; -- bh2 = temp_buffer; -- } -- } -- } -- } -- } else { -- /* -- * Try to place the inode in its parent directory -- */ -- i = dir->u.ext3_i.i_block_group; -- tmp = ext3_get_group_desc (sb, i, &bh2); -- if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) -- gdp = tmp; -- else -- { -- /* -- * Use a quadratic hash to find a group with a -- * free inode -- */ -- for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) { -- i += j; -- if (i >= sb->u.ext3_sb.s_groups_count) -- i -= sb->u.ext3_sb.s_groups_count; -- tmp = ext3_get_group_desc (sb, i, &bh2); -- if (tmp && -- le16_to_cpu(tmp->bg_free_inodes_count)) { -- gdp = tmp; -- break; -- } -- } -- } -- if (!gdp) { -- /* -- * That failed: try linear search for a free inode -- */ -- i = dir->u.ext3_i.i_block_group + 1; -- for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) { -- if (++i >= sb->u.ext3_sb.s_groups_count) -- i = 0; -- tmp = ext3_get_group_desc (sb, i, &bh2); -- if (tmp && -- le16_to_cpu(tmp->bg_free_inodes_count)) { -- gdp = tmp; -- break; -- } -- } -- } -- } - -+ /* choose group */ -+ i = ext3_find_group(dir, mode, &gdp, &bh2); - err = -ENOSPC; - if (!gdp) - goto out; -- -+ - err = -EIO; -- bitmap_nr = load_inode_bitmap (sb, i); -+ bitmap_nr = load_inode_bitmap(sb, i); - if (bitmap_nr < 0) - goto fail; -- - bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; - -- if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data, -- EXT3_INODES_PER_GROUP(sb))) < -- EXT3_INODES_PER_GROUP(sb)) { -- BUFFER_TRACE(bh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, bh); -- if (err) goto fail; -- -- if (ext3_set_bit (j, bh->b_data)) { -- ext3_error (sb, "ext3_new_inode", -- "bit already set for inode %d", j); -- goto repeat; -- } -- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -- err = ext3_journal_dirty_metadata(handle, bh); -- if (err) goto fail; -- } else { -- if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) { -- ext3_error (sb, "ext3_new_inode", -- "Free inodes count corrupted in group %d", -- i); -- /* Is it really ENOSPC? */ -- err = -ENOSPC; -- if (sb->s_flags & MS_RDONLY) -- goto fail; -- -- BUFFER_TRACE(bh2, "get_write_access"); -- err = ext3_journal_get_write_access(handle, bh2); -- if (err) goto fail; -- gdp->bg_free_inodes_count = 0; -- BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); -- err = ext3_journal_dirty_metadata(handle, bh2); -- if (err) goto fail; -+ /* try to allocate in selected group */ -+ j = ext3_find_usable_inode(sb, bh); -+ err = -ENOSPC; -+ if (j >= 0) -+ goto found_free; -+ -+ /* can't allocate: try to allocate in ANY another groups */ -+ k = i; -+ err = -EIO; -+ for (i = i + 1; i != k; i++) { -+ if (i >= sb->u.ext3_sb.s_groups_count) -+ i = 0; -+ tmp = ext3_get_group_desc(sb, i, &bh2); -+ if (le16_to_cpu(tmp->bg_free_inodes_count) == 0) -+ continue; -+ -+ bitmap_nr = load_inode_bitmap(sb, i); -+ if (bitmap_nr < 0) -+ goto fail; -+ bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; -+ -+ /* try to allocate in selected group */ -+ if ((j = ext3_find_usable_inode(sb, bh)) >= 0) { -+ gdp = tmp; -+ break; - } -- goto repeat; - } -+ err = -ENOSPC; -+ if (!gdp) -+ goto out; -+ -+ found_free: -+ BUFFER_TRACE(bh, "get_undo_access"); -+ err = ext3_journal_get_undo_access(handle, bh); -+ if (err) -+ goto fail; -+ -+ if (ext3_set_bit(j, bh->b_data)) { -+ ext3_error (sb, "ext3_new_inode", -+ "bit already set for inode %d", j); -+ goto fail; -+ } -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) -+ goto fail; -+ - have_bit_and_group: -+ if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data) -+ J_ASSERT_BH(bh, !ext3_test_bit(j, bh2jh(bh)->b_committed_data)); -+ - j += i * EXT3_INODES_PER_GROUP(sb) + 1; - if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) { - ext3_error (sb, "ext3_new_inode", diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.5.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.5.patch deleted file mode 100644 index 4b7874c..0000000 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.5.patch +++ /dev/null @@ -1,1723 +0,0 @@ -Index: linux-2.6.7/fs/ext3/mballoc.c -=================================================================== ---- linux-2.6.7.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.6.7/fs/ext3/mballoc.c 2004-09-03 09:48:40.000000000 +0400 -@@ -0,0 +1,1401 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+ -+/* -+ * mballoc.c contains the multiblocks allocation routines -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* -+ * TODO: -+ * - do not scan from the beginning, try to remember first free block -+ * - mb_mark_used_* may allocate chunk right after splitting buddy -+ * - special flag to advice allocator to look for requested + N blocks -+ * this may improve interaction between extents and mballoc -+ */ -+ -+/* -+ * with AGRESSIVE_CHECK allocator runs consistency checks over -+ * structures. this checks slow things down a lot -+ */ -+#define AGGRESSIVE_CHECK__ -+ -+/* -+ */ -+#define MB_DEBUG__ -+#ifdef MB_DEBUG -+#define mb_debug(fmt,a...) printk(fmt, ##a) -+#else -+#define mb_debug(fmt,a...) -+#endif -+ -+/* -+ * where to save buddies structures beetween umount/mount (clean case only) -+ */ -+#define EXT3_BUDDY_FILE ".buddy" -+ -+/* -+ * max. number of chunks to be tracked in ext3_free_extent struct -+ */ -+#define MB_ARR_SIZE 32 -+ -+struct ext3_allocation_context { -+ struct super_block *ac_sb; -+ -+ /* search goals */ -+ int ac_g_group; -+ int ac_g_start; -+ int ac_g_len; -+ int ac_g_flags; -+ -+ /* the best found extent */ -+ int ac_b_group; -+ int ac_b_start; -+ int ac_b_len; -+ -+ /* number of iterations done. we have to track to limit searching */ -+ int ac_repeats; -+ int ac_groups_scanned; -+ int ac_status; -+}; -+ -+#define AC_STATUS_CONTINUE 1 -+#define AC_STATUS_FOUND 2 -+ -+ -+struct ext3_buddy { -+ void *bd_bitmap; -+ void *bd_buddy; -+ int bd_blkbits; -+ struct buffer_head *bd_bh; -+ struct buffer_head *bd_bh2; -+ struct ext3_buddy_group_blocks *bd_bd; -+ struct super_block *bd_sb; -+}; -+ -+struct ext3_free_extent { -+ int fe_start; -+ int fe_len; -+ unsigned char fe_orders[MB_ARR_SIZE]; -+ unsigned char fe_nums; -+ unsigned char fe_back; -+}; -+ -+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) -+ -+ -+int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); -+struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); -+void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long); -+int ext3_new_block_old(handle_t *, struct inode *, unsigned long, u32 *, u32 *, int *); -+int ext3_mb_reserve_blocks(struct super_block *, int); -+void ext3_mb_release_blocks(struct super_block *, int); -+void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); -+void ext3_mb_free_committed_blocks(struct super_block *); -+ -+static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) -+{ -+ int i = 1; -+ void *bb; -+ -+ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy); -+ J_ASSERT(max != NULL); -+ -+ if (order > e3b->bd_blkbits + 1) -+ return NULL; -+ -+ /* at order 0 we see each particular block */ -+ *max = 1 << (e3b->bd_blkbits + 3); -+ if (order == 0) -+ return e3b->bd_bitmap; -+ -+ bb = e3b->bd_buddy; -+ *max = *max >> 1; -+ while (i < order) { -+ bb += 1 << (e3b->bd_blkbits - i); -+ i++; -+ *max = *max >> 1; -+ } -+ return bb; -+} -+ -+static int ext3_mb_load_desc(struct super_block *sb, int group, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap); -+ J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy); -+ -+ /* load bitmap */ -+ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap); -+ if (e3b->bd_bh == NULL) { -+ ext3_error(sb, "ext3_mb_load_desc", -+ "can't get block for buddy bitmap\n"); -+ goto out; -+ } -+ if (!buffer_uptodate(e3b->bd_bh)) { -+ ll_rw_block(READ, 1, &e3b->bd_bh); -+ wait_on_buffer(e3b->bd_bh); -+ } -+ J_ASSERT(buffer_uptodate(e3b->bd_bh)); -+ -+ /* load buddy */ -+ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy); -+ if (e3b->bd_bh2 == NULL) { -+ ext3_error(sb, "ext3_mb_load_desc", -+ "can't get block for buddy bitmap\n"); -+ goto out; -+ } -+ if (!buffer_uptodate(e3b->bd_bh2)) { -+ ll_rw_block(READ, 1, &e3b->bd_bh2); -+ wait_on_buffer(e3b->bd_bh2); -+ } -+ J_ASSERT(buffer_uptodate(e3b->bd_bh2)); -+ -+ e3b->bd_bitmap = e3b->bd_bh->b_data; -+ e3b->bd_buddy = e3b->bd_bh2->b_data; -+ e3b->bd_blkbits = sb->s_blocksize_bits; -+ e3b->bd_bd = sbi->s_buddy_blocks + group; -+ e3b->bd_sb = sb; -+ -+ return 0; -+out: -+ brelse(e3b->bd_bh); -+ brelse(e3b->bd_bh2); -+ e3b->bd_bh = NULL; -+ e3b->bd_bh2 = NULL; -+ return -EIO; -+} -+ -+static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b) -+{ -+ mark_buffer_dirty(e3b->bd_bh); -+ mark_buffer_dirty(e3b->bd_bh2); -+} -+ -+static void ext3_mb_release_desc(struct ext3_buddy *e3b) -+{ -+ brelse(e3b->bd_bh); -+ brelse(e3b->bd_bh2); -+} -+ -+#ifdef AGGRESSIVE_CHECK -+static void mb_check_buddy(struct ext3_buddy *e3b) -+{ -+ int order = e3b->bd_blkbits + 1; -+ int max, max2, i, j, k, count; -+ void *buddy, *buddy2; -+ -+ if (!test_opt(e3b->bd_sb, MBALLOC)) -+ return; -+ -+ while (order > 1) { -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ buddy2 = mb_find_buddy(e3b, order - 1, &max2); -+ J_ASSERT(buddy2); -+ J_ASSERT(buddy != buddy2); -+ J_ASSERT(max * 2 == max2); -+ -+ count = 0; -+ for (i = 0; i < max; i++) { -+ -+ if (!test_bit(i, buddy)) { -+ /* only single bit in buddy2 may be 1 */ -+ if (test_bit(i << 1, buddy2)) -+ J_ASSERT(!test_bit((i<<1)+1, buddy2)); -+ else if (test_bit((i << 1) + 1, buddy2)) -+ J_ASSERT(!test_bit(i << 1, buddy2)); -+ continue; -+ } -+ -+ /* both bits in buddy2 must be 0 */ -+ J_ASSERT(!test_bit(i << 1, buddy2)); -+ J_ASSERT(!test_bit((i << 1) + 1, buddy2)); -+ -+ for (j = 0; j < (1 << order); j++) { -+ k = (i * (1 << order)) + j; -+ J_ASSERT(test_bit(k, e3b->bd_bitmap)); -+ } -+ count++; -+ } -+ J_ASSERT(e3b->bd_bd->bb_counters[order] == count); -+ order--; -+ } -+ -+ buddy = mb_find_buddy(e3b, 0, &max); -+ for (i = 0; i < max; i++) { -+ if (test_bit(i, buddy)) -+ continue; -+ /* check used bits only */ -+ for (j = 0; j < e3b->bd_blkbits + 1; j++) { -+ buddy2 = mb_find_buddy(e3b, j, &max2); -+ k = i >> j; -+ J_ASSERT(k < max2); -+ J_ASSERT(!test_bit(k, buddy2)); -+ } -+ } -+} -+#else -+#define mb_check_buddy(e3b) -+#endif -+ -+static inline void -+ext3_lock_group(struct super_block *sb, int group) -+{ -+ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock); -+} -+ -+static inline void -+ext3_unlock_group(struct super_block *sb, int group) -+{ -+ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock); -+} -+ -+static int mb_find_order_for_block(struct ext3_buddy *e3b, int block) -+{ -+ int order = 1; -+ void *bb; -+ -+ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy); -+ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3))); -+ -+ bb = e3b->bd_buddy; -+ while (order <= e3b->bd_blkbits + 1) { -+ block = block >> 1; -+ if (test_bit(block, bb)) { -+ /* this block is part of buddy of order 'order' */ -+ return order; -+ } -+ bb += 1 << (e3b->bd_blkbits - order); -+ order++; -+ } -+ return 0; -+} -+ -+static inline void mb_clear_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0; -+ cur += 32; -+ continue; -+ } -+ clear_bit(cur, bm); -+ cur++; -+ } -+} -+ -+static inline void mb_set_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0xffffffff; -+ cur += 32; -+ continue; -+ } -+ set_bit(cur, bm); -+ cur++; -+ } -+} -+ -+static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count) -+{ -+ int block, max, order; -+ void *buddy, *buddy2; -+ -+ mb_check_buddy(e3b); -+ while (count-- > 0) { -+ block = first++; -+ order = 0; -+ -+ J_ASSERT(!test_bit(block, e3b->bd_bitmap)); -+ set_bit(block, e3b->bd_bitmap); -+ e3b->bd_bd->bb_counters[order]++; -+ -+ /* start of the buddy */ -+ buddy = mb_find_buddy(e3b, order, &max); -+ -+ do { -+ block &= ~1UL; -+ if (!test_bit(block, buddy) || -+ !test_bit(block + 1, buddy)) -+ break; -+ -+ /* both the buddies are free, try to coalesce them */ -+ buddy2 = mb_find_buddy(e3b, order + 1, &max); -+ -+ if (!buddy2) -+ break; -+ -+ if (order > 0) { -+ /* for special purposes, we don't clear -+ * free bits in bitmap */ -+ clear_bit(block, buddy); -+ clear_bit(block + 1, buddy); -+ } -+ e3b->bd_bd->bb_counters[order]--; -+ e3b->bd_bd->bb_counters[order]--; -+ -+ block = block >> 1; -+ order++; -+ e3b->bd_bd->bb_counters[order]++; -+ -+ set_bit(block, buddy2); -+ buddy = buddy2; -+ } while (1); -+ } -+ mb_check_buddy(e3b); -+ -+ return 0; -+} -+ -+/* -+ * returns 1 if out extent is enough to fill needed space -+ */ -+int mb_make_backward_extent(struct ext3_free_extent *in, -+ struct ext3_free_extent *out, int needed) -+{ -+ int i; -+ -+ J_ASSERT(in); -+ J_ASSERT(out); -+ J_ASSERT(in->fe_nums < MB_ARR_SIZE); -+ -+ out->fe_len = 0; -+ out->fe_start = in->fe_start + in->fe_len; -+ out->fe_nums = 0; -+ -+ /* for single-chunk extent we need not back order -+ * also, if an extent doesn't fill needed space -+ * then it makes no sense to try back order becase -+ * if we select this extent then it'll be use as is */ -+ if (in->fe_nums < 2 || in->fe_len < needed) -+ return 0; -+ -+ i = in->fe_nums - 1; -+ while (i >= 0 && out->fe_len < needed) { -+ out->fe_len += (1 << in->fe_orders[i]); -+ out->fe_start -= (1 << in->fe_orders[i]); -+ i--; -+ } -+ /* FIXME: in some situation fe_orders may be too small to hold -+ * all the buddies */ -+ J_ASSERT(out->fe_len >= needed); -+ -+ for (i++; i < in->fe_nums; i++) -+ out->fe_orders[out->fe_nums++] = in->fe_orders[i]; -+ J_ASSERT(out->fe_nums < MB_ARR_SIZE); -+ out->fe_back = 1; -+ -+ return 1; -+} -+ -+int mb_find_extent(struct ext3_buddy *e3b, int order, int block, -+ int needed, struct ext3_free_extent *ex) -+{ -+ int space = needed; -+ int next, max, ord; -+ void *buddy; -+ -+ J_ASSERT(ex != NULL); -+ -+ ex->fe_nums = 0; -+ ex->fe_len = 0; -+ -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ J_ASSERT(block < max); -+ if (!test_bit(block, buddy)) -+ goto nofree; -+ -+ if (order == 0) { -+ /* find actual order */ -+ order = mb_find_order_for_block(e3b, block); -+ block = block >> order; -+ } -+ -+ ex->fe_orders[ex->fe_nums++] = order; -+ ex->fe_len = 1 << order; -+ ex->fe_start = block << order; -+ ex->fe_back = 0; -+ -+ while ((space = space - (1 << order)) > 0) { -+ -+ buddy = mb_find_buddy(e3b, order, &max); -+ J_ASSERT(buddy); -+ -+ if (block + 1 >= max) -+ break; -+ -+ next = (block + 1) * (1 << order); -+ if (!test_bit(next, e3b->bd_bitmap)) -+ break; -+ -+ ord = mb_find_order_for_block(e3b, next); -+ -+ if ((1 << ord) >= needed) { -+ /* we dont want to coalesce with self-enough buddies */ -+ break; -+ } -+ order = ord; -+ block = next >> order; -+ ex->fe_len += 1 << order; -+ -+ if (ex->fe_nums < MB_ARR_SIZE) -+ ex->fe_orders[ex->fe_nums++] = order; -+ } -+ -+nofree: -+ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3))); -+ return ex->fe_len; -+} -+ -+static int mb_mark_used_backward(struct ext3_buddy *e3b, -+ struct ext3_free_extent *ex, int len) -+{ -+ int start = ex->fe_start, len0 = len; -+ int ord, mlen, max, cur; -+ void *buddy; -+ -+ start = ex->fe_start + ex->fe_len - 1; -+ while (len) { -+ ord = mb_find_order_for_block(e3b, start); -+ if (((start >> ord) << ord) == (start - (1 << ord) + 1) && -+ len >= (1 << ord)) { -+ /* the whole chunk may be allocated at once! */ -+ mlen = 1 << ord; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ J_ASSERT((start >> ord) < max); -+ clear_bit(start >> ord, buddy); -+ e3b->bd_bd->bb_counters[ord]--; -+ start -= mlen; -+ len -= mlen; -+ J_ASSERT(len >= 0); -+ J_ASSERT(start >= 0); -+ continue; -+ } -+ -+ /* we have to split large buddy */ -+ J_ASSERT(ord > 0); -+ buddy = mb_find_buddy(e3b, ord, &max); -+ clear_bit(start >> ord, buddy); -+ e3b->bd_bd->bb_counters[ord]--; -+ -+ ord--; -+ cur = (start >> ord) & ~1U; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ set_bit(cur, buddy); -+ set_bit(cur + 1, buddy); -+ e3b->bd_bd->bb_counters[ord]++; -+ e3b->bd_bd->bb_counters[ord]++; -+ } -+ -+ /* now drop all the bits in bitmap */ -+ mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0); -+ -+ mb_check_buddy(e3b); -+ -+ return 0; -+} -+ -+static int mb_mark_used_forward(struct ext3_buddy *e3b, -+ struct ext3_free_extent *ex, int len) -+{ -+ int start = ex->fe_start, len0 = len; -+ int ord, mlen, max, cur; -+ void *buddy; -+ -+ while (len) { -+ ord = mb_find_order_for_block(e3b, start); -+ -+ if (((start >> ord) << ord) == start && len >= (1 << ord)) { -+ /* the whole chunk may be allocated at once! */ -+ mlen = 1 << ord; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ J_ASSERT((start >> ord) < max); -+ clear_bit(start >> ord, buddy); -+ e3b->bd_bd->bb_counters[ord]--; -+ start += mlen; -+ len -= mlen; -+ J_ASSERT(len >= 0); -+ continue; -+ } -+ -+ /* we have to split large buddy */ -+ J_ASSERT(ord > 0); -+ buddy = mb_find_buddy(e3b, ord, &max); -+ clear_bit(start >> ord, buddy); -+ e3b->bd_bd->bb_counters[ord]--; -+ -+ ord--; -+ cur = (start >> ord) & ~1U; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ set_bit(cur, buddy); -+ set_bit(cur + 1, buddy); -+ e3b->bd_bd->bb_counters[ord]++; -+ e3b->bd_bd->bb_counters[ord]++; -+ } -+ -+ /* now drop all the bits in bitmap */ -+ mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0); -+ -+ mb_check_buddy(e3b); -+ -+ return 0; -+} -+ -+int inline mb_mark_used(struct ext3_buddy *e3b, -+ struct ext3_free_extent *ex, int len) -+{ -+ int err; -+ -+ J_ASSERT(ex); -+ if (ex->fe_back == 0) -+ err = mb_mark_used_forward(e3b, ex, len); -+ else -+ err = mb_mark_used_backward(e3b, ex, len); -+ return err; -+} -+ -+int ext3_mb_new_in_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b, int group) -+{ -+ struct super_block *sb = ac->ac_sb; -+ int err, gorder, max, i; -+ struct ext3_free_extent curex; -+ -+ /* let's know order of allocation */ -+ gorder = 0; -+ while (ac->ac_g_len > (1 << gorder)) -+ gorder++; -+ -+ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) { -+ /* someone asks for space at this specified block -+ * probably he wants to merge it into existing extent */ -+ if (test_bit(ac->ac_g_start, e3b->bd_bitmap)) { -+ /* good. at least one block is free */ -+ max = mb_find_extent(e3b, 0, ac->ac_g_start, -+ ac->ac_g_len, &curex); -+ max = min(curex.fe_len, ac->ac_g_len); -+ mb_mark_used(e3b, &curex, max); -+ -+ ac->ac_b_group = group; -+ ac->ac_b_start = curex.fe_start; -+ ac->ac_b_len = max; -+ ac->ac_status = AC_STATUS_FOUND; -+ err = 0; -+ goto out; -+ } -+ /* don't try to find goal anymore */ -+ ac->ac_g_flags &= ~1; -+ } -+ -+ i = 0; -+ while (1) { -+ i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i); -+ if (i >= sb->s_blocksize * 8) -+ break; -+ -+ max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex); -+ if (max >= ac->ac_g_len) { -+ max = min(curex.fe_len, ac->ac_g_len); -+ mb_mark_used(e3b, &curex, max); -+ -+ ac->ac_b_group = group; -+ ac->ac_b_start = curex.fe_start; -+ ac->ac_b_len = max; -+ ac->ac_status = AC_STATUS_FOUND; -+ break; -+ } -+ i += max; -+ } -+ -+ return 0; -+ -+out: -+ return err; -+} -+ -+int mb_good_group(struct ext3_allocation_context *ac, int group, int cr) -+{ -+ struct ext3_group_desc *gdp; -+ int free_blocks; -+ -+ gdp = ext3_get_group_desc(ac->ac_sb, group, NULL); -+ if (!gdp) -+ return 0; -+ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); -+ if (free_blocks == 0) -+ return 0; -+ -+ /* someone wants this block very much */ -+ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) -+ return 1; -+ -+ /* FIXME: I'd like to take fragmentation into account here */ -+ if (cr == 0) { -+ if (free_blocks >= ac->ac_g_len >> 1) -+ return 1; -+ } else if (cr == 1) { -+ if (free_blocks >= ac->ac_g_len >> 2) -+ return 1; -+ } else if (cr == 2) { -+ return 1; -+ } else { -+ BUG(); -+ } -+ return 0; -+} -+ -+int ext3_mb_new_blocks(handle_t *handle, struct inode *inode, -+ unsigned long goal, int *len, int flags, int *errp) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_allocation_context ac; -+ int i, group, block, cr, err = 0; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ struct buffer_head *gdp_bh; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ -+ J_ASSERT(len != NULL); -+ J_ASSERT(*len > 0); -+ -+ sb = inode->i_sb; -+ if (!sb) { -+ printk("ext3_mb_new_nblocks: nonexistent device"); -+ return 0; -+ } -+ -+ if (!test_opt(sb, MBALLOC)) { -+ static int ext3_mballoc_warning = 0; -+ if (ext3_mballoc_warning == 0) { -+ printk(KERN_ERR "EXT3-fs: multiblock request with " -+ "mballoc disabled!\n"); -+ ext3_mballoc_warning++; -+ } -+ *len = 1; -+ err = ext3_new_block_old(handle, inode, goal, NULL,NULL, errp); -+ return err; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ -+ if (!(flags & 2)) { -+ /* someone asks for non-reserved blocks */ -+ BUG_ON(*len > 1); -+ err = ext3_mb_reserve_blocks(sb, 1); -+ if (err) { -+ *errp = err; -+ return 0; -+ } -+ } -+ -+ /* -+ * Check quota for allocation of this blocks. -+ */ -+ while (*len && DQUOT_ALLOC_BLOCK(inode, *len)) -+ *len -= 1; -+ if (*len == 0) { -+ *errp = -EDQUOT; -+ block = 0; -+ goto out; -+ } -+ -+ /* start searching from the goal */ -+ if (goal < le32_to_cpu(es->s_first_data_block) || -+ goal >= le32_to_cpu(es->s_blocks_count)) -+ goal = le32_to_cpu(es->s_first_data_block); -+ group = (goal - le32_to_cpu(es->s_first_data_block)) / -+ EXT3_BLOCKS_PER_GROUP(sb); -+ block = ((goal - le32_to_cpu(es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb)); -+ -+ /* set up allocation goals */ -+ ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0; -+ ac.ac_status = 0; -+ ac.ac_groups_scanned = 0; -+ ac.ac_sb = inode->i_sb; -+ ac.ac_g_group = group; -+ ac.ac_g_start = block; -+ ac.ac_g_len = *len; -+ ac.ac_g_flags = flags; -+ -+ /* loop over the groups */ -+ for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) { -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) { -+ if (group == EXT3_SB(sb)->s_groups_count) -+ group = 0; -+ -+ /* check is group good for our criteries */ -+ if (!mb_good_group(&ac, group, cr)) -+ continue; -+ -+ err = ext3_mb_load_desc(ac.ac_sb, group, &e3b); -+ if (err) -+ goto out_err; -+ -+ ext3_lock_group(sb, group); -+ if (!mb_good_group(&ac, group, cr)) { -+ /* someone did allocation from this group */ -+ ext3_unlock_group(sb, group); -+ ext3_mb_release_desc(&e3b); -+ continue; -+ } -+ -+ err = ext3_mb_new_in_group(&ac, &e3b, group); -+ ext3_unlock_group(sb, group); -+ if (ac.ac_status == AC_STATUS_FOUND) -+ ext3_mb_dirty_buddy(&e3b); -+ ext3_mb_release_desc(&e3b); -+ if (err) -+ goto out_err; -+ if (ac.ac_status == AC_STATUS_FOUND) -+ break; -+ } -+ } -+ -+ if (ac.ac_status != AC_STATUS_FOUND) { -+ /* unfortunately, we can't satisfy this request */ -+ J_ASSERT(ac.ac_b_len == 0); -+ DQUOT_FREE_BLOCK(inode, *len); -+ *errp = -ENOSPC; -+ block = 0; -+ goto out; -+ } -+ -+ /* good news - free block(s) have been found. now it's time -+ * to mark block(s) in good old journaled bitmap */ -+ block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + ac.ac_b_start + le32_to_cpu(es->s_first_data_block); -+ -+ /* we made a desicion, now mark found blocks in good old -+ * bitmap to be journaled */ -+ -+ ext3_debug("using block group %d(%d)\n", -+ ac.ac_b_group.group, gdp->bg_free_blocks_count); -+ -+ bitmap_bh = read_block_bitmap(sb, ac.ac_b_group); -+ if (!bitmap_bh) { -+ *errp = -EIO; -+ goto out_err; -+ } -+ -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) { -+ *errp = err; -+ goto out_err; -+ } -+ -+ gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh); -+ if (!gdp) { -+ *errp = -EIO; -+ goto out_err; -+ } -+ -+ err = ext3_journal_get_write_access(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + le32_to_cpu(es->s_first_data_block); -+ -+ if (block == le32_to_cpu(gdp->bg_block_bitmap) || -+ block == le32_to_cpu(gdp->bg_inode_bitmap) || -+ in_range(block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error(sb, "ext3_new_block", -+ "Allocating block in system zone - " -+ "block = %u", block); -+#if 0 -+ for (i = 0; i < ac.ac_b_len; i++) -+ J_ASSERT(!test_bit(ac.ac_b_start + i, bitmap_bh->b_data)); -+#endif -+ mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len); -+ -+ ext3_lock_group(sb, ac.ac_b_group); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - -+ ac.ac_b_len); -+ ext3_unlock_group(sb, ac.ac_b_group); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len); -+ -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ if (err) -+ goto out_err; -+ err = ext3_journal_dirty_metadata(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ sb->s_dirt = 1; -+ *errp = 0; -+ brelse(bitmap_bh); -+ -+ /* drop non-allocated, but dquote'd blocks */ -+ J_ASSERT(*len >= ac.ac_b_len); -+ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len); -+ -+ *len = ac.ac_b_len; -+ J_ASSERT(block != 0); -+ goto out; -+ -+out_err: -+ /* if we've already allocated something, roll it back */ -+ if (ac.ac_status == AC_STATUS_FOUND) { -+ /* FIXME: free blocks here */ -+ } -+ -+ DQUOT_FREE_BLOCK(inode, *len); -+ brelse(bitmap_bh); -+ *errp = err; -+ block = 0; -+out: -+ if (!(flags & 2)) { -+ /* block wasn't reserved before and we reserved it -+ * at the beginning of allocation. it doesn't matter -+ * whether we allocated anything or we failed: time -+ * to release reservation. NOTE: because I expect -+ * any multiblock request from delayed allocation -+ * path only, here is single block always */ -+ ext3_mb_release_blocks(sb, 1); -+ } -+ return block; -+} -+ -+int ext3_mb_generate_buddy(struct super_block *sb, int group) -+{ -+ struct buffer_head *bh; -+ int i, err, count = 0; -+ struct ext3_buddy e3b; -+ -+ err = ext3_mb_load_desc(sb, group, &e3b); -+ if (err) -+ goto out; -+ memset(e3b.bd_bh->b_data, 0, sb->s_blocksize); -+ memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize); -+ -+ bh = read_block_bitmap(sb, group); -+ if (bh == NULL) { -+ err = -EIO; -+ goto out2; -+ } -+ -+ /* loop over the blocks, nad create buddies for free ones */ -+ for (i = 0; i < sb->s_blocksize * 8; i++) { -+ if (!test_bit(i, (void *) bh->b_data)) { -+ mb_free_blocks(&e3b, i, 1); -+ count++; -+ } -+ } -+ brelse(bh); -+ mb_check_buddy(&e3b); -+ ext3_mb_dirty_buddy(&e3b); -+ -+out2: -+ ext3_mb_release_desc(&e3b); -+out: -+ return err; -+} -+ -+EXPORT_SYMBOL(ext3_mb_new_blocks); -+ -+#ifndef EXT3_QUOTA_INIT_BLOCKS -+#define EXT3_QUOTA_INIT_BLOCKS 0 -+#endif -+ -+#define MB_CREDITS \ -+ (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \ -+ 2 * EXT3_QUOTA_INIT_BLOCKS) -+ -+int ext3_mb_init_backend(struct super_block *sb) -+{ -+ struct inode *root = sb->s_root->d_inode; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct dentry *db; -+ tid_t target; -+ int err, i; -+ -+ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) * -+ sbi->s_groups_count, GFP_KERNEL); -+ if (sbi->s_buddy_blocks == NULL) { -+ printk("can't allocate mem for buddy maps\n"); -+ return -ENOMEM; -+ } -+ memset(sbi->s_buddy_blocks, 0, -+ sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count); -+ sbi->s_buddy = NULL; -+ -+ down(&root->i_sem); -+ db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root, -+ strlen(EXT3_BUDDY_FILE)); -+ if (IS_ERR(db)) { -+ err = PTR_ERR(db); -+ printk("can't lookup buddy file: %d\n", err); -+ goto out; -+ } -+ -+ if (db->d_inode != NULL) { -+ sbi->s_buddy = igrab(db->d_inode); -+ goto map; -+ } -+ -+ err = ext3_create(root, db, S_IFREG, NULL); -+ if (err) { -+ printk("error while creation buddy file: %d\n", err); -+ } else { -+ sbi->s_buddy = igrab(db->d_inode); -+ } -+ -+map: -+ for (i = 0; i < sbi->s_groups_count; i++) { -+ struct buffer_head *bh = NULL; -+ handle_t *handle; -+ -+ handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS); -+ if (IS_ERR(handle)) { -+ err = PTR_ERR(handle); -+ goto out2; -+ } -+ -+ /* allocate block for bitmap */ -+ bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err); -+ if (bh == NULL) { -+ printk("can't get block for buddy bitmap: %d\n", err); -+ goto out2; -+ } -+ sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr; -+ brelse(bh); -+ -+ /* allocate block for buddy */ -+ bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err); -+ if (bh == NULL) { -+ printk("can't get block for buddy: %d\n", err); -+ goto out2; -+ } -+ sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr; -+ brelse(bh); -+ ext3_journal_stop(handle); -+ spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock); -+ sbi->s_buddy_blocks[i].bb_md_cur = NULL; -+ sbi->s_buddy_blocks[i].bb_tid = 0; -+ } -+ -+ if (journal_start_commit(sbi->s_journal, &target)) -+ log_wait_commit(sbi->s_journal, target); -+ -+out2: -+ dput(db); -+out: -+ up(&root->i_sem); -+ return err; -+} -+ -+int ext3_mb_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ /* release freed, non-committed blocks */ -+ spin_lock(&sbi->s_md_lock); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_committed_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ ext3_mb_free_committed_blocks(sb); -+ -+ if (sbi->s_buddy_blocks) -+ kfree(sbi->s_buddy_blocks); -+ if (sbi->s_buddy) -+ iput(sbi->s_buddy); -+ if (sbi->s_blocks_reserved) -+ printk("ext3-fs: %ld blocks being reserved at umount!\n", -+ sbi->s_blocks_reserved); -+ return 0; -+} -+ -+int ext3_mb_init(struct super_block *sb) -+{ -+ struct ext3_super_block *es; -+ int i; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ /* init file for buddy data */ -+ clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC); -+ ext3_mb_init_backend(sb); -+ -+ es = EXT3_SB(sb)->s_es; -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) -+ ext3_mb_generate_buddy(sb, i); -+ spin_lock_init(&EXT3_SB(sb)->s_reserve_lock); -+ spin_lock_init(&EXT3_SB(sb)->s_md_lock); -+ INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction); -+ INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction); -+ INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction); -+ set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC); -+ printk("EXT3-fs: mballoc enabled\n"); -+ return 0; -+} -+ -+void ext3_mb_free_committed_blocks(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int err, i, count = 0, count2 = 0; -+ struct ext3_free_metadata *md; -+ struct ext3_buddy e3b; -+ -+ if (list_empty(&sbi->s_committed_transaction)) -+ return; -+ -+ /* there is committed blocks to be freed yet */ -+ do { -+ /* get next array of blocks */ -+ md = NULL; -+ spin_lock(&sbi->s_md_lock); -+ if (!list_empty(&sbi->s_committed_transaction)) { -+ md = list_entry(sbi->s_committed_transaction.next, -+ struct ext3_free_metadata, list); -+ list_del(&md->list); -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ if (md == NULL) -+ break; -+ -+ mb_debug("gonna free %u blocks in group %u (0x%p):", -+ md->num, md->group, md); -+ -+ err = ext3_mb_load_desc(sb, md->group, &e3b); -+ BUG_ON(err != 0); -+ -+ /* there are blocks to put in buddy to make them really free */ -+ count += md->num; -+ count2++; -+ ext3_lock_group(sb, md->group); -+ for (i = 0; i < md->num; i++) { -+ mb_debug(" %u", md->blocks[i]); -+ mb_free_blocks(&e3b, md->blocks[i], 1); -+ } -+ mb_debug("\n"); -+ ext3_unlock_group(sb, md->group); -+ -+ kfree(md); -+ ext3_mb_dirty_buddy(&e3b); -+ ext3_mb_release_desc(&e3b); -+ -+ } while (md); -+ mb_debug("freed %u blocks in %u structures\n", count, count2); -+} -+ -+void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ if (sbi->s_last_transaction == handle->h_transaction->t_tid) -+ return; -+ -+ /* new transaction! time to close last one and free blocks for -+ * committed transaction. we know that only transaction can be -+ * active, so previos transaction can be being logged and we -+ * know that transaction before previous is known to be alreade -+ * logged. this means that now we may free blocks freed in all -+ * transactions before previous one. hope I'm clear enough ... */ -+ -+ spin_lock(&sbi->s_md_lock); -+ if (sbi->s_last_transaction != handle->h_transaction->t_tid) { -+ mb_debug("new transaction %lu, old %lu\n", -+ (unsigned long) handle->h_transaction->t_tid, -+ (unsigned long) sbi->s_last_transaction); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_closed_transaction); -+ sbi->s_last_transaction = handle->h_transaction->t_tid; -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ ext3_mb_free_committed_blocks(sb); -+} -+ -+int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b, -+ int group, int block, int count) -+{ -+ struct ext3_buddy_group_blocks *db = e3b->bd_bd; -+ struct super_block *sb = e3b->bd_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_free_metadata *md; -+ int i; -+ -+ ext3_lock_group(sb, group); -+ for (i = 0; i < count; i++) { -+ md = db->bb_md_cur; -+ if (md && db->bb_tid != handle->h_transaction->t_tid) { -+ db->bb_md_cur = NULL; -+ md = NULL; -+ } -+ -+ if (md == NULL) { -+ ext3_unlock_group(sb, group); -+ md = kmalloc(sizeof(*md), GFP_KERNEL); -+ if (md == NULL) -+ return -ENOMEM; -+ md->num = 0; -+ md->group = group; -+ -+ ext3_lock_group(sb, group); -+ if (db->bb_md_cur == NULL) { -+ spin_lock(&sbi->s_md_lock); -+ list_add(&md->list, &sbi->s_active_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ db->bb_md_cur = md; -+ db->bb_tid = handle->h_transaction->t_tid; -+ mb_debug("new md 0x%p for group %u\n", -+ md, md->group); -+ } else { -+ kfree(md); -+ md = db->bb_md_cur; -+ } -+ } -+ -+ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS); -+ md->blocks[md->num] = block + i; -+ md->num++; -+ if (md->num == EXT3_BB_MAX_BLOCKS) { -+ /* no more space, put full container on a sb's list */ -+ db->bb_md_cur = NULL; -+ } -+ } -+ ext3_unlock_group(sb, group); -+ return 0; -+} -+ -+void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, -+ unsigned long block, unsigned long count, int metadata) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ unsigned long bit, overflow; -+ struct buffer_head *gd_bh; -+ unsigned long block_group; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ int err = 0, ret; -+ -+ sb = inode->i_sb; -+ if (!sb) { -+ printk ("ext3_free_blocks: nonexistent device"); -+ return; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ if (block < le32_to_cpu(es->s_first_data_block) || -+ block + count < block || -+ block + count > le32_to_cpu(es->s_blocks_count)) { -+ ext3_error (sb, "ext3_free_blocks", -+ "Freeing blocks not in datazone - " -+ "block = %lu, count = %lu", block, count); -+ goto error_return; -+ } -+ -+ ext3_debug("freeing block %lu\n", block); -+ -+do_more: -+ overflow = 0; -+ block_group = (block - le32_to_cpu(es->s_first_data_block)) / -+ EXT3_BLOCKS_PER_GROUP(sb); -+ bit = (block - le32_to_cpu(es->s_first_data_block)) % -+ EXT3_BLOCKS_PER_GROUP(sb); -+ /* -+ * Check to see if we are freeing blocks across a group -+ * boundary. -+ */ -+ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { -+ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); -+ count -= overflow; -+ } -+ brelse(bitmap_bh); -+ bitmap_bh = read_block_bitmap(sb, block_group); -+ if (!bitmap_bh) -+ goto error_return; -+ gdp = ext3_get_group_desc (sb, block_group, &gd_bh); -+ if (!gdp) -+ goto error_return; -+ -+ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || -+ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || -+ in_range (block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group) || -+ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error (sb, "ext3_free_blocks", -+ "Freeing blocks in system zones - " -+ "Block = %lu, count = %lu", -+ block, count); -+ -+ BUFFER_TRACE(bitmap_bh, "getting write access"); -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) -+ goto error_return; -+ -+ /* -+ * We are about to modify some metadata. Call the journal APIs -+ * to unshare ->b_data if a currently-committing transaction is -+ * using it -+ */ -+ BUFFER_TRACE(gd_bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, gd_bh); -+ if (err) -+ goto error_return; -+ -+ err = ext3_mb_load_desc(sb, block_group, &e3b); -+ if (err) -+ goto error_return; -+ -+ if (metadata) { -+ /* blocks being freed are metadata. these blocks shouldn't -+ * be used until this transaction is committed */ -+ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count); -+ } else { -+ ext3_lock_group(sb, block_group); -+ mb_free_blocks(&e3b, bit, count); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); -+ ext3_unlock_group(sb, block_group); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, count); -+ } -+ -+ ext3_mb_dirty_buddy(&e3b); -+ ext3_mb_release_desc(&e3b); -+ -+ /* FIXME: undo logic will be implemented later and another way */ -+ mb_clear_bits(bitmap_bh->b_data, bit, count); -+ DQUOT_FREE_BLOCK(inode, count); -+ -+ /* We dirtied the bitmap block */ -+ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ -+ /* And the group descriptor block */ -+ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); -+ ret = ext3_journal_dirty_metadata(handle, gd_bh); -+ if (!err) err = ret; -+ -+ if (overflow && !err) { -+ block += count; -+ count = overflow; -+ goto do_more; -+ } -+ sb->s_dirt = 1; -+error_return: -+ brelse(bitmap_bh); -+ ext3_std_error(sb, err); -+ return; -+} -+ -+int ext3_mb_reserve_blocks(struct super_block *sb, int blocks) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int free, ret = -ENOSPC; -+ -+ BUG_ON(blocks < 0); -+ spin_lock(&sbi->s_reserve_lock); -+ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter); -+ if (blocks <= free - sbi->s_blocks_reserved) { -+ sbi->s_blocks_reserved += blocks; -+ ret = 0; -+ } -+ spin_unlock(&sbi->s_reserve_lock); -+ return ret; -+} -+ -+void ext3_mb_release_blocks(struct super_block *sb, int blocks) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ BUG_ON(blocks < 0); -+ spin_lock(&sbi->s_reserve_lock); -+ sbi->s_blocks_reserved -= blocks; -+ WARN_ON(sbi->s_blocks_reserved < 0); -+ if (sbi->s_blocks_reserved < 0) -+ sbi->s_blocks_reserved = 0; -+ spin_unlock(&sbi->s_reserve_lock); -+} -+ -+int ext3_new_block(handle_t *handle, struct inode *inode, -+ unsigned long goal, u32 *pc, u32 *pb, int *errp) -+{ -+ int ret, len; -+ -+ if (!test_opt(inode->i_sb, MBALLOC)) { -+ ret = ext3_new_block_old(handle, inode, goal, pc, pb, errp); -+ goto out; -+ } -+ len = 1; -+ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp); -+out: -+ return ret; -+} -+ -+ -+void ext3_free_blocks(handle_t *handle, struct inode * inode, -+ unsigned long block, unsigned long count, int metadata) -+{ -+ if (!test_opt(inode->i_sb, MBALLOC)) -+ ext3_free_blocks_old(handle, inode, block, count); -+ else -+ ext3_mb_free_blocks(handle, inode, block, count, metadata); -+ return; -+} -+ -Index: linux-2.6.7/fs/ext3/super.c -=================================================================== ---- linux-2.6.7.orig/fs/ext3/super.c 2004-09-03 08:46:59.000000000 +0400 -+++ linux-2.6.7/fs/ext3/super.c 2004-09-03 08:46:59.000000000 +0400 -@@ -392,6 +392,7 @@ - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_mb_release(sb); - ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); -@@ -594,7 +595,7 @@ - Opt_commit, Opt_journal_update, Opt_journal_inum, - Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, -- Opt_err, Opt_extents, Opt_extdebug -+ Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc, - }; - - static match_table_t tokens = { -@@ -644,6 +645,7 @@ - {Opt_iopen_nopriv, "iopen_nopriv"}, - {Opt_extents, "extents"}, - {Opt_extdebug, "extdebug"}, -+ {Opt_mballoc, "mballoc"}, - {Opt_err, NULL} - }; - -@@ -929,6 +931,9 @@ - case Opt_extdebug: - set_opt (sbi->s_mount_opt, EXTDEBUG); - break; -+ case Opt_mballoc: -+ set_opt (sbi->s_mount_opt, MBALLOC); -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1602,7 +1607,8 @@ - ext3_count_dirs(sb)); - - ext3_ext_init(sb); -- -+ ext3_mb_init(sb); -+ - return 0; - - failed_mount3: -Index: linux-2.6.7/fs/ext3/Makefile -=================================================================== ---- linux-2.6.7.orig/fs/ext3/Makefile 2004-09-03 08:46:59.000000000 +0400 -+++ linux-2.6.7/fs/ext3/Makefile 2004-09-03 08:46:59.000000000 +0400 -@@ -5,7 +5,7 @@ - obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ -- ioctl.o namei.o super.o symlink.o hash.o extents.o -+ ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-2.6.7/fs/ext3/balloc.c -=================================================================== ---- linux-2.6.7.orig/fs/ext3/balloc.c 2004-08-26 17:11:16.000000000 +0400 -+++ linux-2.6.7/fs/ext3/balloc.c 2004-09-03 08:46:59.000000000 +0400 -@@ -78,7 +78,7 @@ - * - * Return buffer_head on success or NULL in case of failure. - */ --static struct buffer_head * -+struct buffer_head * - read_block_bitmap(struct super_block *sb, unsigned int block_group) - { - struct ext3_group_desc * desc; -@@ -98,8 +98,8 @@ - } - - /* Free given blocks, update quota and i_blocks field */ --void ext3_free_blocks (handle_t *handle, struct inode * inode, -- unsigned long block, unsigned long count) -+void ext3_free_blocks_old (handle_t *handle, struct inode * inode, -+ unsigned long block, unsigned long count) - { - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *gd_bh; -@@ -474,8 +474,8 @@ - * This function also updates quota and i_blocks field. - */ - int --ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, -- u32 *prealloc_count, u32 *prealloc_block, int *errp) -+ext3_new_block_old(handle_t *handle, struct inode *inode, unsigned long goal, -+ u32 *prealloc_count, u32 *prealloc_block, int *errp) - { - struct buffer_head *bitmap_bh = NULL; /* bh */ - struct buffer_head *gdp_bh; /* bh2 */ -Index: linux-2.6.7/fs/ext3/namei.c -=================================================================== ---- linux-2.6.7.orig/fs/ext3/namei.c 2004-09-03 08:46:59.000000000 +0400 -+++ linux-2.6.7/fs/ext3/namei.c 2004-09-03 08:46:59.000000000 +0400 -@@ -1640,7 +1640,7 @@ - * If the create succeeds, we fill in the inode information - * with d_instantiate(). - */ --static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, -+int ext3_create (struct inode * dir, struct dentry * dentry, int mode, - struct nameidata *nd) - { - handle_t *handle; -Index: linux-2.6.7/fs/ext3/inode.c -=================================================================== ---- linux-2.6.7.orig/fs/ext3/inode.c 2004-09-03 08:46:59.000000000 +0400 -+++ linux-2.6.7/fs/ext3/inode.c 2004-09-03 08:46:59.000000000 +0400 -@@ -254,7 +254,7 @@ - ei->i_prealloc_count = 0; - ei->i_prealloc_block = 0; - /* Writer: end */ -- ext3_free_blocks (inode, block, total); -+ ext3_free_blocks (inode, block, total, 1); - } - #endif - } -@@ -633,7 +633,7 @@ - ext3_journal_forget(handle, branch[i].bh); - } - for (i = 0; i < keys; i++) -- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); -+ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1); - return err; - } - -@@ -734,7 +734,7 @@ - if (err == -EAGAIN) - for (i = 0; i < num; i++) - ext3_free_blocks(handle, inode, -- le32_to_cpu(where[i].key), 1); -+ le32_to_cpu(where[i].key), 1, 1); - return err; - } - -@@ -1911,7 +1911,7 @@ - } - } - -- ext3_free_blocks(handle, inode, block_to_free, count); -+ ext3_free_blocks(handle, inode, block_to_free, count, 1); - } - - /** -@@ -2082,7 +2082,7 @@ - ext3_journal_test_restart(handle, inode); - } - -- ext3_free_blocks(handle, inode, nr, 1); -+ ext3_free_blocks(handle, inode, nr, 1, 1); - - if (parent_bh) { - /* -Index: linux-2.6.7/fs/ext3/extents.c -=================================================================== ---- linux-2.6.7.orig/fs/ext3/extents.c 2004-09-03 08:46:59.000000000 +0400 -+++ linux-2.6.7/fs/ext3/extents.c 2004-09-03 08:46:59.000000000 +0400 -@@ -740,7 +740,7 @@ - for (i = 0; i < depth; i++) { - if (!ablocks[i]) - continue; -- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); - } - } - kfree(ablocks); -@@ -1388,7 +1388,7 @@ - path->p_idx->ei_leaf); - bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); - ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); - return err; - } - -@@ -1876,10 +1876,12 @@ - int needed = ext3_remove_blocks_credits(tree, ex, from, to); - handle_t *handle = ext3_journal_start(tree->inode, needed); - struct buffer_head *bh; -- int i; -+ int i, metadata = 0; - - if (IS_ERR(handle)) - return PTR_ERR(handle); -+ if (S_ISDIR(tree->inode->i_mode)) -+ metadata = 1; - if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { - /* tail removal */ - unsigned long num, start; -@@ -1891,7 +1893,7 @@ - bh = sb_find_get_block(tree->inode->i_sb, start + i); - ext3_forget(handle, 0, tree->inode, bh, start + i); - } -- ext3_free_blocks(handle, tree->inode, start, num); -+ ext3_free_blocks(handle, tree->inode, start, num, metadata); - } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { - printk("strange request: removal %lu-%lu from %u:%u\n", - from, to, ex->ee_block, ex->ee_len); -Index: linux-2.6.7/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.7.orig/fs/ext3/xattr.c 2004-09-03 08:46:59.000000000 +0400 -+++ linux-2.6.7/fs/ext3/xattr.c 2004-09-03 08:46:59.000000000 +0400 -@@ -1366,7 +1366,7 @@ - new_bh = sb_getblk(sb, block); - if (!new_bh) { - getblk_failed: -- ext3_free_blocks(handle, inode, block, 1); -+ ext3_free_blocks(handle, inode, block, 1, 1); - error = -EIO; - goto cleanup; - } -@@ -1408,7 +1408,7 @@ - if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { - /* Free the old block. */ - ea_bdebug(old_bh, "freeing"); -- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); -+ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1); - - /* ext3_forget() calls bforget() for us, but we - let our caller release old_bh, so we need to -@@ -1497,7 +1497,7 @@ - lock_buffer(bh); - if (HDR(bh)->h_refcount == cpu_to_le32(1)) { - ext3_xattr_cache_remove(bh); -- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); -+ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1); - get_bh(bh); - ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); - } else { -Index: linux-2.6.7/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.7.orig/include/linux/ext3_fs.h 2004-09-03 08:46:59.000000000 +0400 -+++ linux-2.6.7/include/linux/ext3_fs.h 2004-09-03 08:47:35.000000000 +0400 -@@ -57,6 +57,8 @@ - #define ext3_debug(f, a...) do {} while (0) - #endif - -+#define EXT3_MULTIBLOCK_ALLOCATOR 1 -+ - /* - * Special inodes numbers - */ -@@ -335,6 +337,7 @@ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x10000 /* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x20000 /* Extents debug */ -+#define EXT3_MOUNT_MBALLOC 0x100000/* Buddy allocation support */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -695,7 +698,7 @@ - extern int ext3_new_block (handle_t *, struct inode *, unsigned long, - __u32 *, __u32 *, int *); - extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, -- unsigned long); -+ unsigned long, int); - extern unsigned long ext3_count_free_blocks (struct super_block *); - extern void ext3_check_blocks_bitmap (struct super_block *); - extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, -Index: linux-2.6.7/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.7.orig/include/linux/ext3_fs_sb.h 2004-09-03 08:46:59.000000000 +0400 -+++ linux-2.6.7/include/linux/ext3_fs_sb.h 2004-09-03 08:46:59.000000000 +0400 -@@ -23,9 +23,29 @@ - #define EXT_INCLUDE - #include - #include -+#include - #endif - #endif - -+#define EXT3_BB_MAX_BLOCKS 30 -+struct ext3_free_metadata { -+ unsigned short group; -+ unsigned short num; -+ unsigned short blocks[EXT3_BB_MAX_BLOCKS]; -+ struct list_head list; -+}; -+ -+#define EXT3_BB_MAX_ORDER 14 -+ -+struct ext3_buddy_group_blocks { -+ sector_t bb_bitmap; -+ sector_t bb_buddy; -+ spinlock_t bb_lock; -+ unsigned bb_counters[EXT3_BB_MAX_ORDER]; -+ struct ext3_free_metadata *bb_md_cur; -+ unsigned long bb_tid; -+}; -+ - /* - * third extended-fs super-block data in memory - */ -@@ -76,6 +96,17 @@ - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ - #endif -+ -+ /* for buddy allocator */ -+ struct ext3_buddy_group_blocks *s_buddy_blocks; -+ struct inode *s_buddy; -+ long s_blocks_reserved; -+ spinlock_t s_reserve_lock; -+ struct list_head s_active_transaction; -+ struct list_head s_closed_transaction; -+ struct list_head s_committed_transaction; -+ spinlock_t s_md_lock; -+ tid_t s_last_transaction; - }; - - #endif /* _LINUX_EXT3_FS_SB */ diff --git a/lustre/kernel_patches/patches/ext3-nlinks-2.4.20-hp_pnnl.patch b/lustre/kernel_patches/patches/ext3-nlinks-2.4.20-hp_pnnl.patch index d9435b2..9407cef 100644 --- a/lustre/kernel_patches/patches/ext3-nlinks-2.4.20-hp_pnnl.patch +++ b/lustre/kernel_patches/patches/ext3-nlinks-2.4.20-hp_pnnl.patch @@ -2,12 +2,13 @@ Index: linux/fs/ext3/namei.c =================================================================== --- linux.orig/fs/ext3/namei.c Wed Aug 25 11:34:25 2004 +++ linux/fs/ext3/namei.c Wed Aug 25 13:16:03 2004 -@@ -1541,11 +1541,16 @@ +@@ -1541,11 +1541,17 @@ static inline void ext3_inc_count(handle_t *handle, struct inode *inode) { inode->i_nlink++; + if (is_dx(inode) && inode->i_nlink > 1) { -+ if (inode->i_nlink >= 65000) /* limit is 16-bit i_links_count */ ++ /* limit is 16-bit i_links_count */ ++ if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2) + inode->i_nlink = 1; + } } @@ -151,6 +152,15 @@ Index: linux/include/linux/ext3_fs.h /* * Debug code +@@ -79,7 +81,7 @@ + /* + * Maximal count of links to a file + */ +-#define EXT3_LINK_MAX 32000 ++#define EXT3_LINK_MAX 65000 + + /* + * Macro-instructions used to manage several block sizes @@ -580,14 +580,15 @@ */ diff --git a/lustre/kernel_patches/patches/ext3-nlinks-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-nlinks-2.4.21-chaos.patch index debe4b3..d4460f8 100644 --- a/lustre/kernel_patches/patches/ext3-nlinks-2.4.21-chaos.patch +++ b/lustre/kernel_patches/patches/ext3-nlinks-2.4.21-chaos.patch @@ -2,12 +2,13 @@ Index: 69chaos/fs/ext3/namei.c =================================================================== --- 69chaos.orig/fs/ext3/namei.c 2004-08-24 23:56:04.000000000 -0700 +++ 69chaos/fs/ext3/namei.c 2004-08-24 23:57:20.000000000 -0700 -@@ -1542,11 +1542,16 @@ +@@ -1542,11 +1542,17 @@ static inline void ext3_inc_count(handle_t *handle, struct inode *inode) { inode->i_nlink++; + if (is_dx(inode) && inode->i_nlink > 1) { -+ if (inode->i_nlink >= 65000) /* limit is 16-bit i_links_count */ ++ /* limit is 16-bit i_links_count */ ++ if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2) + inode->i_nlink = 1; + } } @@ -151,6 +152,15 @@ Index: 69chaos/include/linux/ext3_fs.h /* * Debug code +@@ -79,7 +81,7 @@ + /* + * Maximal count of links to a file + */ +-#define EXT3_LINK_MAX 32000 ++#define EXT3_LINK_MAX 65000 + + /* + * Macro-instructions used to manage several block sizes @@ -582,14 +582,15 @@ */ diff --git a/lustre/kernel_patches/series/rhel-2.4.21 b/lustre/kernel_patches/series/rhel-2.4.21 index 456c87f..569fbf9 100644 --- a/lustre/kernel_patches/series/rhel-2.4.21 +++ b/lustre/kernel_patches/series/rhel-2.4.21 @@ -29,7 +29,6 @@ ext3-raw-lookup.patch nfs_export_kernel-2.4.21-chaos.patch ext3-ea-in-inode-2.4.21-chaos.patch listman-2.4.21-chaos.patch -gfp_memalloc-2.4.21-chaos.patch ext3-xattr-ptr-arith-fix.patch kernel_text_address-2.4.18-chaos.patch pagecache-lock-2.4.21-chaos.patch -- 1.8.3.1