-Index: linux-2.6.5-7.201/include/linux/ext3_fs.h
+Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
===================================================================
---- linux-2.6.5-7.201.orig/include/linux/ext3_fs.h 2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/include/linux/ext3_fs.h 2005-12-17 03:13:38.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs.h 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/include/linux/ext3_fs.h 2006-04-26 23:40:28.000000000 +0400
@@ -57,6 +57,14 @@ struct statfs;
#define ext3_debug(f, a...) do {} while (0)
#endif
#endif /* __KERNEL__ */
#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long)
-Index: linux-2.6.5-7.201/include/linux/ext3_fs_sb.h
+Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h
===================================================================
---- linux-2.6.5-7.201.orig/include/linux/ext3_fs_sb.h 2005-12-17 02:53:25.000000000 +0300
-+++ linux-2.6.5-7.201/include/linux/ext3_fs_sb.h 2005-12-17 03:10:23.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs_sb.h 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h 2006-04-26 23:40:28.000000000 +0400
@@ -23,9 +23,15 @@
#define EXT_INCLUDE
#include <linux/blockgroup_lock.h>
};
#endif /* _LINUX_EXT3_FS_SB */
-Index: linux-2.6.5-7.201/fs/ext3/super.c
+Index: linux-2.6.5-7.252-full/fs/ext3/super.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/super.c 2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/super.c 2005-12-17 03:10:23.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/fs/ext3/super.c 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/super.c 2006-04-26 23:40:28.000000000 +0400
@@ -389,6 +389,7 @@ void ext3_put_super (struct super_block
struct ext3_super_block *es = sbi->s_es;
int i;
ext3_ext_release(sb);
ext3_xattr_put_super(sb);
journal_destroy(sbi->s_journal);
-@@ -543,7 +544,7 @@ enum {
+@@ -545,7 +546,7 @@ enum {
Opt_ignore, Opt_barrier,
Opt_err,
Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
};
static match_table_t tokens = {
-@@ -590,6 +591,7 @@ static match_table_t tokens = {
+@@ -591,6 +592,7 @@ static match_table_t tokens = {
{Opt_iopen_nopriv, "iopen_nopriv"},
{Opt_extents, "extents"},
{Opt_extdebug, "extdebug"},
{Opt_barrier, "barrier=%u"},
{Opt_err, NULL}
};
-@@ -811,6 +813,9 @@ static int parse_options (char * options
+@@ -813,6 +815,9 @@ static int parse_options (char * options
case Opt_extdebug:
set_opt (sbi->s_mount_opt, EXTDEBUG);
break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1464,6 +1469,7 @@ static int ext3_fill_super (struct super
+@@ -1466,6 +1471,7 @@ static int ext3_fill_super (struct super
ext3_count_dirs(sb));
ext3_ext_init(sb);
return 0;
-@@ -2112,7 +2118,13 @@ static struct file_system_type ext3_fs_t
+@@ -2114,7 +2120,13 @@ static struct file_system_type ext3_fs_t
static int __init init_ext3_fs(void)
{
if (err)
return err;
err = init_inodecache();
-@@ -2141,6 +2153,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2143,6 +2155,7 @@ static void __exit exit_ext3_fs(void)
unregister_filesystem(&ext3_fs_type);
destroy_inodecache();
exit_ext3_xattr();
}
int ext3_prep_san_write(struct inode *inode, long *blocks,
-Index: linux-2.6.5-7.201/fs/ext3/extents.c
+Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/extents.c 2005-12-17 02:53:29.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/extents.c 2005-12-17 03:10:23.000000000 +0300
-@@ -771,7 +771,7 @@ cleanup:
+--- linux-2.6.5-7.252-full.orig/fs/ext3/extents.c 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/extents.c 2006-04-26 23:40:28.000000000 +0400
+@@ -777,7 +777,7 @@ cleanup:
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
}
}
kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
path->p_idx->ei_leaf);
bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
return err;
}
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
int needed = ext3_remove_blocks_credits(tree, ex, from, to);
handle_t *handle = ext3_journal_start(tree->inode, needed);
struct buffer_head *bh;
if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
/* tail removal */
unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
bh = sb_find_get_block(tree->inode->i_sb, start + i);
ext3_forget(handle, 0, tree->inode, bh, start + i);
}
} else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
printk("strange request: removal %lu-%lu from %u:%u\n",
from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.5-7.201/fs/ext3/inode.c
+Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/inode.c 2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/inode.c 2005-12-17 03:10:23.000000000 +0300
-@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
+--- linux-2.6.5-7.252-full.orig/fs/ext3/inode.c 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/inode.c 2006-04-26 23:40:28.000000000 +0400
+@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h
ext3_journal_forget(handle, branch[i].bh);
}
for (i = 0; i < keys; i++)
return err;
}
-@@ -673,7 +673,7 @@ err_out:
+@@ -675,7 +675,7 @@ err_out:
if (err == -EAGAIN)
for (i = 0; i < num; i++)
ext3_free_blocks(handle, inode,
return err;
}
-@@ -1835,7 +1835,7 @@ ext3_clear_blocks(handle_t *handle, stru
+@@ -1837,7 +1837,7 @@ ext3_clear_blocks(handle_t *handle, stru
}
}
}
/**
-@@ -2006,7 +2006,7 @@ static void ext3_free_branches(handle_t
+@@ -2008,7 +2008,7 @@ static void ext3_free_branches(handle_t
ext3_journal_test_restart(handle, inode);
}
if (parent_bh) {
/*
-Index: linux-2.6.5-7.201/fs/ext3/balloc.c
+Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/balloc.c 2005-10-11 00:12:45.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/balloc.c 2005-12-17 03:10:23.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/fs/ext3/balloc.c 2006-02-14 15:26:58.000000000 +0300
++++ linux-2.6.5-7.252-full/fs/ext3/balloc.c 2006-04-26 23:40:28.000000000 +0400
@@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_
*
* Return buffer_head on success or NULL in case of failure.
unsigned long goal, int *errp)
{
struct buffer_head *bitmap_bh = NULL;
-Index: linux-2.6.5-7.201/fs/ext3/xattr.c
+Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/xattr.c 2005-12-17 02:53:26.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/xattr.c 2005-12-17 03:10:41.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/fs/ext3/xattr.c 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/xattr.c 2006-04-26 23:40:28.000000000 +0400
@@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle,
new_bh = sb_getblk(sb, block);
if (!new_bh) {
get_bh(bh);
ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
} else {
-Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
+Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/mballoc.c 2005-12-09 13:08:53.191437750 +0300
-+++ linux-2.6.5-7.201/fs/ext3/mballoc.c 2005-12-17 03:15:04.000000000 +0300
-@@ -0,0 +1,2430 @@
+--- linux-2.6.5-7.252-full.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/mballoc.c 2006-04-26 23:42:45.000000000 +0400
+@@ -0,0 +1,2616 @@
+/*
+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+
+long ext3_mb_stats = 1;
+
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
++
+#ifdef EXT3_BB_MAX_BLOCKS
+#undef EXT3_BB_MAX_BLOCKS
+#endif
+struct ext3_mb_history {
+ struct ext3_free_extent goal; /* goal allocation */
+ struct ext3_free_extent result; /* result allocation */
++ unsigned pid;
++ unsigned ino;
+ __u16 found; /* how many extents have been found */
+ __u16 groups; /* how many groups have been scanned */
+ __u16 tail; /* what tail broke some buddy */
+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy)
+
+#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
+#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
+ struct ext3_allocation_context *ac);
+#endif
+
+static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
+ int needed, struct ext3_free_extent *ex)
+{
-+ int next, max, ord;
++ int next = block, max, ord;
+ void *buddy;
+
+ J_ASSERT(ex != NULL);
+ ex->fe_start = block << order;
+ ex->fe_group = e3b->bd_group;
+
++ /* calc difference from given start */
++ next = next - ex->fe_start;
++ ex->fe_len -= next;
++ ex->fe_start += next;
++
+ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
+
+ if (block + 1 >= max)
+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
+ ac->ac_g_ex.fe_len, &ex);
+
-+ if (max > 0) {
++ if (max >= ac->ac_g_ex.fe_len) {
++ J_ASSERT(ex.fe_len > 0);
++ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++ ac->ac_found++;
++ ac->ac_b_ex = ex;
++ ext3_mb_use_best_found(ac, e3b);
++ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++ /* Sometimes, caller may want to merge even small
++ * number of blocks to an existing extent */
+ J_ASSERT(ex.fe_len > 0);
+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
+ int i, k, max;
+
+ J_ASSERT(ac->ac_2order > 0);
-+ for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
+ if (grp->bb_counters[i] == 0)
+ continue;
+
+ case 0:
+ J_ASSERT(ac->ac_2order != 0);
+ bits = ac->ac_sb->s_blocksize_bits + 1;
-+ for (i = ac->ac_2order; i < bits; i++)
++ for (i = ac->ac_2order; i <= bits; i++)
+ if (grp->bb_counters[i] > 0)
+ return 1;
++ break;
+ case 1:
+ if ((free / fragments) >= ac->ac_g_ex.fe_len)
+ return 1;
++ break;
+ case 2:
+ if (free >= ac->ac_g_ex.fe_len)
+ return 1;
++ break;
+ case 3:
+ return 1;
+ default:
+
+ /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
+ i = ffs(*len);
-+ if (i >= 8) {
++ if (i >= ext3_mb_order2_reqs) {
+ i--;
+ if ((*len & (~(1 << i))) == 0)
+ ac.ac_2order = i;
+ }
+
-+ /* Sometimes, caller may want to merge even small
-+ * number of blocks to an existing extent */
-+ if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+ err = ext3_mb_find_by_goal(&ac, &e3b);
-+ if (err)
-+ goto out_err;
-+ if (ac.ac_status == AC_STATUS_FOUND)
-+ goto found;
-+ }
++ /* first, try the goal */
++ err = ext3_mb_find_by_goal(&ac, &e3b);
++ if (err)
++ goto out_err;
++ if (ac.ac_status == AC_STATUS_FOUND)
++ goto found;
+
+ /* Let's just scan groups to find more-less suitable blocks */
+ cr = ac.ac_2order ? 0 : 1;
+ atomic_inc(&sbi->s_bal_breaks);
+ }
+
-+ ext3_mb_store_history(sb, &ac);
++ ext3_mb_store_history(sb, inode->i_ino, &ac);
+
+ return block;
+}
+ char buf[20], buf2[20];
+
+ if (v == SEQ_START_TOKEN) {
-+ seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+ "goal", "result", "found", "grps", "cr", "merge",
-+ "tail", "broken");
++ seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++ "pid", "inode", "goal", "result", "found", "grps", "cr",
++ "merge", "tail", "broken");
+ return 0;
+ }
+
+ hs->goal.fe_start, hs->goal.fe_len);
+ sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
+ hs->result.fe_start, hs->result.fe_len);
-+ seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+ buf2, hs->found, hs->groups, hs->cr,
-+ hs->merged ? "M" : "", hs->tail,
++ seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++ hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++ hs->cr, hs->merged ? "M" : "", hs->tail,
+ hs->buddy ? 1 << hs->buddy : 0);
+ return 0;
+}
+ .release = ext3_mb_seq_history_release,
+};
+
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group;
++
++ if (*pos < 0 || *pos >= sbi->s_groups_count)
++ return NULL;
++
++ group = *pos + 1;
++ return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group;
++
++ ++*pos;
++ if (*pos < 0 || *pos >= sbi->s_groups_count)
++ return NULL;
++ group = *pos + 1;
++ return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group = (int) v, i;
++ struct sg {
++ struct ext3_group_info info;
++ unsigned short counters[16];
++ } sg;
++
++ group--;
++ if (group == 0)
++ seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++ "group", "free", "frags", "first", "2^0", "2^1", "2^2",
++ "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++ "2^11", "2^12", "2^13");
++
++ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++ sizeof(struct ext3_group_info);
++ ext3_lock_group(sb, group);
++ memcpy(&sg, sbi->s_group_info[group], i);
++ ext3_unlock_group(sb, group);
++
++ if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++ return 0;
++
++ seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
++ sg.info.bb_fragments, sg.info.bb_first_free);
++ for (i = 0; i <= 13; i++)
++ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++ sg.info.bb_counters[i] : 0);
++ seq_printf(seq, " ]\n");
++
++ return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++ .start = ext3_mb_seq_groups_start,
++ .next = ext3_mb_seq_groups_next,
++ .stop = ext3_mb_seq_groups_stop,
++ .show = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++ struct super_block *sb = PDE(inode)->data;
++ int rc;
++
++ rc = seq_open(file, &ext3_mb_seq_groups_ops);
++ if (rc == 0) {
++ struct seq_file *m = (struct seq_file *)file->private_data;
++ m->private = sb;
++ }
++ return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++ .owner = THIS_MODULE,
++ .open = ext3_mb_seq_groups_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
+static void ext3_mb_history_release(struct super_block *sb)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ char name[64];
+
+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++ remove_proc_entry("mb_groups", sbi->s_mb_proc);
+ remove_proc_entry("mb_history", sbi->s_mb_proc);
+ remove_proc_entry(name, proc_root_ext3);
+
+ p->proc_fops = &ext3_mb_seq_history_fops;
+ p->data = sb;
+ }
++ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++ if (p) {
++ p->proc_fops = &ext3_mb_seq_groups_fops;
++ p->data = sb;
++ }
+ }
+
+ sbi->s_mb_history_max = 1000;
+}
+
+static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++ struct ext3_allocation_context *ac)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ struct ext3_mb_history h;
+ if (likely(sbi->s_mb_history == NULL))
+ return;
+
++ h.pid = current->pid;
++ h.ino = ino;
+ h.goal = ac->ac_g_ex;
+ h.result = ac->ac_b_ex;
+ h.found = ac->ac_found;
+#define EXT3_MB_STATS_NAME "mb_stats"
+#define EXT3_MB_MAX_TO_SCAN_NAME "mb_max_to_scan"
+#define EXT3_MB_MIN_TO_SCAN_NAME "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ "mb_order2_req"
+
+static int ext3_mb_stats_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+ return len;
+}
+
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++ unsigned long count, void *data)
++{
++ char str[32];
++ long value;
++
++ if (count >= sizeof(str)) {
++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++ return -EOVERFLOW;
++ }
++
++ if (copy_from_user(str, buffer, count))
++ return -EFAULT;
++
++ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++ value = simple_strtol(str, NULL, 0);
++ if (value <= 0)
++ return -ERANGE;
++
++ ext3_mb_order2_reqs = value;
++
++ return count;
++}
++
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++ int count, int *eof, void *data)
++{
++ int len;
++
++ *eof = 1;
++ if (off != 0)
++ return 0;
++
++ len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++ *start = page;
++ return len;
++}
++
+static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct proc_dir_entry *proc_ext3_mb_stats;
+ struct proc_dir_entry *proc_ext3_mb_max_to_scan;
+ struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++ struct proc_dir_entry *proc_ext3_mb_order2_req;
+
+ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
+ if (proc_root_ext3 == NULL) {
+ proc_ext3_mb_min_to_scan->read_proc = ext3_mb_min_to_scan_read;
+ proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
+
++ /* Initialize EXT3_ORDER2_REQ */
++ proc_ext3_mb_order2_req = create_proc_entry(
++ EXT3_MB_ORDER2_REQ,
++ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++ if (proc_ext3_mb_order2_req == NULL) {
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++ EXT3_MB_ORDER2_REQ);
++ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_ROOT, proc_root_fs);
++ return -EIO;
++ }
++
++ proc_ext3_mb_order2_req->data = NULL;
++ proc_ext3_mb_order2_req->read_proc = ext3_mb_order2_req_read;
++ proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
+ return 0;
+}
+
+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
+ remove_proc_entry(EXT3_ROOT, proc_root_fs);
+}
-Index: linux-2.6.5-7.201/fs/ext3/Makefile
+Index: linux-2.6.5-7.252-full/fs/ext3/Makefile
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/Makefile 2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/Makefile 2005-12-17 03:10:23.000000000 +0300
-@@ -6,7 +6,7 @@
+--- linux-2.6.5-7.252-full.orig/fs/ext3/Makefile 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/Makefile 2006-04-26 23:40:28.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
ioctl.o namei.o super.o symlink.o hash.o \
-Index: linux-2.6.12.6/include/linux/ext3_fs.h
+Index: linux-2.6.12.6-bull/include/linux/ext3_fs.h
===================================================================
---- linux-2.6.12.6.orig/include/linux/ext3_fs.h 2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/include/linux/ext3_fs.h 2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/include/linux/ext3_fs.h 2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/include/linux/ext3_fs.h 2006-04-29 20:39:10.000000000 +0400
@@ -57,6 +57,14 @@ struct statfs;
#define ext3_debug(f, a...) do {} while (0)
#endif
#endif /* __KERNEL__ */
/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
-Index: linux-2.6.12.6/include/linux/ext3_fs_sb.h
+Index: linux-2.6.12.6-bull/include/linux/ext3_fs_sb.h
===================================================================
---- linux-2.6.12.6.orig/include/linux/ext3_fs_sb.h 2005-08-29 20:55:27.000000000 +0400
-+++ linux-2.6.12.6/include/linux/ext3_fs_sb.h 2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/include/linux/ext3_fs_sb.h 2005-08-29 20:55:27.000000000 +0400
++++ linux-2.6.12.6-bull/include/linux/ext3_fs_sb.h 2006-04-29 20:39:10.000000000 +0400
@@ -21,8 +21,14 @@
#include <linux/wait.h>
#include <linux/blockgroup_lock.h>
};
#endif /* _LINUX_EXT3_FS_SB */
-Index: linux-2.6.12.6/fs/ext3/super.c
+Index: linux-2.6.12.6-bull/fs/ext3/super.c
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/super.c 2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/super.c 2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/super.c 2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/super.c 2006-04-29 20:39:10.000000000 +0400
@@ -387,6 +387,7 @@ static void ext3_put_super (struct super
struct ext3_super_block *es = sbi->s_es;
int i;
};
static match_table_t tokens = {
-@@ -649,6 +651,7 @@ static match_table_t tokens = {
+@@ -650,6 +651,7 @@ static match_table_t tokens = {
{Opt_iopen_nopriv, "iopen_nopriv"},
{Opt_extents, "extents"},
{Opt_extdebug, "extdebug"},
{Opt_barrier, "barrier=%u"},
{Opt_err, NULL},
{Opt_resize, "resize"},
-@@ -964,6 +967,9 @@ clear_qf_name:
+@@ -965,6 +967,9 @@ clear_qf_name:
case Opt_extdebug:
set_opt (sbi->s_mount_opt, EXTDEBUG);
break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1669,6 +1675,7 @@ static int ext3_fill_super (struct super
+@@ -1670,6 +1675,7 @@ static int ext3_fill_super (struct super
ext3_count_dirs(sb));
ext3_ext_init(sb);
lock_kernel();
return 0;
-@@ -2548,7 +2555,13 @@ static struct file_system_type ext3_fs_t
+@@ -2549,7 +2555,13 @@ static struct file_system_type ext3_fs_t
static int __init init_ext3_fs(void)
{
if (err)
return err;
err = init_inodecache();
-@@ -2570,6 +2583,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2571,6 +2583,7 @@ static void __exit exit_ext3_fs(void)
unregister_filesystem(&ext3_fs_type);
destroy_inodecache();
exit_ext3_xattr();
}
int ext3_prep_san_write(struct inode *inode, long *blocks,
-Index: linux-2.6.12.6/fs/ext3/extents.c
+Index: linux-2.6.12.6-bull/fs/ext3/extents.c
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/extents.c 2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/extents.c 2005-12-17 02:21:21.000000000 +0300
-@@ -771,7 +771,7 @@ cleanup:
+--- linux-2.6.12.6-bull.orig/fs/ext3/extents.c 2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/extents.c 2006-04-29 20:39:10.000000000 +0400
+@@ -777,7 +777,7 @@ cleanup:
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
}
}
kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
path->p_idx->ei_leaf);
bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
return err;
}
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
int needed = ext3_remove_blocks_credits(tree, ex, from, to);
handle_t *handle = ext3_journal_start(tree->inode, needed);
struct buffer_head *bh;
if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
/* tail removal */
unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
bh = sb_find_get_block(tree->inode->i_sb, start + i);
ext3_forget(handle, 0, tree->inode, bh, start + i);
}
} else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
printk("strange request: removal %lu-%lu from %u:%u\n",
from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.12.6/fs/ext3/inode.c
+Index: linux-2.6.12.6-bull/fs/ext3/inode.c
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/inode.c 2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/inode.c 2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/inode.c 2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/inode.c 2006-04-29 20:39:10.000000000 +0400
@@ -564,7 +564,7 @@ static int ext3_alloc_branch(handle_t *h
ext3_journal_forget(handle, branch[i].bh);
}
if (parent_bh) {
/*
-Index: linux-2.6.12.6/fs/ext3/balloc.c
+Index: linux-2.6.12.6-bull/fs/ext3/balloc.c
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/balloc.c 2005-08-29 20:55:27.000000000 +0400
-+++ linux-2.6.12.6/fs/ext3/balloc.c 2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/balloc.c 2005-08-29 20:55:27.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/balloc.c 2006-04-29 20:39:10.000000000 +0400
@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
*
* Return buffer_head on success or NULL in case of failure.
unsigned long goal, int *errp)
{
struct buffer_head *bitmap_bh = NULL;
-Index: linux-2.6.12.6/fs/ext3/xattr.c
+Index: linux-2.6.12.6-bull/fs/ext3/xattr.c
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/xattr.c 2005-08-29 20:55:27.000000000 +0400
-+++ linux-2.6.12.6/fs/ext3/xattr.c 2005-12-17 02:21:33.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/xattr.c 2005-08-29 20:55:27.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/xattr.c 2006-04-29 20:39:10.000000000 +0400
@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl
ea_bdebug(bh, "refcount now=0; freeing");
if (ce)
error = -EIO;
goto cleanup;
}
-Index: linux-2.6.12.6/fs/ext3/mballoc.c
+Index: linux-2.6.12.6-bull/fs/ext3/mballoc.c
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/mballoc.c 2005-12-09 13:08:53.191437750 +0300
-+++ linux-2.6.12.6/fs/ext3/mballoc.c 2005-12-17 02:21:21.000000000 +0300
-@@ -0,0 +1,2429 @@
+--- linux-2.6.12.6-bull.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400
++++ linux-2.6.12.6-bull/fs/ext3/mballoc.c 2006-04-30 01:24:11.000000000 +0400
+@@ -0,0 +1,2615 @@
+/*
+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+
+long ext3_mb_stats = 1;
+
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
++
+#ifdef EXT3_BB_MAX_BLOCKS
+#undef EXT3_BB_MAX_BLOCKS
+#endif
+struct ext3_mb_history {
+ struct ext3_free_extent goal; /* goal allocation */
+ struct ext3_free_extent result; /* result allocation */
++ unsigned pid;
++ unsigned ino;
+ __u16 found; /* how many extents have been found */
+ __u16 groups; /* how many groups have been scanned */
+ __u16 tail; /* what tail broke some buddy */
+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy)
+
+#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
+#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
+ struct ext3_allocation_context *ac);
+#endif
+
+static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
+ int needed, struct ext3_free_extent *ex)
+{
-+ int next, max, ord;
++ int next = block, max, ord;
+ void *buddy;
+
+ J_ASSERT(ex != NULL);
+ ex->fe_start = block << order;
+ ex->fe_group = e3b->bd_group;
+
++ /* calc difference from given start */
++ next = next - ex->fe_start;
++ ex->fe_len -= next;
++ ex->fe_start += next;
++
+ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
+
+ if (block + 1 >= max)
+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
+ ac->ac_g_ex.fe_len, &ex);
+
-+ if (max > 0) {
++ if (max >= ac->ac_g_ex.fe_len) {
++ J_ASSERT(ex.fe_len > 0);
++ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++ ac->ac_found++;
++ ac->ac_b_ex = ex;
++ ext3_mb_use_best_found(ac, e3b);
++ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++ /* Sometimes, caller may want to merge even small
++ * number of blocks to an existing extent */
+ J_ASSERT(ex.fe_len > 0);
+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
+ int i, k, max;
+
+ J_ASSERT(ac->ac_2order > 0);
-+ for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
+ if (grp->bb_counters[i] == 0)
+ continue;
+
+ case 0:
+ J_ASSERT(ac->ac_2order != 0);
+ bits = ac->ac_sb->s_blocksize_bits + 1;
-+ for (i = ac->ac_2order; i < bits; i++)
++ for (i = ac->ac_2order; i <= bits; i++)
+ if (grp->bb_counters[i] > 0)
+ return 1;
++ break;
+ case 1:
+ if ((free / fragments) >= ac->ac_g_ex.fe_len)
+ return 1;
++ break;
+ case 2:
+ if (free >= ac->ac_g_ex.fe_len)
+ return 1;
++ break;
+ case 3:
+ return 1;
+ default:
+
+ /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
+ i = ffs(*len);
-+ if (i >= 8) {
++ if (i >= ext3_mb_order2_reqs) {
+ i--;
+ if ((*len & (~(1 << i))) == 0)
+ ac.ac_2order = i;
+ }
+
-+ /* Sometimes, caller may want to merge even small
-+ * number of blocks to an existing extent */
-+ if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+ err = ext3_mb_find_by_goal(&ac, &e3b);
-+ if (err)
-+ goto out_err;
-+ if (ac.ac_status == AC_STATUS_FOUND)
-+ goto found;
-+ }
++ /* first, try the goal */
++ err = ext3_mb_find_by_goal(&ac, &e3b);
++ if (err)
++ goto out_err;
++ if (ac.ac_status == AC_STATUS_FOUND)
++ goto found;
+
+ /* Let's just scan groups to find more-less suitable blocks */
+ cr = ac.ac_2order ? 0 : 1;
+ atomic_inc(&sbi->s_bal_breaks);
+ }
+
-+ ext3_mb_store_history(sb, &ac);
++ ext3_mb_store_history(sb, inode->i_ino, &ac);
+
+ return block;
+}
+ char buf[20], buf2[20];
+
+ if (v == SEQ_START_TOKEN) {
-+ seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+ "goal", "result", "found", "grps", "cr", "merge",
-+ "tail", "broken");
++ seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++ "pid", "inode", "goal", "result", "found", "grps", "cr",
++ "merge", "tail", "broken");
+ return 0;
+ }
+
+ hs->goal.fe_start, hs->goal.fe_len);
+ sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
+ hs->result.fe_start, hs->result.fe_len);
-+ seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+ buf2, hs->found, hs->groups, hs->cr,
-+ hs->merged ? "M" : "", hs->tail,
++ seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++ hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++ hs->cr, hs->merged ? "M" : "", hs->tail,
+ hs->buddy ? 1 << hs->buddy : 0);
+ return 0;
+}
+ .release = ext3_mb_seq_history_release,
+};
+
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group;
++
++ if (*pos < 0 || *pos >= sbi->s_groups_count)
++ return NULL;
++
++ group = *pos + 1;
++ return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group;
++
++ ++*pos;
++ if (*pos < 0 || *pos >= sbi->s_groups_count)
++ return NULL;
++ group = *pos + 1;
++ return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group = (int) v, i;
++ struct sg {
++ struct ext3_group_info info;
++ unsigned short counters[16];
++ } sg;
++
++ group--;
++ if (group == 0)
++ seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++ "group", "free", "frags", "first", "2^0", "2^1", "2^2",
++ "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++ "2^11", "2^12", "2^13");
++
++ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++ sizeof(struct ext3_group_info);
++ ext3_lock_group(sb, group);
++ memcpy(&sg, sbi->s_group_info[group], i);
++ ext3_unlock_group(sb, group);
++
++ if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++ return 0;
++
++ seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
++ sg.info.bb_fragments, sg.info.bb_first_free);
++ for (i = 0; i <= 13; i++)
++ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++ sg.info.bb_counters[i] : 0);
++ seq_printf(seq, " ]\n");
++
++ return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++ .start = ext3_mb_seq_groups_start,
++ .next = ext3_mb_seq_groups_next,
++ .stop = ext3_mb_seq_groups_stop,
++ .show = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++ struct super_block *sb = PDE(inode)->data;
++ int rc;
++
++ rc = seq_open(file, &ext3_mb_seq_groups_ops);
++ if (rc == 0) {
++ struct seq_file *m = (struct seq_file *)file->private_data;
++ m->private = sb;
++ }
++ return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++ .owner = THIS_MODULE,
++ .open = ext3_mb_seq_groups_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
+static void ext3_mb_history_release(struct super_block *sb)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ char name[64];
+
+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++ remove_proc_entry("mb_groups", sbi->s_mb_proc);
+ remove_proc_entry("mb_history", sbi->s_mb_proc);
+ remove_proc_entry(name, proc_root_ext3);
+
+ p->proc_fops = &ext3_mb_seq_history_fops;
+ p->data = sb;
+ }
++ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++ if (p) {
++ p->proc_fops = &ext3_mb_seq_groups_fops;
++ p->data = sb;
++ }
+ }
+
+ sbi->s_mb_history_max = 1000;
+}
+
+static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++ struct ext3_allocation_context *ac)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ struct ext3_mb_history h;
+ if (likely(sbi->s_mb_history == NULL))
+ return;
+
++ h.pid = current->pid;
++ h.ino = ino;
+ h.goal = ac->ac_g_ex;
+ h.result = ac->ac_b_ex;
+ h.found = ac->ac_found;
+#define EXT3_MB_STATS_NAME "mb_stats"
+#define EXT3_MB_MAX_TO_SCAN_NAME "mb_max_to_scan"
+#define EXT3_MB_MIN_TO_SCAN_NAME "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ "mb_order2_req"
+
+static int ext3_mb_stats_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+ return len;
+}
+
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++ unsigned long count, void *data)
++{
++ char str[32];
++ long value;
++
++ if (count >= sizeof(str)) {
++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++ return -EOVERFLOW;
++ }
++
++ if (copy_from_user(str, buffer, count))
++ return -EFAULT;
++
++ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++ value = simple_strtol(str, NULL, 0);
++ if (value <= 0)
++ return -ERANGE;
++
++ ext3_mb_order2_reqs = value;
++
++ return count;
++}
++
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++ int count, int *eof, void *data)
++{
++ int len;
++
++ *eof = 1;
++ if (off != 0)
++ return 0;
++
++ len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++ *start = page;
++ return len;
++}
++
+static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct proc_dir_entry *proc_ext3_mb_stats;
+ struct proc_dir_entry *proc_ext3_mb_max_to_scan;
+ struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++ struct proc_dir_entry *proc_ext3_mb_order2_req;
+
+ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
+ if (proc_root_ext3 == NULL) {
+ proc_ext3_mb_min_to_scan->read_proc = ext3_mb_min_to_scan_read;
+ proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
+
++ /* Initialize EXT3_ORDER2_REQ */
++ proc_ext3_mb_order2_req = create_proc_entry(
++ EXT3_MB_ORDER2_REQ,
++ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++ if (proc_ext3_mb_order2_req == NULL) {
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++ EXT3_MB_ORDER2_REQ);
++ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_ROOT, proc_root_fs);
++ return -EIO;
++ }
++
++ proc_ext3_mb_order2_req->data = NULL;
++ proc_ext3_mb_order2_req->read_proc = ext3_mb_order2_req_read;
++ proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
+ return 0;
+}
+
+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
+ remove_proc_entry(EXT3_ROOT, proc_root_fs);
+}
-Index: linux-2.6.12.6/fs/ext3/Makefile
+Index: linux-2.6.12.6-bull/fs/ext3/Makefile
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/Makefile 2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/Makefile 2005-12-17 02:21:21.000000000 +0300
-@@ -6,7 +6,7 @@
+--- linux-2.6.12.6-bull.orig/fs/ext3/Makefile 2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/Makefile 2006-04-29 20:39:10.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o \
-Index: linux-2.6.9-full/include/linux/ext3_fs.h
-===================================================================
---- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/include/linux/ext3_fs.h 2005-12-16 23:16:42.000000000 +0300
-@@ -57,6 +57,14 @@ struct statfs;
- #define ext3_debug(f, a...) do {} while (0)
- #endif
-
-+#define EXT3_MULTIBLOCK_ALLOCATOR 1
-+
-+#define EXT3_MB_HINT_MERGE 1
-+#define EXT3_MB_HINT_RESERVED 2
-+#define EXT3_MB_HINT_METADATA 4
-+#define EXT3_MB_HINT_FIRST 8
-+#define EXT3_MB_HINT_BEST 16
-+
- /*
- * Special inodes numbers
- */
-@@ -365,6 +373,7 @@ struct ext3_inode {
- #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */
- #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */
- #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */
-+#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */
-
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef clear_opt
-@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe
- extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
- extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
- extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
-- unsigned long);
-+ unsigned long, int);
- extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
- unsigned long, unsigned long, int *);
- extern unsigned long ext3_count_free_blocks (struct super_block *);
-@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc
- extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg);
-
-+/* mballoc.c */
-+extern long ext3_mb_stats;
-+extern long ext3_mb_max_to_scan;
-+extern int ext3_mb_init(struct super_block *, int);
-+extern int ext3_mb_release(struct super_block *);
-+extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *);
-+extern int ext3_mb_reserve_blocks(struct super_block *, int);
-+extern void ext3_mb_release_blocks(struct super_block *, int);
-+int __init init_ext3_proc(void);
-+void exit_ext3_proc(void);
-+
- #endif /* __KERNEL__ */
-
- /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h
===================================================================
---- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h 2005-12-16 23:16:39.000000000 +0300
-+++ linux-2.6.9-full/include/linux/ext3_fs_sb.h 2005-12-16 23:16:42.000000000 +0300
+--- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/include/linux/ext3_fs_sb.h 2006-05-22 21:45:08.000000000 +0400
@@ -23,9 +23,15 @@
#define EXT_INCLUDE
#include <linux/blockgroup_lock.h>
/*
* third extended-fs super-block data in memory
-@@ -81,6 +87,38 @@ struct ext3_sb_info {
+@@ -81,6 +87,39 @@ struct ext3_sb_info {
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
+ tid_t s_last_transaction;
+ int s_mb_factor;
+ unsigned short *s_mb_offsets, *s_mb_maxs;
++ unsigned long s_stripe;
+
+ /* history to debug policy */
+ struct ext3_mb_history *s_mb_history;
};
#endif /* _LINUX_EXT3_FS_SB */
+Index: linux-2.6.9-full/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/include/linux/ext3_fs.h 2006-05-22 21:44:37.000000000 +0400
+@@ -57,6 +57,14 @@ struct statfs;
+ #define ext3_debug(f, a...) do {} while (0)
+ #endif
+
++#define EXT3_MULTIBLOCK_ALLOCATOR 1
++
++#define EXT3_MB_HINT_MERGE 1
++#define EXT3_MB_HINT_RESERVED 2
++#define EXT3_MB_HINT_METADATA 4
++#define EXT3_MB_HINT_FIRST 8
++#define EXT3_MB_HINT_BEST 16
++
+ /*
+ * Special inodes numbers
+ */
+@@ -365,6 +373,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */
++#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe
+ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+- unsigned long);
++ unsigned long, int);
+ extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
+ unsigned long, unsigned long, int *);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc
+ extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg);
+
++/* mballoc.c */
++extern long ext3_mb_stats;
++extern long ext3_mb_max_to_scan;
++extern int ext3_mb_init(struct super_block *, int);
++extern int ext3_mb_release(struct super_block *);
++extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *);
++extern int ext3_mb_reserve_blocks(struct super_block *, int);
++extern void ext3_mb_release_blocks(struct super_block *, int);
++int __init init_ext3_proc(void);
++void exit_ext3_proc(void);
++
+ #endif /* __KERNEL__ */
+
+ /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
Index: linux-2.6.9-full/fs/ext3/super.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/super.c 2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/super.c 2005-12-16 23:16:42.000000000 +0300
+--- linux-2.6.9-full.orig/fs/ext3/super.c 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/super.c 2006-05-22 21:52:54.000000000 +0400
@@ -394,6 +394,7 @@ void ext3_put_super (struct super_block
struct ext3_super_block *es = sbi->s_es;
int i;
Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
- Opt_extents, Opt_extdebug,
-+ Opt_extents, Opt_extdebug, Opt_mballoc,
++ Opt_extents, Opt_extdebug, Opt_mballoc, Opt_stripe
};
static match_table_t tokens = {
-@@ -647,6 +649,7 @@ static match_table_t tokens = {
+@@ -648,6 +649,8 @@ static match_table_t tokens = {
{Opt_iopen_nopriv, "iopen_nopriv"},
{Opt_extents, "extents"},
{Opt_extdebug, "extdebug"},
+ {Opt_mballoc, "mballoc"},
++ {Opt_stripe, "stripe=%u"},
{Opt_barrier, "barrier=%u"},
{Opt_err, NULL},
{Opt_resize, "resize"},
-@@ -957,6 +960,9 @@ clear_qf_name:
+@@ -958,6 +961,16 @@ clear_qf_name:
case Opt_extdebug:
set_opt (sbi->s_mount_opt, EXTDEBUG);
break;
+ case Opt_mballoc:
+ set_opt (sbi->s_mount_opt, MBALLOC);
+ break;
++ case Opt_stripe:
++ if (match_int(&args[0], &option))
++ return 0;
++ if (option < 0)
++ return 0;
++ sbi->s_stripe = option;
++ break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1646,6 +1652,7 @@ static int ext3_fill_super (struct super
+@@ -1647,6 +1660,7 @@ static int ext3_fill_super (struct super
ext3_count_dirs(sb));
ext3_ext_init(sb);
return 0;
-@@ -2428,7 +2435,13 @@ static struct file_system_type ext3_fs_t
+@@ -2429,7 +2443,13 @@ static struct file_system_type ext3_fs_t
static int __init init_ext3_fs(void)
{
if (err)
return err;
err = init_inodecache();
-@@ -2450,6 +2463,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2451,6 +2471,7 @@ static void __exit exit_ext3_fs(void)
unregister_filesystem(&ext3_fs_type);
destroy_inodecache();
exit_ext3_xattr();
int ext3_prep_san_write(struct inode *inode, long *blocks,
Index: linux-2.6.9-full/fs/ext3/extents.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/extents.c 2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/extents.c 2005-12-16 23:16:42.000000000 +0300
-@@ -771,7 +771,7 @@ cleanup:
+--- linux-2.6.9-full.orig/fs/ext3/extents.c 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/extents.c 2006-05-22 21:44:37.000000000 +0400
+@@ -777,7 +777,7 @@ cleanup:
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
}
}
kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
path->p_idx->ei_leaf);
bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
return err;
}
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
int needed = ext3_remove_blocks_credits(tree, ex, from, to);
handle_t *handle = ext3_journal_start(tree->inode, needed);
struct buffer_head *bh;
if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
/* tail removal */
unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
bh = sb_find_get_block(tree->inode->i_sb, start + i);
ext3_forget(handle, 0, tree->inode, bh, start + i);
}
} else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
printk("strange request: removal %lu-%lu from %u:%u\n",
from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.9-full/fs/ext3/inode.c
+Index: linux-2.6.9-full/fs/ext3/Makefile
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/inode.c 2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/inode.c 2005-12-16 23:16:42.000000000 +0300
-@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
- ext3_journal_forget(handle, branch[i].bh);
- }
- for (i = 0; i < keys; i++)
-- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
-+ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
- return err;
- }
-
-@@ -673,7 +673,7 @@ err_out:
- if (err == -EAGAIN)
- for (i = 0; i < num; i++)
- ext3_free_blocks(handle, inode,
-- le32_to_cpu(where[i].key), 1);
-+ le32_to_cpu(where[i].key), 1, 1);
- return err;
- }
-
-@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru
- }
- }
+--- linux-2.6.9-full.orig/fs/ext3/Makefile 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/Makefile 2006-05-22 21:44:37.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
-- ext3_free_blocks(handle, inode, block_to_free, count);
-+ ext3_free_blocks(handle, inode, block_to_free, count, 1);
- }
-
- /**
-@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t
- ext3_journal_test_restart(handle, inode);
- }
-
-- ext3_free_blocks(handle, inode, nr, 1);
-+ ext3_free_blocks(handle, inode, nr, 1, 1);
-
- if (parent_bh) {
- /*
-Index: linux-2.6.9-full/fs/ext3/balloc.c
-===================================================================
---- linux-2.6.9-full.orig/fs/ext3/balloc.c 2005-10-27 21:44:24.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/balloc.c 2005-12-16 23:16:42.000000000 +0300
-@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
- *
- * Return buffer_head on success or NULL in case of failure.
- */
--static struct buffer_head *
-+struct buffer_head *
- read_block_bitmap(struct super_block *sb, unsigned int block_group)
- {
- struct ext3_group_desc * desc;
-@@ -450,24 +450,6 @@ error_return:
- return;
- }
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+ ioctl.o namei.o super.o symlink.o hash.o resize.o \
+- extents.o
++ extents.o mballoc.o
--/* Free given blocks, update quota and i_blocks field */
--void ext3_free_blocks(handle_t *handle, struct inode *inode,
-- unsigned long block, unsigned long count)
--{
-- struct super_block * sb;
-- int dquot_freed_blocks;
--
-- sb = inode->i_sb;
-- if (!sb) {
-- printk ("ext3_free_blocks: nonexistent device");
-- return;
-- }
-- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
-- if (dquot_freed_blocks)
-- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
-- return;
--}
--
- /*
- * For ext3 allocations, we must not reuse any blocks which are
- * allocated in the bitmap buffer's "last committed data" copy. This
-@@ -1140,7 +1122,7 @@ int ext3_should_retry_alloc(struct super
- * bitmap, and then for any free bit if that fails.
- * This function also updates quota and i_blocks field.
- */
--int ext3_new_block(handle_t *handle, struct inode *inode,
-+int ext3_new_block_old(handle_t *handle, struct inode *inode,
- unsigned long goal, int *errp)
- {
- struct buffer_head *bitmap_bh = NULL;
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
Index: linux-2.6.9-full/fs/ext3/xattr.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/xattr.c 2005-12-16 23:16:40.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/xattr.c 2005-12-16 23:16:42.000000000 +0300
+--- linux-2.6.9-full.orig/fs/ext3/xattr.c 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/xattr.c 2006-05-22 21:44:37.000000000 +0400
@@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle,
new_bh = sb_getblk(sb, block);
if (!new_bh) {
} else {
Index: linux-2.6.9-full/fs/ext3/mballoc.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/mballoc.c 2005-12-16 17:46:19.148560250 +0300
-+++ linux-2.6.9-full/fs/ext3/mballoc.c 2005-12-17 00:10:15.000000000 +0300
-@@ -0,0 +1,2429 @@
+--- linux-2.6.9-full.orig/fs/ext3/mballoc.c 2006-05-12 23:14:51.200000000 +0400
++++ linux-2.6.9-full/fs/ext3/mballoc.c 2006-05-22 21:51:30.000000000 +0400
+@@ -0,0 +1,2671 @@
+/*
+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+
+long ext3_mb_stats = 1;
+
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
++
+#ifdef EXT3_BB_MAX_BLOCKS
+#undef EXT3_BB_MAX_BLOCKS
+#endif
+struct ext3_mb_history {
+ struct ext3_free_extent goal; /* goal allocation */
+ struct ext3_free_extent result; /* result allocation */
++ unsigned pid;
++ unsigned ino;
+ __u16 found; /* how many extents have been found */
+ __u16 groups; /* how many groups have been scanned */
+ __u16 tail; /* what tail broke some buddy */
+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy)
+
+#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
+#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
+ struct ext3_allocation_context *ac);
+#endif
+
+static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
+ int needed, struct ext3_free_extent *ex)
+{
-+ int next, max, ord;
++ int next = block, max, ord;
+ void *buddy;
+
+ J_ASSERT(ex != NULL);
+ ex->fe_start = block << order;
+ ex->fe_group = e3b->bd_group;
+
++ /* calc difference from given start */
++ next = next - ex->fe_start;
++ ex->fe_len -= next;
++ ex->fe_start += next;
++
+ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
+
+ if (block + 1 >= max)
+ struct ext3_buddy *e3b)
+{
+ int group = ac->ac_g_ex.fe_group, max, err;
++ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb);
++ struct ext3_super_block *es = sbi->s_es;
+ struct ext3_free_extent ex;
+
+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b);
+ ext3_lock_group(ac->ac_sb, group);
+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
+ ac->ac_g_ex.fe_len, &ex);
-+
-+ if (max > 0) {
++
++ if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
++ unsigned long start;
++ start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) +
++ ex.fe_start + le32_to_cpu(es->s_first_data_block));
++ if (start % sbi->s_stripe == 0) {
++ ac->ac_found++;
++ ac->ac_b_ex = ex;
++ ext3_mb_use_best_found(ac, e3b);
++ }
++ } else if (max >= ac->ac_g_ex.fe_len) {
++ J_ASSERT(ex.fe_len > 0);
++ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++ ac->ac_found++;
++ ac->ac_b_ex = ex;
++ ext3_mb_use_best_found(ac, e3b);
++ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++ /* Sometimes, caller may want to merge even small
++ * number of blocks to an existing extent */
+ J_ASSERT(ex.fe_len > 0);
+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
+ int i, k, max;
+
+ J_ASSERT(ac->ac_2order > 0);
-+ for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
+ if (grp->bb_counters[i] == 0)
+ continue;
+
+ }
+}
+
++/*
++ * This is a special case for storages like raid5
++ * we try to find stripe-aligned chunks for stripe-size requests
++ */
++static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac,
++ struct ext3_buddy *e3b)
++{
++ struct super_block *sb = ac->ac_sb;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ void *bitmap = EXT3_MB_BITMAP(e3b);
++ struct ext3_free_extent ex;
++ unsigned long i, max;
++
++ J_ASSERT(sbi->s_stripe != 0);
++
++ /* find first stripe-aligned block */
++ i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb)
++ + le32_to_cpu(sbi->s_es->s_first_data_block);
++ i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe;
++ i = (i - le32_to_cpu(sbi->s_es->s_first_data_block))
++ % EXT3_BLOCKS_PER_GROUP(sb);
++
++ while (i < sb->s_blocksize * 8) {
++ if (!mb_test_bit(i, bitmap)) {
++ max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex);
++ if (max >= sbi->s_stripe) {
++ ac->ac_found++;
++ ac->ac_b_ex = ex;
++ ext3_mb_use_best_found(ac, e3b);
++ break;
++ }
++ }
++ i += sbi->s_stripe;
++ }
++}
++
+static int ext3_mb_good_group(struct ext3_allocation_context *ac,
+ int group, int cr)
+{
+ case 0:
+ J_ASSERT(ac->ac_2order != 0);
+ bits = ac->ac_sb->s_blocksize_bits + 1;
-+ for (i = ac->ac_2order; i < bits; i++)
++ for (i = ac->ac_2order; i <= bits; i++)
+ if (grp->bb_counters[i] > 0)
+ return 1;
++ break;
+ case 1:
+ if ((free / fragments) >= ac->ac_g_ex.fe_len)
+ return 1;
++ break;
+ case 2:
+ if (free >= ac->ac_g_ex.fe_len)
+ return 1;
++ break;
+ case 3:
+ return 1;
+ default:
+ ac.ac_2order = 0;
+ ac.ac_criteria = 0;
+
++ if (*len == 1 && sbi->s_stripe) {
++ /* looks like a metadata, let's use a dirty hack for raid5
++ * move all metadata in first groups in hope to hit cached
++ * sectors and thus avoid read-modify cycles in raid5 */
++ ac.ac_g_ex.fe_group = group = 0;
++ }
++
+ /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
+ i = ffs(*len);
-+ if (i >= 8) {
++ if (i >= ext3_mb_order2_reqs) {
+ i--;
+ if ((*len & (~(1 << i))) == 0)
+ ac.ac_2order = i;
+ }
+
-+ /* Sometimes, caller may want to merge even small
-+ * number of blocks to an existing extent */
-+ if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+ err = ext3_mb_find_by_goal(&ac, &e3b);
-+ if (err)
-+ goto out_err;
-+ if (ac.ac_status == AC_STATUS_FOUND)
-+ goto found;
-+ }
++ /* first, try the goal */
++ err = ext3_mb_find_by_goal(&ac, &e3b);
++ if (err)
++ goto out_err;
++ if (ac.ac_status == AC_STATUS_FOUND)
++ goto found;
+
+ /* Let's just scan groups to find more-less suitable blocks */
+ cr = ac.ac_2order ? 0 : 1;
+ ac.ac_groups_scanned++;
+ if (cr == 0)
+ ext3_mb_simple_scan_group(&ac, &e3b);
++ else if (cr == 1 && *len == sbi->s_stripe)
++ ext3_mb_scan_aligned(&ac, &e3b);
+ else
+ ext3_mb_complex_scan_group(&ac, &e3b);
+
+ atomic_inc(&sbi->s_bal_breaks);
+ }
+
-+ ext3_mb_store_history(sb, &ac);
++ ext3_mb_store_history(sb, inode->i_ino, &ac);
+
+ return block;
+}
+ char buf[20], buf2[20];
+
+ if (v == SEQ_START_TOKEN) {
-+ seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+ "goal", "result", "found", "grps", "cr", "merge",
-+ "tail", "broken");
++ seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++ "pid", "inode", "goal", "result", "found", "grps", "cr",
++ "merge", "tail", "broken");
+ return 0;
+ }
+
+ hs->goal.fe_start, hs->goal.fe_len);
+ sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
+ hs->result.fe_start, hs->result.fe_len);
-+ seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+ buf2, hs->found, hs->groups, hs->cr,
-+ hs->merged ? "M" : "", hs->tail,
++ seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++ hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++ hs->cr, hs->merged ? "M" : "", hs->tail,
+ hs->buddy ? 1 << hs->buddy : 0);
+ return 0;
+}
+ .release = ext3_mb_seq_history_release,
+};
+
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group;
++
++ if (*pos < 0 || *pos >= sbi->s_groups_count)
++ return NULL;
++
++ group = *pos + 1;
++ return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group;
++
++ ++*pos;
++ if (*pos < 0 || *pos >= sbi->s_groups_count)
++ return NULL;
++ group = *pos + 1;
++ return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group = (int) v, i;
++ struct sg {
++ struct ext3_group_info info;
++ unsigned short counters[16];
++ } sg;
++
++ group--;
++ if (group == 0)
++ seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++ "group", "free", "frags", "first", "2^0", "2^1", "2^2",
++ "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++ "2^11", "2^12", "2^13");
++
++ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++ sizeof(struct ext3_group_info);
++ ext3_lock_group(sb, group);
++ memcpy(&sg, sbi->s_group_info[group], i);
++ ext3_unlock_group(sb, group);
++
++ if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++ return 0;
++
++ seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
++ sg.info.bb_fragments, sg.info.bb_first_free);
++ for (i = 0; i <= 13; i++)
++ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++ sg.info.bb_counters[i] : 0);
++ seq_printf(seq, " ]\n");
++
++ return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++ .start = ext3_mb_seq_groups_start,
++ .next = ext3_mb_seq_groups_next,
++ .stop = ext3_mb_seq_groups_stop,
++ .show = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++ struct super_block *sb = PDE(inode)->data;
++ int rc;
++
++ rc = seq_open(file, &ext3_mb_seq_groups_ops);
++ if (rc == 0) {
++ struct seq_file *m = (struct seq_file *)file->private_data;
++ m->private = sb;
++ }
++ return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++ .owner = THIS_MODULE,
++ .open = ext3_mb_seq_groups_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
+static void ext3_mb_history_release(struct super_block *sb)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ char name[64];
+
+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++ remove_proc_entry("mb_groups", sbi->s_mb_proc);
+ remove_proc_entry("mb_history", sbi->s_mb_proc);
+ remove_proc_entry(name, proc_root_ext3);
+
+ p->proc_fops = &ext3_mb_seq_history_fops;
+ p->data = sb;
+ }
++ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++ if (p) {
++ p->proc_fops = &ext3_mb_seq_groups_fops;
++ p->data = sb;
++ }
+ }
+
+ sbi->s_mb_history_max = 1000;
+}
+
+static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++ struct ext3_allocation_context *ac)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ struct ext3_mb_history h;
+ if (likely(sbi->s_mb_history == NULL))
+ return;
+
++ h.pid = current->pid;
++ h.ino = ino;
+ h.goal = ac->ac_g_ex;
+ h.result = ac->ac_b_ex;
+ h.found = ac->ac_found;
+#define EXT3_MB_STATS_NAME "mb_stats"
+#define EXT3_MB_MAX_TO_SCAN_NAME "mb_max_to_scan"
+#define EXT3_MB_MIN_TO_SCAN_NAME "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ "mb_order2_req"
+
+static int ext3_mb_stats_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+ return len;
+}
+
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++ unsigned long count, void *data)
++{
++ char str[32];
++ long value;
++
++ if (count >= sizeof(str)) {
++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++ return -EOVERFLOW;
++ }
++
++ if (copy_from_user(str, buffer, count))
++ return -EFAULT;
++
++ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++ value = simple_strtol(str, NULL, 0);
++ if (value <= 0)
++ return -ERANGE;
++
++ ext3_mb_order2_reqs = value;
++
++ return count;
++}
++
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++ int count, int *eof, void *data)
++{
++ int len;
++
++ *eof = 1;
++ if (off != 0)
++ return 0;
++
++ len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++ *start = page;
++ return len;
++}
++
+static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ long value;
+
+ if (count >= sizeof(str)) {
-+ printk(KERN_ERR "EXT3: %s string too long, max %u bytes\n",
++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
+ return -EOVERFLOW;
+ }
+ struct proc_dir_entry *proc_ext3_mb_stats;
+ struct proc_dir_entry *proc_ext3_mb_max_to_scan;
+ struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++ struct proc_dir_entry *proc_ext3_mb_order2_req;
+
+ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
+ if (proc_root_ext3 == NULL) {
-+ printk(KERN_ERR "EXT3: Unable to create %s\n", EXT3_ROOT);
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT);
+ return -EIO;
+ }
+
+ proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME,
+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
+ if (proc_ext3_mb_stats == NULL) {
-+ printk(KERN_ERR "EXT3: Unable to create %s\n",
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
+ EXT3_MB_STATS_NAME);
+ remove_proc_entry(EXT3_ROOT, proc_root_fs);
+ return -EIO;
+ EXT3_MB_MAX_TO_SCAN_NAME,
+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
+ if (proc_ext3_mb_max_to_scan == NULL) {
-+ printk(KERN_ERR "EXT3: Unable to create %s\n",
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
+ EXT3_MB_MAX_TO_SCAN_NAME);
+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_ROOT, proc_root_fs);
+ EXT3_MB_MIN_TO_SCAN_NAME,
+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
+ if (proc_ext3_mb_min_to_scan == NULL) {
-+ printk(KERN_ERR "EXT3: Unable to create %s\n",
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
+ EXT3_MB_MIN_TO_SCAN_NAME);
+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
+ proc_ext3_mb_min_to_scan->read_proc = ext3_mb_min_to_scan_read;
+ proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
+
++ /* Initialize EXT3_ORDER2_REQ */
++ proc_ext3_mb_order2_req = create_proc_entry(
++ EXT3_MB_ORDER2_REQ,
++ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++ if (proc_ext3_mb_order2_req == NULL) {
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++ EXT3_MB_ORDER2_REQ);
++ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_ROOT, proc_root_fs);
++ return -EIO;
++ }
++
++ proc_ext3_mb_order2_req->data = NULL;
++ proc_ext3_mb_order2_req->read_proc = ext3_mb_order2_req_read;
++ proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
+ return 0;
+}
+
+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
+ remove_proc_entry(EXT3_ROOT, proc_root_fs);
+}
-Index: linux-2.6.9-full/fs/ext3/Makefile
+Index: linux-2.6.9-full/fs/ext3/balloc.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/Makefile 2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/Makefile 2005-12-16 23:16:42.000000000 +0300
-@@ -6,7 +6,7 @@
+--- linux-2.6.9-full.orig/fs/ext3/balloc.c 2006-03-10 18:20:03.000000000 +0300
++++ linux-2.6.9-full/fs/ext3/balloc.c 2006-05-22 21:44:37.000000000 +0400
+@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
+ *
+ * Return buffer_head on success or NULL in case of failure.
+ */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+ struct ext3_group_desc * desc;
+@@ -451,24 +451,6 @@ error_return:
+ return;
+ }
- ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
- ioctl.o namei.o super.o symlink.o hash.o resize.o \
-- extents.o
-+ extents.o mballoc.o
+-/* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks(handle_t *handle, struct inode *inode,
+- unsigned long block, unsigned long count)
+-{
+- struct super_block * sb;
+- int dquot_freed_blocks;
+-
+- sb = inode->i_sb;
+- if (!sb) {
+- printk ("ext3_free_blocks: nonexistent device");
+- return;
+- }
+- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+- if (dquot_freed_blocks)
+- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+- return;
+-}
+-
+ /*
+ * For ext3 allocations, we must not reuse any blocks which are
+ * allocated in the bitmap buffer's "last committed data" copy. This
+@@ -1131,7 +1113,7 @@ int ext3_should_retry_alloc(struct super
+ * bitmap, and then for any free bit if that fails.
+ * This function also updates quota and i_blocks field.
+ */
+-int ext3_new_block(handle_t *handle, struct inode *inode,
++int ext3_new_block_old(handle_t *handle, struct inode *inode,
+ unsigned long goal, int *errp)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+Index: linux-2.6.9-full/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/inode.c 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/inode.c 2006-05-22 21:44:37.000000000 +0400
+@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
+ ext3_journal_forget(handle, branch[i].bh);
+ }
+ for (i = 0; i < keys; i++)
+- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
+ return err;
+ }
- ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
- ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+@@ -673,7 +673,7 @@ err_out:
+ if (err == -EAGAIN)
+ for (i = 0; i < num; i++)
+ ext3_free_blocks(handle, inode,
+- le32_to_cpu(where[i].key), 1);
++ le32_to_cpu(where[i].key), 1, 1);
+ return err;
+ }
+
+@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru
+ }
+ }
+
+- ext3_free_blocks(handle, inode, block_to_free, count);
++ ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+
+ /**
+@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t
+ ext3_journal_test_restart(handle, inode);
+ }
+
+- ext3_free_blocks(handle, inode, nr, 1);
++ ext3_free_blocks(handle, inode, nr, 1, 1);
+
+ if (parent_bh) {
+ /*
Details : Guard negative dentries with UPDATE lock on parent dir, drop
negative dentries on lock revocation.
+Severity : minor
+Frequency : Always
+Bugzilla : 10510
+Description: Remounting a client read-only wasn't possible with a zconf mount
+Details : It wasn't possible to remount a client read-only with llmount.
+
+Severity : enhancement
+Description: Include MPICH 1.2.6 Lustre ADIO interface patch
+Details : In lustre/contrib/ or /usr/share/lustre in RPM a patch for
+ MPICH is included to add Lustre-specific ADIO interfaces.
+ This is based closely on the UFS ADIO layer and only differs
+ in file creation, in order to allow the OST striping to be set.
+ This is user-contributed code and not supported by CFS.
+
+Severity : minor
+Frequency : Always
+Bugzilla : 9486
+Description: extended inode attributes work improperly for the case of 2.4/2.6
+ kernels used on client/server or the other way around.
+Details : Introduce kernel-independent values for these flags.
+
------------------------------------------------------------------------------
client node to run out of memory. Instead flush old inodes
from client cache that have the same inode number as a new inode.
+Severity : minor
+Frequency : SLES9 2.6.5 kernel and long filenames only
+Bugzilla : 9969, 10379
+Description: utime reports stale NFS file handle
+Details : SLES9 uses out-of-dentry names in some cases, which confused
+ the lustre dentry revalidation. Change it to always use the
+ in-dentry qstr.
+
Severity : major
Frequency : rare, unless heavy write-truncate concurrency is continuous
Bugzilla : 4180, 6984, 7171, 9963, 9331
AUTOMAKE_OPTIONS = foreign
+# also update lustre/autoconf/lustre-core.m4 AC_CONFIG_FILES
ALWAYS_SUBDIRS := include lvfs obdclass ldlm ptlrpc osc lov obdecho \
- mgc doc utils tests conf scripts autoconf
+ mgc doc utils tests conf scripts autoconf contrib
SERVER_SUBDIRS := ldiskfs obdfilter ost mds mgs
[LC_CONFIG_OBD_BUFFER_SIZE
# include/liblustre.h
-AC_CHECK_HEADERS([asm/page.h sys/user.h sys/vfs.h stdint.h])
+AC_CHECK_HEADERS([asm/page.h sys/user.h sys/vfs.h stdint.h blkid/blkid.h])
# include/lustre/lustre_user.h
# See note there re: __ASM_X86_64_PROCESSOR_H
AM_CONDITIONAL(CLIENT, test x$enable_client = xyes)
AM_CONDITIONAL(SERVER, test x$enable_server = xyes)
AM_CONDITIONAL(QUOTA, test x$enable_quota = xyes)
+AM_CONDITIONAL(BLKID, test x$ac_cv_header_blkid_blkid_h = xyes)
])
#
lustre/Makefile
lustre/autoMakefile
lustre/autoconf/Makefile
+lustre/contrib/Makefile
lustre/conf/Makefile
lustre/doc/Makefile
lustre/include/Makefile
--- /dev/null
+Makefile
+Makefile.in
--- /dev/null
+# Contributions Makefile
+
+EXTRA_DIST = mpich-*.patch
+pkgdata_DATA = $(EXTRA_DIST)
+
--- /dev/null
+The files in this directory are user-contributed and are not supported by
+CFS in any way.
--- /dev/null
+BLOCK DEVICE VERIFICATION TOOL. ( bdevt )
+==========================================
+
+Building tool:
+ To build this tool you just need to invoke make at command prompt.
+ e.g. $ make
+
+ this will compile the sources and build bdevt in this directory.
+
+Usage:
+Syntax:
+
+./bdevt [OPTION]... <device-name> ...
+
+[OPTION]
+ -t {seconds} for --timestamp, set test time (default=current time())
+ -o {offset} for --offset, offset in kB of start of test (default=0)
+ -r run test in read (verify) mode
+ -w run test in write (test-pattern) mode (default=r&w)
+ -v for verbose
+ -p for --partial, for partial check (1GB steps)
+ -l for --long, full check (default 4k)
+ -c for --chunksize, IO chunk size (default=1048576)
+ -f for --force, force test to run without confirmation
+ --help to display help.
+
+Guide lines for using this tool:
+ It is expected that bdevt tool will be run on large size devices (TB),
+So it is always better to run bdevt tool in verbose mode, So that one can easily
+restart device testing from the point at which it had stoped.
+for example:
+
+ [root@tucker bdevt]# ./bdevt -v -f -w --timestamp=1009839028 /dev/hda5
+ Number of sectors: 49158837, this makes 23.441 GB
+ Timestamp: 1009839028
+ Current write offset: 5078016 kB
+
+If due to some reason sombody breaks execution at this point then one can
+easily restart device from the same point by picking the same offset
+displayed in by verbose as explained below.
+
+ [root@tucker bdevt]# ./bdevt -v -f -w --offset=5078016 --timestamp=1009839028 /dev/hda5
+ Number of sectors: 49158837, this makes 23.441 GB
+ Timestamp: 1009839028
+ Current write offset: 9726208 kB
+
+One can use similar things for read only and read write modes also.
+
--- /dev/null
+FILESYSTEM VERIFICATION TOOL. ( ext3vt )
+==========================================
+
+Building tool:
+ To build this tool you just need to invoke make at command prompt.
+ e.g. $ make
+
+ this will compile the sources and build ext3vt in this directory.
+
+Usage:
+Syntax:
+
+./ext3vt [OPTION]... <filesystem path> ...
+
+[OPTION]
+ -t {seconds} for --timestamp, set test time(default=current time())
+ -o {fileOffset} for --fileOffset, full path of file from which tests should start
+ -r run test in read (verify) mode
+ -w run test in write (test-pattern) mode (default=r&w)
+ -v for verbose
+ -p for --partial, for partial check (1MB files)
+ -l for --long, full check (4GB file with 4k blocks)
+ -c for --chunksize, IO chunk size (default=1048576)
+ -h display this help and exit
+ --help display this help and exit
+
+Guide lines for using this tool:
+ It is expected that ext3vt tool will be run on large size
+filesystem (TB), So it is always better to run ext3vt tool in verbose mode,
+So that one can easily restart device testing from the point at which it
+had stoped.
+for example:
+
+ [root@Matrix ext3vt]# ./ext3vtnew -v -f -w --timestamp=1145009417 /mnt/store/
+ Timestamp: 1145009417
+ write File name: /mnt/store/dir00004/file005
+
+If due to some reason sombody breaks execution at this point then one can
+easily restart device from the same point by picking the same file offset
+displayed in by verbose as explained below.
+
+ [root@tucker ext3vt]# ./bdevt -v -f -w --fileOffset=/home/dir00004/file005
+ --timestamp=1145009417 /mnt/store/
+ Timestamp: 1145009417
+ write File name: /mnt/store/dir00008/file007
+ write complete
+ [root@tucker ext3vt]#
+One can use similar things for read only and read write modes also.
static inline void clear_page_dirty(struct page *page)
{
if (PageDirty(page))
- ClearPageDirty(page);
+ ClearPageDirty(page);
}
static inline int clear_page_dirty_for_io(struct page *page)
#include <linux/fs.h>
#include <linux/dcache.h>
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
#include <linux/proc_fs.h>
#include <obd_class.h>
#endif
#if (!defined(_LINUX_TYPES_H) && !defined(_BLKID_TYPES_H) && \
- !defined(_EXT2_TYPES_H) && !defined(_I386_TYPES_H) && \
- !defined(_X86_64_TYPES_H))
+ !defined(_EXT2_TYPES_H) && !defined(_I386_TYPES_H)) && \
+ !defined(_ASM_IA64_TYPES_H) && !defined(_X86_64_TYPES_H) && \
+ !defined(_PPC_TYPES_H) && !defined(_PPC64_TYPES_H)
+ /* yuck, would be nicer with _ASM_TYPES_H */
typedef unsigned short umode_t;
/*
#define OBD_CONNECT_ATTRFID 0x4000ULL /* Server supports GetAttr By Fid */
#define OBD_CONNECT_NODEVOH 0x8000ULL /* No open handle for special nodes */
#define OBD_CONNECT_EMPTY 0x80000000ULL /* fake: these are empty connect flags*/
+#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /* Remote client */
/* also update obd_connect_names[] for lprocfs_rd_connect_flags() */
extern void lustre_swab_mds_status_req (struct mds_status_req *r);
#define MDS_BFLAG_UNCOMMITTED_WRITES 0x1
+#define MDS_BFLAG_EXT_FLAGS 0x80000000 /* == EXT3_RESERVED_FL */
+
+/* these should be identical to their EXT3_*_FL counterparts, and are
+ * redefined here only to avoid dragging in ext3_fs.h */
+#define MDS_SYNC_FL 0x00000008 /* Synchronous updates */
+#define MDS_IMMUTABLE_FL 0x00000010 /* Immutable file */
+#define MDS_APPEND_FL 0x00000020 /* writes to file may only append */
+#define MDS_NOATIME_FL 0x00000080 /* do not update atime */
+#define MDS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (dir only) */
+
+#ifdef __KERNEL__
+/* If MDS_BFLAG_IOC_FLAGS is set it means we requested EXT3_*_FL inode flags
+ * and we need to decode these into local S_* flags in the inode. Otherwise
+ * we pass flags straight through (see bug 9486). */
+static inline int ll_ext_to_inode_flags(int flags)
+{
+ return (flags & MDS_BFLAG_EXT_FLAGS) ?
+ (((flags & MDS_SYNC_FL) ? S_SYNC : 0) |
+ ((flags & MDS_NOATIME_FL) ? S_NOATIME : 0) |
+ ((flags & MDS_APPEND_FL) ? S_APPEND : 0) |
+#if defined(S_DIRSYNC)
+ ((flags & MDS_DIRSYNC_FL) ? S_DIRSYNC : 0) |
+#endif
+ ((flags & MDS_IMMUTABLE_FL) ? S_IMMUTABLE : 0)) :
+ (flags & ~MDS_BFLAG_EXT_FLAGS);
+}
+
+/* If MDS_BFLAG_EXT_FLAGS is set it means we requested EXT3_*_FL inode flags
+ * and we pass these straight through. Otherwise we need to convert from
+ * S_* flags to their EXT3_*_FL equivalents (see bug 9486). */
+static inline int ll_inode_to_ext_flags(int oflags, int iflags)
+{
+ return (oflags & MDS_BFLAG_EXT_FLAGS) ? (oflags & ~MDS_BFLAG_EXT_FLAGS):
+ (((iflags & S_SYNC) ? MDS_SYNC_FL : 0) |
+ ((iflags & S_NOATIME) ? MDS_NOATIME_FL : 0) |
+ ((iflags & S_APPEND) ? MDS_APPEND_FL : 0) |
+#if defined(S_DIRSYNC)
+ ((iflags & S_DIRSYNC) ? MDS_DIRSYNC_FL : 0) |
+#endif
+ ((iflags & S_IMMUTABLE) ? MDS_IMMUTABLE_FL : 0));
+}
+#endif
struct mds_body {
struct ll_fid fid1;
REQ_FLAGS_FMT"/%x/%x rc %d/%d\n" , ## args, req, req->rq_xid, \
req->rq_transno, \
req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : -1, \
- req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : "<?>", \
+ req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : \
+ req->rq_export ? (char*)req->rq_export->exp_client_uuid.uuid : "<?>",\
req->rq_import ? \
- (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : "<?>", \
+ (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : \
+ req->rq_export ? \
+ (char *)req->rq_export->exp_connection->c_remote_uuid.uuid : "<?>", \
(req->rq_import && req->rq_import->imp_client) ? \
req->rq_import->imp_client->cli_request_portal : -1, \
req->rq_reqlen, req->rq_replen, \
#define IOC_MDC_TYPE 'i'
#define IOC_MDC_MIN_NR 20
/* Moved to lustre_user.h
-#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
+#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_ioctl_data *)
#define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) */
#define IOC_MDC_MAX_NR 50
/* this is really local to the OSC */
struct loi_oap_pages {
struct list_head lop_pending;
- int lop_num_pending;
struct list_head lop_urgent;
struct list_head lop_pending_group;
+ int lop_num_pending;
};
struct osc_async_rc {
#
CONFIG_EFI_VARS=y
CONFIG_EFI_PCDP=y
+CONFIG_DELL_RBU=m
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_MISC=y
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_FC_ATTRS=m
CONFIG_SCSI_ISCSI_ATTRS=m
+CONFIG_SAS_CLASS=m
+# CONFIG_SAS_DEBUG is not set
#
# SCSI low-level drivers
# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
CONFIG_AIC7XXX_DEBUG_MASK=0
# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC94XX=m
+# CONFIG_AIC94XX_DEBUG is not set
CONFIG_SCSI_AIC7XXX_OLD=m
CONFIG_SCSI_AIC79XX=m
CONFIG_AIC79XX_CMDS_PER_DEVICE=4
CONFIG_MEGARAID_NEWGEN=y
CONFIG_MEGARAID_MM=m
CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
CONFIG_SCSI_SATA=y
CONFIG_SCSI_SATA_AHCI=m
CONFIG_SCSI_SATA_SVW=m
#
# Fusion MPT device support
#
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
CONFIG_FUSION_MAX_SGE=40
CONFIG_FUSION_CTL=m
CONFIG_FUSION_LAN=m
+CONFIG_FUSION_OLD_MODULE_COMPAT=m
#
# IEEE 1394 (FireWire) support
# CONFIG_YELLOWFIN is not set
CONFIG_R8169=m
CONFIG_R8169_NAPI=y
+CONFIG_SKY2=m
CONFIG_SK98LIN=m
CONFIG_VIA_VELOCITY=m
CONFIG_TIGON3=m
+CONFIG_BNX2=m
#
# Ethernet (10000 Mbit)
# Active AVM cards
#
CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
#
# Active Eicon DIVA Server cards
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
CONFIG_SERIAL_SGI_L1_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_CRASH is not set
CONFIG_RAW_DRIVER=y
# CONFIG_HPET is not set
CONFIG_MAX_RAW_DEVS=8192
-# CONFIG_MMTIMER is not set
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_MMTIMER=m
#
# I2C support
# CONFIG_USB_GADGET is not set
#
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SDP=m
+# CONFIG_INFINIBAND_SDP_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
+
+#
+# EDAC - error detection and reporting (RAS)
+#
+# CONFIG_EDAC is not set
+
+#
# File systems
#
CONFIG_EXT2_FS=y
CONFIG_LOCKD=m
CONFIG_LOCKD_V4=y
CONFIG_EXPORTFS=m
+CONFIG_NFS_COMMON=y
CONFIG_SUNRPC=m
CONFIG_SUNRPC_GSS=m
CONFIG_RPCSEC_GSS_KRB5=m
#
CONFIG_EFI_VARS=y
CONFIG_EFI_PCDP=y
+CONFIG_DELL_RBU=m
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_MISC=y
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_FC_ATTRS=m
CONFIG_SCSI_ISCSI_ATTRS=m
+CONFIG_SAS_CLASS=m
+# CONFIG_SAS_DEBUG is not set
#
# SCSI low-level drivers
# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
CONFIG_AIC7XXX_DEBUG_MASK=0
# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC94XX=m
+# CONFIG_AIC94XX_DEBUG is not set
CONFIG_SCSI_AIC7XXX_OLD=m
CONFIG_SCSI_AIC79XX=m
CONFIG_AIC79XX_CMDS_PER_DEVICE=4
CONFIG_MEGARAID_NEWGEN=y
CONFIG_MEGARAID_MM=m
CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
CONFIG_SCSI_SATA=y
CONFIG_SCSI_SATA_AHCI=m
CONFIG_SCSI_SATA_SVW=m
#
# Fusion MPT device support
#
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
CONFIG_FUSION_MAX_SGE=40
CONFIG_FUSION_CTL=m
CONFIG_FUSION_LAN=m
+CONFIG_FUSION_OLD_MODULE_COMPAT=m
#
# IEEE 1394 (FireWire) support
# CONFIG_YELLOWFIN is not set
CONFIG_R8169=m
CONFIG_R8169_NAPI=y
+CONFIG_SKY2=m
CONFIG_SK98LIN=m
CONFIG_VIA_VELOCITY=m
CONFIG_TIGON3=m
+CONFIG_BNX2=m
#
# Ethernet (10000 Mbit)
# Active AVM cards
#
CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
#
# Active Eicon DIVA Server cards
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
CONFIG_SERIAL_SGI_L1_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
CONFIG_UNIX98_PTYS=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_CRASH is not set
CONFIG_RAW_DRIVER=y
# CONFIG_HPET is not set
CONFIG_MAX_RAW_DEVS=8192
-# CONFIG_MMTIMER is not set
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_MMTIMER=m
#
# I2C support
# CONFIG_USB_GADGET is not set
#
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SDP=m
+# CONFIG_INFINIBAND_SDP_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
+
+#
+# EDAC - error detection and reporting (RAS)
+#
+# CONFIG_EDAC is not set
+
+#
# File systems
#
CONFIG_EXT2_FS=y
CONFIG_LOCKD=m
CONFIG_LOCKD_V4=y
CONFIG_EXPORTFS=m
+CONFIG_NFS_COMMON=y
CONFIG_SUNRPC=m
CONFIG_SUNRPC_GSS=m
CONFIG_RPCSEC_GSS_KRB5=m
--- /dev/null
+Signed-off-by: Johann Lombardi <johann.lombardi@bull.net>
+
+Index: linux-2.6.9-full/fs/ext3/super.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/super.c 2006-05-20 01:14:14.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/super.c 2006-05-20 01:17:10.000000000 +0400
+@@ -39,7 +39,8 @@
+ #include "xattr.h"
+ #include "acl.h"
+
+-static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
++static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
++ unsigned long journal_devnum);
+ static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
+ int);
+ static void ext3_commit_super (struct super_block * sb,
+@@ -591,7 +592,7 @@ enum {
+ Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
+ Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+ Opt_reservation, Opt_noreservation, Opt_noload,
+- Opt_commit, Opt_journal_update, Opt_journal_inum,
++ Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
+ Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+ Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+@@ -630,6 +631,7 @@ static match_table_t tokens = {
+ {Opt_commit, "commit=%u"},
+ {Opt_journal_update, "journal=update"},
+ {Opt_journal_inum, "journal=%u"},
++ {Opt_journal_dev, "journal_dev=%u"},
+ {Opt_abort, "abort"},
+ {Opt_data_journal, "data=journal"},
+ {Opt_data_ordered, "data=ordered"},
+@@ -675,8 +677,9 @@ static unsigned long get_sb_block(void *
+ return sb_block;
+ }
+
+-static int parse_options (char * options, struct super_block *sb,
+- unsigned long * inum, unsigned long *n_blocks_count, int is_remount)
++static int parse_options (char *options, struct super_block *sb,
++ unsigned long *inum, unsigned long *journal_devnum,
++ unsigned long *n_blocks_count, int is_remount)
+ {
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ char * p;
+@@ -816,6 +819,16 @@ static int parse_options (char * options
+ return 0;
+ *inum = option;
+ break;
++ case Opt_journal_dev:
++ if (is_remount) {
++ printk(KERN_ERR "EXT3-fs: cannot specify "
++ "journal on remount\n");
++ return 0;
++ }
++ if (match_int(&args[0], &option))
++ return 0;
++ *journal_devnum = option;
++ break;
+ case Opt_noload:
+ set_opt (sbi->s_mount_opt, NOLOAD);
+ break;
+@@ -1278,6 +1291,7 @@ static int ext3_fill_super (struct super
+ unsigned long logic_sb_block;
+ unsigned long offset = 0;
+ unsigned long journal_inum = 0;
++ unsigned long journal_devnum = 0;
+ unsigned long def_mount_opts;
+ struct inode *root;
+ int blocksize;
+@@ -1361,7 +1375,8 @@ static int ext3_fill_super (struct super
+
+ set_opt(sbi->s_mount_opt, RESERVATION);
+
+- if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0))
++ if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
++ NULL, 0))
+ goto failed_mount;
+
+ set_sb_time_gran(sb, 1000000000U);
+@@ -1567,7 +1582,7 @@ static int ext3_fill_super (struct super
+ */
+ if (!test_opt(sb, NOLOAD) &&
+ EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
+- if (ext3_load_journal(sb, es))
++ if (ext3_load_journal(sb, es, journal_devnum))
+ goto failed_mount2;
+ } else if (journal_inum) {
+ if (ext3_create_journal(sb, es, journal_inum))
+@@ -1831,15 +1846,24 @@ out_bdev:
+ return NULL;
+ }
+
+-static int ext3_load_journal(struct super_block * sb,
+- struct ext3_super_block * es)
++static int ext3_load_journal(struct super_block *sb,
++ struct ext3_super_block *es,
++ unsigned long journal_devnum)
+ {
+ journal_t *journal;
+ int journal_inum = le32_to_cpu(es->s_journal_inum);
+- dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
++ dev_t journal_dev;
+ int err = 0;
+ int really_read_only;
+
++ if (journal_devnum &&
++ journal_devnum != le32_to_cpu(es->s_journal_dev)) {
++ printk(KERN_INFO "EXT3-fs: external journal device major/minor "
++ "numbers have changed\n");
++ journal_dev = new_decode_dev(journal_devnum);
++ } else
++ journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
++
+ really_read_only = bdev_read_only(sb->s_bdev);
+
+ /*
+@@ -1898,6 +1922,16 @@ static int ext3_load_journal(struct supe
+
+ EXT3_SB(sb)->s_journal = journal;
+ ext3_clear_journal_err(sb, es);
++
++ if (journal_devnum &&
++ journal_devnum != le32_to_cpu(es->s_journal_dev)) {
++ es->s_journal_dev = cpu_to_le32(journal_devnum);
++ sb->s_dirt = 1;
++
++ /* Make sure we flush the recovery flag to disk. */
++ ext3_commit_super(sb, es, 1);
++ }
++
+ return 0;
+ }
+
+@@ -2105,13 +2139,13 @@ int ext3_remount (struct super_block * s
+ {
+ struct ext3_super_block * es;
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+- unsigned long tmp;
++ unsigned long tmp1, tmp2;
+ unsigned long n_blocks_count = 0;
+
+ /*
+ * Allow the "check" option to be passed as a remount option.
+ */
+- if (!parse_options(data, sb, &tmp, &n_blocks_count, 1))
++ if (!parse_options(data, sb, &tmp1, &tmp2, &n_blocks_count, 1))
+ return -EINVAL;
+
+ if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
-Index: linux-2.6.5-7.201/include/linux/ext3_fs.h
+Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
===================================================================
---- linux-2.6.5-7.201.orig/include/linux/ext3_fs.h 2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/include/linux/ext3_fs.h 2005-12-17 03:13:38.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs.h 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/include/linux/ext3_fs.h 2006-04-26 23:40:28.000000000 +0400
@@ -57,6 +57,14 @@ struct statfs;
#define ext3_debug(f, a...) do {} while (0)
#endif
#endif /* __KERNEL__ */
#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long)
-Index: linux-2.6.5-7.201/include/linux/ext3_fs_sb.h
+Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h
===================================================================
---- linux-2.6.5-7.201.orig/include/linux/ext3_fs_sb.h 2005-12-17 02:53:25.000000000 +0300
-+++ linux-2.6.5-7.201/include/linux/ext3_fs_sb.h 2005-12-17 03:10:23.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs_sb.h 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h 2006-04-26 23:40:28.000000000 +0400
@@ -23,9 +23,15 @@
#define EXT_INCLUDE
#include <linux/blockgroup_lock.h>
};
#endif /* _LINUX_EXT3_FS_SB */
-Index: linux-2.6.5-7.201/fs/ext3/super.c
+Index: linux-2.6.5-7.252-full/fs/ext3/super.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/super.c 2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/super.c 2005-12-17 03:10:23.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/fs/ext3/super.c 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/super.c 2006-04-26 23:40:28.000000000 +0400
@@ -389,6 +389,7 @@ void ext3_put_super (struct super_block
struct ext3_super_block *es = sbi->s_es;
int i;
ext3_ext_release(sb);
ext3_xattr_put_super(sb);
journal_destroy(sbi->s_journal);
-@@ -543,7 +544,7 @@ enum {
+@@ -545,7 +546,7 @@ enum {
Opt_ignore, Opt_barrier,
Opt_err,
Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
};
static match_table_t tokens = {
-@@ -590,6 +591,7 @@ static match_table_t tokens = {
+@@ -591,6 +592,7 @@ static match_table_t tokens = {
{Opt_iopen_nopriv, "iopen_nopriv"},
{Opt_extents, "extents"},
{Opt_extdebug, "extdebug"},
{Opt_barrier, "barrier=%u"},
{Opt_err, NULL}
};
-@@ -811,6 +813,9 @@ static int parse_options (char * options
+@@ -813,6 +815,9 @@ static int parse_options (char * options
case Opt_extdebug:
set_opt (sbi->s_mount_opt, EXTDEBUG);
break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1464,6 +1469,7 @@ static int ext3_fill_super (struct super
+@@ -1466,6 +1471,7 @@ static int ext3_fill_super (struct super
ext3_count_dirs(sb));
ext3_ext_init(sb);
return 0;
-@@ -2112,7 +2118,13 @@ static struct file_system_type ext3_fs_t
+@@ -2114,7 +2120,13 @@ static struct file_system_type ext3_fs_t
static int __init init_ext3_fs(void)
{
if (err)
return err;
err = init_inodecache();
-@@ -2141,6 +2153,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2143,6 +2155,7 @@ static void __exit exit_ext3_fs(void)
unregister_filesystem(&ext3_fs_type);
destroy_inodecache();
exit_ext3_xattr();
}
int ext3_prep_san_write(struct inode *inode, long *blocks,
-Index: linux-2.6.5-7.201/fs/ext3/extents.c
+Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/extents.c 2005-12-17 02:53:29.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/extents.c 2005-12-17 03:10:23.000000000 +0300
-@@ -771,7 +771,7 @@ cleanup:
+--- linux-2.6.5-7.252-full.orig/fs/ext3/extents.c 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/extents.c 2006-04-26 23:40:28.000000000 +0400
+@@ -777,7 +777,7 @@ cleanup:
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
}
}
kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
path->p_idx->ei_leaf);
bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
return err;
}
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
int needed = ext3_remove_blocks_credits(tree, ex, from, to);
handle_t *handle = ext3_journal_start(tree->inode, needed);
struct buffer_head *bh;
if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
/* tail removal */
unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
bh = sb_find_get_block(tree->inode->i_sb, start + i);
ext3_forget(handle, 0, tree->inode, bh, start + i);
}
} else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
printk("strange request: removal %lu-%lu from %u:%u\n",
from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.5-7.201/fs/ext3/inode.c
+Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/inode.c 2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/inode.c 2005-12-17 03:10:23.000000000 +0300
-@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
+--- linux-2.6.5-7.252-full.orig/fs/ext3/inode.c 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/inode.c 2006-04-26 23:40:28.000000000 +0400
+@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h
ext3_journal_forget(handle, branch[i].bh);
}
for (i = 0; i < keys; i++)
return err;
}
-@@ -673,7 +673,7 @@ err_out:
+@@ -675,7 +675,7 @@ err_out:
if (err == -EAGAIN)
for (i = 0; i < num; i++)
ext3_free_blocks(handle, inode,
return err;
}
-@@ -1835,7 +1835,7 @@ ext3_clear_blocks(handle_t *handle, stru
+@@ -1837,7 +1837,7 @@ ext3_clear_blocks(handle_t *handle, stru
}
}
}
/**
-@@ -2006,7 +2006,7 @@ static void ext3_free_branches(handle_t
+@@ -2008,7 +2008,7 @@ static void ext3_free_branches(handle_t
ext3_journal_test_restart(handle, inode);
}
if (parent_bh) {
/*
-Index: linux-2.6.5-7.201/fs/ext3/balloc.c
+Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/balloc.c 2005-10-11 00:12:45.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/balloc.c 2005-12-17 03:10:23.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/fs/ext3/balloc.c 2006-02-14 15:26:58.000000000 +0300
++++ linux-2.6.5-7.252-full/fs/ext3/balloc.c 2006-04-26 23:40:28.000000000 +0400
@@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_
*
* Return buffer_head on success or NULL in case of failure.
unsigned long goal, int *errp)
{
struct buffer_head *bitmap_bh = NULL;
-Index: linux-2.6.5-7.201/fs/ext3/xattr.c
+Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/xattr.c 2005-12-17 02:53:26.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/xattr.c 2005-12-17 03:10:41.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/fs/ext3/xattr.c 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/xattr.c 2006-04-26 23:40:28.000000000 +0400
@@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle,
new_bh = sb_getblk(sb, block);
if (!new_bh) {
get_bh(bh);
ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
} else {
-Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
+Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/mballoc.c 2005-12-09 13:08:53.191437750 +0300
-+++ linux-2.6.5-7.201/fs/ext3/mballoc.c 2005-12-17 03:15:04.000000000 +0300
-@@ -0,0 +1,2430 @@
+--- linux-2.6.5-7.252-full.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/mballoc.c 2006-04-26 23:42:45.000000000 +0400
+@@ -0,0 +1,2616 @@
+/*
+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+
+long ext3_mb_stats = 1;
+
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
++
+#ifdef EXT3_BB_MAX_BLOCKS
+#undef EXT3_BB_MAX_BLOCKS
+#endif
+struct ext3_mb_history {
+ struct ext3_free_extent goal; /* goal allocation */
+ struct ext3_free_extent result; /* result allocation */
++ unsigned pid;
++ unsigned ino;
+ __u16 found; /* how many extents have been found */
+ __u16 groups; /* how many groups have been scanned */
+ __u16 tail; /* what tail broke some buddy */
+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy)
+
+#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
+#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
+ struct ext3_allocation_context *ac);
+#endif
+
+static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
+ int needed, struct ext3_free_extent *ex)
+{
-+ int next, max, ord;
++ int next = block, max, ord;
+ void *buddy;
+
+ J_ASSERT(ex != NULL);
+ ex->fe_start = block << order;
+ ex->fe_group = e3b->bd_group;
+
++ /* calc difference from given start */
++ next = next - ex->fe_start;
++ ex->fe_len -= next;
++ ex->fe_start += next;
++
+ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
+
+ if (block + 1 >= max)
+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
+ ac->ac_g_ex.fe_len, &ex);
+
-+ if (max > 0) {
++ if (max >= ac->ac_g_ex.fe_len) {
++ J_ASSERT(ex.fe_len > 0);
++ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++ ac->ac_found++;
++ ac->ac_b_ex = ex;
++ ext3_mb_use_best_found(ac, e3b);
++ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++ /* Sometimes, caller may want to merge even small
++ * number of blocks to an existing extent */
+ J_ASSERT(ex.fe_len > 0);
+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
+ int i, k, max;
+
+ J_ASSERT(ac->ac_2order > 0);
-+ for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
+ if (grp->bb_counters[i] == 0)
+ continue;
+
+ case 0:
+ J_ASSERT(ac->ac_2order != 0);
+ bits = ac->ac_sb->s_blocksize_bits + 1;
-+ for (i = ac->ac_2order; i < bits; i++)
++ for (i = ac->ac_2order; i <= bits; i++)
+ if (grp->bb_counters[i] > 0)
+ return 1;
++ break;
+ case 1:
+ if ((free / fragments) >= ac->ac_g_ex.fe_len)
+ return 1;
++ break;
+ case 2:
+ if (free >= ac->ac_g_ex.fe_len)
+ return 1;
++ break;
+ case 3:
+ return 1;
+ default:
+
+ /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
+ i = ffs(*len);
-+ if (i >= 8) {
++ if (i >= ext3_mb_order2_reqs) {
+ i--;
+ if ((*len & (~(1 << i))) == 0)
+ ac.ac_2order = i;
+ }
+
-+ /* Sometimes, caller may want to merge even small
-+ * number of blocks to an existing extent */
-+ if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+ err = ext3_mb_find_by_goal(&ac, &e3b);
-+ if (err)
-+ goto out_err;
-+ if (ac.ac_status == AC_STATUS_FOUND)
-+ goto found;
-+ }
++ /* first, try the goal */
++ err = ext3_mb_find_by_goal(&ac, &e3b);
++ if (err)
++ goto out_err;
++ if (ac.ac_status == AC_STATUS_FOUND)
++ goto found;
+
+ /* Let's just scan groups to find more-less suitable blocks */
+ cr = ac.ac_2order ? 0 : 1;
+ atomic_inc(&sbi->s_bal_breaks);
+ }
+
-+ ext3_mb_store_history(sb, &ac);
++ ext3_mb_store_history(sb, inode->i_ino, &ac);
+
+ return block;
+}
+ char buf[20], buf2[20];
+
+ if (v == SEQ_START_TOKEN) {
-+ seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+ "goal", "result", "found", "grps", "cr", "merge",
-+ "tail", "broken");
++ seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++ "pid", "inode", "goal", "result", "found", "grps", "cr",
++ "merge", "tail", "broken");
+ return 0;
+ }
+
+ hs->goal.fe_start, hs->goal.fe_len);
+ sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
+ hs->result.fe_start, hs->result.fe_len);
-+ seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+ buf2, hs->found, hs->groups, hs->cr,
-+ hs->merged ? "M" : "", hs->tail,
++ seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++ hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++ hs->cr, hs->merged ? "M" : "", hs->tail,
+ hs->buddy ? 1 << hs->buddy : 0);
+ return 0;
+}
+ .release = ext3_mb_seq_history_release,
+};
+
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group;
++
++ if (*pos < 0 || *pos >= sbi->s_groups_count)
++ return NULL;
++
++ group = *pos + 1;
++ return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group;
++
++ ++*pos;
++ if (*pos < 0 || *pos >= sbi->s_groups_count)
++ return NULL;
++ group = *pos + 1;
++ return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group = (int) v, i;
++ struct sg {
++ struct ext3_group_info info;
++ unsigned short counters[16];
++ } sg;
++
++ group--;
++ if (group == 0)
++ seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++ "group", "free", "frags", "first", "2^0", "2^1", "2^2",
++ "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++ "2^11", "2^12", "2^13");
++
++ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++ sizeof(struct ext3_group_info);
++ ext3_lock_group(sb, group);
++ memcpy(&sg, sbi->s_group_info[group], i);
++ ext3_unlock_group(sb, group);
++
++ if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++ return 0;
++
++ seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
++ sg.info.bb_fragments, sg.info.bb_first_free);
++ for (i = 0; i <= 13; i++)
++ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++ sg.info.bb_counters[i] : 0);
++ seq_printf(seq, " ]\n");
++
++ return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++ .start = ext3_mb_seq_groups_start,
++ .next = ext3_mb_seq_groups_next,
++ .stop = ext3_mb_seq_groups_stop,
++ .show = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++ struct super_block *sb = PDE(inode)->data;
++ int rc;
++
++ rc = seq_open(file, &ext3_mb_seq_groups_ops);
++ if (rc == 0) {
++ struct seq_file *m = (struct seq_file *)file->private_data;
++ m->private = sb;
++ }
++ return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++ .owner = THIS_MODULE,
++ .open = ext3_mb_seq_groups_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
+static void ext3_mb_history_release(struct super_block *sb)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ char name[64];
+
+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++ remove_proc_entry("mb_groups", sbi->s_mb_proc);
+ remove_proc_entry("mb_history", sbi->s_mb_proc);
+ remove_proc_entry(name, proc_root_ext3);
+
+ p->proc_fops = &ext3_mb_seq_history_fops;
+ p->data = sb;
+ }
++ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++ if (p) {
++ p->proc_fops = &ext3_mb_seq_groups_fops;
++ p->data = sb;
++ }
+ }
+
+ sbi->s_mb_history_max = 1000;
+}
+
+static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++ struct ext3_allocation_context *ac)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ struct ext3_mb_history h;
+ if (likely(sbi->s_mb_history == NULL))
+ return;
+
++ h.pid = current->pid;
++ h.ino = ino;
+ h.goal = ac->ac_g_ex;
+ h.result = ac->ac_b_ex;
+ h.found = ac->ac_found;
+#define EXT3_MB_STATS_NAME "mb_stats"
+#define EXT3_MB_MAX_TO_SCAN_NAME "mb_max_to_scan"
+#define EXT3_MB_MIN_TO_SCAN_NAME "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ "mb_order2_req"
+
+static int ext3_mb_stats_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+ return len;
+}
+
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++ unsigned long count, void *data)
++{
++ char str[32];
++ long value;
++
++ if (count >= sizeof(str)) {
++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++ return -EOVERFLOW;
++ }
++
++ if (copy_from_user(str, buffer, count))
++ return -EFAULT;
++
++ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++ value = simple_strtol(str, NULL, 0);
++ if (value <= 0)
++ return -ERANGE;
++
++ ext3_mb_order2_reqs = value;
++
++ return count;
++}
++
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++ int count, int *eof, void *data)
++{
++ int len;
++
++ *eof = 1;
++ if (off != 0)
++ return 0;
++
++ len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++ *start = page;
++ return len;
++}
++
+static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct proc_dir_entry *proc_ext3_mb_stats;
+ struct proc_dir_entry *proc_ext3_mb_max_to_scan;
+ struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++ struct proc_dir_entry *proc_ext3_mb_order2_req;
+
+ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
+ if (proc_root_ext3 == NULL) {
+ proc_ext3_mb_min_to_scan->read_proc = ext3_mb_min_to_scan_read;
+ proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
+
++ /* Initialize EXT3_ORDER2_REQ */
++ proc_ext3_mb_order2_req = create_proc_entry(
++ EXT3_MB_ORDER2_REQ,
++ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++ if (proc_ext3_mb_order2_req == NULL) {
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++ EXT3_MB_ORDER2_REQ);
++ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_ROOT, proc_root_fs);
++ return -EIO;
++ }
++
++ proc_ext3_mb_order2_req->data = NULL;
++ proc_ext3_mb_order2_req->read_proc = ext3_mb_order2_req_read;
++ proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
+ return 0;
+}
+
+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
+ remove_proc_entry(EXT3_ROOT, proc_root_fs);
+}
-Index: linux-2.6.5-7.201/fs/ext3/Makefile
+Index: linux-2.6.5-7.252-full/fs/ext3/Makefile
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/Makefile 2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/Makefile 2005-12-17 03:10:23.000000000 +0300
-@@ -6,7 +6,7 @@
+--- linux-2.6.5-7.252-full.orig/fs/ext3/Makefile 2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/Makefile 2006-04-26 23:40:28.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
ioctl.o namei.o super.o symlink.o hash.o \
-Index: linux-2.6.12.6/include/linux/ext3_fs.h
+Index: linux-2.6.12.6-bull/include/linux/ext3_fs.h
===================================================================
---- linux-2.6.12.6.orig/include/linux/ext3_fs.h 2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/include/linux/ext3_fs.h 2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/include/linux/ext3_fs.h 2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/include/linux/ext3_fs.h 2006-04-29 20:39:10.000000000 +0400
@@ -57,6 +57,14 @@ struct statfs;
#define ext3_debug(f, a...) do {} while (0)
#endif
#endif /* __KERNEL__ */
/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
-Index: linux-2.6.12.6/include/linux/ext3_fs_sb.h
+Index: linux-2.6.12.6-bull/include/linux/ext3_fs_sb.h
===================================================================
---- linux-2.6.12.6.orig/include/linux/ext3_fs_sb.h 2005-08-29 20:55:27.000000000 +0400
-+++ linux-2.6.12.6/include/linux/ext3_fs_sb.h 2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/include/linux/ext3_fs_sb.h 2005-08-29 20:55:27.000000000 +0400
++++ linux-2.6.12.6-bull/include/linux/ext3_fs_sb.h 2006-04-29 20:39:10.000000000 +0400
@@ -21,8 +21,14 @@
#include <linux/wait.h>
#include <linux/blockgroup_lock.h>
};
#endif /* _LINUX_EXT3_FS_SB */
-Index: linux-2.6.12.6/fs/ext3/super.c
+Index: linux-2.6.12.6-bull/fs/ext3/super.c
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/super.c 2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/super.c 2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/super.c 2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/super.c 2006-04-29 20:39:10.000000000 +0400
@@ -387,6 +387,7 @@ static void ext3_put_super (struct super
struct ext3_super_block *es = sbi->s_es;
int i;
};
static match_table_t tokens = {
-@@ -649,6 +651,7 @@ static match_table_t tokens = {
+@@ -650,6 +651,7 @@ static match_table_t tokens = {
{Opt_iopen_nopriv, "iopen_nopriv"},
{Opt_extents, "extents"},
{Opt_extdebug, "extdebug"},
{Opt_barrier, "barrier=%u"},
{Opt_err, NULL},
{Opt_resize, "resize"},
-@@ -964,6 +967,9 @@ clear_qf_name:
+@@ -965,6 +967,9 @@ clear_qf_name:
case Opt_extdebug:
set_opt (sbi->s_mount_opt, EXTDEBUG);
break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1669,6 +1675,7 @@ static int ext3_fill_super (struct super
+@@ -1670,6 +1675,7 @@ static int ext3_fill_super (struct super
ext3_count_dirs(sb));
ext3_ext_init(sb);
lock_kernel();
return 0;
-@@ -2548,7 +2555,13 @@ static struct file_system_type ext3_fs_t
+@@ -2549,7 +2555,13 @@ static struct file_system_type ext3_fs_t
static int __init init_ext3_fs(void)
{
if (err)
return err;
err = init_inodecache();
-@@ -2570,6 +2583,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2571,6 +2583,7 @@ static void __exit exit_ext3_fs(void)
unregister_filesystem(&ext3_fs_type);
destroy_inodecache();
exit_ext3_xattr();
}
int ext3_prep_san_write(struct inode *inode, long *blocks,
-Index: linux-2.6.12.6/fs/ext3/extents.c
+Index: linux-2.6.12.6-bull/fs/ext3/extents.c
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/extents.c 2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/extents.c 2005-12-17 02:21:21.000000000 +0300
-@@ -771,7 +771,7 @@ cleanup:
+--- linux-2.6.12.6-bull.orig/fs/ext3/extents.c 2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/extents.c 2006-04-29 20:39:10.000000000 +0400
+@@ -777,7 +777,7 @@ cleanup:
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
}
}
kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
path->p_idx->ei_leaf);
bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
return err;
}
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
int needed = ext3_remove_blocks_credits(tree, ex, from, to);
handle_t *handle = ext3_journal_start(tree->inode, needed);
struct buffer_head *bh;
if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
/* tail removal */
unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
bh = sb_find_get_block(tree->inode->i_sb, start + i);
ext3_forget(handle, 0, tree->inode, bh, start + i);
}
} else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
printk("strange request: removal %lu-%lu from %u:%u\n",
from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.12.6/fs/ext3/inode.c
+Index: linux-2.6.12.6-bull/fs/ext3/inode.c
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/inode.c 2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/inode.c 2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/inode.c 2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/inode.c 2006-04-29 20:39:10.000000000 +0400
@@ -564,7 +564,7 @@ static int ext3_alloc_branch(handle_t *h
ext3_journal_forget(handle, branch[i].bh);
}
if (parent_bh) {
/*
-Index: linux-2.6.12.6/fs/ext3/balloc.c
+Index: linux-2.6.12.6-bull/fs/ext3/balloc.c
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/balloc.c 2005-08-29 20:55:27.000000000 +0400
-+++ linux-2.6.12.6/fs/ext3/balloc.c 2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/balloc.c 2005-08-29 20:55:27.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/balloc.c 2006-04-29 20:39:10.000000000 +0400
@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
*
* Return buffer_head on success or NULL in case of failure.
unsigned long goal, int *errp)
{
struct buffer_head *bitmap_bh = NULL;
-Index: linux-2.6.12.6/fs/ext3/xattr.c
+Index: linux-2.6.12.6-bull/fs/ext3/xattr.c
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/xattr.c 2005-08-29 20:55:27.000000000 +0400
-+++ linux-2.6.12.6/fs/ext3/xattr.c 2005-12-17 02:21:33.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/xattr.c 2005-08-29 20:55:27.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/xattr.c 2006-04-29 20:39:10.000000000 +0400
@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl
ea_bdebug(bh, "refcount now=0; freeing");
if (ce)
error = -EIO;
goto cleanup;
}
-Index: linux-2.6.12.6/fs/ext3/mballoc.c
+Index: linux-2.6.12.6-bull/fs/ext3/mballoc.c
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/mballoc.c 2005-12-09 13:08:53.191437750 +0300
-+++ linux-2.6.12.6/fs/ext3/mballoc.c 2005-12-17 02:21:21.000000000 +0300
-@@ -0,0 +1,2429 @@
+--- linux-2.6.12.6-bull.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400
++++ linux-2.6.12.6-bull/fs/ext3/mballoc.c 2006-04-30 01:24:11.000000000 +0400
+@@ -0,0 +1,2615 @@
+/*
+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+
+long ext3_mb_stats = 1;
+
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
++
+#ifdef EXT3_BB_MAX_BLOCKS
+#undef EXT3_BB_MAX_BLOCKS
+#endif
+struct ext3_mb_history {
+ struct ext3_free_extent goal; /* goal allocation */
+ struct ext3_free_extent result; /* result allocation */
++ unsigned pid;
++ unsigned ino;
+ __u16 found; /* how many extents have been found */
+ __u16 groups; /* how many groups have been scanned */
+ __u16 tail; /* what tail broke some buddy */
+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy)
+
+#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
+#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
+ struct ext3_allocation_context *ac);
+#endif
+
+static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
+ int needed, struct ext3_free_extent *ex)
+{
-+ int next, max, ord;
++ int next = block, max, ord;
+ void *buddy;
+
+ J_ASSERT(ex != NULL);
+ ex->fe_start = block << order;
+ ex->fe_group = e3b->bd_group;
+
++ /* calc difference from given start */
++ next = next - ex->fe_start;
++ ex->fe_len -= next;
++ ex->fe_start += next;
++
+ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
+
+ if (block + 1 >= max)
+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
+ ac->ac_g_ex.fe_len, &ex);
+
-+ if (max > 0) {
++ if (max >= ac->ac_g_ex.fe_len) {
++ J_ASSERT(ex.fe_len > 0);
++ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++ ac->ac_found++;
++ ac->ac_b_ex = ex;
++ ext3_mb_use_best_found(ac, e3b);
++ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++ /* Sometimes, caller may want to merge even small
++ * number of blocks to an existing extent */
+ J_ASSERT(ex.fe_len > 0);
+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
+ int i, k, max;
+
+ J_ASSERT(ac->ac_2order > 0);
-+ for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
+ if (grp->bb_counters[i] == 0)
+ continue;
+
+ case 0:
+ J_ASSERT(ac->ac_2order != 0);
+ bits = ac->ac_sb->s_blocksize_bits + 1;
-+ for (i = ac->ac_2order; i < bits; i++)
++ for (i = ac->ac_2order; i <= bits; i++)
+ if (grp->bb_counters[i] > 0)
+ return 1;
++ break;
+ case 1:
+ if ((free / fragments) >= ac->ac_g_ex.fe_len)
+ return 1;
++ break;
+ case 2:
+ if (free >= ac->ac_g_ex.fe_len)
+ return 1;
++ break;
+ case 3:
+ return 1;
+ default:
+
+ /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
+ i = ffs(*len);
-+ if (i >= 8) {
++ if (i >= ext3_mb_order2_reqs) {
+ i--;
+ if ((*len & (~(1 << i))) == 0)
+ ac.ac_2order = i;
+ }
+
-+ /* Sometimes, caller may want to merge even small
-+ * number of blocks to an existing extent */
-+ if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+ err = ext3_mb_find_by_goal(&ac, &e3b);
-+ if (err)
-+ goto out_err;
-+ if (ac.ac_status == AC_STATUS_FOUND)
-+ goto found;
-+ }
++ /* first, try the goal */
++ err = ext3_mb_find_by_goal(&ac, &e3b);
++ if (err)
++ goto out_err;
++ if (ac.ac_status == AC_STATUS_FOUND)
++ goto found;
+
+ /* Let's just scan groups to find more-less suitable blocks */
+ cr = ac.ac_2order ? 0 : 1;
+ atomic_inc(&sbi->s_bal_breaks);
+ }
+
-+ ext3_mb_store_history(sb, &ac);
++ ext3_mb_store_history(sb, inode->i_ino, &ac);
+
+ return block;
+}
+ char buf[20], buf2[20];
+
+ if (v == SEQ_START_TOKEN) {
-+ seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+ "goal", "result", "found", "grps", "cr", "merge",
-+ "tail", "broken");
++ seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++ "pid", "inode", "goal", "result", "found", "grps", "cr",
++ "merge", "tail", "broken");
+ return 0;
+ }
+
+ hs->goal.fe_start, hs->goal.fe_len);
+ sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
+ hs->result.fe_start, hs->result.fe_len);
-+ seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+ buf2, hs->found, hs->groups, hs->cr,
-+ hs->merged ? "M" : "", hs->tail,
++ seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++ hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++ hs->cr, hs->merged ? "M" : "", hs->tail,
+ hs->buddy ? 1 << hs->buddy : 0);
+ return 0;
+}
+ .release = ext3_mb_seq_history_release,
+};
+
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group;
++
++ if (*pos < 0 || *pos >= sbi->s_groups_count)
++ return NULL;
++
++ group = *pos + 1;
++ return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group;
++
++ ++*pos;
++ if (*pos < 0 || *pos >= sbi->s_groups_count)
++ return NULL;
++ group = *pos + 1;
++ return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group = (int) v, i;
++ struct sg {
++ struct ext3_group_info info;
++ unsigned short counters[16];
++ } sg;
++
++ group--;
++ if (group == 0)
++ seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++ "group", "free", "frags", "first", "2^0", "2^1", "2^2",
++ "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++ "2^11", "2^12", "2^13");
++
++ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++ sizeof(struct ext3_group_info);
++ ext3_lock_group(sb, group);
++ memcpy(&sg, sbi->s_group_info[group], i);
++ ext3_unlock_group(sb, group);
++
++ if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++ return 0;
++
++ seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
++ sg.info.bb_fragments, sg.info.bb_first_free);
++ for (i = 0; i <= 13; i++)
++ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++ sg.info.bb_counters[i] : 0);
++ seq_printf(seq, " ]\n");
++
++ return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++ .start = ext3_mb_seq_groups_start,
++ .next = ext3_mb_seq_groups_next,
++ .stop = ext3_mb_seq_groups_stop,
++ .show = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++ struct super_block *sb = PDE(inode)->data;
++ int rc;
++
++ rc = seq_open(file, &ext3_mb_seq_groups_ops);
++ if (rc == 0) {
++ struct seq_file *m = (struct seq_file *)file->private_data;
++ m->private = sb;
++ }
++ return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++ .owner = THIS_MODULE,
++ .open = ext3_mb_seq_groups_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
+static void ext3_mb_history_release(struct super_block *sb)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ char name[64];
+
+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++ remove_proc_entry("mb_groups", sbi->s_mb_proc);
+ remove_proc_entry("mb_history", sbi->s_mb_proc);
+ remove_proc_entry(name, proc_root_ext3);
+
+ p->proc_fops = &ext3_mb_seq_history_fops;
+ p->data = sb;
+ }
++ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++ if (p) {
++ p->proc_fops = &ext3_mb_seq_groups_fops;
++ p->data = sb;
++ }
+ }
+
+ sbi->s_mb_history_max = 1000;
+}
+
+static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++ struct ext3_allocation_context *ac)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ struct ext3_mb_history h;
+ if (likely(sbi->s_mb_history == NULL))
+ return;
+
++ h.pid = current->pid;
++ h.ino = ino;
+ h.goal = ac->ac_g_ex;
+ h.result = ac->ac_b_ex;
+ h.found = ac->ac_found;
+#define EXT3_MB_STATS_NAME "mb_stats"
+#define EXT3_MB_MAX_TO_SCAN_NAME "mb_max_to_scan"
+#define EXT3_MB_MIN_TO_SCAN_NAME "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ "mb_order2_req"
+
+static int ext3_mb_stats_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+ return len;
+}
+
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++ unsigned long count, void *data)
++{
++ char str[32];
++ long value;
++
++ if (count >= sizeof(str)) {
++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++ return -EOVERFLOW;
++ }
++
++ if (copy_from_user(str, buffer, count))
++ return -EFAULT;
++
++ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++ value = simple_strtol(str, NULL, 0);
++ if (value <= 0)
++ return -ERANGE;
++
++ ext3_mb_order2_reqs = value;
++
++ return count;
++}
++
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++ int count, int *eof, void *data)
++{
++ int len;
++
++ *eof = 1;
++ if (off != 0)
++ return 0;
++
++ len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++ *start = page;
++ return len;
++}
++
+static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct proc_dir_entry *proc_ext3_mb_stats;
+ struct proc_dir_entry *proc_ext3_mb_max_to_scan;
+ struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++ struct proc_dir_entry *proc_ext3_mb_order2_req;
+
+ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
+ if (proc_root_ext3 == NULL) {
+ proc_ext3_mb_min_to_scan->read_proc = ext3_mb_min_to_scan_read;
+ proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
+
++ /* Initialize EXT3_ORDER2_REQ */
++ proc_ext3_mb_order2_req = create_proc_entry(
++ EXT3_MB_ORDER2_REQ,
++ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++ if (proc_ext3_mb_order2_req == NULL) {
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++ EXT3_MB_ORDER2_REQ);
++ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_ROOT, proc_root_fs);
++ return -EIO;
++ }
++
++ proc_ext3_mb_order2_req->data = NULL;
++ proc_ext3_mb_order2_req->read_proc = ext3_mb_order2_req_read;
++ proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
+ return 0;
+}
+
+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
+ remove_proc_entry(EXT3_ROOT, proc_root_fs);
+}
-Index: linux-2.6.12.6/fs/ext3/Makefile
+Index: linux-2.6.12.6-bull/fs/ext3/Makefile
===================================================================
---- linux-2.6.12.6.orig/fs/ext3/Makefile 2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/Makefile 2005-12-17 02:21:21.000000000 +0300
-@@ -6,7 +6,7 @@
+--- linux-2.6.12.6-bull.orig/fs/ext3/Makefile 2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/Makefile 2006-04-29 20:39:10.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o \
-Index: linux-2.6.9-full/include/linux/ext3_fs.h
-===================================================================
---- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/include/linux/ext3_fs.h 2005-12-16 23:16:42.000000000 +0300
-@@ -57,6 +57,14 @@ struct statfs;
- #define ext3_debug(f, a...) do {} while (0)
- #endif
-
-+#define EXT3_MULTIBLOCK_ALLOCATOR 1
-+
-+#define EXT3_MB_HINT_MERGE 1
-+#define EXT3_MB_HINT_RESERVED 2
-+#define EXT3_MB_HINT_METADATA 4
-+#define EXT3_MB_HINT_FIRST 8
-+#define EXT3_MB_HINT_BEST 16
-+
- /*
- * Special inodes numbers
- */
-@@ -365,6 +373,7 @@ struct ext3_inode {
- #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */
- #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */
- #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */
-+#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */
-
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef clear_opt
-@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe
- extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
- extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
- extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
-- unsigned long);
-+ unsigned long, int);
- extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
- unsigned long, unsigned long, int *);
- extern unsigned long ext3_count_free_blocks (struct super_block *);
-@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc
- extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg);
-
-+/* mballoc.c */
-+extern long ext3_mb_stats;
-+extern long ext3_mb_max_to_scan;
-+extern int ext3_mb_init(struct super_block *, int);
-+extern int ext3_mb_release(struct super_block *);
-+extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *);
-+extern int ext3_mb_reserve_blocks(struct super_block *, int);
-+extern void ext3_mb_release_blocks(struct super_block *, int);
-+int __init init_ext3_proc(void);
-+void exit_ext3_proc(void);
-+
- #endif /* __KERNEL__ */
-
- /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h
===================================================================
---- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h 2005-12-16 23:16:39.000000000 +0300
-+++ linux-2.6.9-full/include/linux/ext3_fs_sb.h 2005-12-16 23:16:42.000000000 +0300
+--- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/include/linux/ext3_fs_sb.h 2006-05-22 21:45:08.000000000 +0400
@@ -23,9 +23,15 @@
#define EXT_INCLUDE
#include <linux/blockgroup_lock.h>
/*
* third extended-fs super-block data in memory
-@@ -81,6 +87,38 @@ struct ext3_sb_info {
+@@ -81,6 +87,39 @@ struct ext3_sb_info {
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
+ tid_t s_last_transaction;
+ int s_mb_factor;
+ unsigned short *s_mb_offsets, *s_mb_maxs;
++ unsigned long s_stripe;
+
+ /* history to debug policy */
+ struct ext3_mb_history *s_mb_history;
};
#endif /* _LINUX_EXT3_FS_SB */
+Index: linux-2.6.9-full/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/include/linux/ext3_fs.h 2006-05-22 21:44:37.000000000 +0400
+@@ -57,6 +57,14 @@ struct statfs;
+ #define ext3_debug(f, a...) do {} while (0)
+ #endif
+
++#define EXT3_MULTIBLOCK_ALLOCATOR 1
++
++#define EXT3_MB_HINT_MERGE 1
++#define EXT3_MB_HINT_RESERVED 2
++#define EXT3_MB_HINT_METADATA 4
++#define EXT3_MB_HINT_FIRST 8
++#define EXT3_MB_HINT_BEST 16
++
+ /*
+ * Special inodes numbers
+ */
+@@ -365,6 +373,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */
++#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe
+ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+- unsigned long);
++ unsigned long, int);
+ extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
+ unsigned long, unsigned long, int *);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc
+ extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg);
+
++/* mballoc.c */
++extern long ext3_mb_stats;
++extern long ext3_mb_max_to_scan;
++extern int ext3_mb_init(struct super_block *, int);
++extern int ext3_mb_release(struct super_block *);
++extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *);
++extern int ext3_mb_reserve_blocks(struct super_block *, int);
++extern void ext3_mb_release_blocks(struct super_block *, int);
++int __init init_ext3_proc(void);
++void exit_ext3_proc(void);
++
+ #endif /* __KERNEL__ */
+
+ /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
Index: linux-2.6.9-full/fs/ext3/super.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/super.c 2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/super.c 2005-12-16 23:16:42.000000000 +0300
+--- linux-2.6.9-full.orig/fs/ext3/super.c 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/super.c 2006-05-22 21:52:54.000000000 +0400
@@ -394,6 +394,7 @@ void ext3_put_super (struct super_block
struct ext3_super_block *es = sbi->s_es;
int i;
Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
- Opt_extents, Opt_extdebug,
-+ Opt_extents, Opt_extdebug, Opt_mballoc,
++ Opt_extents, Opt_extdebug, Opt_mballoc, Opt_stripe
};
static match_table_t tokens = {
-@@ -647,6 +649,7 @@ static match_table_t tokens = {
+@@ -648,6 +649,8 @@ static match_table_t tokens = {
{Opt_iopen_nopriv, "iopen_nopriv"},
{Opt_extents, "extents"},
{Opt_extdebug, "extdebug"},
+ {Opt_mballoc, "mballoc"},
++ {Opt_stripe, "stripe=%u"},
{Opt_barrier, "barrier=%u"},
{Opt_err, NULL},
{Opt_resize, "resize"},
-@@ -957,6 +960,9 @@ clear_qf_name:
+@@ -958,6 +961,16 @@ clear_qf_name:
case Opt_extdebug:
set_opt (sbi->s_mount_opt, EXTDEBUG);
break;
+ case Opt_mballoc:
+ set_opt (sbi->s_mount_opt, MBALLOC);
+ break;
++ case Opt_stripe:
++ if (match_int(&args[0], &option))
++ return 0;
++ if (option < 0)
++ return 0;
++ sbi->s_stripe = option;
++ break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1646,6 +1652,7 @@ static int ext3_fill_super (struct super
+@@ -1647,6 +1660,7 @@ static int ext3_fill_super (struct super
ext3_count_dirs(sb));
ext3_ext_init(sb);
return 0;
-@@ -2428,7 +2435,13 @@ static struct file_system_type ext3_fs_t
+@@ -2429,7 +2443,13 @@ static struct file_system_type ext3_fs_t
static int __init init_ext3_fs(void)
{
if (err)
return err;
err = init_inodecache();
-@@ -2450,6 +2463,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2451,6 +2471,7 @@ static void __exit exit_ext3_fs(void)
unregister_filesystem(&ext3_fs_type);
destroy_inodecache();
exit_ext3_xattr();
int ext3_prep_san_write(struct inode *inode, long *blocks,
Index: linux-2.6.9-full/fs/ext3/extents.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/extents.c 2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/extents.c 2005-12-16 23:16:42.000000000 +0300
-@@ -771,7 +771,7 @@ cleanup:
+--- linux-2.6.9-full.orig/fs/ext3/extents.c 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/extents.c 2006-05-22 21:44:37.000000000 +0400
+@@ -777,7 +777,7 @@ cleanup:
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
}
}
kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
path->p_idx->ei_leaf);
bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
return err;
}
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
int needed = ext3_remove_blocks_credits(tree, ex, from, to);
handle_t *handle = ext3_journal_start(tree->inode, needed);
struct buffer_head *bh;
if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
/* tail removal */
unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
bh = sb_find_get_block(tree->inode->i_sb, start + i);
ext3_forget(handle, 0, tree->inode, bh, start + i);
}
} else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
printk("strange request: removal %lu-%lu from %u:%u\n",
from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.9-full/fs/ext3/inode.c
+Index: linux-2.6.9-full/fs/ext3/Makefile
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/inode.c 2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/inode.c 2005-12-16 23:16:42.000000000 +0300
-@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
- ext3_journal_forget(handle, branch[i].bh);
- }
- for (i = 0; i < keys; i++)
-- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
-+ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
- return err;
- }
-
-@@ -673,7 +673,7 @@ err_out:
- if (err == -EAGAIN)
- for (i = 0; i < num; i++)
- ext3_free_blocks(handle, inode,
-- le32_to_cpu(where[i].key), 1);
-+ le32_to_cpu(where[i].key), 1, 1);
- return err;
- }
-
-@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru
- }
- }
+--- linux-2.6.9-full.orig/fs/ext3/Makefile 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/Makefile 2006-05-22 21:44:37.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
-- ext3_free_blocks(handle, inode, block_to_free, count);
-+ ext3_free_blocks(handle, inode, block_to_free, count, 1);
- }
-
- /**
-@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t
- ext3_journal_test_restart(handle, inode);
- }
-
-- ext3_free_blocks(handle, inode, nr, 1);
-+ ext3_free_blocks(handle, inode, nr, 1, 1);
-
- if (parent_bh) {
- /*
-Index: linux-2.6.9-full/fs/ext3/balloc.c
-===================================================================
---- linux-2.6.9-full.orig/fs/ext3/balloc.c 2005-10-27 21:44:24.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/balloc.c 2005-12-16 23:16:42.000000000 +0300
-@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
- *
- * Return buffer_head on success or NULL in case of failure.
- */
--static struct buffer_head *
-+struct buffer_head *
- read_block_bitmap(struct super_block *sb, unsigned int block_group)
- {
- struct ext3_group_desc * desc;
-@@ -450,24 +450,6 @@ error_return:
- return;
- }
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+ ioctl.o namei.o super.o symlink.o hash.o resize.o \
+- extents.o
++ extents.o mballoc.o
--/* Free given blocks, update quota and i_blocks field */
--void ext3_free_blocks(handle_t *handle, struct inode *inode,
-- unsigned long block, unsigned long count)
--{
-- struct super_block * sb;
-- int dquot_freed_blocks;
--
-- sb = inode->i_sb;
-- if (!sb) {
-- printk ("ext3_free_blocks: nonexistent device");
-- return;
-- }
-- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
-- if (dquot_freed_blocks)
-- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
-- return;
--}
--
- /*
- * For ext3 allocations, we must not reuse any blocks which are
- * allocated in the bitmap buffer's "last committed data" copy. This
-@@ -1140,7 +1122,7 @@ int ext3_should_retry_alloc(struct super
- * bitmap, and then for any free bit if that fails.
- * This function also updates quota and i_blocks field.
- */
--int ext3_new_block(handle_t *handle, struct inode *inode,
-+int ext3_new_block_old(handle_t *handle, struct inode *inode,
- unsigned long goal, int *errp)
- {
- struct buffer_head *bitmap_bh = NULL;
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
Index: linux-2.6.9-full/fs/ext3/xattr.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/xattr.c 2005-12-16 23:16:40.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/xattr.c 2005-12-16 23:16:42.000000000 +0300
+--- linux-2.6.9-full.orig/fs/ext3/xattr.c 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/xattr.c 2006-05-22 21:44:37.000000000 +0400
@@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle,
new_bh = sb_getblk(sb, block);
if (!new_bh) {
} else {
Index: linux-2.6.9-full/fs/ext3/mballoc.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/mballoc.c 2005-12-16 17:46:19.148560250 +0300
-+++ linux-2.6.9-full/fs/ext3/mballoc.c 2005-12-17 00:10:15.000000000 +0300
-@@ -0,0 +1,2429 @@
+--- linux-2.6.9-full.orig/fs/ext3/mballoc.c 2006-05-12 23:14:51.200000000 +0400
++++ linux-2.6.9-full/fs/ext3/mballoc.c 2006-05-22 21:51:30.000000000 +0400
+@@ -0,0 +1,2671 @@
+/*
+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+
+long ext3_mb_stats = 1;
+
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
++
+#ifdef EXT3_BB_MAX_BLOCKS
+#undef EXT3_BB_MAX_BLOCKS
+#endif
+struct ext3_mb_history {
+ struct ext3_free_extent goal; /* goal allocation */
+ struct ext3_free_extent result; /* result allocation */
++ unsigned pid;
++ unsigned ino;
+ __u16 found; /* how many extents have been found */
+ __u16 groups; /* how many groups have been scanned */
+ __u16 tail; /* what tail broke some buddy */
+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy)
+
+#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
+#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
+ struct ext3_allocation_context *ac);
+#endif
+
+static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
+ int needed, struct ext3_free_extent *ex)
+{
-+ int next, max, ord;
++ int next = block, max, ord;
+ void *buddy;
+
+ J_ASSERT(ex != NULL);
+ ex->fe_start = block << order;
+ ex->fe_group = e3b->bd_group;
+
++ /* calc difference from given start */
++ next = next - ex->fe_start;
++ ex->fe_len -= next;
++ ex->fe_start += next;
++
+ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
+
+ if (block + 1 >= max)
+ struct ext3_buddy *e3b)
+{
+ int group = ac->ac_g_ex.fe_group, max, err;
++ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb);
++ struct ext3_super_block *es = sbi->s_es;
+ struct ext3_free_extent ex;
+
+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b);
+ ext3_lock_group(ac->ac_sb, group);
+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
+ ac->ac_g_ex.fe_len, &ex);
-+
-+ if (max > 0) {
++
++ if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
++ unsigned long start;
++ start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) +
++ ex.fe_start + le32_to_cpu(es->s_first_data_block));
++ if (start % sbi->s_stripe == 0) {
++ ac->ac_found++;
++ ac->ac_b_ex = ex;
++ ext3_mb_use_best_found(ac, e3b);
++ }
++ } else if (max >= ac->ac_g_ex.fe_len) {
++ J_ASSERT(ex.fe_len > 0);
++ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++ ac->ac_found++;
++ ac->ac_b_ex = ex;
++ ext3_mb_use_best_found(ac, e3b);
++ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++ /* Sometimes, caller may want to merge even small
++ * number of blocks to an existing extent */
+ J_ASSERT(ex.fe_len > 0);
+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
+ int i, k, max;
+
+ J_ASSERT(ac->ac_2order > 0);
-+ for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
+ if (grp->bb_counters[i] == 0)
+ continue;
+
+ }
+}
+
++/*
++ * This is a special case for storages like raid5
++ * we try to find stripe-aligned chunks for stripe-size requests
++ */
++static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac,
++ struct ext3_buddy *e3b)
++{
++ struct super_block *sb = ac->ac_sb;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ void *bitmap = EXT3_MB_BITMAP(e3b);
++ struct ext3_free_extent ex;
++ unsigned long i, max;
++
++ J_ASSERT(sbi->s_stripe != 0);
++
++ /* find first stripe-aligned block */
++ i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb)
++ + le32_to_cpu(sbi->s_es->s_first_data_block);
++ i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe;
++ i = (i - le32_to_cpu(sbi->s_es->s_first_data_block))
++ % EXT3_BLOCKS_PER_GROUP(sb);
++
++ while (i < sb->s_blocksize * 8) {
++ if (!mb_test_bit(i, bitmap)) {
++ max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex);
++ if (max >= sbi->s_stripe) {
++ ac->ac_found++;
++ ac->ac_b_ex = ex;
++ ext3_mb_use_best_found(ac, e3b);
++ break;
++ }
++ }
++ i += sbi->s_stripe;
++ }
++}
++
+static int ext3_mb_good_group(struct ext3_allocation_context *ac,
+ int group, int cr)
+{
+ case 0:
+ J_ASSERT(ac->ac_2order != 0);
+ bits = ac->ac_sb->s_blocksize_bits + 1;
-+ for (i = ac->ac_2order; i < bits; i++)
++ for (i = ac->ac_2order; i <= bits; i++)
+ if (grp->bb_counters[i] > 0)
+ return 1;
++ break;
+ case 1:
+ if ((free / fragments) >= ac->ac_g_ex.fe_len)
+ return 1;
++ break;
+ case 2:
+ if (free >= ac->ac_g_ex.fe_len)
+ return 1;
++ break;
+ case 3:
+ return 1;
+ default:
+ ac.ac_2order = 0;
+ ac.ac_criteria = 0;
+
++ if (*len == 1 && sbi->s_stripe) {
++ /* looks like a metadata, let's use a dirty hack for raid5
++ * move all metadata in first groups in hope to hit cached
++ * sectors and thus avoid read-modify cycles in raid5 */
++ ac.ac_g_ex.fe_group = group = 0;
++ }
++
+ /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
+ i = ffs(*len);
-+ if (i >= 8) {
++ if (i >= ext3_mb_order2_reqs) {
+ i--;
+ if ((*len & (~(1 << i))) == 0)
+ ac.ac_2order = i;
+ }
+
-+ /* Sometimes, caller may want to merge even small
-+ * number of blocks to an existing extent */
-+ if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+ err = ext3_mb_find_by_goal(&ac, &e3b);
-+ if (err)
-+ goto out_err;
-+ if (ac.ac_status == AC_STATUS_FOUND)
-+ goto found;
-+ }
++ /* first, try the goal */
++ err = ext3_mb_find_by_goal(&ac, &e3b);
++ if (err)
++ goto out_err;
++ if (ac.ac_status == AC_STATUS_FOUND)
++ goto found;
+
+ /* Let's just scan groups to find more-less suitable blocks */
+ cr = ac.ac_2order ? 0 : 1;
+ ac.ac_groups_scanned++;
+ if (cr == 0)
+ ext3_mb_simple_scan_group(&ac, &e3b);
++ else if (cr == 1 && *len == sbi->s_stripe)
++ ext3_mb_scan_aligned(&ac, &e3b);
+ else
+ ext3_mb_complex_scan_group(&ac, &e3b);
+
+ atomic_inc(&sbi->s_bal_breaks);
+ }
+
-+ ext3_mb_store_history(sb, &ac);
++ ext3_mb_store_history(sb, inode->i_ino, &ac);
+
+ return block;
+}
+ char buf[20], buf2[20];
+
+ if (v == SEQ_START_TOKEN) {
-+ seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+ "goal", "result", "found", "grps", "cr", "merge",
-+ "tail", "broken");
++ seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++ "pid", "inode", "goal", "result", "found", "grps", "cr",
++ "merge", "tail", "broken");
+ return 0;
+ }
+
+ hs->goal.fe_start, hs->goal.fe_len);
+ sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
+ hs->result.fe_start, hs->result.fe_len);
-+ seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+ buf2, hs->found, hs->groups, hs->cr,
-+ hs->merged ? "M" : "", hs->tail,
++ seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++ hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++ hs->cr, hs->merged ? "M" : "", hs->tail,
+ hs->buddy ? 1 << hs->buddy : 0);
+ return 0;
+}
+ .release = ext3_mb_seq_history_release,
+};
+
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group;
++
++ if (*pos < 0 || *pos >= sbi->s_groups_count)
++ return NULL;
++
++ group = *pos + 1;
++ return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group;
++
++ ++*pos;
++ if (*pos < 0 || *pos >= sbi->s_groups_count)
++ return NULL;
++ group = *pos + 1;
++ return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++ struct super_block *sb = seq->private;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int group = (int) v, i;
++ struct sg {
++ struct ext3_group_info info;
++ unsigned short counters[16];
++ } sg;
++
++ group--;
++ if (group == 0)
++ seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++ "group", "free", "frags", "first", "2^0", "2^1", "2^2",
++ "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++ "2^11", "2^12", "2^13");
++
++ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++ sizeof(struct ext3_group_info);
++ ext3_lock_group(sb, group);
++ memcpy(&sg, sbi->s_group_info[group], i);
++ ext3_unlock_group(sb, group);
++
++ if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++ return 0;
++
++ seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
++ sg.info.bb_fragments, sg.info.bb_first_free);
++ for (i = 0; i <= 13; i++)
++ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++ sg.info.bb_counters[i] : 0);
++ seq_printf(seq, " ]\n");
++
++ return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++ .start = ext3_mb_seq_groups_start,
++ .next = ext3_mb_seq_groups_next,
++ .stop = ext3_mb_seq_groups_stop,
++ .show = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++ struct super_block *sb = PDE(inode)->data;
++ int rc;
++
++ rc = seq_open(file, &ext3_mb_seq_groups_ops);
++ if (rc == 0) {
++ struct seq_file *m = (struct seq_file *)file->private_data;
++ m->private = sb;
++ }
++ return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++ .owner = THIS_MODULE,
++ .open = ext3_mb_seq_groups_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
+static void ext3_mb_history_release(struct super_block *sb)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ char name[64];
+
+ snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++ remove_proc_entry("mb_groups", sbi->s_mb_proc);
+ remove_proc_entry("mb_history", sbi->s_mb_proc);
+ remove_proc_entry(name, proc_root_ext3);
+
+ p->proc_fops = &ext3_mb_seq_history_fops;
+ p->data = sb;
+ }
++ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++ if (p) {
++ p->proc_fops = &ext3_mb_seq_groups_fops;
++ p->data = sb;
++ }
+ }
+
+ sbi->s_mb_history_max = 1000;
+}
+
+static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++ struct ext3_allocation_context *ac)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ struct ext3_mb_history h;
+ if (likely(sbi->s_mb_history == NULL))
+ return;
+
++ h.pid = current->pid;
++ h.ino = ino;
+ h.goal = ac->ac_g_ex;
+ h.result = ac->ac_b_ex;
+ h.found = ac->ac_found;
+#define EXT3_MB_STATS_NAME "mb_stats"
+#define EXT3_MB_MAX_TO_SCAN_NAME "mb_max_to_scan"
+#define EXT3_MB_MIN_TO_SCAN_NAME "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ "mb_order2_req"
+
+static int ext3_mb_stats_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+ return len;
+}
+
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++ unsigned long count, void *data)
++{
++ char str[32];
++ long value;
++
++ if (count >= sizeof(str)) {
++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++ return -EOVERFLOW;
++ }
++
++ if (copy_from_user(str, buffer, count))
++ return -EFAULT;
++
++ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++ value = simple_strtol(str, NULL, 0);
++ if (value <= 0)
++ return -ERANGE;
++
++ ext3_mb_order2_reqs = value;
++
++ return count;
++}
++
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++ int count, int *eof, void *data)
++{
++ int len;
++
++ *eof = 1;
++ if (off != 0)
++ return 0;
++
++ len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++ *start = page;
++ return len;
++}
++
+static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ long value;
+
+ if (count >= sizeof(str)) {
-+ printk(KERN_ERR "EXT3: %s string too long, max %u bytes\n",
++ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
+ return -EOVERFLOW;
+ }
+ struct proc_dir_entry *proc_ext3_mb_stats;
+ struct proc_dir_entry *proc_ext3_mb_max_to_scan;
+ struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++ struct proc_dir_entry *proc_ext3_mb_order2_req;
+
+ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
+ if (proc_root_ext3 == NULL) {
-+ printk(KERN_ERR "EXT3: Unable to create %s\n", EXT3_ROOT);
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT);
+ return -EIO;
+ }
+
+ proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME,
+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
+ if (proc_ext3_mb_stats == NULL) {
-+ printk(KERN_ERR "EXT3: Unable to create %s\n",
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
+ EXT3_MB_STATS_NAME);
+ remove_proc_entry(EXT3_ROOT, proc_root_fs);
+ return -EIO;
+ EXT3_MB_MAX_TO_SCAN_NAME,
+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
+ if (proc_ext3_mb_max_to_scan == NULL) {
-+ printk(KERN_ERR "EXT3: Unable to create %s\n",
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
+ EXT3_MB_MAX_TO_SCAN_NAME);
+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_ROOT, proc_root_fs);
+ EXT3_MB_MIN_TO_SCAN_NAME,
+ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
+ if (proc_ext3_mb_min_to_scan == NULL) {
-+ printk(KERN_ERR "EXT3: Unable to create %s\n",
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
+ EXT3_MB_MIN_TO_SCAN_NAME);
+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
+ proc_ext3_mb_min_to_scan->read_proc = ext3_mb_min_to_scan_read;
+ proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
+
++ /* Initialize EXT3_ORDER2_REQ */
++ proc_ext3_mb_order2_req = create_proc_entry(
++ EXT3_MB_ORDER2_REQ,
++ S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++ if (proc_ext3_mb_order2_req == NULL) {
++ printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++ EXT3_MB_ORDER2_REQ);
++ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_ROOT, proc_root_fs);
++ return -EIO;
++ }
++
++ proc_ext3_mb_order2_req->data = NULL;
++ proc_ext3_mb_order2_req->read_proc = ext3_mb_order2_req_read;
++ proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
+ return 0;
+}
+
+ remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
+ remove_proc_entry(EXT3_ROOT, proc_root_fs);
+}
-Index: linux-2.6.9-full/fs/ext3/Makefile
+Index: linux-2.6.9-full/fs/ext3/balloc.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/Makefile 2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/Makefile 2005-12-16 23:16:42.000000000 +0300
-@@ -6,7 +6,7 @@
+--- linux-2.6.9-full.orig/fs/ext3/balloc.c 2006-03-10 18:20:03.000000000 +0300
++++ linux-2.6.9-full/fs/ext3/balloc.c 2006-05-22 21:44:37.000000000 +0400
+@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
+ *
+ * Return buffer_head on success or NULL in case of failure.
+ */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+ struct ext3_group_desc * desc;
+@@ -451,24 +451,6 @@ error_return:
+ return;
+ }
- ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
- ioctl.o namei.o super.o symlink.o hash.o resize.o \
-- extents.o
-+ extents.o mballoc.o
+-/* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks(handle_t *handle, struct inode *inode,
+- unsigned long block, unsigned long count)
+-{
+- struct super_block * sb;
+- int dquot_freed_blocks;
+-
+- sb = inode->i_sb;
+- if (!sb) {
+- printk ("ext3_free_blocks: nonexistent device");
+- return;
+- }
+- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+- if (dquot_freed_blocks)
+- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+- return;
+-}
+-
+ /*
+ * For ext3 allocations, we must not reuse any blocks which are
+ * allocated in the bitmap buffer's "last committed data" copy. This
+@@ -1131,7 +1113,7 @@ int ext3_should_retry_alloc(struct super
+ * bitmap, and then for any free bit if that fails.
+ * This function also updates quota and i_blocks field.
+ */
+-int ext3_new_block(handle_t *handle, struct inode *inode,
++int ext3_new_block_old(handle_t *handle, struct inode *inode,
+ unsigned long goal, int *errp)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+Index: linux-2.6.9-full/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/inode.c 2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/inode.c 2006-05-22 21:44:37.000000000 +0400
+@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
+ ext3_journal_forget(handle, branch[i].bh);
+ }
+ for (i = 0; i < keys; i++)
+- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
+ return err;
+ }
- ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
- ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+@@ -673,7 +673,7 @@ err_out:
+ if (err == -EAGAIN)
+ for (i = 0; i < num; i++)
+ ext3_free_blocks(handle, inode,
+- le32_to_cpu(where[i].key), 1);
++ le32_to_cpu(where[i].key), 1, 1);
+ return err;
+ }
+
+@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru
+ }
+ }
+
+- ext3_free_blocks(handle, inode, block_to_free, count);
++ ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+
+ /**
+@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t
+ ext3_journal_test_restart(handle, inode);
+ }
+
+- ext3_free_blocks(handle, inode, nr, 1);
++ ext3_free_blocks(handle, inode, nr, 1, 1);
+
+ if (parent_bh) {
+ /*
dput(dentry);
spin_lock(&dcache_lock);
return 1;
- }
-
+ }
+
if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
struct inode *inode = dentry->d_inode;
#endif
- CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
+ CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
"inode %p refc %d\n", dentry->d_name.len,
dentry->d_name.name, dentry, dentry->d_parent,
dentry->d_inode, atomic_read(&dentry->d_count));
RETURN(-EINVAL);
}
if (head->i_size % JOIN_FILE_ALIGN) {
- CERROR("hsize" LPU64 " must be times of 64K\n",
- head->i_size);
+ CERROR("hsize %llu must be times of 64K\n", head->i_size);
RETURN(-EINVAL);
}
RETURN(0);
if (body->valid & OBD_MD_FLGID)
inode->i_gid = body->gid;
if (body->valid & OBD_MD_FLFLAGS)
- inode->i_flags = body->flags;
+ inode->i_flags = ll_ext_to_inode_flags(body->flags);
if (body->valid & OBD_MD_FLNLINK)
inode->i_nlink = body->nlink;
if (body->valid & OBD_MD_FLGENER)
body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
sizeof(*body));
- if (body->flags & S_APPEND)
- flags |= EXT3_APPEND_FL;
- if (body->flags & S_IMMUTABLE)
- flags |= EXT3_IMMUTABLE_FL;
- if (body->flags & S_NOATIME)
- flags |= EXT3_NOATIME_FL;
-
- ptlrpc_req_finished(req);
+ /* We want to return EXT3_*_FL flags to the caller via this
+ * ioctl. An older MDS may be sending S_* flags, fix it up. */
+ flags = ll_inode_to_ext_flags(body->flags, body->flags);
+ ptlrpc_req_finished (req);
RETURN(put_user(flags, (int *)arg));
}
RETURN(rc);
}
- if (flags & EXT3_APPEND_FL)
- inode->i_flags |= S_APPEND;
- else
- inode->i_flags &= ~S_APPEND;
- if (flags & EXT3_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= ~S_IMMUTABLE;
- if (flags & EXT3_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
- else
- inode->i_flags &= ~S_NOATIME;
-
+ inode->i_flags = ll_ext_to_inode_flags(flags |
+ MDS_BFLAG_EXT_FLAGS);
RETURN(0);
}
default:
kms_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- CDEBUG(D_READA, "kmsp %llu mwp %lu mp %lu\n", kms_pages,
+ CDEBUG(D_READA, "kmsp "LPU64" mwp %lu mp %lu\n", kms_pages,
ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages);
if (kms_pages &&
RETURN(-EPERM);
}
+ /* FIXME: Can't do this because of nested transaction deadlock */
+ if (cmd == EXT3_IOC_SETFLAGS && (*(int *)arg) & EXT3_JOURNAL_DATA_FL) {
+ CERROR("can't set data journal flag on file\n");
+ RETURN(-EPERM);
+ }
+
if (inode->i_fop->ioctl)
rc = inode->i_fop->ioctl(inode, file, cmd, arg);
else
#include <lustre_mds.h>
void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
- __u64 valid, struct ll_fid *fid, int ea_size);
+ __u64 valid, struct ll_fid *fid, int ea_size, int flags);
void mdc_pack_rep_body(struct ptlrpc_request *);
void mdc_readdir_pack(struct ptlrpc_request *req, int offset, __u64 pg_off,
__u32 size, struct ll_fid *mdc_fid);
}
void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
- __u64 valid, struct ll_fid *fid, int ea_size)
+ __u64 valid, struct ll_fid *fid, int ea_size, int flags)
{
struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
b->fid1 = *fid;
b->valid = valid;
b->eadatasize = ea_size;
+ b->flags = flags;
mdc_pack_body(b);
}
b->fsgid = current->fsgid;
b->capability = current->cap_effective;
b->valid = valid;
- b->flags = flags;
+ b->flags = flags | MDS_BFLAG_EXT_FLAGS;
b->suppgid = data->suppgids[0];
b->fid1 = data->fid1;
{
it->d.lustre.it_disposition &= ~flag;
}
-
EXPORT_SYMBOL(it_clear_disposition);
static int it_to_lock_mode(struct lookup_intent *it)
owner/group/acls are under lookup lock, we need both
ibits for GETATTR. */
policy.l_inodebits.bits = (it->it_op == IT_GETATTR) ?
- MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP :
+ MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP :
MDS_INODELOCK_LOOKUP;
-
+
rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
LDLM_FL_BLOCK_GRANTED, &res_id,
LDLM_IBITS, &policy, LCK_CR, &lockh);
mode = LCK_CW;
rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
LDLM_FL_BLOCK_GRANTED, &res_id,
- LDLM_IBITS, &policy, LCK_CW, &lockh);
+ LDLM_IBITS, &policy,LCK_CW,&lockh);
}
if (!rc) {
mode = LCK_PR;
rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
LDLM_FL_BLOCK_GRANTED, &res_id,
- LDLM_IBITS, &policy, LCK_PR, &lockh);
+ LDLM_IBITS, &policy,LCK_PR,&lockh);
}
if (rc) {
memcpy(&it->d.lustre.it_lock_handle, &lockh,
if (op_data->fid2.id && (it->it_op != IT_GETATTR)) {
it_set_disposition(it, DISP_ENQ_COMPLETE);
/* Also: did we find the same inode? */
- if (memcmp(&op_data->fid2, &mds_body->fid1, sizeof(op_data->fid2)))
+ if (memcmp(&op_data->fid2, &mds_body->fid1,
+ sizeof(op_data->fid2)))
RETURN(-ESTALE);
}
req->rq_send_state = level;
ptlrpc_req_set_repsize(req, 2, size);
- mdc_pack_req_body(req, REQ_REC_OFF, 0, NULL, 0);
+ mdc_pack_req_body(req, REQ_REC_OFF, 0, NULL, 0, 0);
lustre_msg_add_flags(req->rq_reqmsg, msg_flags);
rc = ptlrpc_queue_wait(req);
if (!req)
GOTO(out, rc = -ENOMEM);
- mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size);
+ mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size,
+ MDS_BFLAG_EXT_FLAGS/*request "new" flags(bug 9486)*/);
/* currently only root inode will call us with FLACL */
if (valid & OBD_MD_FLACL)
if (!req)
GOTO(out, rc = -ENOMEM);
- mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size);
+ mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size,
+ MDS_BFLAG_EXT_FLAGS/*request "new" flags(bug 9486)*/);
LASSERT(strnlen(filename, namelen) == namelen - 1);
memcpy(lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1, namelen),
int flags, struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
- struct mds_body *body;
- int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int size[4] = { sizeof(struct ptlrpc_body), sizeof(struct mds_body) };
+ // int size[3] = {sizeof(struct mds_body)}, bufcnt = 1;
int rc, xattr_namelen = 0, bufcnt = 2, offset;
void *tmp;
ENTRY;
GOTO(out, rc = -ENOMEM);
/* request data */
- mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, output_size);
- body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
- body->flags = flags;
+ mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, output_size, flags);
offset = REQ_REC_OFF + 1;
/* reply buffers */
if (opcode == MDS_GETXATTR) {
+ size[0] = sizeof(struct mds_body);
bufcnt = 2;
} else {
bufcnt = 1;
GOTO(err_out, rc);
if (opcode == MDS_GETXATTR) {
- body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body),
+ struct mds_body * body = lustre_swab_repbuf(req, REPLY_REC_OFF,
+ sizeof(*body),
lustre_swab_mds_body);
if (body == NULL) {
CERROR ("Can't unpack mds_body\n");
if (!req)
RETURN(rc = -ENOMEM);
- mdc_pack_req_body(req, REQ_REC_OFF, 0, fid, 0);
+ mdc_pack_req_body(req, REQ_REC_OFF, 0, fid, 0, 0);
ptlrpc_req_set_repsize(req, 2, size);
LASSERT(body != NULL); /* caller prepped reply */
mds_pack_inode2fid(&body->fid1, inode);
+ body->flags = reqbody->flags; /* copy MDS_BFLAG_EXT_FLAGS if present */
mds_pack_inode2body(body, inode);
reply_off++;
rc = 0;
}
reply_off++;
+ } else if (reqbody->valid == OBD_MD_FLFLAGS &&
+ reqbody->flags & MDS_BFLAG_EXT_FLAGS) {
+ int flags;
+
+ /* We only return the full set of flags on ioctl, otherwise we
+ * get enough flags from the inode in mds_pack_inode2body(). */
+ rc = fsfilt_iocontrol(obd, inode, NULL, EXT3_IOC_GETFLAGS,
+ (long)&flags);
+ if (rc == 0)
+ body->flags = flags | MDS_BFLAG_EXT_FLAGS;
}
if (reqbody->valid & OBD_MD_FLMODEASIZE) {
b->blocks = inode->i_blocks;
b->uid = inode->i_uid;
b->gid = inode->i_gid;
- b->flags = inode->i_flags;
+ b->flags = ll_inode_to_ext_flags(b->flags, inode->i_flags);
b->rdev = inode->i_rdev;
/* Return the correct link count for orphan inodes */
b->nlink = mds_inode_is_orphan(inode) ? 0 : inode->i_nlink;
if (IS_ERR(mds->mds_osc_obd))
RETURN(PTR_ERR(mds->mds_osc_obd));
- rc = obd_unpackmd(mds->mds_osc_exp, &lsm, lmm, lmm_size);
+ rc = obd_unpackmd(mds->mds_osc_exp, &lsm, lmm, lmm_size);
if (rc < 0)
RETURN(rc);
rc = obd_checkmd(mds->mds_osc_exp, obd->obd_self_export, lsm);
mds_lov_update_objids(obd, ids);
OBD_FREE(ids, sizeof(*ids) * mds->mds_lov_desc.ld_tgt_count);
}
- if (rc)
+ if (rc) /* coverity[deadcode] */
mds_mfd_unlink(mfd, 1);
+
mds_mfd_put(mfd);
RETURN(rc);
}
* values specified) then delete default striping from dir. */
if (S_ISDIR(inode->i_mode) &&
((lum->lmm_stripe_size == 0 &&
- lum->lmm_stripe_offset == (typeof(lum->lmm_stripe_offset))(-1) &&
+ lum->lmm_stripe_offset ==
+ (typeof(lum->lmm_stripe_offset))(-1) &&
lum->lmm_stripe_count == 0) ||
/* lmm_stripe_size == -1 is deprecated in 1.4.6 */
- lum->lmm_stripe_size == (typeof(lum->lmm_stripe_size))(-1))){
+ lum->lmm_stripe_size ==
+ (typeof(lum->lmm_stripe_size))(-1))){
rc = fsfilt_set_md(obd, inode, handle, NULL, 0, "lov");
if (rc)
GOTO(cleanup, rc);
RETURN(-EOPNOTSUPP);
rc = llog_cat_id2handle(handle, &log_handle, &lir->lid_id);
if (rc) {
- CDEBUG(D_IOCTL,
+ CDEBUG(D_IOCTL,
"cannot find log #"LPX64"#"LPX64"#%08x\n",
lir->lid_id.lgl_oid, lir->lid_id.lgl_ogr,
lir->lid_id.lgl_ogen);
spin_unlock(&exp->exp_obd->obd_osfs_lock);
CDEBUG(D_CACHE, "%s: cli %s/%p ocd_grant: %d want: "
- "%lld left: %lld\n", exp->exp_obd->obd_name,
+ LPU64" left: "LPU64"\n", exp->exp_obd->obd_name,
exp->exp_client_uuid.uuid, exp,
data->ocd_grant, want, left);
}
CERROR("Failure to commit OST transaction (%d)?\n", err);
rc = err;
}
- if (obd->obd_replayable && !err)
+ if (obd->obd_replayable && !rc)
LASSERTF(oti->oti_transno <= obd->obd_last_committed,
"oti_transno "LPU64" last_committed "LPU64"\n",
oti->oti_transno, obd->obd_last_committed);
rc = generic_osync_inode(inode, inode->i_mapping,
OSYNC_DATA|OSYNC_METADATA);
*/
- down(&inode->i_sem);
+ LOCK_INODE_MUTEX(inode);
current->flags |= PF_SYNCWRITE;
rc = filemap_fdatawrite(inode->i_mapping);
rc2 = sync_mapping_buffers(inode->i_mapping);
rc = rc2;
rc2 = filemap_fdatawait(inode->i_mapping);
current->flags &= ~PF_SYNCWRITE;
- up(&inode->i_sem);
+ UNLOCK_INODE_MUTEX(inode);
if (rc == 0)
rc = rc2;
if (rc != 0)
fsfilt_check_slow(now, obd_timeout, "direct_io");
err = fsfilt_commit_wait(obd, inode, wait_handle);
- if (err)
+ if (err) {
+ CERROR("Failure to commit OST transaction (%d)?\n", err);
rc = err;
+ }
- if (obd->obd_replayable && !err)
+ if (obd->obd_replayable && !rc)
LASSERTF(oti->oti_transno <= obd->obd_last_committed,
"oti_transno "LPU64" last_committed "LPU64"\n",
oti->oti_transno, obd->obd_last_committed);
opd.opd_policy.l_extent.end = (nb[nrbufs - 1].offset +
nb[nrbufs - 1].len - 1) | ~CFS_PAGE_MASK;
- CDEBUG(D_DLMTRACE, "refresh locks: "LPU64"/"LPU64" ("LPU64"->"LPU64")\n",
+ CDEBUG(D_DLMTRACE,"refresh locks: "LPU64"/"LPU64" ("LPU64"->"LPU64")\n",
res_id.name[0], res_id.name[1], opd.opd_policy.l_extent.start,
opd.opd_policy.l_extent.end);
ldlm_resource_iterate(exp->exp_obd->obd_namespace, &res_id,
ptlrpc_rs_decref(req->rq_reply_state);
req->rq_reply_state = NULL;
}
- CWARN("%s: ignoring bulk IO comm error with %s@%s id %s\n",
+ CWARN("%s: ignoring bulk IO comm error with %s@%s id %s - "
+ "client will retry\n",
req->rq_export->exp_obd->obd_name,
req->rq_export->exp_client_uuid.uuid,
req->rq_export->exp_connection->c_remote_uuid.uuid,
ptlrpc_rs_decref(req->rq_reply_state);
req->rq_reply_state = NULL;
}
- CWARN("%s: ignoring bulk IO comm error with %s@%s id %s\n",
+ CWARN("%s: ignoring bulk IO comm error with %s@%s id %s - "
+ "client will retry\n",
req->rq_export->exp_obd->obd_name,
req->rq_export->exp_client_uuid.uuid,
req->rq_export->exp_connection->c_remote_uuid.uuid,
CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
+ CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x10000ULL);
/* Sizes and Offsets */
+ /* Checks for struct obd_uuid */
+ LASSERTF((int)sizeof(struct obd_uuid) == 40, " found %lld\n",
+ (long long)(int)sizeof(struct obd_uuid));
/* Checks for struct lustre_handle */
LASSERTF((int)sizeof(struct lustre_handle) == 8, " found %lld\n",
-#!/bin/sh
+#!/bin/bash
# script which _must_ complete successfully (at minimum) before checkins to
# the CVS HEAD are allowed.
set -vxe
PATH=`dirname $0`/../utils:$PATH
[ "$CONFIGS" ] || CONFIGS="local" #"local lov"
-[ "$MAX_THREADS" ] || MAX_THREADS=10
+[ "$MAX_THREADS" ] || MAX_THREADS=20
+RAMKB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo`
if [ -z "$THREADS" ]; then
- KB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo`
- THREADS=`expr $KB / 16384`
+ THREADS=$((RAMKB / 16384))
[ $THREADS -gt $MAX_THREADS ] && THREADS=$MAX_THREADS
fi
-[ "$SIZE" ] || SIZE=40960
+[ "$SIZE" ] || SIZE=$((RAMKB * 2))
[ "$RSIZE" ] || RSIZE=512
[ "$UID" ] || UID=1000
[ "$MOUNT" ] || MOUNT=/mnt/lustre
if [ "$DBENCH" != "no" ]; then
mount_client $MOUNT
SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
- DB_THREADS=`expr $SPACE / 50000`
+ DB_THREADS=$((SPACE / 50000))
[ $THREADS -lt $DB_THREADS ] && DB_THREADS=$THREADS
$DEBUG_OFF
chown $UID $MOUNT && chmod 700 $MOUNT
if [ "$BONNIE" != "no" ]; then
mount_client $MOUNT
+ SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
+ [ $SPACE -lt $SIZE ] && SIZE=$((SPACE * 3 / 4))
$DEBUG_OFF
- bonnie++ -f -r 0 -s $(($SIZE / 1024)) -n 10 -u $UID -d $MOUNT
+ bonnie++ -f -r 0 -s $((SIZE / 1024)) -n 10 -u $UID -d $MOUNT
$DEBUG_ON
$CLEANUP
$SETUP
fi
- IOZONE_OPTS="-i 0 -i 1 -i 2 -e -+d -r $RSIZE -s $SIZE"
- IOZFILE="-f $MOUNT/iozone"
export O_DIRECT
if [ "$IOZONE" != "no" ]; then
mount_client $MOUNT
+ SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
+ [ $SPACE -lt $SIZE ] && SIZE=$((SPACE * 3 / 4))
+ IOZONE_OPTS="-i 0 -i 1 -i 2 -e -+d -r $RSIZE -s $SIZE"
+ IOZFILE="-f $MOUNT/iozone"
$DEBUG_OFF
iozone $IOZONE_OPTS $IOZFILE
$DEBUG_ON
fi
SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
- IOZ_THREADS=`expr $SPACE / \( $SIZE + $SIZE / 512 \)`
+ IOZ_THREADS=$((SPACE / SIZE * 2 / 3 ))
[ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS
- IOZVER=`iozone -v|awk '/Revision:/ {print $3}'|tr -d .`
+ IOZVER=`iozone -v | awk '/Revision:/ {print $3}' | tr -d .`
if [ "$IOZ_THREADS" -gt 1 -a "$IOZVER" -ge 3145 ]; then
$DEBUG_OFF
THREAD=1
IOZFILE="-F "
while [ $THREAD -le $IOZ_THREADS ]; do
IOZFILE="$IOZFILE $MOUNT/iozone.$THREAD"
- THREAD=`expr $THREAD + 1`
+ THREAD=$((THREAD + 1))
done
iozone $IOZONE_OPTS -t $IOZ_THREADS $IOZFILE
$DEBUG_ON
if [ "$FSX" != "no" ]; then
mount | grep $MOUNT || $SETUP
+ SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
+ [ $SPACE -lt $SIZE ] && SIZE=$((SPACE * 3 / 4))
$DEBUG_OFF
./fsx -c 50 -p 1000 -P $TMP -l $SIZE \
-N $(($COUNT * 100)) $MOUNT/fsxfile
zconf_mount `hostname` $MOUNTPATH || return 96
}
+remount_client() {
+ local SAVEMOUNTOPT=$MOUNTOPT
+ MOUNTOPT="remount,$1"
+ local MOUNTPATH=$2
+ echo "remount '$1' lustre on ${MOUNTPATH}....."
+ zconf_mount `hostname` $MOUNTPATH || return 96
+ MOUNTOPT=$SAVEMOUNTOPT
+}
+
umount_client() {
local MOUNTPATH=$1
echo "umount lustre on ${MOUNTPATH}....."
fi
echo "change the mode of $MDSDEV/OBJECTS,LOGS,PENDING to 555"
- do_facet mds "[ -d $TMPMTPT ] || mkdir -p $TMPMTPT;
- mount -o loop -t ext3 $MDSDEV $TMPMTPT || return \$?;
- chmod 555 $TMPMTPT/{OBJECTS,LOGS,PENDING} || return \$?;
- umount -d $TMPMTPT || return \$?" || return $?
+ do_facet mds "mkdir -p $TMPMTPT &&
+ mount -o loop -t ext3 $MDSDEV $TMPMTPT &&
+ chmod 555 $TMPMTPT/{OBJECTS,LOGS,PENDING} &&
+ umount $TMPMTPT" || return $?
echo "mount Lustre to change the mode of OBJECTS/LOGS/PENDING, then umount Lustre"
setup
umount_client $MOUNT
cleanup_nocli
+test_20() {
+ # first format the ost/mdt
+ start_ost
+ start_mds
+ mount_client $MOUNT
+ check_mount || return 43
+ rm -f $DIR/$tfile
+ remount_client ro $MOUNT || return 44
+ touch $DIR/$tfile && echo "$DIR/$tfile created incorrectly" && return 45
+ [ -e $DIR/$tfile ] && echo "$DIR/$tfile exists incorrectly" && return 46
+ remount_client rw $MOUNT || return 47
+ touch $DIR/$tfile
+ [ ! -f $DIR/$tfile ] && echo "$DIR/$tfile missing" && return 48
+ MCNT=`grep -c $MOUNT /etc/mtab`
+ [ "$MCNT" -ne 1 ] && echo "$MOUNT in /etc/mtab $MCNT times" && return 49
+ umount_client $MOUNT
+ stop_mds
+ stop_ost
+}
+run_test 20 "remount ro,rw mounts work and doesn't break /etc/mtab"
+
equals_msg "Done"
fi
}
+# verify that lustre actually cleaned up properly
+cleanup_check() {
+ BUSY=`dmesg | grep -i destruct || true`
+ if [ "$BUSY" ]; then
+ echo "$BUSY" 1>&2
+ [ -e $TMP/debug ] && mv $TMP/debug $TMP/debug-busy.`date +%s`
+ exit 205
+ fi
+ LEAK_LUSTRE=`dmesg | tail -n 30 | grep "obd mem.*leaked" || true`
+ LEAK_PORTALS=`dmesg | tail -n 20 | grep "Portals memory leaked" || true`
+ if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
+ echo "$0: $LEAK_LUSTRE" 1>&2
+ echo "$0: $LEAK_PORTALS" 1>&2
+ echo "$0: Memory leak(s) detected..." 1>&2
+ mv $TMP/debug $TMP/debug-leak.`date +%s`
+ exit 204
+ fi
+
+ [ "`lctl dl 2> /dev/null | wc -l`" -gt 0 ] && lctl dl && \
+ echo "$0: lustre didn't clean up..." 1>&2 && return 202 || true
+
+ if [ "`/sbin/lsmod 2>&1 | egrep 'lnet|libcfs'`" ]; then
+ echo "$0: modules still loaded..." 1>&2
+ /sbin/lsmod 1>&2
+ return 203
+ fi
+ return 0
+}
+
wait_for_host() {
HOST=$1
check_network "$HOST" 900
llog_reader
.*.cmd
.*.d
+llverfs
+llverdev
rootsbin_PROGRAMS = mount.lustre
sbin_PROGRAMS = lctl obdio obdbarrier lload wirecheck wiretest \
mount_lustre mkfs_lustre mkfs.lustre \
- tunefs_lustre tunefs.lustre l_getgroups # llverfs llverdev
+ tunefs_lustre tunefs.lustre l_getgroups llverfs llverdev
bin_PROGRAMS = lfs llog_reader
lib_LIBRARIES = liblustreapi.a
sbin_SCRIPTS = $(sbin_scripts)
lload_SOURCES = lload.c
llverfs_LDADD := -lext2fs -le2p
+if BLKID
llverdev_LDADD := -lext2fs -lblkid
+else
+llverdev_LDADD := -lext2fs
+endif
liblustreapi_a_SOURCES = liblustreapi.c
run('/sbin/rmmod kiiblnd')
if mod_loaded("kviblnd"):
run('/sbin/rmmod kviblnd')
+ if mod_loaded("kciblnd"):
+ run('/sbin/rmmod kciblnd')
+ if mod_loaded("ko2iblnd"):
+ run('/sbin/rmmod ko2iblnd')
if mod_loaded("kralnd"):
run('/sbin/rmmod kralnd')
if mod_loaded("kptllnd"):
* pattern in bulk.
*/
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#ifndef LUSTRE_UTILS
+#define LUSTRE_UTILS
+#endif
+#ifndef _LARGEFILE64_SOURCE
+#define _LARGEFILE64_SOURCE
+#endif
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+
#include <features.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
#include <gnu/stubs.h>
#include <ext2fs/ext2fs.h>
-#include <blkid/blkid.h>
#define ONE_MB (1024 * 1024)
#define ONE_GB (1024 * 1024 * 1024)
return (fd);
}
+#ifdef HAVE_BLKID_BLKID_H
+#include <blkid/blkid.h>
+#endif
/*
* sizeof_dev: Returns size of device in bytes
*/
-static unsigned long long sizeof_dev(int fd)
+static loff_t sizeof_dev(int fd)
{
- blkid_loff_t numbytes = 0;
+ loff_t numbytes;
+#ifdef HAVE_BLKID_BLKID_H
numbytes = blkid_get_dev_size(fd);
if (numbytes <= 0) {
fprintf(stderr, "%s: blkid_get_dev_size(%s) failed",
progname, devname);
return 1;
}
+ goto out;
+#else
+# if defined BLKGETSIZE64 /* in sys/mount.h */
+ if (ioctl(fd, BLKGETSIZE64, &numbytes) >= 0)
+ goto out;
+# endif
+# if defined BLKGETSIZE /* in sys/mount.h */
+ {
+ unsigned long sectors;
+
+ if (ioctl(fd, BLKGETSIZE, §ors) >= 0) {
+ numbytes = (loff_t)sectors << 9;
+ goto out;
+ }
+ }
+# endif
+ {
+ struct stat statbuf;
+
+ if (fstat(fd, &statbuf) == 0 && S_ISREG(statbuf.st_mode)) {
+ numbytes = statbuf.st_size;
+ goto out;
+ }
+ }
+ fprintf(stderr, "%s: unable to determine size of %s\n",
+ progname, devname);
+ return 0;
+#endif
+out:
if (verbose)
printf("%s: %s is %llu bytes (%g GB) in size\n",
progname, devname,
* Returns 0 if test offset and timestamp is correct otherwise 1.
*/
int verify_chunk(char *chunk_buf, size_t chunksize,
- loff_t chunk_off, time_t time_st)
+ unsigned long long chunk_off, time_t time_st)
{
struct block_data *bd;
char *chunk_end;
* write_chunk: write the chunk_buf on the device. The number of write
* operations are based on the parameters write_end, offset, and chunksize.
*/
-int write_chunks(loff_t offset, loff_t write_end, char *chunk_buf,
- size_t chunksize, time_t time_st)
+int write_chunks(unsigned long long offset, unsigned long long write_end,
+ char *chunk_buf, size_t chunksize, time_t time_st)
{
unsigned long long stride, count = 0;
* read_chunk: reads the chunk_buf from the device. The number of read
* operations are based on the parameters read_end, offset, and chunksize.
*/
-int read_chunks(loff_t offset, loff_t read_end, char *chunk_buf,
- size_t chunksize, time_t time_st)
+int read_chunks(unsigned long long offset, unsigned long long read_end,
+ char *chunk_buf, size_t chunksize, time_t time_st)
{
unsigned long long stride, count = 0;
* that the data in each file is correct.
*/
+#ifndef _GNU_SOURCE
#define _GNU_SOURCE
+#endif
+#ifndef LUSTRE_UTILS
+#define LUSTRE_UTILS
+#endif
+#ifndef _LARGEFILE64_SOURCE
+#define _LARGEFILE64_SOURCE
+#endif
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
#include <features.h>
#include <stdlib.h>
/* Structure for writing test pattern */
struct block_data {
- loff_t bd_offset;
- time_t bd_time;
- ino_t bd_inode;
+ unsigned long long bd_offset;
+ unsigned long long bd_time;
+ unsigned long long bd_inode;
};
static char *progname; /* name by which this program was run. */
static unsigned verbose = 1; /* prints offset in kB, operation rate */
static unsigned full = 1; /* flag to full check */
static int errno_local; /* local copy of errno */
static unsigned long num_files; /* Total number of files for read/write */
-static loff_t file_size; /* Size of each file */
+static loff_t file_size = 4*ONE_GB; /* Size of each file */
static unsigned files_in_dir = 32; /* number of files in each directioy */
static unsigned num_dirs = 30000; /* total number of directories */
const int dirmode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
* Verify_chunk: Verifies test pattern in each 4kB (BLOCKSIZE) is correct.
* Returns 0 if test offset and timestamp is correct otherwise 1.
*/
-int verify_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off,
- time_t time_st, ino_t inode_st, char *file)
+int verify_chunk(char *chunk_buf, size_t chunksize,unsigned long long chunk_off,
+ unsigned long long time_st, unsigned long long inode_st,
+ char *file)
{
struct block_data *bd;
char *chunk_end;
(bd->bd_inode == inode_st))
continue;
fprintf(stderr,"\n%s: verify %s failed offset/timestamp/inode "
- "%llu/%lu/%lu: found %llu/%lu/%lu instead\n", progname,
- file, chunk_off, time_st, inode_st, bd->bd_offset,
- bd->bd_time, bd->bd_inode);
+ "%llu/%llu/%llu: found %llu/%llu/%llu instead\n",
+ progname, file, chunk_off, time_st, inode_st,
+ bd->bd_offset, bd->bd_time, bd->bd_inode);
return 1;
}
return 0;
* write_chunk: write the chunk_buf on the device. The number of write
* operations are based on the parameters write_end, offset, and chunksize.
*/
-int write_chunks(int fd, loff_t offset, loff_t write_end, char *chunk_buf,
- size_t chunksize, time_t time_st,
+int write_chunks(int fd, unsigned long long offset,unsigned long long write_end,
+ char *chunk_buf, size_t chunksize, time_t time_st,
ino_t inode_st, const char *file)
{
unsigned long long stride;
* read_chunk: reads the chunk_buf from the device. The number of read
* operations are based on the parameters read_end, offset, and chunksize.
*/
-int read_chunks(int fd, loff_t offset, loff_t read_end, char *chunk_buf,
- size_t chunksize, time_t time_st, ino_t inode_st, char *file)
+int read_chunks(int fd, unsigned long long offset, unsigned long long read_end,
+ char *chunk_buf, size_t chunksize, time_t time_st,
+ ino_t inode_st, char *file)
{
unsigned long long stride;
usage(1);
return -1;
}
- file_size = 4 * ONE_GB;
if (!readoption && !writeoption) {
readoption = 1;
writeoption = 1;
CHECK_CDEFINE(OBD_CONNECT_JOIN);
CHECK_CDEFINE(OBD_CONNECT_ATTRFID);
CHECK_CDEFINE(OBD_CONNECT_NODEVOH);
+ CHECK_CDEFINE(OBD_CONNECT_RMT_CLIENT);
COMMENT("Sizes and Offsets");
BLANK_LINE();
CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
+ CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x10000ULL);
/* Sizes and Offsets */
+ /* Checks for struct obd_uuid */
+ LASSERTF((int)sizeof(struct obd_uuid) == 40, " found %lld\n",
+ (long long)(int)sizeof(struct obd_uuid));
/* Checks for struct lustre_handle */
LASSERTF((int)sizeof(struct lustre_handle) == 8, " found %lld\n",