-Index: linux-2.6.5-7.201/fs/ext3/super.c
+Index: linux-2.6.5-7.201-full/include/linux/ext3_fs.h
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/super.c 2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/super.c 2006-06-20 19:42:08.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/include/linux/ext3_fs.h 2006-08-09 17:59:34.000000000 +0400
++++ linux-2.6.5-7.201-full/include/linux/ext3_fs.h 2006-08-22 12:35:55.000000000 +0400
+@@ -793,6 +793,7 @@ extern void ext3_put_super (struct super
+ extern void ext3_write_super (struct super_block *);
+ extern void ext3_write_super_lockfs (struct super_block *);
+ extern void ext3_unlockfs (struct super_block *);
++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int);
+ extern int ext3_remount (struct super_block *, int *, char *);
+ extern int ext3_statfs (struct super_block *, struct kstatfs *);
+
+Index: linux-2.6.5-7.201-full/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-7.201-full.orig/fs/ext3/super.c 2006-08-09 17:59:37.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/super.c 2006-08-09 17:59:37.000000000 +0400
@@ -39,7 +39,7 @@
static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
struct ext3_super_block * es,
int sync)
{
-Index: linux-2.6.5-7.201/fs/ext3/namei.c
+Index: linux-2.6.5-7.201-full/fs/ext3/namei.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/namei.c 2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/namei.c 2006-06-20 19:42:08.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/fs/ext3/namei.c 2006-08-09 17:59:37.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/namei.c 2006-08-09 17:59:37.000000000 +0400
@@ -1598,7 +1598,7 @@ static int ext3_delete_entry (handle_t *
struct buffer_head * bh)
{
if (pde)
pde->rec_len =
cpu_to_le16(le16_to_cpu(pde->rec_len) +
-Index: linux-2.6.5-7.201/fs/ext3/xattr.c
+Index: linux-2.6.5-7.201-full/fs/ext3/xattr.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/xattr.c 2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/xattr.c 2006-06-20 19:42:30.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/fs/ext3/xattr.c 2006-07-14 01:53:23.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/xattr.c 2006-08-09 17:59:37.000000000 +0400
@@ -107,7 +107,7 @@ ext3_xattr_register(int name_index, stru
{
int error = -EINVAL;
write_lock(&ext3_handler_lock);
if (!ext3_xattr_handlers[name_index-1]) {
ext3_xattr_handlers[name_index-1] = handler;
-Index: linux-2.6.5-7.201/fs/ext3/inode.c
+Index: linux-2.6.5-7.201-full/fs/ext3/inode.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/inode.c 2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/inode.c 2006-06-20 19:42:08.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/fs/ext3/inode.c 2006-07-14 01:53:22.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/inode.c 2006-08-22 12:35:28.000000000 +0400
@@ -1517,9 +1517,14 @@ out_stop:
if (end > inode->i_size) {
ei->i_disksize = end;
+Index: linux-2.6.9-full/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2006-08-09 17:56:39.000000000 +0400
++++ linux-2.6.9-full/include/linux/ext3_fs.h 2006-08-22 12:36:22.000000000 +0400
+@@ -826,6 +826,7 @@ extern void ext3_put_super (struct super
+ extern void ext3_write_super (struct super_block *);
+ extern void ext3_write_super_lockfs (struct super_block *);
+ extern void ext3_unlockfs (struct super_block *);
++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int);
+ extern int ext3_remount (struct super_block *, int *, char *);
+ extern int ext3_statfs (struct super_block *, struct kstatfs *);
+
Index: linux-2.6.9-full/fs/ext3/super.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/super.c 2006-06-02 23:37:51.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/super.c 2006-06-02 23:56:29.000000000 +0400
+--- linux-2.6.9-full.orig/fs/ext3/super.c 2006-08-09 17:56:40.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/super.c 2006-08-09 17:56:40.000000000 +0400
@@ -43,7 +43,7 @@ static int ext3_load_journal(struct supe
unsigned long journal_devnum);
static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
{
Index: linux-2.6.9-full/fs/ext3/namei.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-06-02 23:37:49.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/namei.c 2006-06-02 23:43:31.000000000 +0400
+--- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-08-09 17:56:40.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/namei.c 2006-08-09 17:56:40.000000000 +0400
@@ -1599,7 +1599,7 @@ static int ext3_delete_entry (handle_t *
struct buffer_head * bh)
{
Index: linux-2.6.9-full/fs/ext3/xattr.c
===================================================================
--- linux-2.6.9-full.orig/fs/ext3/xattr.c 2006-06-01 14:58:48.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/xattr.c 2006-06-03 00:02:00.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/xattr.c 2006-08-09 17:56:40.000000000 +0400
@@ -132,7 +132,7 @@ ext3_xattr_handler(int name_index)
{
struct xattr_handler *handler = NULL;
Index: linux-2.6.9-full/fs/ext3/inode.c
===================================================================
--- linux-2.6.9-full.orig/fs/ext3/inode.c 2006-06-02 23:37:38.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/inode.c 2006-06-03 00:27:41.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/inode.c 2006-08-22 12:34:28.000000000 +0400
@@ -1513,9 +1513,14 @@ out_stop:
if (end > inode->i_size) {
ei->i_disksize = end;
+ * Someone more lucky has already allocated it.
+ * The only thing we can do is just take first
+ * found block(s)
-+ */
+ printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
++ */
+ ac.ac_b_ex.fe_group = 0;
+ ac.ac_b_ex.fe_start = 0;
+ ac.ac_b_ex.fe_len = 0;
-Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
+Index: linux-stage/include/linux/ext3_fs.h
===================================================================
---- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs.h 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/include/linux/ext3_fs.h 2006-04-26 23:40:28.000000000 +0400
-@@ -57,6 +57,14 @@ struct statfs;
+--- linux-stage.orig/include/linux/ext3_fs.h 2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/include/linux/ext3_fs.h 2006-09-06 12:37:27.000000000 +0800
+@@ -57,6 +57,14 @@
#define ext3_debug(f, a...) do {} while (0)
#endif
/*
* Special inodes numbers
*/
-@@ -339,6 +347,7 @@ struct ext3_inode {
+@@ -339,6 +347,7 @@
#define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */
#define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */
#define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef clear_opt
-@@ -700,7 +709,9 @@ extern int ext3_bg_has_super(struct supe
+@@ -361,6 +370,14 @@
+ #define ext3_find_first_zero_bit ext2_find_first_zero_bit
+ #define ext3_find_next_zero_bit ext2_find_next_zero_bit
+
++#ifndef ext2_find_next_le_bit
++#ifdef __LITTLE_ENDIAN
++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off))
++#else
++#error "mballoc needs a patch for big-endian systems - CFS bug 10634"
++#endif /* __LITTLE_ENDIAN */
++#endif /* !ext2_find_next_le_bit */
++
+ /*
+ * Maximal mount counts between two filesystem checks
+ */
+@@ -700,7 +717,9 @@
extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
extern unsigned long ext3_count_free_blocks (struct super_block *);
extern void ext3_check_blocks_bitmap (struct super_block *);
extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
-@@ -822,6 +833,17 @@ extern void ext3_extents_initialize_bloc
+@@ -824,6 +843,17 @@
extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg);
#endif /* __KERNEL__ */
#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long)
-Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h
+Index: linux-stage/include/linux/ext3_fs_sb.h
===================================================================
---- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs_sb.h 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h 2006-04-26 23:40:28.000000000 +0400
+--- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-09-06 12:37:00.000000000 +0800
++++ linux-stage/include/linux/ext3_fs_sb.h 2006-09-06 12:37:01.000000000 +0800
@@ -23,9 +23,15 @@
#define EXT_INCLUDE
#include <linux/blockgroup_lock.h>
/*
* third extended-fs super-block data in memory
-@@ -78,6 +84,43 @@ struct ext3_sb_info {
+@@ -78,6 +84,43 @@
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
#endif
+ unsigned long s_mb_buddies_generated;
+ unsigned long long s_mb_generation_time;
};
-+
+
+#define EXT3_GROUP_INFO(sb, group) \
+ EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \
+ [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)]
-
++
#endif /* _LINUX_EXT3_FS_SB */
-Index: linux-2.6.5-7.252-full/fs/ext3/super.c
+Index: linux-stage/fs/ext3/super.c
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/super.c 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/super.c 2006-04-26 23:40:28.000000000 +0400
-@@ -389,6 +389,7 @@ void ext3_put_super (struct super_block
+--- linux-stage.orig/fs/ext3/super.c 2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/super.c 2006-09-06 12:37:01.000000000 +0800
+@@ -389,6 +389,7 @@
struct ext3_super_block *es = sbi->s_es;
int i;
ext3_ext_release(sb);
ext3_xattr_put_super(sb);
journal_destroy(sbi->s_journal);
-@@ -545,6 +546,7 @@ enum {
+@@ -546,6 +547,7 @@
Opt_err,
Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
Opt_extents, Opt_noextents, Opt_extdebug,
};
static match_table_t tokens = {
-@@ -591,6 +592,9 @@ static match_table_t tokens = {
+@@ -592,6 +594,9 @@
{Opt_extents, "extents"},
{Opt_noextents, "noextents"},
{Opt_extdebug, "extdebug"},
{Opt_barrier, "barrier=%u"},
{Opt_err, NULL}
};
-@@ -813,6 +815,19 @@ static int parse_options (char * options
+@@ -817,6 +822,19 @@
case Opt_extdebug:
set_opt (sbi->s_mount_opt, EXTDEBUG);
break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1466,6 +1471,7 @@ static int ext3_fill_super (struct super
+@@ -1470,6 +1488,7 @@
ext3_count_dirs(sb));
ext3_ext_init(sb);
return 0;
-@@ -2114,7 +2120,13 @@ static struct file_system_type ext3_fs_t
+@@ -2118,7 +2137,13 @@
static int __init init_ext3_fs(void)
{
if (err)
return err;
err = init_inodecache();
-@@ -2143,6 +2155,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2147,6 +2172,7 @@
unregister_filesystem(&ext3_fs_type);
destroy_inodecache();
exit_ext3_xattr();
}
int ext3_prep_san_write(struct inode *inode, long *blocks,
-Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
+Index: linux-stage/fs/ext3/extents.c
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/extents.c 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/extents.c 2006-04-26 23:40:28.000000000 +0400
-@@ -777,7 +777,7 @@ cleanup:
+--- linux-stage.orig/fs/ext3/extents.c 2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/extents.c 2006-09-06 12:37:01.000000000 +0800
+@@ -779,7 +779,7 @@
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
}
}
kfree(ablocks);
-@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1438,7 +1438,7 @@
path->p_idx->ei_leaf);
bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
return err;
}
-@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1923,10 +1923,12 @@
int needed = ext3_remove_blocks_credits(tree, ex, from, to);
handle_t *handle = ext3_journal_start(tree->inode, needed);
struct buffer_head *bh;
if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
/* tail removal */
unsigned long num, start;
-@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1938,7 +1940,7 @@
bh = sb_find_get_block(tree->inode->i_sb, start + i);
ext3_forget(handle, 0, tree->inode, bh, start + i);
}
} else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
printk("strange request: removal %lu-%lu from %u:%u\n",
from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
+Index: linux-stage/fs/ext3/inode.c
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/inode.c 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/inode.c 2006-04-26 23:40:28.000000000 +0400
-@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h
+--- linux-stage.orig/fs/ext3/inode.c 2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/inode.c 2006-09-06 12:37:01.000000000 +0800
+@@ -574,7 +574,7 @@
ext3_journal_forget(handle, branch[i].bh);
}
for (i = 0; i < keys; i++)
return err;
}
-@@ -675,7 +675,7 @@ err_out:
+@@ -675,7 +675,7 @@
if (err == -EAGAIN)
for (i = 0; i < num; i++)
ext3_free_blocks(handle, inode,
return err;
}
-@@ -1837,7 +1837,7 @@ ext3_clear_blocks(handle_t *handle, stru
+@@ -1837,7 +1837,7 @@
}
}
}
/**
-@@ -2008,7 +2008,7 @@ static void ext3_free_branches(handle_t
+@@ -2008,7 +2008,7 @@
ext3_journal_test_restart(handle, inode);
}
if (parent_bh) {
/*
-Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
+Index: linux-stage/fs/ext3/balloc.c
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/balloc.c 2006-02-14 15:26:58.000000000 +0300
-+++ linux-2.6.5-7.252-full/fs/ext3/balloc.c 2006-04-26 23:40:28.000000000 +0400
-@@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_
+--- linux-stage.orig/fs/ext3/balloc.c 2006-09-06 12:36:59.000000000 +0800
++++ linux-stage/fs/ext3/balloc.c 2006-09-06 12:37:01.000000000 +0800
+@@ -78,7 +78,7 @@
*
* Return buffer_head on success or NULL in case of failure.
*/
read_block_bitmap(struct super_block *sb, unsigned int block_group)
{
struct ext3_group_desc * desc;
-@@ -274,7 +274,7 @@ void ext3_discard_reservation(struct ino
+@@ -274,7 +274,7 @@
}
/* Free given blocks, update quota and i_blocks field */
unsigned long block, unsigned long count)
{
struct buffer_head *bitmap_bh = NULL;
-@@ -1142,7 +1142,7 @@ int ext3_should_retry_alloc(struct super
+@@ -1142,7 +1142,7 @@
* bitmap, and then for any free bit if that fails.
* This function also updates quota and i_blocks field.
*/
unsigned long goal, int *errp)
{
struct buffer_head *bitmap_bh = NULL;
-Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
+Index: linux-stage/fs/ext3/xattr.c
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/xattr.c 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/xattr.c 2006-04-26 23:40:28.000000000 +0400
-@@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle,
+--- linux-stage.orig/fs/ext3/xattr.c 2006-09-06 12:37:00.000000000 +0800
++++ linux-stage/fs/ext3/xattr.c 2006-09-06 12:37:01.000000000 +0800
+@@ -1371,7 +1371,7 @@
new_bh = sb_getblk(sb, block);
if (!new_bh) {
getblk_failed:
error = -EIO;
goto cleanup;
}
-@@ -1411,7 +1411,7 @@ getblk_failed:
+@@ -1411,7 +1411,7 @@
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
/* Free the old block. */
ea_bdebug(old_bh, "freeing");
/* ext3_forget() calls bforget() for us, but we
let our caller release old_bh, so we need to
-@@ -1519,7 +1519,7 @@ ext3_xattr_delete_inode(handle_t *handle
+@@ -1519,7 +1519,7 @@
mb_cache_entry_free(ce);
ce = NULL;
}
get_bh(bh);
ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
} else {
-Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
+Index: linux-stage/fs/ext3/mballoc.c
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/mballoc.c 2006-04-26 23:42:45.000000000 +0400
+--- linux-stage.orig/fs/ext3/mballoc.c 2006-09-06 11:16:28.656439250 +0800
++++ linux-stage/fs/ext3/mballoc.c 2006-09-06 12:37:34.000000000 +0800
@@ -0,0 +1,2702 @@
+/*
+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
+ while (i < max) {
+ fragments++;
+ first = i;
-+ i = find_next_bit(bitmap, max, i);
++ i = ext2_find_next_le_bit(bitmap, max, i);
+ len = i - first;
+ free += len;
+ if (len > 1)
+ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
+ remove_proc_entry(EXT3_ROOT, proc_root_fs);
+}
-Index: linux-2.6.5-7.252-full/fs/ext3/Makefile
+Index: linux-stage/fs/ext3/Makefile
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/Makefile 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/Makefile 2006-04-26 23:40:28.000000000 +0400
-@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
+--- linux-stage.orig/fs/ext3/Makefile 2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/Makefile 2006-09-06 12:37:01.000000000 +0800
+@@ -6,7 +6,7 @@
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
ioctl.o namei.o super.o symlink.o hash.o \
+ * Someone more lucky has already allocated it.
+ * The only thing we can do is just take first
+ * found block(s)
-+ */
+ printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
++ */
+ ac.ac_b_ex.fe_group = 0;
+ ac.ac_b_ex.fe_start = 0;
+ ac.ac_b_ex.fe_len = 0;
Index: linux-stage/include/linux/ext3_fs.h
===================================================================
---- linux-stage.orig/include/linux/ext3_fs.h 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/include/linux/ext3_fs.h 2006-05-25 10:36:04.000000000 -0600
-@@ -57,6 +57,14 @@ struct statfs;
+--- linux-stage.orig/include/linux/ext3_fs.h 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/include/linux/ext3_fs.h 2006-09-06 12:29:38.000000000 +0800
+@@ -57,6 +57,14 @@
#define ext3_debug(f, a...) do {} while (0)
#endif
/*
* Special inodes numbers
*/
-@@ -365,6 +373,7 @@ struct ext3_inode {
+@@ -365,6 +373,7 @@
#define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */
#define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */
#define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef clear_opt
-@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe
+@@ -387,6 +396,14 @@
+ #define ext3_find_first_zero_bit ext2_find_first_zero_bit
+ #define ext3_find_next_zero_bit ext2_find_next_zero_bit
+
++#ifndef ext2_find_next_le_bit
++#ifdef __LITTLE_ENDIAN
++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off))
++#else
++#error "mballoc needs a patch for big-endian systems - CFS bug 10634"
++#endif /* __LITTLE_ENDIAN */
++#endif /* !ext2_find_next_le_bit */
++
+ /*
+ * Maximal mount counts between two filesystem checks
+ */
+@@ -726,7 +743,7 @@
extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
unsigned long, unsigned long, int *);
extern unsigned long ext3_count_free_blocks (struct super_block *);
-@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc
+@@ -859,6 +876,17 @@
extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg);
/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
Index: linux-stage/include/linux/ext3_fs_sb.h
===================================================================
---- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/include/linux/ext3_fs_sb.h 2006-05-25 10:59:14.000000000 -0600
+--- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-09-06 12:27:36.000000000 +0800
++++ linux-stage/include/linux/ext3_fs_sb.h 2006-09-06 12:27:37.000000000 +0800
@@ -23,9 +23,15 @@
#define EXT_INCLUDE
#include <linux/blockgroup_lock.h>
/*
* third extended-fs super-block data in memory
-@@ -81,6 +87,43 @@ struct ext3_sb_info {
+@@ -81,6 +87,43 @@
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
+ unsigned long s_mb_buddies_generated;
+ unsigned long long s_mb_generation_time;
};
-+
+
+#define EXT3_GROUP_INFO(sb, group) \
+ EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \
+ [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)]
-
++
#endif /* _LINUX_EXT3_FS_SB */
Index: linux-stage/fs/ext3/super.c
===================================================================
---- linux-stage.orig/fs/ext3/super.c 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/super.c 2006-05-25 10:36:04.000000000 -0600
-@@ -394,6 +394,7 @@ void ext3_put_super (struct super_block
+--- linux-stage.orig/fs/ext3/super.c 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/super.c 2006-09-06 12:27:37.000000000 +0800
+@@ -394,6 +394,7 @@
struct ext3_super_block *es = sbi->s_es;
int i;
ext3_ext_release(sb);
ext3_xattr_put_super(sb);
journal_destroy(sbi->s_journal);
-@@ -597,6 +598,7 @@ enum {
+@@ -597,6 +598,7 @@
Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
Opt_extents, Opt_noextents, Opt_extdebug,
};
static match_table_t tokens = {
-@@ -649,6 +651,9 @@ static match_table_t tokens = {
+@@ -649,6 +651,9 @@
{Opt_extents, "extents"},
{Opt_noextents, "noextents"},
{Opt_extdebug, "extdebug"},
{Opt_barrier, "barrier=%u"},
{Opt_err, NULL},
{Opt_resize, "resize"},
-@@ -962,6 +967,19 @@ static int parse_options (char * options
+@@ -962,6 +967,19 @@
case Opt_extdebug:
set_opt (sbi->s_mount_opt, EXTDEBUG);
break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1651,6 +1669,7 @@ static int ext3_fill_super (struct super
+@@ -1651,6 +1669,7 @@
ext3_count_dirs(sb));
ext3_ext_init(sb);
return 0;
-@@ -2433,7 +2452,13 @@ static struct file_system_type ext3_fs_t
+@@ -2433,7 +2452,13 @@
static int __init init_ext3_fs(void)
{
if (err)
return err;
err = init_inodecache();
-@@ -2455,6 +2480,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2455,6 +2480,7 @@
unregister_filesystem(&ext3_fs_type);
destroy_inodecache();
exit_ext3_xattr();
int ext3_prep_san_write(struct inode *inode, long *blocks,
Index: linux-stage/fs/ext3/extents.c
===================================================================
---- linux-stage.orig/fs/ext3/extents.c 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/extents.c 2006-05-25 10:36:04.000000000 -0600
-@@ -777,7 +777,7 @@ cleanup:
+--- linux-stage.orig/fs/ext3/extents.c 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/extents.c 2006-09-06 12:27:37.000000000 +0800
+@@ -779,7 +779,7 @@
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
}
}
kfree(ablocks);
-@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1438,7 +1438,7 @@
path->p_idx->ei_leaf);
bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
return err;
}
-@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1923,10 +1923,12 @@
int needed = ext3_remove_blocks_credits(tree, ex, from, to);
handle_t *handle = ext3_journal_start(tree->inode, needed);
struct buffer_head *bh;
if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
/* tail removal */
unsigned long num, start;
-@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1938,7 +1940,7 @@
bh = sb_find_get_block(tree->inode->i_sb, start + i);
ext3_forget(handle, 0, tree->inode, bh, start + i);
}
from, to, ex->ee_block, ex->ee_len);
Index: linux-stage/fs/ext3/inode.c
===================================================================
---- linux-stage.orig/fs/ext3/inode.c 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/inode.c 2006-05-25 10:36:04.000000000 -0600
-@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
+--- linux-stage.orig/fs/ext3/inode.c 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/inode.c 2006-09-06 12:27:37.000000000 +0800
+@@ -572,7 +572,7 @@
ext3_journal_forget(handle, branch[i].bh);
}
for (i = 0; i < keys; i++)
return err;
}
-@@ -673,7 +673,7 @@ err_out:
+@@ -673,7 +673,7 @@
if (err == -EAGAIN)
for (i = 0; i < num; i++)
ext3_free_blocks(handle, inode,
return err;
}
-@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru
+@@ -1831,7 +1831,7 @@
}
}
}
/**
-@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t
+@@ -2004,7 +2004,7 @@
ext3_journal_test_restart(handle, inode);
}
/*
Index: linux-stage/fs/ext3/balloc.c
===================================================================
---- linux-stage.orig/fs/ext3/balloc.c 2006-05-25 10:36:02.000000000 -0600
-+++ linux-stage/fs/ext3/balloc.c 2006-05-25 10:36:04.000000000 -0600
-@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
+--- linux-stage.orig/fs/ext3/balloc.c 2006-09-06 12:27:36.000000000 +0800
++++ linux-stage/fs/ext3/balloc.c 2006-09-06 12:27:37.000000000 +0800
+@@ -79,7 +79,7 @@
*
* Return buffer_head on success or NULL in case of failure.
*/
struct buffer_head *bitmap_bh = NULL;
Index: linux-stage/fs/ext3/xattr.c
===================================================================
---- linux-stage.orig/fs/ext3/xattr.c 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/xattr.c 2006-05-25 10:36:04.000000000 -0600
-@@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle,
+--- linux-stage.orig/fs/ext3/xattr.c 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/xattr.c 2006-09-06 12:27:37.000000000 +0800
+@@ -1281,7 +1281,7 @@
new_bh = sb_getblk(sb, block);
if (!new_bh) {
getblk_failed:
error = -EIO;
goto cleanup;
}
-@@ -1328,7 +1328,7 @@ getblk_failed:
+@@ -1328,7 +1328,7 @@
if (ce)
mb_cache_entry_free(ce);
ea_bdebug(old_bh, "freeing");
/* ext3_forget() calls bforget() for us, but we
let our caller release old_bh, so we need to
-@@ -1427,7 +1427,7 @@ ext3_xattr_delete_inode(handle_t *handle
+@@ -1427,7 +1427,7 @@
if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
if (ce)
mb_cache_entry_free(ce);
} else {
Index: linux-stage/fs/ext3/mballoc.c
===================================================================
---- linux-stage.orig/fs/ext3/mballoc.c 2006-05-23 17:33:37.579436680 -0600
-+++ linux-stage/fs/ext3/mballoc.c 2006-05-25 10:59:14.000000000 -0600
+--- linux-stage.orig/fs/ext3/mballoc.c 2006-09-06 11:16:28.656439250 +0800
++++ linux-stage/fs/ext3/mballoc.c 2006-09-06 12:30:11.000000000 +0800
@@ -0,0 +1,2701 @@
+/*
+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
+ while (i < max) {
+ fragments++;
+ first = i;
-+ i = find_next_bit(bitmap, max, i);
++ i = ext2_find_next_le_bit(bitmap, max, i);
+ len = i - first;
+ free += len;
+ if (len > 1)
+}
Index: linux-stage/fs/ext3/Makefile
===================================================================
---- linux-stage.orig/fs/ext3/Makefile 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/Makefile 2006-05-25 10:36:04.000000000 -0600
+--- linux-stage.orig/fs/ext3/Makefile 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/Makefile 2006-09-06 12:27:37.000000000 +0800
@@ -6,7 +6,7 @@
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
special upgrade step is needed. Please read the
user documentation before upgrading any part of a live system.
* WIRE PROTOCOL CHANGE from previous 1.6 beta versions. This
- version will not interoperate with older 1.6 betas.
+ version will not interoperate with 1.6 betas before beta5 (1.5.95).
* WARNING: Lustre configuration and startup changes are required with
this release. See https://mail.clusterfs.com/wikis/lustre/MountConf
for details.
Severity : enhancement
-Bugzilla : 4226
-Description: Permanently set tunables
-Details : All writable /proc/fs/lustre tunables can now be permanently
- set on a per-server basis, at mkfs time or on a live
- system.
-
-Severity : enhancement
-Bugzilla : 10547
-Description: Lustre message v2
-Details : Add lustre message format v2.
-
-Severity : enhancement
Bugzilla : 8007
Description: MountConf
Details : Lustre configuration is now managed via mkfs and mount
to optimize storage space and networking resources.
Severity : enhancement
+Bugzilla : 4226
+Description: Permanently set tunables
+Details : All writable /proc/fs/lustre tunables can now be permanently
+ set on a per-server basis, at mkfs time or on a live
+ system.
+
+Severity : enhancement
+Bugzilla : 10547
+Description: Lustre message v2
+Details : Add lustre message format v2.
+
+Severity : enhancement
Bugzilla : 9866
Description: client OST exclusion list
Details : Clients can be started with a list of OSTs that should be
MDS RPCs in flight for a single client and add /proc controls
to adjust this limit.
+Severity : enhancement
+Bugzilla : 22484
+Description: client read/write statistics
+Details : Add client read/write call usage stats for performance
+ analysis of user processes.
+ /proc/fs/lustre/llite/*/offset_stats shows non-sequential
+ file access. extents_stats shows chunk size distribution.
+ extents_stats_per_process show chunk size distribution per
+ user process.
+
+Severity : enhancement
+Bugzilla : 22486
+Description: mds statistics
+Details : Add detailed mds operations statistics in
+ /proc/fs/lustre/mds/*/stats.
+
+Severity : minor
+Bugzilla : 10667
+Description: Failure of copying files with lustre special EAs.
+Details : Client side always return success for setxattr call for lustre
+ special xattr (currently only "trusted.lov").
------------------------------------------------------------------------------
-tbd Cluster File Systems, Inc. <info@clusterfs.com>
+08-20-2006 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.4.7
* Support for kernels:
- 2.6.9-34.EL (RHEL 4)
- 2.6.5-7.252 (SLES 9)
+ 2.6.9-42.EL (RHEL 4)
+ 2.6.5-7.276 (SLES 9)
+ 2.4.21-40.EL (RHEL 3)
2.6.12.6 vanilla (kernel.org)
* bug fixes
Severity : major
Frequency : rare
-Bugzilla : 5719, 9635, 9792, 9684,
+Bugzilla : 5719, 9635, 9792, 9684
Description: OST (or MDS) trips assertions in (re)connection under heavy load
Details : If a server is under heavy load and cannot reply to new
connection requests before the client resends the (re)connect,
Severity : major
Frequency : only very large systems with liblustre clients
-Bugzilla : 7304
+Bugzilla : 7304
Description: slow eviction of liblustre clients with the "evict_by_nid" RPC
-Details : Use asynchronous set_info RPCs to send the "evict_by_nid" to
+Details : Use asynchronous set_info RPCs to send the "evict_by_nid" to
all OSTs in parallel. This allows the eviction of stale liblustre
- clients to proceed much faster than if they were done in series,
+ clients to proceed much faster than if they were done in series,
and also offers similar improvements for other set_info RPCs.
Severity : minor
Severity : minor
Frequency : occasional, when OST network is overloaded/intermittent
-Bugzilla : 10416
+Bugzilla : 10416
Description: client evicted by OST after bulk IO timeout
Details : If a client sends a bulk IO request (read or write) the OST
may evict the client if it is unresposive to its data GET/PUT
Severity : minor
Frequency : Always when mmapping file with no objects
-Bugzilla : 10438
+Bugzilla : 10438
Description: client crashes when mmapping file with no objects
Details : Check that we actually have objects in a file before doing any
operations on objects in ll_vm_open, ll_vm_close and
Severity : enhancement
Bugzilla : 9292
Description: Getattr by fid
-Details : Getting a file attributes by its fid, obtaining UPDATE|LOOKUP
- locks, avoids extra getattr rpc requests to MDS, allows '/' to
+Details : Getting a file attributes by its fid, obtaining UPDATE|LOOKUP
+ locks, avoids extra getattr rpc requests to MDS, allows '/' to
have locks and avoids getattr rpc requests for it on every stat.
Severity : major
assertion in ll_local_open. Now we set the handler right after
recognising of open request
-Severity : minor
-Frequency : very rare
-Bugzilla : 10669
-Description: Deadlock: extent lock cancellation callback vs import invalidation
-Details : If extent lock cancellation callback takes long enough time, and it
- happens that import gets invalidated in process, there is a
- deadlock on page_lock in extent lock cancellation vs ns_lock in
- import invalidation processes. The fix is to not try to match
- locks from inactive OSTs.
-
Severity : trivial
Frequency : very rare
Bugzilla : 10584
to the "stats" file.
Severity : minor
+Frequency : rare
+Bugzilla : 10641
+Description: Client mtime is not the same on different clients after utimes
+Details : In some cases, the client was using the utimes() syscall on
+ a file cached on another node. The clients now validate the
+ ctime from the MDS + OSTs to determine which one is right.
+
+Severity : minor
Frequency : always
Bugzilla : 10611
Description: Inability to activate failout mode
failed mount can wait for the full obd_timeout interval,
possibly several minutes, before reporting an error.
Instead return an error as soon as the status is known.
+Severity : major
+Frequency : quota enabled and large files being deleted
+Bugzilla : 10707
+Description: releasing more than 4GB of quota at once hangs OST
+Details : If a user deletes more than 4GB of files on a single OST it
+ will cause the OST to spin in an infinite loop. Release
+ quota in < 4GB chunks, or use a 64-bit value for 1.4.7.1+.
+
+Severity : trivial
+Frequency : rare
+Bugzilla : 10845
+Description: statfs data retrieved from /proc may be stale or zero
+Details : When reading per-device statfs data from /proc, in the
+ {kbytes,files}_{total,free,avail} files, it may appear
+ as zero or be out of date.
+
+Severity : trivial
+Frequency : systems with MD RAID1 external journal devices
+Bugzilla : 10832
+Description: lconf's call to blkid is confused by RAID1 journal devices
+Details : Use the "blkid -l" flag to locate the MD RAID device instead
+ of returning all block devices that match the journal UUID.
+
+Severity : normal
+Frequency : always, for aggregate stripe size over 4GB
+Bugzilla : 10725
+Description: assertion fails when trying to use 4GB stripe size
+Details : Use "setstripe" to set stripe size over 4GB will fail the kernel,
+ complaining "ASSERTION(lsm->lsm_xfersize != 0)"
+
+Severity : normal
+Frequency : always on ppc64
+Bugzilla : 10634
+Description: the first write on an ext3 filesystem with mballoc got stuck
+Details : ext3_mb_generate_buddy() uses find_next_bit() which does not
+ perform endianness conversion.
------------------------------------------------------------------------------
#
AC_DEFUN([LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP],
[AC_MSG_CHECKING([if kernel defines grab_cache_page_nowait_gfp()])
-HAVE_GCPN_GFP="`grep -c 'grab_cache_page_nowait_gfp' $LINUX/mm/filemap.c`"
+HAVE_GCPN_GFP="`grep -c 'grab_cache_page_nowait_gfp' $LINUX/include/linux/pagemap.h`"
if test "$HAVE_GCPN_GFP" != 0 ; then
AC_DEFINE(HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP, 1,
[kernel has grab_cache_page_nowait_gfp()])
m4_define([LUSTRE_MAJOR],[1])
-m4_define([LUSTRE_MINOR],[9])
-m4_define([LUSTRE_PATCH],[0])
+m4_define([LUSTRE_MINOR],[5])
+m4_define([LUSTRE_PATCH],[95])
m4_define([LUSTRE_FIX],[0])
dnl # 288 stands for 0.0.1.32 , next version with fixes is ok, but next after
.BI \--backfstype= fstype
Force a particular format for the backing fs (ext3, ldiskfs)
.TP
+.BI \--comment= comment
+Set user comment about this disk, ignored by Lustre.
+.TP
.BI \--device-size= KB
Set device size for loop devices
.TP
Only start the MGC (and MGS, if co-located) for a target service, and not the actual service.
.TP
.BI exclude= ostlist
-Start a client or MDT with a list of known inactive OSTs
+Start a client or MDT with a (colon-separated) list of known inactive OSTs
.TP
.BI abort_recov
Abort recovery (targets only)
.SH OPTIONS
.TP
+.BI \--comment= comment
+Set user comment about this disk, ignored by Lustre.
+.TP
.BI \--erase-params
Remove all previous parameter info
.TP
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
- */
+ */
#ifndef __LVFS_LINUX_H__
#define __LVFS_LINUX_H__
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fs.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
#include <linux/namei.h>
+#endif
#include <linux/sched.h>
#include <lvfs.h>
}
}
-#define OBD_SLEEP_ON(wq) interruptible_sleep_on(wq)
+#define OBD_SLEEP_ON(wq, state) wait_event_interruptible(wq, state)
#else /* !__KERNEL__ */
typedef void (*llapi_cb_t)(char *obd_type_name, char *obd_name, char *obd_uuid, void *args);
/* liblustreapi.c */
-extern int llapi_file_create(const char *name, long stripe_size,
+extern int llapi_file_create(const char *name, unsigned long stripe_size,
int stripe_offset, int stripe_count,
int stripe_pattern);
extern int llapi_file_get_stripe(const char *path, struct lov_user_md *lum);
struct lu_fid mea_ids[0];
};
-
struct lustre_handle {
__u64 cookie;
};
#define MSG_CONNECT_NEXT_VER 0x80 /* use next version of lustre_msg */
/* Connect flags */
-#define OBD_CONNECT_RDONLY 0x1ULL /* client allowed read-only access */
-#define OBD_CONNECT_INDEX 0x2ULL /* connect to specific LOV idx */
-#define OBD_CONNECT_GRANT 0x8ULL /* OSC acquires grant at connect */
-#define OBD_CONNECT_SRVLOCK 0x10ULL /* server takes locks for client */
-#define OBD_CONNECT_VERSION 0x20ULL /* Server supports versions in ocd */
-#define OBD_CONNECT_REQPORTAL 0x40ULL /* Separate portal for non-IO reqs */
-#define OBD_CONNECT_ACL 0x80ULL /* client using access control lists */
-#define OBD_CONNECT_XATTR 0x100ULL /* client using extended attributes*/
-#define OBD_CONNECT_CROW 0x200ULL /* MDS+OST do object create-on-write */
-#define OBD_CONNECT_TRUNCLOCK 0x400ULL /* server gets locks for punch b=9528 */
-#define OBD_CONNECT_TRANSNO 0x800ULL /* replay is sending initial transno */
-#define OBD_CONNECT_IBITS 0x1000ULL /* support for inodebits locks */
-#define OBD_CONNECT_JOIN 0x2000ULL /* files can be concatenated */
-#define OBD_CONNECT_REAL 0x4000ULL
-#define OBD_CONNECT_ATTRFID 0x8000ULL /* Server supports GetAttr By Fid */
-#define OBD_CONNECT_NODEVOH 0x10000ULL /* No open handle for special nodes */
-#define OBD_CONNECT_LCL_CLIENT 0x20000ULL /* local 1.6 client */
-#define OBD_CONNECT_RMT_CLIENT 0x40000ULL /* Remote client */
-#define OBD_CONNECT_BRW_SIZE 0x80000ULL /* Maximum pages per RPC */
-
-/* also update obd_connect_names[] for lprocfs_rd_connect_flags() */
-
-#define MDS_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \
+#define OBD_CONNECT_RDONLY 0x1ULL /* client allowed read-only access */
+#define OBD_CONNECT_INDEX 0x2ULL /* connect to specific LOV idx */
+#define OBD_CONNECT_GRANT 0x8ULL /* OSC acquires grant at connect */
+#define OBD_CONNECT_SRVLOCK 0x10ULL /* server takes locks for client */
+#define OBD_CONNECT_VERSION 0x20ULL /* Server supports versions in ocd */
+#define OBD_CONNECT_REQPORTAL 0x40ULL /* Separate portal for non-IO reqs */
+#define OBD_CONNECT_ACL 0x80ULL /* client uses access control lists */
+#define OBD_CONNECT_XATTR 0x100ULL /* client using extended attributes*/
+#define OBD_CONNECT_CROW 0x200ULL /* MDS+OST create objects on write */
+#define OBD_CONNECT_TRUNCLOCK 0x400ULL /* locks on server for punch b=9528 */
+#define OBD_CONNECT_TRANSNO 0x800ULL /* replay sends initial transno */
+#define OBD_CONNECT_IBITS 0x1000ULL /* support for inodebits locks */
+#define OBD_CONNECT_JOIN 0x2000ULL /* files can be concatenated */
+#define OBD_CONNECT_REAL 0x4000ULL
+#define OBD_CONNECT_ATTRFID 0x8000ULL /* Server supports GetAttr By Fid */
+#define OBD_CONNECT_NODEVOH 0x10000ULL /* No open handle for special nodes */
+#define OBD_CONNECT_LCL_CLIENT 0x20000ULL /* local 1.8 client */
+#define OBD_CONNECT_RMT_CLIENT 0x40000ULL /* Remote 1.8 client */
+#define OBD_CONNECT_BRW_SIZE 0x80000ULL /* Max bytes per rpc */
+#define OBD_CONNECT_QUOTA64 0x100000ULL /* 64bit qunit_data.qd_count b=10707*/
+#define OBD_CONNECT_FID_CAPA 0x200000ULL /* fid capability */
+#define OBD_CONNECT_OSS_CAPA 0x400000ULL /* OSS capability */
+/* also update obd_connect_names[] for lprocfs_rd_connect_flags()
+ * and lustre/utils/wirecheck.c */
+
+#define MDT_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \
OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \
OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \
OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID)
#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
- OBD_CONNECT_BRW_SIZE)
+ OBD_CONNECT_BRW_SIZE | OBD_CONNECT_QUOTA64)
#define ECHO_CONNECT_SUPPORTED (0)
#define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION)
-#define MDT_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \
- OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \
- OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \
- OBD_CONNECT_ATTRFID)
+#define MAX_QUOTA_COUNT32 ((0xffffffffULL >> QUOTABLOCK_BITS) << QUOTABLOCK_BITS)
#define OBD_OCD_VERSION(major,minor,patch,fix) (((major)<<24) + ((minor)<<16) +\
((patch)<<8) + (fix))
OST_OPEN = 11,
OST_CLOSE = 12,
OST_STATFS = 13,
- OST_SAN_READ = 14,
- OST_SAN_WRITE = 15,
+/* OST_SAN_READ = 14, deprecated */
+/* OST_SAN_WRITE = 15, deprecated */
OST_SYNC = 16,
OST_SET_INFO = 17,
OST_QUOTACHECK = 18,
#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access"
#define XATTR_NAME_LOV "trusted.lov"
-
#define OBD_MD_FLID (0x00000001ULL) /* object ID */
#define OBD_MD_FLATIME (0x00000002ULL) /* access time */
#define OBD_MD_FLMTIME (0x00000004ULL) /* data modification time */
#define LLOG_OP_MASK 0xfff00000
typedef enum {
- LLOG_PAD_MAGIC = LLOG_OP_MAGIC | 0,
- OST_SZ_REC = LLOG_OP_MAGIC | (OST_SAN_WRITE << 8),
- OST_RAID1_REC = LLOG_OP_MAGIC | ((OST_SAN_WRITE + 1) << 8),
+ LLOG_PAD_MAGIC = LLOG_OP_MAGIC | 0x00000,
+ OST_SZ_REC = LLOG_OP_MAGIC | 0x00f00,
+ OST_RAID1_REC = LLOG_OP_MAGIC | 0x01000,
MDS_UNLINK_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_UNLINK,
MDS_SETATTR_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_SETATTR,
OBD_CFG_REC = LLOG_OP_MAGIC | 0x20000,
struct lustre_cfg;
extern void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg);
-/* quota */
+/* quota. fixed by tianzy for bug10707 */
+#define QUOTA_IS_GRP 0X1UL /* 0 is user, 1 is group. Used by qd_flags*/
+#define QUOTA_IS_BLOCK 0x2UL /* 0 is inode, 1 is block. Used by qd_flags*/
+
struct qunit_data {
+ __u32 qd_id; /* ID appiles to (uid, gid) */
+ __u32 qd_flags; /* Quota type (USRQUOTA, GRPQUOTA) occupy one bit;
+ * Block quota or file quota occupy one bit */
+ __u64 qd_count; /* acquire/release count (bytes for block quota) */
+};
+
+struct qunit_data_old {
__u32 qd_id; /* ID appiles to (uid, gid) */
__u32 qd_type; /* Quota type (USRQUOTA, GRPQUOTA) */
__u32 qd_count; /* acquire/release count (bytes for block quota) */
__u32 qd_isblk; /* Block quota or file quota */
};
+
extern void lustre_swab_qdata(struct qunit_data *d);
+extern void lustre_swab_qdata_old(struct qunit_data_old *d);
+extern struct qunit_data *lustre_quota_old_to_new(struct qunit_data_old *d);
+extern struct qunit_data_old *lustre_quota_new_to_old(struct qunit_data *d);
typedef enum {
QUOTA_DQACQ = 601,
LCFG_DEL_CONN = 0x00cf00c,
LCFG_LOV_ADD_OBD = 0x00cf00d,
LCFG_LOV_DEL_OBD = 0x00cf00e,
- LCFG_PARAM = 0x00ce00f,
- LCFG_MARKER = 0x00ce010,
+ LCFG_PARAM = 0x00cf00f,
+ LCFG_MARKER = 0x00cf010,
LCFG_LOG_START = 0x00ce011,
LCFG_LOG_END = 0x00ce012,
LCFG_LOV_ADD_INA = 0x00ce013,
char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/
__u8 ldd_uuid[40]; /* server UUID (COMPAT_146) */
-/*200*/ __u8 ldd_padding[4096 - 200];
+/*200*/ char ldd_userdata[1024 - 200]; /* arbitrary user string */
+/*1024*/__u8 ldd_padding[4096 - 1024];
/*4096*/char ldd_mount_opts[4096]; /* target fs mount opts */
/*8192*/char ldd_params[4096]; /* key=value pairs */
};
extern char *ldlm_typename[];
extern char *ldlm_it2str(int it);
-#define __LDLM_DEBUG(level, lock, format, a...) \
-do { \
- if (lock->l_resource == NULL) { \
- CDEBUG(level, "### " format \
- " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\
- "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: " \
- LPX64" expref: %d pid: %u\n" , ## a, lock, \
- lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \
- lock->l_readers, lock->l_writers, \
- ldlm_lockname[lock->l_granted_mode], \
- ldlm_lockname[lock->l_req_mode], \
- lock->l_flags, lock->l_remote_handle.cookie, \
- lock->l_export ? \
- atomic_read(&lock->l_export->exp_refcount) : -99, \
- lock->l_pid); \
- break; \
- } \
- if (lock->l_resource->lr_type == LDLM_EXTENT) { \
- CDEBUG(level, "### " format \
- " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \
- "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64\
- "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64 \
- " expref: %d pid: %u\n" , ## a, \
- lock->l_resource->lr_namespace->ns_name, lock, \
- lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \
- lock->l_readers, lock->l_writers, \
- ldlm_lockname[lock->l_granted_mode], \
- ldlm_lockname[lock->l_req_mode], \
- lock->l_resource->lr_name.name[0], \
- lock->l_resource->lr_name.name[1], \
- atomic_read(&lock->l_resource->lr_refcount), \
- ldlm_typename[lock->l_resource->lr_type], \
- lock->l_policy_data.l_extent.start, \
- lock->l_policy_data.l_extent.end, \
- lock->l_req_extent.start, lock->l_req_extent.end, \
- lock->l_flags, lock->l_remote_handle.cookie, \
- lock->l_export ? \
- atomic_read(&lock->l_export->exp_refcount) : -99, \
- lock->l_pid); \
- break; \
- } \
- if (lock->l_resource->lr_type == LDLM_FLOCK) { \
- CDEBUG(level, "### " format \
- " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \
- "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d " \
- "["LPU64"->"LPU64"] flags: %x remote: "LPX64 \
- " expref: %d pid: %u\n" , ## a, \
- lock->l_resource->lr_namespace->ns_name, lock, \
- lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \
- lock->l_readers, lock->l_writers, \
- ldlm_lockname[lock->l_granted_mode], \
- ldlm_lockname[lock->l_req_mode], \
- lock->l_resource->lr_name.name[0], \
- lock->l_resource->lr_name.name[1], \
- atomic_read(&lock->l_resource->lr_refcount), \
- ldlm_typename[lock->l_resource->lr_type], \
- lock->l_policy_data.l_flock.pid, \
- lock->l_policy_data.l_flock.start, \
- lock->l_policy_data.l_flock.end, \
- lock->l_flags, lock->l_remote_handle.cookie, \
- lock->l_export ? \
- atomic_read(&lock->l_export->exp_refcount) : -99, \
- lock->l_pid); \
- break; \
- } \
- if (lock->l_resource->lr_type == LDLM_IBITS) { \
- CDEBUG(level, "### " format \
- " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \
- "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s " \
- "flags: %x remote: "LPX64" expref: %d " \
- "pid %u\n" , ## a, \
- lock->l_resource->lr_namespace->ns_name, \
- lock, lock->l_handle.h_cookie, \
- atomic_read (&lock->l_refc), \
- lock->l_readers, lock->l_writers, \
- ldlm_lockname[lock->l_granted_mode], \
- ldlm_lockname[lock->l_req_mode], \
- lock->l_resource->lr_name.name[0], \
- lock->l_resource->lr_name.name[1], \
- lock->l_policy_data.l_inodebits.bits, \
- atomic_read(&lock->l_resource->lr_refcount), \
- ldlm_typename[lock->l_resource->lr_type], \
- lock->l_flags, lock->l_remote_handle.cookie, \
- lock->l_export ? \
- atomic_read(&lock->l_export->exp_refcount) : -99, \
- lock->l_pid); \
- break; \
- } \
- { \
- CDEBUG(level, "### " format \
- " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \
- "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x " \
- "remote: "LPX64" expref: %d pid: %u\n" , ## a, \
- lock->l_resource->lr_namespace->ns_name, \
- lock, lock->l_handle.h_cookie, \
- atomic_read (&lock->l_refc), \
- lock->l_readers, lock->l_writers, \
- ldlm_lockname[lock->l_granted_mode], \
- ldlm_lockname[lock->l_req_mode], \
- lock->l_resource->lr_name.name[0], \
- lock->l_resource->lr_name.name[1], \
- atomic_read(&lock->l_resource->lr_refcount), \
- ldlm_typename[lock->l_resource->lr_type], \
- lock->l_flags, lock->l_remote_handle.cookie, \
- lock->l_export ? \
- atomic_read(&lock->l_export->exp_refcount) : -99, \
- lock->l_pid); \
- } \
+void ldlm_lock_debug(cfs_debug_limit_state_t *cdls,
+ __u32 level, struct ldlm_lock *lock,
+ const char *file, const char *func, const int line,
+ char *fmt, ...);
+
+#define LDLM_DEBUG(lock, fmt, a...) ldlm_lock_debug(NULL, D_DLMTRACE, lock, \
+ __FILE__, __func__, __LINE__, "### " fmt, ## a)
+
+#define LDLM_ERROR(lock, fmt, a...) \
+do { \
+ static cfs_debug_limit_state_t cdls; \
+ ldlm_lock_debug(&cdls, D_ERROR, lock, \
+ __FILE__, __func__, __LINE__, "### " fmt, ## a); \
} while (0)
-#define LDLM_DEBUG(lock, format, a...) __LDLM_DEBUG(D_DLMTRACE, lock, \
- format, ## a)
-#define LDLM_ERROR(lock, format, a...) __LDLM_DEBUG(D_ERROR, lock, format, ## a)
-
#define LDLM_DEBUG_NOLOCK(format, a...) \
CDEBUG(D_DLMTRACE, "### " format "\n" , ## a)
struct ptlrpc_request;
-void ptlrpc_run_failed_import_upcall(struct obd_import *imp);
-void ptlrpc_run_recovery_over_upcall(struct obd_device *obd);
int ptlrpc_replay(struct obd_import *imp);
int ptlrpc_resend(struct obd_import *imp);
void ptlrpc_free_committed(struct obd_import *imp);
struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
struct llog_cookie *logcookies, int numcookies);
-int llog_cat_initialize(struct obd_device *obd, int count);
+int llog_cat_initialize(struct obd_device *obd, int count,
+ struct obd_uuid *uuid);
int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd,
- int count, struct llog_catid *logid);
+ int count, struct llog_catid *logid, struct obd_uuid *uuid);
int obd_llog_finish(struct obd_device *obd, int count);
struct timeval rq_arrival_time; /* request arrival time */
struct ptlrpc_reply_state *rq_reply_state; /* separated reply state */
struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer*/
-#if CRAY_XT3
+#ifdef CRAY_XT3
__u32 rq_uid; /* peer uid, used in MDS only */
#endif
#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s"
-#define __DEBUG_REQ(CDEB_TYPE, level, req, fmt, args...) \
-CDEB_TYPE(level, "@@@ " fmt \
- " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl " \
- REQ_FLAGS_FMT"/%x/%x rc %d/%d\n" , ## args, req, req->rq_xid, \
- req->rq_transno, \
- req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : -1, \
- req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : \
- req->rq_export ? (char*)req->rq_export->exp_client_uuid.uuid : "<?>",\
- req->rq_import ? \
- (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : \
- req->rq_export ? \
- (char *)req->rq_export->exp_connection->c_remote_uuid.uuid : "<?>", \
- (req->rq_import && req->rq_import->imp_client) ? \
- req->rq_import->imp_client->cli_request_portal : -1, \
- req->rq_reqlen, req->rq_replen, \
- atomic_read(&req->rq_refcount), \
- DEBUG_REQ_FLAGS(req), \
- req->rq_reqmsg ? lustre_msg_get_flags(req->rq_reqmsg) : 0, \
- req->rq_repmsg ? lustre_msg_get_flags(req->rq_repmsg) : 0, \
- req->rq_status, req->rq_repmsg ? lustre_msg_get_status(req->rq_repmsg) : 0)
+void debug_req(cfs_debug_limit_state_t *cdls,
+ __u32 level, struct ptlrpc_request *req,
+ const char *file, const char *func, const int line,
+ const char *fmt, ...);
/* for most callers (level is a constant) this is resolved at compile time */
#define DEBUG_REQ(level, req, fmt, args...) \
do { \
- if ((level) & (D_ERROR | D_WARNING)) \
- __DEBUG_REQ(CDEBUG_LIMIT, level, req, fmt, ## args); \
- else \
- __DEBUG_REQ(CDEBUG, level, req, fmt, ## args); \
+ if ((level) & (D_ERROR | D_WARNING)) { \
+ static cfs_debug_limit_state_t cdls; \
+ debug_req(&cdls, level, req, __FILE__, __func__, __LINE__, \
+ "@@@ "fmt"\n", ## args); \
+ } else \
+ debug_req(NULL, level, req, __FILE__, __func__, __LINE__, \
+ "@@@ "fmt"\n", ## args); \
} while (0)
struct ptlrpc_bulk_page {
#define PARAM_MDC "mdc."
#define PARAM_LLITE "llite."
#define PARAM_LOV "lov."
-/* LOV_STRIPE_* aren't settable in proc. But match the proc names. */
-#define PARAM_LOV_STRIPE_SIZE PARAM_LOV"stripesize="
-#define PARAM_LOV_STRIPE_COUNT PARAM_LOV"stripecount="
-#define PARAM_LOV_STRIPE_OFFSET PARAM_LOV"stripeoffset="
-#define PARAM_LOV_STRIPE_PATTERN PARAM_LOV"stripetype="
#define PARAM_SEC "security."
#define PARAM_SEC_RPC PARAM_SEC"rpc."
#define PARAM_SEC_RPC_MDT PARAM_SEC_RPC"mdt="
#include <lustre_quota.h>
#include <lustre_fld.h>
+#define MAX_OBD_DEVICES 8192
+
/* this is really local to the OSC */
struct loi_oap_pages {
struct list_head lop_pending;
int cl_default_mds_easize;
int cl_max_mds_easize;
int cl_max_mds_cookiesize;
- kdev_t cl_sandev;
/* security configuration */
struct sec_flavor_config cl_sec_conf;
int ph_opc;
};
-#define LUSTRE_FLD_NAME "fld"
-#define LUSTRE_SEQ_NAME "seq"
-
-/* device types (not names--FIXME) */
-/* FIXME all the references to these defines need to be updated */
-#define LUSTRE_MDS_NAME "mds"
-#define LUSTRE_MDT_NAME "mdt"
-
-/* new MDS layers. Prototype */
-#define LUSTRE_CMM_NAME "cmm"
-#define LUSTRE_MDD_NAME "mdd"
-#define LUSTRE_OSD_NAME "osd"
-#define LUSTRE_CMM_MDC_NAME "cmm-mdc"
-
-#define LUSTRE_MDC_NAME "mdc"
-#define LUSTRE_LOV_NAME "lov"
-#define LUSTRE_LMV_NAME "lmv"
-
-/* FIXME just the names need to be changed */
-#define LUSTRE_OSS_NAME "ost" /* FIXME oss */
-#define LUSTRE_OST_NAME "obdfilter" /* FIXME ost */
-#define LUSTRE_OSTSAN_NAME "sanobdfilter"
-
-#define LUSTRE_OSC_NAME "osc"
-#define LUSTRE_FILTER_NAME "filter"
-#define LUSTRE_SANOSC_NAME "sanosc"
-#define LUSTRE_SANOST_NAME "sanost"
-#define LUSTRE_MGS_NAME "mgs"
-#define LUSTRE_MGC_NAME "mgc"
-
+#define LUSTRE_FLD_NAME "fld"
+#define LUSTRE_SEQ_NAME "seq"
+
+#define LUSTRE_CMM_NAME "cmm"
+#define LUSTRE_MDD_NAME "mdd"
+#define LUSTRE_OSD_NAME "osd"
+#define LUSTRE_LMV_NAME "lmv"
+#define LUSTRE_CMM_MDC_NAME "cmm-mdc"
+
+/* obd device type names */
+ /* FIXME all the references to LUSTRE_MDS_NAME should be swapped with LUSTRE_MDT_NAME */
+#define LUSTRE_MDS_NAME "mds"
+#define LUSTRE_MDT_NAME "mdt"
+#define LUSTRE_MDC_NAME "mdc"
+#define LUSTRE_OSS_NAME "ost" /* FIXME change name to oss */
+#define LUSTRE_OST_NAME "obdfilter" /* FIXME change name to ost */
+#define LUSTRE_OSC_NAME "osc"
+#define LUSTRE_LOV_NAME "lov"
+#define LUSTRE_MGS_NAME "mgs"
+#define LUSTRE_MGC_NAME "mgc"
+
+#define LUSTRE_CACHEOBD_NAME "cobd"
#define LUSTRE_ECHO_NAME "obdecho"
#define LUSTRE_ECHO_CLIENT_NAME "echo_client"
int flags, void *opaque);
int (*o_join_lru)(struct obd_export *, struct lov_stripe_md *,
int join);
- int (*o_san_preprw)(int cmd, struct obd_export *exp,
- struct obdo *oa, int objcount,
- struct obd_ioobj *obj, int niocount,
- struct niobuf_remote *remote);
int (*o_init_export)(struct obd_export *exp);
int (*o_destroy_export)(struct obd_export *exp);
int (*o_extent_calc)(struct obd_export *, struct lov_stripe_md *,
/* llog related obd_methods */
int (*o_llog_init)(struct obd_device *obd, struct obd_device *disk_obd,
- int count, struct llog_catid *logid);
+ int count, struct llog_catid *logid,
+ struct obd_uuid *uuid);
int (*o_llog_finish)(struct obd_device *obd, int count);
/* metadata-only methods */
void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *,
unsigned long *);
obd_off (*lsm_stripe_offset_by_index)(struct lov_stripe_md *, int);
+ obd_off (*lsm_stripe_offset_by_offset)(struct lov_stripe_md *, obd_off);
int (*lsm_stripe_index_by_offset)(struct lov_stripe_md *, obd_off);
int (*lsm_revalidate) (struct lov_stripe_md *, struct obd_device *obd);
int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes,
#endif
/* OBD Device Declarations */
-#define MAX_OBD_DEVICES 2048
extern struct obd_device *obd_devs[MAX_OBD_DEVICES];
extern spinlock_t obd_dev_lock;
RETURN(rc);
}
-static inline int obd_san_preprw(int cmd, struct obd_export *exp,
- struct obdo *oa,
- int objcount, struct obd_ioobj *obj,
- int niocount, struct niobuf_remote *remote)
-{
- int rc;
-
- EXP_CHECK_DT_OP(exp, preprw);
- OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
-
- rc = OBP(exp->exp_obd, san_preprw)(cmd, exp, oa, objcount, obj,
- niocount, remote);
- class_export_put(exp);
- return(rc);
-}
-
static inline int obd_pin(struct obd_export *exp, const struct lu_fid *fid,
struct obd_client_handle *handle, int flag)
{
extern int obd_memmax;
extern unsigned int obd_fail_loc;
extern unsigned int obd_dump_on_timeout;
+extern unsigned int obd_dump_on_eviction;
extern unsigned int obd_timeout; /* seconds */
#define PING_INTERVAL max(obd_timeout / 4, 1U)
#define RECONNECT_INTERVAL max(obd_timeout / 10, 10U)
extern unsigned int ldlm_timeout;
extern unsigned int obd_health_check_timeout;
-extern char obd_lustre_upcall[128];
+extern unsigned int obd_sync_filter;
extern cfs_waitq_t obd_race_waitq;
extern int obd_race_state;
#define OBD_FAIL_SEC_CTX_INIT_CONT_NET 0x1210
#define OBD_FAIL_SEC_CTX_FINI_NET 0x1220
+#define OBD_FAIL_QUOTA_QD_COUNT_32BIT 0xA00
+
/* preparation for a more advanced failure testbed (not functional yet) */
#define OBD_FAIL_MASK_SYS 0x0000FF00
#define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS)
* first thread that calls this with a matching fail_loc is put to
* sleep. The next thread that calls with the same fail_loc wakes up
* the first and continues. */
-#define OBD_RACE(id) \
-do { \
- if (OBD_FAIL_CHECK_ONCE(id)) { \
- CERROR("obd_race id %x sleeping\n", (id)); \
- OBD_SLEEP_ON(&obd_race_waitq); \
- CERROR("obd_fail_race id %x awake\n", (id)); \
- } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) == \
- ((id) & OBD_FAIL_MASK_LOC)) { \
- cfs_waitq_signal(&obd_race_waitq); \
- } \
+#define OBD_RACE(id) \
+do { \
+ if (OBD_FAIL_CHECK_ONCE(id)) { \
+ obd_race_state = 0; \
+ CERROR("obd_race id %x sleeping\n", (id)); \
+ OBD_SLEEP_ON(obd_race_waitq, obd_race_state != 0); \
+ CERROR("obd_fail_race id %x awake\n", (id)); \
+ } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) == \
+ ((id) & OBD_FAIL_MASK_LOC)) { \
+ CERROR("obd_fail_race id %x waking\n", (id)); \
+ obd_race_state = 1; \
+ } \
} while(0)
#else
/* sigh. an expedient fix until OBD_RACE is fixed up */
--- /dev/null
+Index: linux-2.6.9/include/asm-i386/bitops.h
+===================================================================
+--- linux-2.6.9.orig/include/asm-i386/bitops.h 2004-10-19 05:54:37.000000000 +0800
++++ linux-2.6.9/include/asm-i386/bitops.h 2006-09-01 14:04:19.000000000 +0800
+@@ -448,6 +448,8 @@
+ find_first_zero_bit((unsigned long*)addr, size)
+ #define ext2_find_next_zero_bit(addr, size, off) \
+ find_next_zero_bit((unsigned long*)addr, size, off)
++#define ext2_find_next_le_bit(addr, size, off) \
++ find_next_bit((unsigned long*)(addr), (size), (off))
+
+ /* Bitmap functions for the minix filesystem. */
+ #define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,(void*)addr)
+Index: linux-2.6.9/include/asm-x86_64/bitops.h
+===================================================================
+--- linux-2.6.9.orig/include/asm-x86_64/bitops.h 2004-10-19 05:53:51.000000000 +0800
++++ linux-2.6.9/include/asm-x86_64/bitops.h 2006-09-01 14:04:19.000000000 +0800
+@@ -399,6 +399,8 @@
+ find_first_zero_bit((unsigned long*)addr, size)
+ #define ext2_find_next_zero_bit(addr, size, off) \
+ find_next_zero_bit((unsigned long*)addr, size, off)
++#define ext2_find_next_le_bit(addr, size, off) \
++ find_next_bit((unsigned long*)(addr), (size), (off))
+
+ /* Bitmap functions for the minix filesystem. */
+ #define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,(void*)addr)
+Index: linux-2.6.9/include/asm-ia64/bitops.h
+===================================================================
+--- linux-2.6.9.orig/include/asm-ia64/bitops.h 2004-10-19 05:55:07.000000000 +0800
++++ linux-2.6.9/include/asm-ia64/bitops.h 2006-09-01 14:04:19.000000000 +0800
+@@ -387,6 +387,8 @@
+ #define ext2_test_bit test_bit
+ #define ext2_find_first_zero_bit find_first_zero_bit
+ #define ext2_find_next_zero_bit find_next_zero_bit
++#define ext2_find_next_le_bit(addr, size, off) \
++ __find_next_bit((addr), (size), (off))
+
+ /* Bitmap functions for the minix filesystem. */
+ #define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
+Index: linux-2.6.9/include/asm-ppc/bitops.h
+===================================================================
+--- linux-2.6.9.orig/include/asm-ppc/bitops.h 2004-10-19 05:54:08.000000000 +0800
++++ linux-2.6.9/include/asm-ppc/bitops.h 2006-09-01 14:04:35.000000000 +0800
+@@ -449,6 +449,47 @@
+ return result + ffz(tmp);
+ }
+
++#define ext2_find_next_le_bit(addr, size, off) \
++ generic_find_next_le_bit((addr), (size), (off))
++
++static __inline__ unsigned long generic_find_next_le_bit(const void *addr,
++ unsigned long size, unsigned long offset)
++{
++ unsigned int *p = ((unsigned int*) addr) + (offset >> 5);
++ unsigned int result = offset & ~31UL;
++ unsigned int tmp;
++
++ if (offset >= size)
++ return size;
++ size -= result;
++ offset &= 31UL;
++ if (offset) {
++ tmp = cpu_to_le32p(p++);
++ tmp &= ~0UL << offset;
++ if (size < 32)
++ goto found_first;
++ if (tmp)
++ goto found_middle;
++ size -= 32;
++ result += 32;
++ }
++ while (size >= 32) {
++ if ((tmp = cpu_to_le32p(p++)))
++ goto found_middle;
++ result += 32;
++ size -= 32;
++ }
++ if (!size)
++ return result;
++ tmp = cpu_to_le32p(p);
++found_first:
++ tmp &= ~0U >> (32 - size);
++ if (tmp == 0UL) /* Are any bits set? */
++ return result + size; /* Nope. */
++found_middle:
++ return result + __ffs(tmp);
++}
++
+ /* Bitmap functions for the minix filesystem. */
+ #define minix_test_and_set_bit(nr,addr) ext2_set_bit(nr,addr)
+ #define minix_set_bit(nr,addr) ((void)ext2_set_bit(nr,addr))
+Index: linux-2.6.9/include/asm-ppc64/bitops.h
+===================================================================
+--- linux-2.6.9.orig/include/asm-ppc64/bitops.h 2004-10-19 05:55:43.000000000 +0800
++++ linux-2.6.9/include/asm-ppc64/bitops.h 2006-09-01 14:05:00.000000000 +0800
+@@ -349,6 +349,9 @@
+ find_first_zero_le_bit((unsigned long*)addr, size)
+ #define ext2_find_next_zero_bit(addr, size, off) \
+ find_next_zero_le_bit((unsigned long*)addr, size, off)
++#define ext2_find_next_le_bit(addr, size, off) \
++ generic_find_next_le_bit((unsigned long*)(addr), (size), (off))
++extern unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned long size, unsigned long offset);
+
+ #define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
+ #define minix_set_bit(nr,addr) set_bit(nr,addr)
+Index: linux-2.6.9/arch/ppc64/kernel/bitops.c
+===================================================================
+--- linux-2.6.9.orig/arch/ppc64/kernel/bitops.c 2004-10-19 05:54:37.000000000 +0800
++++ linux-2.6.9/arch/ppc64/kernel/bitops.c 2006-09-01 14:05:25.000000000 +0800
+@@ -145,3 +145,43 @@
+ }
+
+ EXPORT_SYMBOL(find_next_zero_le_bit);
++
++unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned long size,
++ unsigned long offset)
++{
++ const unsigned long *p = addr + offset / BITS_PER_LONG;
++ unsigned long result = offset & ~(BITS_PER_LONG - 1);
++ unsigned long tmp;
++
++ if (offset >= size)
++ return size;
++ size -= result;
++ offset %= BITS_PER_LONG;
++ if (offset) {
++ tmp = __swab64p(p++);
++ tmp &= (~0UL << offset);
++ if (size < BITS_PER_LONG)
++ goto found_first;
++ if (tmp)
++ goto found_middle;
++ size -= BITS_PER_LONG;
++ result += BITS_PER_LONG;
++ }
++ while (size & ~(BITS_PER_LONG - 1)) {
++ if ((tmp = __swab64p(p++)))
++ goto found_middle;
++ result += BITS_PER_LONG;
++ size -= BITS_PER_LONG;
++ }
++ if (!size)
++ return result;
++ tmp = __swab64p(p);
++found_first:
++ tmp &= ~0UL >> (BITS_PER_LONG - size);
++ if (tmp == 0UL)
++ return result + size;
++found_middle:
++ return result + __ffs(tmp);
++}
++
++EXPORT_SYMBOL(generic_find_next_le_bit);
-Index: linux-2.6.5-7.201/include/linux/dcache.h
+Index: linux-2.6.5-7.276/include/linux/dcache.h
===================================================================
---- linux-2.6.5-7.201.orig/include/linux/dcache.h 2005-10-11 00:12:48.000000000 +0400
-+++ linux-2.6.5-7.201/include/linux/dcache.h 2005-12-20 23:16:31.000000000 +0300
+--- linux-2.6.5-7.276.orig/include/linux/dcache.h
++++ linux-2.6.5-7.276/include/linux/dcache.h
@@ -38,7 +38,6 @@ struct qstr {
const unsigned char * name;
unsigned int len;
struct dentry * d_parent; /* parent directory */
struct qstr d_name;
struct hlist_node d_hash; /* lookup hash list */
-Index: linux-2.6.5-7.201/fs/dcache.c
+Index: linux-2.6.5-7.276/fs/dcache.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/dcache.c 2005-10-11 00:12:45.000000000 +0400
-+++ linux-2.6.5-7.201/fs/dcache.c 2005-12-20 23:16:31.000000000 +0300
-@@ -41,6 +41,8 @@ EXPORT_SYMBOL(dcache_lock);
-
- static kmem_cache_t *dentry_cache;
-
-+#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
-+
- /*
- * This is the single most critical data structure when it comes
- * to the dcache: the hashtable for lookups. Somebody should try
-@@ -67,7 +69,7 @@ static void d_callback(void *arg)
- struct dentry * dentry = (struct dentry *)arg;
-
- if (dname_external(dentry)) {
-- kfree(dentry->d_qstr);
-+ kfree(dentry->d_name.name);
- }
- kmem_cache_free(dentry_cache, dentry);
- }
-@@ -678,8 +680,6 @@ static int shrink_dcache_memory(int nr,
- return dentry_stat.nr_unused;
- }
-
--#define NAME_ALLOC_LEN(len) ((len+16) & ~15)
--
- /**
- * d_alloc - allocate a dcache entry
- * @parent: parent of entry to allocate
-@@ -694,26 +694,18 @@ struct dentry * d_alloc(struct dentry *
- {
- char * str;
- struct dentry *dentry;
-- struct qstr * qstr;
-
- dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
- if (!dentry)
- return NULL;
-
- if (name->len > DNAME_INLINE_LEN-1) {
-- qstr = kmalloc(sizeof(*qstr) + NAME_ALLOC_LEN(name->len),
-- GFP_KERNEL);
-- if (!qstr) {
-+ str = kmalloc(name->len + 1, GFP_KERNEL);
-+ if (!str) {
- kmem_cache_free(dentry_cache, dentry);
- return NULL;
- }
-- qstr->name = qstr->name_str;
-- qstr->len = name->len;
-- qstr->hash = name->hash;
-- dentry->d_qstr = qstr;
-- str = qstr->name_str;
- } else {
-- dentry->d_qstr = &dentry->d_name;
- str = dentry->d_iname;
- }
-
-@@ -1010,7 +1002,7 @@ struct dentry * __d_lookup(struct dentry
- if (dentry->d_parent != parent)
- continue;
-
-- qstr = dentry->d_qstr;
-+ qstr = &dentry->d_name;
- smp_read_barrier_depends();
- if (parent->d_op && parent->d_op->d_compare) {
- if (parent->d_op->d_compare(parent, qstr, name))
-@@ -1163,26 +1155,38 @@ void d_rehash(struct dentry * entry)
- */
- static inline void switch_names(struct dentry * dentry, struct dentry * target)
- {
-- const unsigned char *old_name, *new_name;
-- struct qstr *old_qstr, *new_qstr;
--
-- memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
-- old_qstr = target->d_qstr;
-- old_name = target->d_name.name;
-- new_qstr = dentry->d_qstr;
-- new_name = dentry->d_name.name;
-- if (old_name == target->d_iname) {
-- old_name = dentry->d_iname;
-- old_qstr = &dentry->d_name;
-- }
-- if (new_name == dentry->d_iname) {
-- new_name = target->d_iname;
-- new_qstr = &target->d_name;
-- }
-- target->d_name.name = new_name;
-- dentry->d_name.name = old_name;
-- target->d_qstr = new_qstr;
-- dentry->d_qstr = old_qstr;
-+ if (dname_external(target)) {
-+ if (dname_external(dentry)) {
-+ /*
-+ * Both external: swap the pointers
-+ */
-+ do_switch(target->d_name.name, dentry->d_name.name);
-+ } else {
-+ /*
-+ * dentry:internal, target:external. Steal target's
-+ * storage and make target internal.
-+ */
-+ dentry->d_name.name = target->d_name.name;
-+ target->d_name.name = target->d_iname;
-+ }
-+ } else {
-+ if (dname_external(dentry)) {
-+ /*
-+ * dentry:external, target:internal. Give dentry's
-+ * storage to target and make dentry internal
-+ */
-+ memcpy(dentry->d_iname, target->d_name.name,
-+ target->d_name.len + 1);
-+ target->d_name.name = dentry->d_name.name;
-+ dentry->d_name.name = dentry->d_iname;
-+ } else {
-+ /*
-+ * Both are internal. Just copy target to dentry
-+ */
-+ memcpy(dentry->d_iname, target->d_name.name,
-+ target->d_name.len + 1);
-+ }
-+ }
- }
-
- /*
+--- linux-2.6.5-7.276.orig/fs/dcache.c
++++ linux-2.6.5-7.276/fs/dcache.c
+@@ -775,7 +775,6 @@ struct dentry * d_alloc(struct dentry *
+ dentry->d_parent = NULL;
+ dentry->d_move_count = 0;
+ dentry->d_sb = NULL;
+- dentry->d_qstr = &dentry->d_name;
+ dentry->d_name.name = str;
+ dentry->d_name.len = name->len;
+ dentry->d_name.hash = name->hash;
--- /dev/null
+Index: linux-2.6/fs/super.c
+===================================================================
+--- linux-2.6.orig/fs/super.c 2006-07-20 10:51:39.000000000 +0800
++++ linux-2.6/fs/super.c 2006-07-20 10:51:59.000000000 +0800
+@@ -877,6 +877,8 @@ do_kern_mount(const char *fstype, int fl
+ return mnt;
+ }
+
++EXPORT_SYMBOL_GPL(do_kern_mount);
++
+ struct vfsmount *kern_mount(struct file_system_type *type)
+ {
+ return vfs_kern_mount(type, 0, type->name, NULL);
-Index: linux-2.6.5-7.201/fs/ext3/super.c
+Index: linux-2.6.5-7.201-full/include/linux/ext3_fs.h
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/super.c 2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/super.c 2006-06-20 19:42:08.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/include/linux/ext3_fs.h 2006-08-09 17:59:34.000000000 +0400
++++ linux-2.6.5-7.201-full/include/linux/ext3_fs.h 2006-08-22 12:35:55.000000000 +0400
+@@ -793,6 +793,7 @@ extern void ext3_put_super (struct super
+ extern void ext3_write_super (struct super_block *);
+ extern void ext3_write_super_lockfs (struct super_block *);
+ extern void ext3_unlockfs (struct super_block *);
++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int);
+ extern int ext3_remount (struct super_block *, int *, char *);
+ extern int ext3_statfs (struct super_block *, struct kstatfs *);
+
+Index: linux-2.6.5-7.201-full/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-7.201-full.orig/fs/ext3/super.c 2006-08-09 17:59:37.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/super.c 2006-08-09 17:59:37.000000000 +0400
@@ -39,7 +39,7 @@
static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
struct ext3_super_block * es,
int sync)
{
-Index: linux-2.6.5-7.201/fs/ext3/namei.c
+Index: linux-2.6.5-7.201-full/fs/ext3/namei.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/namei.c 2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/namei.c 2006-06-20 19:42:08.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/fs/ext3/namei.c 2006-08-09 17:59:37.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/namei.c 2006-08-09 17:59:37.000000000 +0400
@@ -1598,7 +1598,7 @@ static int ext3_delete_entry (handle_t *
struct buffer_head * bh)
{
if (pde)
pde->rec_len =
cpu_to_le16(le16_to_cpu(pde->rec_len) +
-Index: linux-2.6.5-7.201/fs/ext3/xattr.c
+Index: linux-2.6.5-7.201-full/fs/ext3/xattr.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/xattr.c 2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/xattr.c 2006-06-20 19:42:30.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/fs/ext3/xattr.c 2006-07-14 01:53:23.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/xattr.c 2006-08-09 17:59:37.000000000 +0400
@@ -107,7 +107,7 @@ ext3_xattr_register(int name_index, stru
{
int error = -EINVAL;
write_lock(&ext3_handler_lock);
if (!ext3_xattr_handlers[name_index-1]) {
ext3_xattr_handlers[name_index-1] = handler;
-Index: linux-2.6.5-7.201/fs/ext3/inode.c
+Index: linux-2.6.5-7.201-full/fs/ext3/inode.c
===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/inode.c 2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/inode.c 2006-06-20 19:42:08.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/fs/ext3/inode.c 2006-07-14 01:53:22.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/inode.c 2006-08-22 12:35:28.000000000 +0400
@@ -1517,9 +1517,14 @@ out_stop:
if (end > inode->i_size) {
ei->i_disksize = end;
+Index: linux-2.6.9-full/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2006-08-09 17:56:39.000000000 +0400
++++ linux-2.6.9-full/include/linux/ext3_fs.h 2006-08-22 12:36:22.000000000 +0400
+@@ -826,6 +826,7 @@ extern void ext3_put_super (struct super
+ extern void ext3_write_super (struct super_block *);
+ extern void ext3_write_super_lockfs (struct super_block *);
+ extern void ext3_unlockfs (struct super_block *);
++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int);
+ extern int ext3_remount (struct super_block *, int *, char *);
+ extern int ext3_statfs (struct super_block *, struct kstatfs *);
+
Index: linux-2.6.9-full/fs/ext3/super.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/super.c 2006-06-02 23:37:51.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/super.c 2006-06-02 23:56:29.000000000 +0400
+--- linux-2.6.9-full.orig/fs/ext3/super.c 2006-08-09 17:56:40.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/super.c 2006-08-09 17:56:40.000000000 +0400
@@ -43,7 +43,7 @@ static int ext3_load_journal(struct supe
unsigned long journal_devnum);
static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
{
Index: linux-2.6.9-full/fs/ext3/namei.c
===================================================================
---- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-06-02 23:37:49.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/namei.c 2006-06-02 23:43:31.000000000 +0400
+--- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-08-09 17:56:40.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/namei.c 2006-08-09 17:56:40.000000000 +0400
@@ -1599,7 +1599,7 @@ static int ext3_delete_entry (handle_t *
struct buffer_head * bh)
{
Index: linux-2.6.9-full/fs/ext3/xattr.c
===================================================================
--- linux-2.6.9-full.orig/fs/ext3/xattr.c 2006-06-01 14:58:48.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/xattr.c 2006-06-03 00:02:00.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/xattr.c 2006-08-09 17:56:40.000000000 +0400
@@ -132,7 +132,7 @@ ext3_xattr_handler(int name_index)
{
struct xattr_handler *handler = NULL;
Index: linux-2.6.9-full/fs/ext3/inode.c
===================================================================
--- linux-2.6.9-full.orig/fs/ext3/inode.c 2006-06-02 23:37:38.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/inode.c 2006-06-03 00:27:41.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/inode.c 2006-08-22 12:34:28.000000000 +0400
@@ -1513,9 +1513,14 @@ out_stop:
if (end > inode->i_size) {
ei->i_disksize = end;
+ * Someone more lucky has already allocated it.
+ * The only thing we can do is just take first
+ * found block(s)
-+ */
+ printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
++ */
+ ac.ac_b_ex.fe_group = 0;
+ ac.ac_b_ex.fe_start = 0;
+ ac.ac_b_ex.fe_len = 0;
-Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
+Index: linux-stage/include/linux/ext3_fs.h
===================================================================
---- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs.h 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/include/linux/ext3_fs.h 2006-04-26 23:40:28.000000000 +0400
-@@ -57,6 +57,14 @@ struct statfs;
+--- linux-stage.orig/include/linux/ext3_fs.h 2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/include/linux/ext3_fs.h 2006-09-06 12:37:27.000000000 +0800
+@@ -57,6 +57,14 @@
#define ext3_debug(f, a...) do {} while (0)
#endif
/*
* Special inodes numbers
*/
-@@ -339,6 +347,7 @@ struct ext3_inode {
+@@ -339,6 +347,7 @@
#define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */
#define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */
#define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef clear_opt
-@@ -700,7 +709,9 @@ extern int ext3_bg_has_super(struct supe
+@@ -361,6 +370,14 @@
+ #define ext3_find_first_zero_bit ext2_find_first_zero_bit
+ #define ext3_find_next_zero_bit ext2_find_next_zero_bit
+
++#ifndef ext2_find_next_le_bit
++#ifdef __LITTLE_ENDIAN
++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off))
++#else
++#error "mballoc needs a patch for big-endian systems - CFS bug 10634"
++#endif /* __LITTLE_ENDIAN */
++#endif /* !ext2_find_next_le_bit */
++
+ /*
+ * Maximal mount counts between two filesystem checks
+ */
+@@ -700,7 +717,9 @@
extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
extern unsigned long ext3_count_free_blocks (struct super_block *);
extern void ext3_check_blocks_bitmap (struct super_block *);
extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
-@@ -822,6 +833,17 @@ extern void ext3_extents_initialize_bloc
+@@ -824,6 +843,17 @@
extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg);
#endif /* __KERNEL__ */
#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long)
-Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h
+Index: linux-stage/include/linux/ext3_fs_sb.h
===================================================================
---- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs_sb.h 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h 2006-04-26 23:40:28.000000000 +0400
+--- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-09-06 12:37:00.000000000 +0800
++++ linux-stage/include/linux/ext3_fs_sb.h 2006-09-06 12:37:01.000000000 +0800
@@ -23,9 +23,15 @@
#define EXT_INCLUDE
#include <linux/blockgroup_lock.h>
/*
* third extended-fs super-block data in memory
-@@ -78,6 +84,43 @@ struct ext3_sb_info {
+@@ -78,6 +84,43 @@
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
#endif
+ unsigned long s_mb_buddies_generated;
+ unsigned long long s_mb_generation_time;
};
-+
+
+#define EXT3_GROUP_INFO(sb, group) \
+ EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \
+ [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)]
-
++
#endif /* _LINUX_EXT3_FS_SB */
-Index: linux-2.6.5-7.252-full/fs/ext3/super.c
+Index: linux-stage/fs/ext3/super.c
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/super.c 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/super.c 2006-04-26 23:40:28.000000000 +0400
-@@ -389,6 +389,7 @@ void ext3_put_super (struct super_block
+--- linux-stage.orig/fs/ext3/super.c 2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/super.c 2006-09-06 12:37:01.000000000 +0800
+@@ -389,6 +389,7 @@
struct ext3_super_block *es = sbi->s_es;
int i;
ext3_ext_release(sb);
ext3_xattr_put_super(sb);
journal_destroy(sbi->s_journal);
-@@ -545,6 +546,7 @@ enum {
+@@ -546,6 +547,7 @@
Opt_err,
Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
Opt_extents, Opt_noextents, Opt_extdebug,
};
static match_table_t tokens = {
-@@ -591,6 +592,9 @@ static match_table_t tokens = {
+@@ -592,6 +594,9 @@
{Opt_extents, "extents"},
{Opt_noextents, "noextents"},
{Opt_extdebug, "extdebug"},
{Opt_barrier, "barrier=%u"},
{Opt_err, NULL}
};
-@@ -813,6 +815,19 @@ static int parse_options (char * options
+@@ -817,6 +822,19 @@
case Opt_extdebug:
set_opt (sbi->s_mount_opt, EXTDEBUG);
break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1466,6 +1471,7 @@ static int ext3_fill_super (struct super
+@@ -1470,6 +1488,7 @@
ext3_count_dirs(sb));
ext3_ext_init(sb);
return 0;
-@@ -2114,7 +2120,13 @@ static struct file_system_type ext3_fs_t
+@@ -2118,7 +2137,13 @@
static int __init init_ext3_fs(void)
{
if (err)
return err;
err = init_inodecache();
-@@ -2143,6 +2155,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2147,6 +2172,7 @@
unregister_filesystem(&ext3_fs_type);
destroy_inodecache();
exit_ext3_xattr();
}
int ext3_prep_san_write(struct inode *inode, long *blocks,
-Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
+Index: linux-stage/fs/ext3/extents.c
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/extents.c 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/extents.c 2006-04-26 23:40:28.000000000 +0400
-@@ -777,7 +777,7 @@ cleanup:
+--- linux-stage.orig/fs/ext3/extents.c 2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/extents.c 2006-09-06 12:37:01.000000000 +0800
+@@ -779,7 +779,7 @@
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
}
}
kfree(ablocks);
-@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1438,7 +1438,7 @@
path->p_idx->ei_leaf);
bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
return err;
}
-@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1923,10 +1923,12 @@
int needed = ext3_remove_blocks_credits(tree, ex, from, to);
handle_t *handle = ext3_journal_start(tree->inode, needed);
struct buffer_head *bh;
if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
/* tail removal */
unsigned long num, start;
-@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1938,7 +1940,7 @@
bh = sb_find_get_block(tree->inode->i_sb, start + i);
ext3_forget(handle, 0, tree->inode, bh, start + i);
}
} else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
printk("strange request: removal %lu-%lu from %u:%u\n",
from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
+Index: linux-stage/fs/ext3/inode.c
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/inode.c 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/inode.c 2006-04-26 23:40:28.000000000 +0400
-@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h
+--- linux-stage.orig/fs/ext3/inode.c 2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/inode.c 2006-09-06 12:37:01.000000000 +0800
+@@ -574,7 +574,7 @@
ext3_journal_forget(handle, branch[i].bh);
}
for (i = 0; i < keys; i++)
return err;
}
-@@ -675,7 +675,7 @@ err_out:
+@@ -675,7 +675,7 @@
if (err == -EAGAIN)
for (i = 0; i < num; i++)
ext3_free_blocks(handle, inode,
return err;
}
-@@ -1837,7 +1837,7 @@ ext3_clear_blocks(handle_t *handle, stru
+@@ -1837,7 +1837,7 @@
}
}
}
/**
-@@ -2008,7 +2008,7 @@ static void ext3_free_branches(handle_t
+@@ -2008,7 +2008,7 @@
ext3_journal_test_restart(handle, inode);
}
if (parent_bh) {
/*
-Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
+Index: linux-stage/fs/ext3/balloc.c
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/balloc.c 2006-02-14 15:26:58.000000000 +0300
-+++ linux-2.6.5-7.252-full/fs/ext3/balloc.c 2006-04-26 23:40:28.000000000 +0400
-@@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_
+--- linux-stage.orig/fs/ext3/balloc.c 2006-09-06 12:36:59.000000000 +0800
++++ linux-stage/fs/ext3/balloc.c 2006-09-06 12:37:01.000000000 +0800
+@@ -78,7 +78,7 @@
*
* Return buffer_head on success or NULL in case of failure.
*/
read_block_bitmap(struct super_block *sb, unsigned int block_group)
{
struct ext3_group_desc * desc;
-@@ -274,7 +274,7 @@ void ext3_discard_reservation(struct ino
+@@ -274,7 +274,7 @@
}
/* Free given blocks, update quota and i_blocks field */
unsigned long block, unsigned long count)
{
struct buffer_head *bitmap_bh = NULL;
-@@ -1142,7 +1142,7 @@ int ext3_should_retry_alloc(struct super
+@@ -1142,7 +1142,7 @@
* bitmap, and then for any free bit if that fails.
* This function also updates quota and i_blocks field.
*/
unsigned long goal, int *errp)
{
struct buffer_head *bitmap_bh = NULL;
-Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
+Index: linux-stage/fs/ext3/xattr.c
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/xattr.c 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/xattr.c 2006-04-26 23:40:28.000000000 +0400
-@@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle,
+--- linux-stage.orig/fs/ext3/xattr.c 2006-09-06 12:37:00.000000000 +0800
++++ linux-stage/fs/ext3/xattr.c 2006-09-06 12:37:01.000000000 +0800
+@@ -1371,7 +1371,7 @@
new_bh = sb_getblk(sb, block);
if (!new_bh) {
getblk_failed:
error = -EIO;
goto cleanup;
}
-@@ -1411,7 +1411,7 @@ getblk_failed:
+@@ -1411,7 +1411,7 @@
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
/* Free the old block. */
ea_bdebug(old_bh, "freeing");
/* ext3_forget() calls bforget() for us, but we
let our caller release old_bh, so we need to
-@@ -1519,7 +1519,7 @@ ext3_xattr_delete_inode(handle_t *handle
+@@ -1519,7 +1519,7 @@
mb_cache_entry_free(ce);
ce = NULL;
}
get_bh(bh);
ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
} else {
-Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
+Index: linux-stage/fs/ext3/mballoc.c
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/mballoc.c 2006-04-26 23:42:45.000000000 +0400
+--- linux-stage.orig/fs/ext3/mballoc.c 2006-09-06 11:16:28.656439250 +0800
++++ linux-stage/fs/ext3/mballoc.c 2006-09-06 12:37:34.000000000 +0800
@@ -0,0 +1,2702 @@
+/*
+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
+ while (i < max) {
+ fragments++;
+ first = i;
-+ i = find_next_bit(bitmap, max, i);
++ i = ext2_find_next_le_bit(bitmap, max, i);
+ len = i - first;
+ free += len;
+ if (len > 1)
+ remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
+ remove_proc_entry(EXT3_ROOT, proc_root_fs);
+}
-Index: linux-2.6.5-7.252-full/fs/ext3/Makefile
+Index: linux-stage/fs/ext3/Makefile
===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/Makefile 2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/Makefile 2006-04-26 23:40:28.000000000 +0400
-@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
+--- linux-stage.orig/fs/ext3/Makefile 2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/Makefile 2006-09-06 12:37:01.000000000 +0800
+@@ -6,7 +6,7 @@
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
ioctl.o namei.o super.o symlink.o hash.o \
+ * Someone more lucky has already allocated it.
+ * The only thing we can do is just take first
+ * found block(s)
-+ */
+ printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
++ */
+ ac.ac_b_ex.fe_group = 0;
+ ac.ac_b_ex.fe_start = 0;
+ ac.ac_b_ex.fe_len = 0;
Index: linux-stage/include/linux/ext3_fs.h
===================================================================
---- linux-stage.orig/include/linux/ext3_fs.h 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/include/linux/ext3_fs.h 2006-05-25 10:36:04.000000000 -0600
-@@ -57,6 +57,14 @@ struct statfs;
+--- linux-stage.orig/include/linux/ext3_fs.h 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/include/linux/ext3_fs.h 2006-09-06 12:29:38.000000000 +0800
+@@ -57,6 +57,14 @@
#define ext3_debug(f, a...) do {} while (0)
#endif
/*
* Special inodes numbers
*/
-@@ -365,6 +373,7 @@ struct ext3_inode {
+@@ -365,6 +373,7 @@
#define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */
#define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */
#define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef clear_opt
-@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe
+@@ -387,6 +396,14 @@
+ #define ext3_find_first_zero_bit ext2_find_first_zero_bit
+ #define ext3_find_next_zero_bit ext2_find_next_zero_bit
+
++#ifndef ext2_find_next_le_bit
++#ifdef __LITTLE_ENDIAN
++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off))
++#else
++#error "mballoc needs a patch for big-endian systems - CFS bug 10634"
++#endif /* __LITTLE_ENDIAN */
++#endif /* !ext2_find_next_le_bit */
++
+ /*
+ * Maximal mount counts between two filesystem checks
+ */
+@@ -726,7 +743,7 @@
extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
unsigned long, unsigned long, int *);
extern unsigned long ext3_count_free_blocks (struct super_block *);
-@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc
+@@ -859,6 +876,17 @@
extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg);
/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
Index: linux-stage/include/linux/ext3_fs_sb.h
===================================================================
---- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/include/linux/ext3_fs_sb.h 2006-05-25 10:59:14.000000000 -0600
+--- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-09-06 12:27:36.000000000 +0800
++++ linux-stage/include/linux/ext3_fs_sb.h 2006-09-06 12:27:37.000000000 +0800
@@ -23,9 +23,15 @@
#define EXT_INCLUDE
#include <linux/blockgroup_lock.h>
/*
* third extended-fs super-block data in memory
-@@ -81,6 +87,43 @@ struct ext3_sb_info {
+@@ -81,6 +87,43 @@
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
+ unsigned long s_mb_buddies_generated;
+ unsigned long long s_mb_generation_time;
};
-+
+
+#define EXT3_GROUP_INFO(sb, group) \
+ EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \
+ [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)]
-
++
#endif /* _LINUX_EXT3_FS_SB */
Index: linux-stage/fs/ext3/super.c
===================================================================
---- linux-stage.orig/fs/ext3/super.c 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/super.c 2006-05-25 10:36:04.000000000 -0600
-@@ -394,6 +394,7 @@ void ext3_put_super (struct super_block
+--- linux-stage.orig/fs/ext3/super.c 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/super.c 2006-09-06 12:27:37.000000000 +0800
+@@ -394,6 +394,7 @@
struct ext3_super_block *es = sbi->s_es;
int i;
ext3_ext_release(sb);
ext3_xattr_put_super(sb);
journal_destroy(sbi->s_journal);
-@@ -597,6 +598,7 @@ enum {
+@@ -597,6 +598,7 @@
Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
Opt_extents, Opt_noextents, Opt_extdebug,
};
static match_table_t tokens = {
-@@ -649,6 +651,9 @@ static match_table_t tokens = {
+@@ -649,6 +651,9 @@
{Opt_extents, "extents"},
{Opt_noextents, "noextents"},
{Opt_extdebug, "extdebug"},
{Opt_barrier, "barrier=%u"},
{Opt_err, NULL},
{Opt_resize, "resize"},
-@@ -962,6 +967,19 @@ static int parse_options (char * options
+@@ -962,6 +967,19 @@
case Opt_extdebug:
set_opt (sbi->s_mount_opt, EXTDEBUG);
break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1651,6 +1669,7 @@ static int ext3_fill_super (struct super
+@@ -1651,6 +1669,7 @@
ext3_count_dirs(sb));
ext3_ext_init(sb);
return 0;
-@@ -2433,7 +2452,13 @@ static struct file_system_type ext3_fs_t
+@@ -2433,7 +2452,13 @@
static int __init init_ext3_fs(void)
{
if (err)
return err;
err = init_inodecache();
-@@ -2455,6 +2480,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2455,6 +2480,7 @@
unregister_filesystem(&ext3_fs_type);
destroy_inodecache();
exit_ext3_xattr();
int ext3_prep_san_write(struct inode *inode, long *blocks,
Index: linux-stage/fs/ext3/extents.c
===================================================================
---- linux-stage.orig/fs/ext3/extents.c 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/extents.c 2006-05-25 10:36:04.000000000 -0600
-@@ -777,7 +777,7 @@ cleanup:
+--- linux-stage.orig/fs/ext3/extents.c 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/extents.c 2006-09-06 12:27:37.000000000 +0800
+@@ -779,7 +779,7 @@
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
}
}
kfree(ablocks);
-@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1438,7 +1438,7 @@
path->p_idx->ei_leaf);
bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
return err;
}
-@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1923,10 +1923,12 @@
int needed = ext3_remove_blocks_credits(tree, ex, from, to);
handle_t *handle = ext3_journal_start(tree->inode, needed);
struct buffer_head *bh;
if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
/* tail removal */
unsigned long num, start;
-@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1938,7 +1940,7 @@
bh = sb_find_get_block(tree->inode->i_sb, start + i);
ext3_forget(handle, 0, tree->inode, bh, start + i);
}
from, to, ex->ee_block, ex->ee_len);
Index: linux-stage/fs/ext3/inode.c
===================================================================
---- linux-stage.orig/fs/ext3/inode.c 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/inode.c 2006-05-25 10:36:04.000000000 -0600
-@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
+--- linux-stage.orig/fs/ext3/inode.c 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/inode.c 2006-09-06 12:27:37.000000000 +0800
+@@ -572,7 +572,7 @@
ext3_journal_forget(handle, branch[i].bh);
}
for (i = 0; i < keys; i++)
return err;
}
-@@ -673,7 +673,7 @@ err_out:
+@@ -673,7 +673,7 @@
if (err == -EAGAIN)
for (i = 0; i < num; i++)
ext3_free_blocks(handle, inode,
return err;
}
-@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru
+@@ -1831,7 +1831,7 @@
}
}
}
/**
-@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t
+@@ -2004,7 +2004,7 @@
ext3_journal_test_restart(handle, inode);
}
/*
Index: linux-stage/fs/ext3/balloc.c
===================================================================
---- linux-stage.orig/fs/ext3/balloc.c 2006-05-25 10:36:02.000000000 -0600
-+++ linux-stage/fs/ext3/balloc.c 2006-05-25 10:36:04.000000000 -0600
-@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
+--- linux-stage.orig/fs/ext3/balloc.c 2006-09-06 12:27:36.000000000 +0800
++++ linux-stage/fs/ext3/balloc.c 2006-09-06 12:27:37.000000000 +0800
+@@ -79,7 +79,7 @@
*
* Return buffer_head on success or NULL in case of failure.
*/
struct buffer_head *bitmap_bh = NULL;
Index: linux-stage/fs/ext3/xattr.c
===================================================================
---- linux-stage.orig/fs/ext3/xattr.c 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/xattr.c 2006-05-25 10:36:04.000000000 -0600
-@@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle,
+--- linux-stage.orig/fs/ext3/xattr.c 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/xattr.c 2006-09-06 12:27:37.000000000 +0800
+@@ -1281,7 +1281,7 @@
new_bh = sb_getblk(sb, block);
if (!new_bh) {
getblk_failed:
error = -EIO;
goto cleanup;
}
-@@ -1328,7 +1328,7 @@ getblk_failed:
+@@ -1328,7 +1328,7 @@
if (ce)
mb_cache_entry_free(ce);
ea_bdebug(old_bh, "freeing");
/* ext3_forget() calls bforget() for us, but we
let our caller release old_bh, so we need to
-@@ -1427,7 +1427,7 @@ ext3_xattr_delete_inode(handle_t *handle
+@@ -1427,7 +1427,7 @@
if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
if (ce)
mb_cache_entry_free(ce);
} else {
Index: linux-stage/fs/ext3/mballoc.c
===================================================================
---- linux-stage.orig/fs/ext3/mballoc.c 2006-05-23 17:33:37.579436680 -0600
-+++ linux-stage/fs/ext3/mballoc.c 2006-05-25 10:59:14.000000000 -0600
+--- linux-stage.orig/fs/ext3/mballoc.c 2006-09-06 11:16:28.656439250 +0800
++++ linux-stage/fs/ext3/mballoc.c 2006-09-06 12:30:11.000000000 +0800
@@ -0,0 +1,2701 @@
+/*
+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
+ while (i < max) {
+ fragments++;
+ first = i;
-+ i = find_next_bit(bitmap, max, i);
++ i = ext2_find_next_le_bit(bitmap, max, i);
+ len = i - first;
+ free += len;
+ if (len > 1)
+}
Index: linux-stage/fs/ext3/Makefile
===================================================================
---- linux-stage.orig/fs/ext3/Makefile 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/Makefile 2006-05-25 10:36:04.000000000 -0600
+--- linux-stage.orig/fs/ext3/Makefile 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/Makefile 2006-09-06 12:27:37.000000000 +0800
@@ -6,7 +6,7 @@
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
--- /dev/null
+Index: linux-2.6.5-7.201/include/linux/jbd.h
+===================================================================
+--- linux-2.6.5-7.201.orig/include/linux/jbd.h 2005-10-11 00:12:48.000000000 +0400
++++ linux-2.6.5-7.201/include/linux/jbd.h 2006-07-28 02:40:14.000000000 +0400
+@@ -411,6 +411,16 @@ struct handle_s
+ };
+
+
++/*
++ * Some stats for checkpoint phase
++ */
++struct transaction_chp_stats_s {
++ unsigned long cs_chp_time;
++ unsigned long cs_forced_to_close;
++ unsigned long cs_written;
++ unsigned long cs_dropped;
++};
++
+ /* The transaction_t type is the guts of the journaling mechanism. It
+ * tracks a compound transaction through its various states:
+ *
+@@ -542,6 +552,21 @@ struct transaction_s
+ spinlock_t t_handle_lock;
+
+ /*
++ * Longest time some handle had to wait for running transaction
++ */
++ unsigned long t_max_wait;
++
++ /*
++ * When transaction started
++ */
++ unsigned long t_start;
++
++ /*
++ * Checkpointing stats [j_checkpoint_sem]
++ */
++ struct transaction_chp_stats_s t_chp_stats;
++
++ /*
+ * Number of outstanding updates running on this transaction
+ * [t_handle_lock]
+ */
+@@ -581,6 +606,57 @@ struct transaction_s
+ struct list_head t_jcb;
+ };
+
++struct transaction_run_stats_s {
++ unsigned long rs_wait;
++ unsigned long rs_running;
++ unsigned long rs_locked;
++ unsigned long rs_flushing;
++ unsigned long rs_logging;
++
++ unsigned long rs_handle_count;
++ unsigned long rs_blocks;
++ unsigned long rs_blocks_logged;
++};
++
++struct transaction_stats_s
++{
++ int ts_type;
++ unsigned long ts_tid;
++ union {
++ struct transaction_run_stats_s run;
++ struct transaction_chp_stats_s chp;
++ } u;
++};
++
++#define JBD_STATS_RUN 1
++#define JBD_STATS_CHECKPOINT 2
++
++#define ts_wait u.run.rs_wait
++#define ts_running u.run.rs_running
++#define ts_locked u.run.rs_locked
++#define ts_flushing u.run.rs_flushing
++#define ts_logging u.run.rs_logging
++#define ts_handle_count u.run.rs_handle_count
++#define ts_blocks u.run.rs_blocks
++#define ts_blocks_logged u.run.rs_blocks_logged
++
++#define ts_chp_time u.chp.cs_chp_time
++#define ts_forced_to_close u.chp.cs_forced_to_close
++#define ts_written u.chp.cs_written
++#define ts_dropped u.chp.cs_dropped
++
++#define CURRENT_MSECS (jiffies_to_msecs(jiffies))
++
++static inline unsigned int
++jbd_time_diff(unsigned int start, unsigned int end)
++{
++ if (unlikely(start > end))
++ end = end + (~0UL - start);
++ else
++ end -= start;
++ return end;
++}
++
+ /**
+ * struct journal_s - The journal_s type is the concrete type associated with
+ * journal_t.
+@@ -817,6 +893,16 @@ struct journal_s
+ struct jbd_revoke_table_s *j_revoke_table[2];
+
+ /*
++ *
++ */
++ struct transaction_stats_s *j_history;
++ int j_history_max;
++ int j_history_cur;
++ spinlock_t j_history_lock;
++ struct proc_dir_entry *j_proc_entry;
++ struct transaction_stats_s j_stats;
++
++ /*
+ * An opaque pointer to fs-private information. ext3 puts its
+ * superblock pointer here
+ */
+Index: linux-2.6.5-7.201/fs/jbd/commit.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/jbd/commit.c 2005-10-11 00:12:45.000000000 +0400
++++ linux-2.6.5-7.201/fs/jbd/commit.c 2006-07-28 02:40:14.000000000 +0400
+@@ -21,6 +21,7 @@
+ #include <linux/mm.h>
+ #include <linux/pagemap.h>
+ #include <linux/smp_lock.h>
++#include <linux/jiffies.h>
+
+ /*
+ * Default IO end handler for temporary BJ_IO buffer_heads.
+@@ -101,6 +102,7 @@ static int inverted_lock(journal_t *jour
+ */
+ void journal_commit_transaction(journal_t *journal)
+ {
++ struct transaction_stats_s stats;
+ transaction_t *commit_transaction;
+ struct journal_head *jh, *new_jh, *descriptor;
+ struct buffer_head *wbuf[64];
+@@ -147,6 +149,11 @@ void journal_commit_transaction(journal_
+ spin_lock(&journal->j_state_lock);
+ commit_transaction->t_state = T_LOCKED;
+
++ stats.ts_wait = commit_transaction->t_max_wait;
++ stats.ts_locked = CURRENT_MSECS;
++ stats.ts_running = jbd_time_diff(commit_transaction->t_start,
++ stats.ts_locked);
++
+ spin_lock(&commit_transaction->t_handle_lock);
+ while (commit_transaction->t_updates) {
+ DEFINE_WAIT(wait);
+@@ -219,6 +226,9 @@ void journal_commit_transaction(journal_
+ */
+ journal_switch_revoke_table(journal);
+
++ stats.ts_flushing = CURRENT_MSECS;
++ stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing);
++
+ commit_transaction->t_state = T_FLUSH;
+ journal->j_committing_transaction = commit_transaction;
+ journal->j_running_transaction = NULL;
+@@ -366,6 +376,11 @@ write_out_data:
+ */
+ commit_transaction->t_state = T_COMMIT;
+
++ stats.ts_logging = CURRENT_MSECS;
++ stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging);
++ stats.ts_blocks = commit_transaction->t_outstanding_credits;
++ stats.ts_blocks_logged = 0;
++
+ descriptor = 0;
+ bufs = 0;
+ while (commit_transaction->t_buffers) {
+@@ -514,6 +529,7 @@ start_journal_io:
+ submit_bh(WRITE, bh);
+ }
+ cond_resched();
++ stats.ts_blocks_logged += bufs;
+
+ /* Force a new descriptor to be generated next
+ time round the loop. */
+@@ -759,6 +775,7 @@ skip_commit: /* The journal should be un
+ cp_transaction = jh->b_cp_transaction;
+ if (cp_transaction) {
+ JBUFFER_TRACE(jh, "remove from old cp transaction");
++ cp_transaction->t_chp_stats.cs_dropped++;
+ __journal_remove_checkpoint(jh);
+ }
+
+@@ -805,6 +822,36 @@ skip_commit: /* The journal should be un
+
+ J_ASSERT(commit_transaction->t_state == T_COMMIT);
+
++ commit_transaction->t_start = CURRENT_MSECS;
++ stats.ts_logging = jbd_time_diff(stats.ts_logging,
++ commit_transaction->t_start);
++
++ /*
++ * File the transaction for history
++ */
++ stats.ts_type = JBD_STATS_RUN;
++ stats.ts_tid = commit_transaction->t_tid;
++ stats.ts_handle_count = commit_transaction->t_handle_count;
++ spin_lock(&journal->j_history_lock);
++ memcpy(journal->j_history + journal->j_history_cur, &stats,
++ sizeof(stats));
++ if (++journal->j_history_cur == journal->j_history_max)
++ journal->j_history_cur = 0;
++
++ /*
++ * Calculate overall stats
++ */
++ journal->j_stats.ts_tid++;
++ journal->j_stats.ts_wait += stats.ts_wait;
++ journal->j_stats.ts_running += stats.ts_running;
++ journal->j_stats.ts_locked += stats.ts_locked;
++ journal->j_stats.ts_flushing += stats.ts_flushing;
++ journal->j_stats.ts_logging += stats.ts_logging;
++ journal->j_stats.ts_handle_count += stats.ts_handle_count;
++ journal->j_stats.ts_blocks += stats.ts_blocks;
++ journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged;
++ spin_unlock(&journal->j_history_lock);
++
+ /*
+ * This is a bit sleazy. We borrow j_list_lock to protect
+ * journal->j_committing_transaction in __journal_remove_checkpoint.
+Index: linux-2.6.5-7.201/fs/jbd/checkpoint.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/jbd/checkpoint.c 2005-10-11 00:12:45.000000000 +0400
++++ linux-2.6.5-7.201/fs/jbd/checkpoint.c 2006-07-28 02:40:14.000000000 +0400
+@@ -166,6 +166,7 @@ static int __cleanup_transaction(journal
+ transaction_t *t = jh->b_transaction;
+ tid_t tid = t->t_tid;
+
++ transaction->t_chp_stats.cs_forced_to_close++;
+ spin_unlock(&journal->j_list_lock);
+ jbd_unlock_bh_state(bh);
+ log_start_commit(journal, tid);
+@@ -227,7 +228,7 @@ __flush_batch(journal_t *journal, struct
+ */
+ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
+ struct buffer_head **bhs, int *batch_count,
+- int *drop_count)
++ int *drop_count, transaction_t *transaction)
+ {
+ struct buffer_head *bh = jh2bh(jh);
+ int ret = 0;
+@@ -248,6 +249,7 @@ static int __flush_buffer(journal_t *jou
+ set_buffer_jwrite(bh);
+ bhs[*batch_count] = bh;
+ jbd_unlock_bh_state(bh);
++ transaction->t_chp_stats.cs_written++;
+ (*batch_count)++;
+ if (*batch_count == NR_BATCH) {
+ __flush_batch(journal, bhs, batch_count);
+@@ -316,6 +318,8 @@ int log_do_checkpoint(journal_t *journal
+ tid_t this_tid;
+
+ transaction = journal->j_checkpoint_transactions;
++ if (transaction->t_chp_stats.cs_chp_time == 0)
++ transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS;
+ this_tid = transaction->t_tid;
+ jh = transaction->t_checkpoint_list;
+ last_jh = jh->b_cpprev;
+@@ -332,7 +336,8 @@ int log_do_checkpoint(journal_t *journal
+ retry = 1;
+ break;
+ }
+- retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
++ retry = __flush_buffer(journal, jh, bhs, &batch_count,
++ &drop_count, transaction);
+ } while (jh != last_jh && !retry);
+
+ if (batch_count) {
+@@ -598,6 +603,8 @@ void __journal_insert_checkpoint(struct
+
+ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
+ {
++ struct transaction_stats_s stats;
++
+ assert_spin_locked(&journal->j_list_lock);
+ if (transaction->t_cpnext) {
+ transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
+@@ -623,5 +630,25 @@ void __journal_drop_transaction(journal_
+ J_ASSERT(journal->j_running_transaction != transaction);
+
+ jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
++
++ /*
++ * File the transaction for history
++ */
++ if (transaction->t_chp_stats.cs_written != 0 ||
++ transaction->t_chp_stats.cs_chp_time != 0) {
++ stats.ts_type = JBD_STATS_CHECKPOINT;
++ stats.ts_tid = transaction->t_tid;
++ stats.u.chp = transaction->t_chp_stats;
++ if (stats.ts_chp_time)
++ stats.ts_chp_time =
++ jbd_time_diff(stats.ts_chp_time, CURRENT_MSECS);
++ spin_lock(&journal->j_history_lock);
++ memcpy(journal->j_history + journal->j_history_cur, &stats,
++ sizeof(stats));
++ if (++journal->j_history_cur == journal->j_history_max)
++ journal->j_history_cur = 0;
++ spin_unlock(&journal->j_history_lock);
++ }
++
+ kfree(transaction);
+ }
+Index: linux-2.6.5-7.201/fs/jbd/transaction.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/jbd/transaction.c 2005-10-11 00:12:45.000000000 +0400
++++ linux-2.6.5-7.201/fs/jbd/transaction.c 2006-07-28 02:40:14.000000000 +0400
+@@ -60,6 +60,8 @@ get_transaction(journal_t *journal, tran
+
+ J_ASSERT(journal->j_running_transaction == NULL);
+ journal->j_running_transaction = transaction;
++ transaction->t_max_wait = 0;
++ transaction->t_start = CURRENT_MSECS;
+
+ return transaction;
+ }
+@@ -86,6 +88,7 @@ static int start_this_handle(journal_t *
+ int nblocks = handle->h_buffer_credits;
+ transaction_t *new_transaction = NULL;
+ int ret = 0;
++ unsigned long ts = CURRENT_MSECS;
+
+ if (nblocks > journal->j_max_transaction_buffers) {
+ printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
+@@ -219,6 +222,12 @@ repeat_locked:
+ /* OK, account for the buffers that this operation expects to
+ * use and add the handle to the running transaction. */
+
++ if (time_after(transaction->t_start, ts)) {
++ ts = jbd_time_diff(ts, transaction->t_start);
++ if (ts > transaction->t_max_wait)
++ transaction->t_max_wait= ts;
++ }
++
+ handle->h_transaction = transaction;
+ transaction->t_outstanding_credits += nblocks;
+ transaction->t_updates++;
+Index: linux-2.6.5-7.201/fs/jbd/journal.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/jbd/journal.c 2005-10-11 00:12:45.000000000 +0400
++++ linux-2.6.5-7.201/fs/jbd/journal.c 2006-08-02 01:20:09.000000000 +0400
+@@ -35,6 +35,7 @@
+ #include <linux/pagemap.h>
+ #include <asm/uaccess.h>
+ #include <linux/proc_fs.h>
++#include <linux/seq_file.h>
+
+ EXPORT_SYMBOL(journal_start);
+ EXPORT_SYMBOL(journal_restart);
+@@ -615,6 +616,337 @@ struct journal_head *journal_get_descrip
+ return journal_add_journal_head(bh);
+ }
+
++struct jbd_stats_proc_session {
++ journal_t *journal;
++ struct transaction_stats_s *stats;
++ int start;
++ int max;
++};
++
++static void *jbd_history_skip_empty(struct jbd_stats_proc_session *s,
++ struct transaction_stats_s *ts,
++ int first)
++{
++ if (ts == s->stats + s->max)
++ ts = s->stats;
++ if (!first && ts == s->stats + s->start)
++ return NULL;
++ while (ts->ts_type == 0) {
++ ts++;
++ if (ts == s->stats + s->max)
++ ts = s->stats;
++ if (ts == s->stats + s->start)
++ return NULL;
++ }
++ return ts;
++
++}
++
++static void *jbd_seq_history_start(struct seq_file *seq, loff_t *pos)
++{
++ struct jbd_stats_proc_session *s = seq->private;
++ struct transaction_stats_s *ts;
++ int l = *pos;
++
++ if (l == 0)
++ return SEQ_START_TOKEN;
++ ts = jbd_history_skip_empty(s, s->stats + s->start, 1);
++ if (!ts)
++ return NULL;
++ while (--l && (ts = jbd_history_skip_empty(s, ++ts, 0)) != NULL);
++ return ts;
++}
++
++static void *jbd_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ struct jbd_stats_proc_session *s = seq->private;
++ struct transaction_stats_s *ts = v;
++
++ ++*pos;
++ if (v == SEQ_START_TOKEN)
++ return jbd_history_skip_empty(s, s->stats + s->start, 1);
++ else
++ return jbd_history_skip_empty(s, ++ts, 0);
++}
++
++static int jbd_seq_history_show(struct seq_file *seq, void *v)
++{
++ struct transaction_stats_s *ts = v;
++ if (v == SEQ_START_TOKEN) {
++ seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
++ "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
++ "wait", "run", "lock", "flush", "log", "hndls",
++ "block", "inlog", "ctime", "write", "drop",
++ "close");
++ return 0;
++ }
++ if (ts->ts_type == JBD_STATS_RUN)
++ seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu "
++ "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
++ ts->ts_wait, ts->ts_running, ts->ts_locked,
++ ts->ts_flushing, ts->ts_logging,
++ ts->ts_handle_count, ts->ts_blocks,
++ ts->ts_blocks_logged);
++ else if (ts->ts_type == JBD_STATS_CHECKPOINT)
++ seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n",
++ "C", ts->ts_tid, " ", ts->ts_chp_time,
++ ts->ts_written, ts->ts_dropped,
++ ts->ts_forced_to_close);
++ else
++ J_ASSERT(0);
++ return 0;
++}
++
++static void jbd_seq_history_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_history_ops = {
++ .start = jbd_seq_history_start,
++ .next = jbd_seq_history_next,
++ .stop = jbd_seq_history_stop,
++ .show = jbd_seq_history_show,
++};
++
++static int jbd_seq_history_open(struct inode *inode, struct file *file)
++{
++ journal_t *journal = PDE(inode)->data;
++ struct jbd_stats_proc_session *s;
++ int rc, size;
++
++ s = kmalloc(sizeof(*s), GFP_KERNEL);
++ if (s == NULL)
++ return -EIO;
++ size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++ s->stats = kmalloc(size, GFP_KERNEL);
++ if (s == NULL) {
++ kfree(s);
++ return -EIO;
++ }
++ spin_lock(&journal->j_history_lock);
++ memcpy(s->stats, journal->j_history, size);
++ s->max = journal->j_history_max;
++ s->start = journal->j_history_cur % s->max;
++ spin_unlock(&journal->j_history_lock);
++
++ rc = seq_open(file, &jbd_seq_history_ops);
++ if (rc == 0) {
++ struct seq_file *m = (struct seq_file *)file->private_data;
++ m->private = s;
++ } else {
++ kfree(s->stats);
++ kfree(s);
++ }
++ return rc;
++
++}
++
++static ssize_t jbd_seq_history_write(struct file *file, const char __user *buf,
++ size_t len, loff_t * ppos)
++{
++ journal_t *journal = PDE(file->f_dentry->d_inode)->data;
++ int size;
++
++ if (!capable(CAP_SYS_ADMIN))
++ return -EPERM;
++
++ spin_lock(&journal->j_history_lock);
++ size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++ journal->j_history_cur = 0;
++ memset(journal->j_history, 0, size);
++ spin_unlock(&journal->j_history_lock);
++
++ return len;
++}
++
++static int jbd_seq_history_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = (struct seq_file *)file->private_data;
++ struct jbd_stats_proc_session *s = seq->private;
++ kfree(s->stats);
++ kfree(s);
++ return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_history_fops = {
++ .owner = THIS_MODULE,
++ .open = jbd_seq_history_open,
++ .read = seq_read,
++ .write = jbd_seq_history_write,
++ .llseek = seq_lseek,
++ .release = jbd_seq_history_release,
++};
++
++static void *jbd_seq_info_start(struct seq_file *seq, loff_t *pos)
++{
++ return *pos ? NULL : SEQ_START_TOKEN;
++}
++
++static void *jbd_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ return NULL;
++}
++
++static int jbd_seq_info_show(struct seq_file *seq, void *v)
++{
++ struct jbd_stats_proc_session *s = seq->private;
++ if (v != SEQ_START_TOKEN)
++ return 0;
++ seq_printf(seq, "%lu transaction, each upto %u blocks\n",
++ s->stats->ts_tid,
++ s->journal->j_max_transaction_buffers);
++ if (s->stats->ts_tid == 0)
++ return 0;
++ seq_printf(seq, "average: \n %lums waiting for transaction\n",
++ s->stats->ts_wait / s->stats->ts_tid);
++ seq_printf(seq, " %lums running transaction\n",
++ s->stats->ts_running / s->stats->ts_tid);
++ seq_printf(seq, " %lums transaction was being locked\n",
++ s->stats->ts_locked / s->stats->ts_tid);
++ seq_printf(seq, " %lums flushing data (in ordered mode)\n",
++ s->stats->ts_flushing / s->stats->ts_tid);
++ seq_printf(seq, " %lums logging transaction\n",
++ s->stats->ts_logging / s->stats->ts_tid);
++ seq_printf(seq, " %lu handles per transaction\n",
++ s->stats->ts_handle_count / s->stats->ts_tid);
++ seq_printf(seq, " %lu blocks per transaction\n",
++ s->stats->ts_blocks / s->stats->ts_tid);
++ seq_printf(seq, " %lu logged blocks per transaction\n",
++ s->stats->ts_blocks_logged / s->stats->ts_tid);
++ return 0;
++}
++
++static void jbd_seq_info_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_info_ops = {
++ .start = jbd_seq_info_start,
++ .next = jbd_seq_info_next,
++ .stop = jbd_seq_info_stop,
++ .show = jbd_seq_info_show,
++};
++
++static int jbd_seq_info_open(struct inode *inode, struct file *file)
++{
++ journal_t *journal = PDE(inode)->data;
++ struct jbd_stats_proc_session *s;
++ int rc, size;
++
++ s = kmalloc(sizeof(*s), GFP_KERNEL);
++ if (s == NULL)
++ return -EIO;
++ size = sizeof(struct transaction_stats_s);
++ s->stats = kmalloc(size, GFP_KERNEL);
++ if (s == NULL) {
++ kfree(s);
++ return -EIO;
++ }
++ spin_lock(&journal->j_history_lock);
++ memcpy(s->stats, &journal->j_stats, size);
++ s->journal = journal;
++ spin_unlock(&journal->j_history_lock);
++
++ rc = seq_open(file, &jbd_seq_info_ops);
++ if (rc == 0) {
++ struct seq_file *m = (struct seq_file *)file->private_data;
++ m->private = s;
++ } else {
++ kfree(s->stats);
++ kfree(s);
++ }
++ return rc;
++
++}
++
++static ssize_t jbd_seq_info_write(struct file *file, const char __user *buf,
++ size_t len, loff_t * ppos)
++{
++ journal_t *journal = PDE(file->f_dentry->d_inode)->data;
++ int size;
++
++ if (!capable(CAP_SYS_ADMIN))
++ return -EPERM;
++
++ size = sizeof(struct transaction_stats_s);
++ spin_lock(&journal->j_history_lock);
++ memset(&journal->j_stats, 0, size);
++ spin_unlock(&journal->j_history_lock);
++
++ return len;
++}
++
++static int jbd_seq_info_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = (struct seq_file *)file->private_data;
++ struct jbd_stats_proc_session *s = seq->private;
++ kfree(s->stats);
++ kfree(s);
++ return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_info_fops = {
++ .owner = THIS_MODULE,
++ .open = jbd_seq_info_open,
++ .read = seq_read,
++ .write = jbd_seq_info_write,
++ .llseek = seq_lseek,
++ .release = jbd_seq_info_release,
++};
++
++static struct proc_dir_entry *proc_jbd_stats = NULL;
++
++static void jbd_stats_proc_init(journal_t *journal)
++{
++ char name[64];
++
++ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++ journal->j_proc_entry = proc_mkdir(name, proc_jbd_stats);
++ if (journal->j_proc_entry) {
++ struct proc_dir_entry *p;
++ p = create_proc_entry("history", S_IRUGO,
++ journal->j_proc_entry);
++ if (p) {
++ p->proc_fops = &jbd_seq_history_fops;
++ p->data = journal;
++ p = create_proc_entry("info", S_IRUGO,
++ journal->j_proc_entry);
++ if (p) {
++ p->proc_fops = &jbd_seq_info_fops;
++ p->data = journal;
++ }
++ }
++ }
++}
++
++static void jbd_stats_proc_exit(journal_t *journal)
++{
++ char name[64];
++
++ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++ remove_proc_entry("info", journal->j_proc_entry);
++ remove_proc_entry("history", journal->j_proc_entry);
++ remove_proc_entry(name, proc_jbd_stats);
++}
++
++static void journal_init_stats(journal_t *journal)
++{
++ int size;
++
++ if (proc_jbd_stats == NULL)
++ return;
++
++ journal->j_history_max = 1500;
++ size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++ journal->j_history = kmalloc(size, GFP_KERNEL);
++ if (journal->j_history == NULL) {
++ journal->j_history_max = 0;
++ return;
++ }
++ memset(journal->j_history, 0, size);
++ spin_lock_init(&journal->j_history_lock);
++}
++
+ /*
+ * Management for journal control blocks: functions to create and
+ * destroy journal_t structures, and to initialise and read existing
+@@ -657,6 +989,9 @@ static journal_t * journal_init_common (
+ kfree(journal);
+ goto fail;
+ }
++
++ journal_init_stats(journal);
++
+ return journal;
+ fail:
+ return NULL;
+@@ -699,6 +1034,7 @@ journal_t * journal_init_dev(struct bloc
+ journal->j_blk_offset = start;
+ journal->j_maxlen = len;
+ journal->j_blocksize = blocksize;
++ jbd_stats_proc_init(journal);
+
+ bh = __getblk(journal->j_dev, start, journal->j_blocksize);
+ J_ASSERT(bh != NULL);
+@@ -736,6 +1072,7 @@ journal_t * journal_init_inode (struct i
+
+ journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
+ journal->j_blocksize = inode->i_sb->s_blocksize;
++ jbd_stats_proc_init(journal);
+
+ err = journal_bmap(journal, 0, &blocknr);
+ /* If that failed, give up */
+@@ -1106,6 +1443,8 @@ void journal_destroy(journal_t *journal)
+ brelse(journal->j_sb_buffer);
+ }
+
++ if (journal->j_proc_entry)
++ jbd_stats_proc_exit(journal);
+ if (journal->j_inode)
+ iput(journal->j_inode);
+ if (journal->j_revoke)
+@@ -1861,6 +2200,28 @@ static void __exit remove_jbd_proc_entry
+
+ #endif
+
++#if defined(CONFIG_PROC_FS)
++
++#define JBD_STATS_PROC_NAME "fs/jbd"
++
++static void __init create_jbd_stats_proc_entry(void)
++{
++ proc_jbd_stats = proc_mkdir(JBD_STATS_PROC_NAME, NULL);
++}
++
++static void __exit remove_jbd_stats_proc_entry(void)
++{
++ if (proc_jbd_stats)
++ remove_proc_entry(JBD_STATS_PROC_NAME, NULL);
++}
++
++#else
++
++#define create_jbd_stats_proc_entry() do {} while (0)
++#define remove_jbd_stats_proc_entry() do {} while (0)
++
++#endif
++
+ kmem_cache_t *jbd_handle_cache;
+
+ static int __init journal_init_handle_cache(void)
+@@ -1915,6 +2276,7 @@ static int __init journal_init(void)
+ if (ret != 0)
+ journal_destroy_caches();
+ create_jbd_proc_entry();
++ create_jbd_stats_proc_entry();
+ return ret;
+ }
+
+@@ -1926,6 +2288,7 @@ static void __exit journal_exit(void)
+ printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
+ #endif
+ remove_jbd_proc_entry();
++ remove_jbd_stats_proc_entry();
+ journal_destroy_caches();
+ }
+
--- /dev/null
+Index: linux-2.6.9/include/linux/jbd.h
+===================================================================
+--- linux-2.6.9.orig/include/linux/jbd.h 2006-03-10 18:20:03.000000000 +0300
++++ linux-2.6.9/include/linux/jbd.h 2006-07-28 02:32:18.000000000 +0400
+@@ -422,6 +422,16 @@ struct handle_s
+ };
+
+
++/*
++ * Some stats for checkpoint phase
++ */
++struct transaction_chp_stats_s {
++ unsigned long cs_chp_time;
++ unsigned long cs_forced_to_close;
++ unsigned long cs_written;
++ unsigned long cs_dropped;
++};
++
+ /* The transaction_t type is the guts of the journaling mechanism. It
+ * tracks a compound transaction through its various states:
+ *
+@@ -553,6 +563,21 @@ struct transaction_s
+ spinlock_t t_handle_lock;
+
+ /*
++ * Longest time some handle had to wait for running transaction
++ */
++ unsigned long t_max_wait;
++
++ /*
++ * When transaction started
++ */
++ unsigned long t_start;
++
++ /*
++ * Checkpointing stats [j_checkpoint_sem]
++ */
++ struct transaction_chp_stats_s t_chp_stats;
++
++ /*
+ * Number of outstanding updates running on this transaction
+ * [t_handle_lock]
+ */
+@@ -592,6 +617,57 @@ struct transaction_s
+ struct list_head t_jcb;
+ };
+
++struct transaction_run_stats_s {
++ unsigned long rs_wait;
++ unsigned long rs_running;
++ unsigned long rs_locked;
++ unsigned long rs_flushing;
++ unsigned long rs_logging;
++
++ unsigned long rs_handle_count;
++ unsigned long rs_blocks;
++ unsigned long rs_blocks_logged;
++};
++
++struct transaction_stats_s
++{
++ int ts_type;
++ unsigned long ts_tid;
++ union {
++ struct transaction_run_stats_s run;
++ struct transaction_chp_stats_s chp;
++ } u;
++};
++
++#define JBD_STATS_RUN 1
++#define JBD_STATS_CHECKPOINT 2
++
++#define ts_wait u.run.rs_wait
++#define ts_running u.run.rs_running
++#define ts_locked u.run.rs_locked
++#define ts_flushing u.run.rs_flushing
++#define ts_logging u.run.rs_logging
++#define ts_handle_count u.run.rs_handle_count
++#define ts_blocks u.run.rs_blocks
++#define ts_blocks_logged u.run.rs_blocks_logged
++
++#define ts_chp_time u.chp.cs_chp_time
++#define ts_forced_to_close u.chp.cs_forced_to_close
++#define ts_written u.chp.cs_written
++#define ts_dropped u.chp.cs_dropped
++
++#define CURRENT_MSECS (jiffies_to_msecs(jiffies))
++
++static inline unsigned int
++jbd_time_diff(unsigned int start, unsigned int end)
++{
++ if (unlikely(start > end))
++ end = end + (~0UL - start);
++ else
++ end -= start;
++ return end;
++}
++
+ /**
+ * struct journal_s - The journal_s type is the concrete type associated with
+ * journal_t.
+@@ -828,6 +904,16 @@ struct journal_s
+ struct jbd_revoke_table_s *j_revoke_table[2];
+
+ /*
++ *
++ */
++ struct transaction_stats_s *j_history;
++ int j_history_max;
++ int j_history_cur;
++ spinlock_t j_history_lock;
++ struct proc_dir_entry *j_proc_entry;
++ struct transaction_stats_s j_stats;
++
++ /*
+ * An opaque pointer to fs-private information. ext3 puts its
+ * superblock pointer here
+ */
+Index: linux-2.6.9/fs/jbd/commit.c
+===================================================================
+--- linux-2.6.9.orig/fs/jbd/commit.c 2006-03-10 18:20:39.000000000 +0300
++++ linux-2.6.9/fs/jbd/commit.c 2006-07-28 02:32:18.000000000 +0400
+@@ -21,6 +21,7 @@
+ #include <linux/mm.h>
+ #include <linux/pagemap.h>
+ #include <linux/smp_lock.h>
++#include <linux/jiffies.h>
+
+ /*
+ * Default IO end handler for temporary BJ_IO buffer_heads.
+@@ -101,6 +102,7 @@ static int inverted_lock(journal_t *jour
+ */
+ void journal_commit_transaction(journal_t *journal)
+ {
++ struct transaction_stats_s stats;
+ transaction_t *commit_transaction;
+ struct journal_head *jh, *new_jh, *descriptor;
+ struct buffer_head *wbuf[64];
+@@ -147,6 +149,11 @@ void journal_commit_transaction(journal_
+ spin_lock(&journal->j_state_lock);
+ commit_transaction->t_state = T_LOCKED;
+
++ stats.ts_wait = commit_transaction->t_max_wait;
++ stats.ts_locked = CURRENT_MSECS;
++ stats.ts_running = jbd_time_diff(commit_transaction->t_start,
++ stats.ts_locked);
++
+ spin_lock(&commit_transaction->t_handle_lock);
+ while (commit_transaction->t_updates) {
+ DEFINE_WAIT(wait);
+@@ -219,6 +226,9 @@ void journal_commit_transaction(journal_
+ */
+ journal_switch_revoke_table(journal);
+
++ stats.ts_flushing = CURRENT_MSECS;
++ stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing);
++
+ commit_transaction->t_state = T_FLUSH;
+ journal->j_committing_transaction = commit_transaction;
+ journal->j_running_transaction = NULL;
+@@ -365,6 +375,11 @@ write_out_data:
+ */
+ commit_transaction->t_state = T_COMMIT;
+
++ stats.ts_logging = CURRENT_MSECS;
++ stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging);
++ stats.ts_blocks = commit_transaction->t_outstanding_credits;
++ stats.ts_blocks_logged = 0;
++
+ descriptor = NULL;
+ bufs = 0;
+ while (commit_transaction->t_buffers) {
+@@ -513,6 +528,7 @@ start_journal_io:
+ submit_bh(WRITE, bh);
+ }
+ cond_resched();
++ stats.ts_blocks_logged += bufs;
+
+ /* Force a new descriptor to be generated next
+ time round the loop. */
+@@ -760,6 +776,7 @@ skip_commit: /* The journal should be un
+ cp_transaction = jh->b_cp_transaction;
+ if (cp_transaction) {
+ JBUFFER_TRACE(jh, "remove from old cp transaction");
++ cp_transaction->t_chp_stats.cs_dropped++;
+ __journal_remove_checkpoint(jh);
+ }
+
+@@ -806,6 +823,36 @@ skip_commit: /* The journal should be un
+
+ J_ASSERT(commit_transaction->t_state == T_COMMIT);
+
++ commit_transaction->t_start = CURRENT_MSECS;
++ stats.ts_logging = jbd_time_diff(stats.ts_logging,
++ commit_transaction->t_start);
++
++ /*
++ * File the transaction for history
++ */
++ stats.ts_type = JBD_STATS_RUN;
++ stats.ts_tid = commit_transaction->t_tid;
++ stats.ts_handle_count = commit_transaction->t_handle_count;
++ spin_lock(&journal->j_history_lock);
++ memcpy(journal->j_history + journal->j_history_cur, &stats,
++ sizeof(stats));
++ if (++journal->j_history_cur == journal->j_history_max)
++ journal->j_history_cur = 0;
++
++ /*
++ * Calculate overall stats
++ */
++ journal->j_stats.ts_tid++;
++ journal->j_stats.ts_wait += stats.ts_wait;
++ journal->j_stats.ts_running += stats.ts_running;
++ journal->j_stats.ts_locked += stats.ts_locked;
++ journal->j_stats.ts_flushing += stats.ts_flushing;
++ journal->j_stats.ts_logging += stats.ts_logging;
++ journal->j_stats.ts_handle_count += stats.ts_handle_count;
++ journal->j_stats.ts_blocks += stats.ts_blocks;
++ journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged;
++ spin_unlock(&journal->j_history_lock);
++
+ /*
+ * This is a bit sleazy. We borrow j_list_lock to protect
+ * journal->j_committing_transaction in __journal_remove_checkpoint.
+Index: linux-2.6.9/fs/jbd/checkpoint.c
+===================================================================
+--- linux-2.6.9.orig/fs/jbd/checkpoint.c 2006-03-10 18:20:03.000000000 +0300
++++ linux-2.6.9/fs/jbd/checkpoint.c 2006-07-28 02:35:21.000000000 +0400
+@@ -166,6 +166,7 @@ static int __cleanup_transaction(journal
+ transaction_t *t = jh->b_transaction;
+ tid_t tid = t->t_tid;
+
++ transaction->t_chp_stats.cs_forced_to_close++;
+ spin_unlock(&journal->j_list_lock);
+ jbd_unlock_bh_state(bh);
+ log_start_commit(journal, tid);
+@@ -227,7 +228,7 @@ __flush_batch(journal_t *journal, struct
+ */
+ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
+ struct buffer_head **bhs, int *batch_count,
+- int *drop_count)
++ int *drop_count, transaction_t *transaction)
+ {
+ struct buffer_head *bh = jh2bh(jh);
+ int ret = 0;
+@@ -248,6 +249,7 @@ static int __flush_buffer(journal_t *jou
+ set_buffer_jwrite(bh);
+ bhs[*batch_count] = bh;
+ jbd_unlock_bh_state(bh);
++ transaction->t_chp_stats.cs_written++;
+ (*batch_count)++;
+ if (*batch_count == NR_BATCH) {
+ __flush_batch(journal, bhs, batch_count);
+@@ -316,6 +318,8 @@ int log_do_checkpoint(journal_t *journal
+ tid_t this_tid;
+
+ transaction = journal->j_checkpoint_transactions;
++ if (transaction->t_chp_stats.cs_chp_time == 0)
++ transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS;
+ this_tid = transaction->t_tid;
+ jh = transaction->t_checkpoint_list;
+ last_jh = jh->b_cpprev;
+@@ -332,7 +336,8 @@ int log_do_checkpoint(journal_t *journal
+ retry = 1;
+ break;
+ }
+- retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
++ retry = __flush_buffer(journal, jh, bhs, &batch_count,
++ &drop_count, transaction);
+ } while (jh != last_jh && !retry);
+
+ if (batch_count) {
+@@ -598,6 +603,8 @@ void __journal_insert_checkpoint(struct
+
+ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
+ {
++ struct transaction_stats_s stats;
++
+ assert_spin_locked(&journal->j_list_lock);
+ if (transaction->t_cpnext) {
+ transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
+@@ -623,5 +630,25 @@ void __journal_drop_transaction(journal_
+ J_ASSERT(journal->j_running_transaction != transaction);
+
+ jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
++
++ /*
++ * File the transaction for history
++ */
++ if (transaction->t_chp_stats.cs_written != 0 ||
++ transaction->t_chp_stats.cs_chp_time != 0) {
++ stats.ts_type = JBD_STATS_CHECKPOINT;
++ stats.ts_tid = transaction->t_tid;
++ stats.u.chp = transaction->t_chp_stats;
++ if (stats.ts_chp_time)
++ stats.ts_chp_time =
++ jbd_time_diff(stats.ts_chp_time, CURRENT_MSECS);
++ spin_lock(&journal->j_history_lock);
++ memcpy(journal->j_history + journal->j_history_cur, &stats,
++ sizeof(stats));
++ if (++journal->j_history_cur == journal->j_history_max)
++ journal->j_history_cur = 0;
++ spin_unlock(&journal->j_history_lock);
++ }
++
+ kfree(transaction);
+ }
+Index: linux-2.6.9/fs/jbd/transaction.c
+===================================================================
+--- linux-2.6.9.orig/fs/jbd/transaction.c 2006-03-10 18:20:03.000000000 +0300
++++ linux-2.6.9/fs/jbd/transaction.c 2006-07-28 02:32:18.000000000 +0400
+@@ -60,6 +60,8 @@ get_transaction(journal_t *journal, tran
+
+ J_ASSERT(journal->j_running_transaction == NULL);
+ journal->j_running_transaction = transaction;
++ transaction->t_max_wait = 0;
++ transaction->t_start = CURRENT_MSECS;
+
+ return transaction;
+ }
+@@ -86,6 +88,7 @@ static int start_this_handle(journal_t *
+ int nblocks = handle->h_buffer_credits;
+ transaction_t *new_transaction = NULL;
+ int ret = 0;
++ unsigned long ts = CURRENT_MSECS;
+
+ if (nblocks > journal->j_max_transaction_buffers) {
+ printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
+@@ -219,6 +222,12 @@ repeat_locked:
+ /* OK, account for the buffers that this operation expects to
+ * use and add the handle to the running transaction. */
+
++ if (time_after(transaction->t_start, ts)) {
++ ts = jbd_time_diff(ts, transaction->t_start);
++ if (ts > transaction->t_max_wait)
++ transaction->t_max_wait= ts;
++ }
++
+ handle->h_transaction = transaction;
+ transaction->t_outstanding_credits += nblocks;
+ transaction->t_updates++;
+Index: linux-2.6.9/fs/jbd/journal.c
+===================================================================
+--- linux-2.6.9.orig/fs/jbd/journal.c 2006-06-19 21:31:57.000000000 +0400
++++ linux-2.6.9/fs/jbd/journal.c 2006-07-28 02:32:18.000000000 +0400
+@@ -36,6 +36,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/page.h>
+ #include <linux/proc_fs.h>
++#include <linux/seq_file.h>
+
+ EXPORT_SYMBOL(journal_start);
+ EXPORT_SYMBOL(journal_restart);
+@@ -649,6 +650,300 @@ struct journal_head *journal_get_descrip
+ return journal_add_journal_head(bh);
+ }
+
++struct jbd_stats_proc_session {
++ journal_t *journal;
++ struct transaction_stats_s *stats;
++ int start;
++ int max;
++};
++
++static void *jbd_history_skip_empty(struct jbd_stats_proc_session *s,
++ struct transaction_stats_s *ts,
++ int first)
++{
++ if (ts == s->stats + s->max)
++ ts = s->stats;
++ if (!first && ts == s->stats + s->start)
++ return NULL;
++ while (ts->ts_type == 0) {
++ ts++;
++ if (ts == s->stats + s->max)
++ ts = s->stats;
++ if (ts == s->stats + s->start)
++ return NULL;
++ }
++ return ts;
++
++}
++
++static void *jbd_seq_history_start(struct seq_file *seq, loff_t *pos)
++{
++ struct jbd_stats_proc_session *s = seq->private;
++ struct transaction_stats_s *ts;
++ int l = *pos;
++
++ if (l == 0)
++ return SEQ_START_TOKEN;
++ ts = jbd_history_skip_empty(s, s->stats + s->start, 1);
++ if (!ts)
++ return NULL;
++ while (--l && (ts = jbd_history_skip_empty(s, ++ts, 0)) != NULL);
++ return ts;
++}
++
++static void *jbd_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ struct jbd_stats_proc_session *s = seq->private;
++ struct transaction_stats_s *ts = v;
++
++ ++*pos;
++ if (v == SEQ_START_TOKEN)
++ return jbd_history_skip_empty(s, s->stats + s->start, 1);
++ else
++ return jbd_history_skip_empty(s, ++ts, 0);
++}
++
++static int jbd_seq_history_show(struct seq_file *seq, void *v)
++{
++ struct transaction_stats_s *ts = v;
++ if (v == SEQ_START_TOKEN) {
++ seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
++ "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
++ "wait", "run", "lock", "flush", "log", "hndls",
++ "block", "inlog", "ctime", "write", "drop",
++ "close");
++ return 0;
++ }
++ if (ts->ts_type == JBD_STATS_RUN)
++ seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu "
++ "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
++ ts->ts_wait, ts->ts_running, ts->ts_locked,
++ ts->ts_flushing, ts->ts_logging,
++ ts->ts_handle_count, ts->ts_blocks,
++ ts->ts_blocks_logged);
++ else if (ts->ts_type == JBD_STATS_CHECKPOINT)
++ seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n",
++ "C", ts->ts_tid, " ", ts->ts_chp_time,
++ ts->ts_written, ts->ts_dropped,
++ ts->ts_forced_to_close);
++ else
++ J_ASSERT(0);
++ return 0;
++}
++
++static void jbd_seq_history_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_history_ops = {
++ .start = jbd_seq_history_start,
++ .next = jbd_seq_history_next,
++ .stop = jbd_seq_history_stop,
++ .show = jbd_seq_history_show,
++};
++
++static int jbd_seq_history_open(struct inode *inode, struct file *file)
++{
++ journal_t *journal = PDE(inode)->data;
++ struct jbd_stats_proc_session *s;
++ int rc, size;
++
++ s = kmalloc(sizeof(*s), GFP_KERNEL);
++ if (s == NULL)
++ return -EIO;
++ size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++ s->stats = kmalloc(size, GFP_KERNEL);
++ if (s == NULL) {
++ kfree(s);
++ return -EIO;
++ }
++ spin_lock(&journal->j_history_lock);
++ memcpy(s->stats, journal->j_history, size);
++ s->max = journal->j_history_max;
++ s->start = journal->j_history_cur % s->max;
++ spin_unlock(&journal->j_history_lock);
++
++ rc = seq_open(file, &jbd_seq_history_ops);
++ if (rc == 0) {
++ struct seq_file *m = (struct seq_file *)file->private_data;
++ m->private = s;
++ } else {
++ kfree(s->stats);
++ kfree(s);
++ }
++ return rc;
++
++}
++
++static int jbd_seq_history_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = (struct seq_file *)file->private_data;
++ struct jbd_stats_proc_session *s = seq->private;
++ kfree(s->stats);
++ kfree(s);
++ return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_history_fops = {
++ .owner = THIS_MODULE,
++ .open = jbd_seq_history_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = jbd_seq_history_release,
++};
++
++static void *jbd_seq_info_start(struct seq_file *seq, loff_t *pos)
++{
++ return *pos ? NULL : SEQ_START_TOKEN;
++}
++
++static void *jbd_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ return NULL;
++}
++
++static int jbd_seq_info_show(struct seq_file *seq, void *v)
++{
++ struct jbd_stats_proc_session *s = seq->private;
++ if (v != SEQ_START_TOKEN)
++ return 0;
++ seq_printf(seq, "%lu transaction, each upto %u blocks\n",
++ s->stats->ts_tid,
++ s->journal->j_max_transaction_buffers);
++ if (s->stats->ts_tid == 0)
++ return 0;
++ seq_printf(seq, "average: \n %lums waiting for transaction\n",
++ s->stats->ts_wait / s->stats->ts_tid);
++ seq_printf(seq, " %lums running transaction\n",
++ s->stats->ts_running / s->stats->ts_tid);
++ seq_printf(seq, " %lums transaction was being locked\n",
++ s->stats->ts_locked / s->stats->ts_tid);
++ seq_printf(seq, " %lums flushing data (in ordered mode)\n",
++ s->stats->ts_flushing / s->stats->ts_tid);
++ seq_printf(seq, " %lums logging transaction\n",
++ s->stats->ts_logging / s->stats->ts_tid);
++ seq_printf(seq, " %lu handles per transaction\n",
++ s->stats->ts_handle_count / s->stats->ts_tid);
++ seq_printf(seq, " %lu blocks per transaction\n",
++ s->stats->ts_blocks / s->stats->ts_tid);
++ seq_printf(seq, " %lu logged blocks per transaction\n",
++ s->stats->ts_blocks_logged / s->stats->ts_tid);
++ return 0;
++}
++
++static void jbd_seq_info_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_info_ops = {
++ .start = jbd_seq_info_start,
++ .next = jbd_seq_info_next,
++ .stop = jbd_seq_info_stop,
++ .show = jbd_seq_info_show,
++};
++
++static int jbd_seq_info_open(struct inode *inode, struct file *file)
++{
++ journal_t *journal = PDE(inode)->data;
++ struct jbd_stats_proc_session *s;
++ int rc, size;
++
++ s = kmalloc(sizeof(*s), GFP_KERNEL);
++ if (s == NULL)
++ return -EIO;
++ size = sizeof(struct transaction_stats_s);
++ s->stats = kmalloc(size, GFP_KERNEL);
++ if (s == NULL) {
++ kfree(s);
++ return -EIO;
++ }
++ spin_lock(&journal->j_history_lock);
++ memcpy(s->stats, &journal->j_stats, size);
++ s->journal = journal;
++ spin_unlock(&journal->j_history_lock);
++
++ rc = seq_open(file, &jbd_seq_info_ops);
++ if (rc == 0) {
++ struct seq_file *m = (struct seq_file *)file->private_data;
++ m->private = s;
++ } else {
++ kfree(s->stats);
++ kfree(s);
++ }
++ return rc;
++
++}
++
++static int jbd_seq_info_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = (struct seq_file *)file->private_data;
++ struct jbd_stats_proc_session *s = seq->private;
++ kfree(s->stats);
++ kfree(s);
++ return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_info_fops = {
++ .owner = THIS_MODULE,
++ .open = jbd_seq_info_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = jbd_seq_info_release,
++};
++
++static struct proc_dir_entry *proc_jbd_stats = NULL;
++
++static void jbd_stats_proc_init(journal_t *journal)
++{
++ char name[64];
++
++ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++ journal->j_proc_entry = proc_mkdir(name, proc_jbd_stats);
++ if (journal->j_proc_entry) {
++ struct proc_dir_entry *p;
++ p = create_proc_entry("history", S_IRUGO,
++ journal->j_proc_entry);
++ if (p) {
++ p->proc_fops = &jbd_seq_history_fops;
++ p->data = journal;
++ p = create_proc_entry("info", S_IRUGO,
++ journal->j_proc_entry);
++ if (p) {
++ p->proc_fops = &jbd_seq_info_fops;
++ p->data = journal;
++ }
++ }
++ }
++}
++
++static void jbd_stats_proc_exit(journal_t *journal)
++{
++ char name[64];
++
++ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++ remove_proc_entry("info", journal->j_proc_entry);
++ remove_proc_entry("history", journal->j_proc_entry);
++ remove_proc_entry(name, proc_jbd_stats);
++}
++
++static void journal_init_stats(journal_t *journal)
++{
++ int size;
++
++ if (proc_jbd_stats == NULL)
++ return;
++
++ journal->j_history_max = 100;
++ size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++ journal->j_history = kmalloc(size, GFP_KERNEL);
++ if (journal->j_history == NULL) {
++ journal->j_history_max = 0;
++ return;
++ }
++ memset(journal->j_history, 0, size);
++ spin_lock_init(&journal->j_history_lock);
++}
++
+ /*
+ * Management for journal control blocks: functions to create and
+ * destroy journal_t structures, and to initialise and read existing
+@@ -691,6 +986,9 @@ static journal_t * journal_init_common (
+ kfree(journal);
+ goto fail;
+ }
++
++ journal_init_stats(journal);
++
+ return journal;
+ fail:
+ return NULL;
+@@ -733,6 +1031,7 @@ journal_t * journal_init_dev(struct bloc
+ journal->j_blk_offset = start;
+ journal->j_maxlen = len;
+ journal->j_blocksize = blocksize;
++ jbd_stats_proc_init(journal);
+
+ bh = __getblk(journal->j_dev, start, journal->j_blocksize);
+ J_ASSERT(bh != NULL);
+@@ -770,6 +1069,7 @@ journal_t * journal_init_inode (struct i
+
+ journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
+ journal->j_blocksize = inode->i_sb->s_blocksize;
++ jbd_stats_proc_init(journal);
+
+ err = journal_bmap(journal, 0, &blocknr);
+ /* If that failed, give up */
+@@ -1140,6 +1440,8 @@ void journal_destroy(journal_t *journal)
+ brelse(journal->j_sb_buffer);
+ }
+
++ if (journal->j_proc_entry)
++ jbd_stats_proc_exit(journal);
+ if (journal->j_inode)
+ iput(journal->j_inode);
+ if (journal->j_revoke)
+@@ -1895,6 +2197,28 @@ static void __exit remove_jbd_proc_entry
+
+ #endif
+
++#if defined(CONFIG_PROC_FS)
++
++#define JBD_STATS_PROC_NAME "fs/jbd"
++
++static void __init create_jbd_stats_proc_entry(void)
++{
++ proc_jbd_stats = proc_mkdir(JBD_STATS_PROC_NAME, NULL);
++}
++
++static void __exit remove_jbd_stats_proc_entry(void)
++{
++ if (proc_jbd_stats)
++ remove_proc_entry(JBD_STATS_PROC_NAME, NULL);
++}
++
++#else
++
++#define create_jbd_stats_proc_entry() do {} while (0)
++#define remove_jbd_stats_proc_entry() do {} while (0)
++
++#endif
++
+ kmem_cache_t *jbd_handle_cache;
+
+ static int __init journal_init_handle_cache(void)
+@@ -1949,6 +2273,7 @@ static int __init journal_init(void)
+ if (ret != 0)
+ journal_destroy_caches();
+ create_jbd_proc_entry();
++ create_jbd_stats_proc_entry();
+ return ret;
+ }
+
+@@ -1960,6 +2285,7 @@ static void __exit journal_exit(void)
+ printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
+ #endif
+ remove_jbd_proc_entry();
++ remove_jbd_stats_proc_entry();
+ journal_destroy_caches();
+ }
+
--- /dev/null
+Index: linux-2.6.7/fs/block_dev.c
+===================================================================
+--- linux-2.6.7.orig/fs/block_dev.c 2004-06-16 13:20:26.000000000 +0800
++++ linux-2.6.7/fs/block_dev.c 2004-08-30 17:36:57.000000000 +0800
+@@ -832,6 +832,7 @@
+ if (!path || !*path)
+ return ERR_PTR(-EINVAL);
+
++ intent_init(&nd.intent.open, IT_LOOKUP);
+ error = path_lookup(path, LOOKUP_FOLLOW, &nd);
+ if (error)
+ return ERR_PTR(error);
+++ /dev/null
-Index: linux-2.6/fs/cifs/dir.c
-===================================================================
---- linux-2.6.orig/fs/cifs/dir.c 2006-07-15 21:04:01.000000000 +0800
-+++ linux-2.6/fs/cifs/dir.c 2006-07-15 21:04:47.000000000 +0800
-@@ -146,7 +146,7 @@ cifs_create(struct inode *inode, struct
- }
-
- if(nd && (nd->flags & LOOKUP_OPEN)) {
-- int oflags = nd->intent.open.flags;
-+ int oflags = nd->intent.flags;
-
- desiredAccess = 0;
- if (oflags & FMODE_READ)
-Index: linux-2.6/fs/nfs/dir.c
-===================================================================
---- linux-2.6.orig/fs/nfs/dir.c 2006-07-15 21:04:01.000000000 +0800
-+++ linux-2.6/fs/nfs/dir.c 2006-07-15 21:04:47.000000000 +0800
-@@ -867,7 +867,7 @@ int nfs_is_exclusive_create(struct inode
- return 0;
- if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
- return 0;
-- return (nd->intent.open.flags & O_EXCL) != 0;
-+ return (nd->intent.it_flags & O_EXCL) != 0;
- }
-
- static inline int nfs_reval_fsid(struct inode *dir,
-@@ -955,7 +955,7 @@ static int is_atomic_open(struct inode *
- if (nd->flags & LOOKUP_DIRECTORY)
- return 0;
- /* Are we trying to write to a read only partition? */
-- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
-+ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
- return 0;
- return 1;
- }
-@@ -979,7 +979,7 @@ static struct dentry *nfs_atomic_lookup(
- dentry->d_op = NFS_PROTO(dir)->dentry_ops;
-
- /* Let vfs_create() deal with O_EXCL */
-- if (nd->intent.open.flags & O_EXCL) {
-+ if (nd->intent.it_flags & O_EXCL) {
- d_add(dentry, NULL);
- goto out;
- }
-@@ -994,7 +994,7 @@ static struct dentry *nfs_atomic_lookup(
- goto out;
- }
-
-- if (nd->intent.open.flags & O_CREAT) {
-+ if (nd->intent.it_flags & O_CREAT) {
- nfs_begin_data_update(dir);
- res = nfs4_atomic_open(dir, dentry, nd);
- nfs_end_data_update(dir);
-@@ -1013,7 +1013,7 @@ static struct dentry *nfs_atomic_lookup(
- case -ENOTDIR:
- goto no_open;
- case -ELOOP:
-- if (!(nd->intent.open.flags & O_NOFOLLOW))
-+ if (!(nd->intent.it_flags & O_NOFOLLOW))
- goto no_open;
- /* case -EINVAL: */
- default:
-@@ -1049,7 +1049,7 @@ static int nfs_open_revalidate(struct de
- /* NFS only supports OPEN on regular files */
- if (!S_ISREG(inode->i_mode))
- goto no_open;
-- openflags = nd->intent.open.flags;
-+ openflags = nd->intent.it_flags;
- /* We cannot do exclusive creation on a positive dentry */
- if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
- goto no_open;
-@@ -1182,7 +1182,7 @@ static int nfs_create(struct inode *dir,
- attr.ia_valid = ATTR_MODE;
-
- if (nd && (nd->flags & LOOKUP_CREATE))
-- open_flags = nd->intent.open.flags;
-+ open_flags = nd->intent.it_flags;
-
- lock_kernel();
- nfs_begin_data_update(dir);
-Index: linux-2.6/fs/nfs/nfs4proc.c
-===================================================================
---- linux-2.6.orig/fs/nfs/nfs4proc.c 2006-07-15 21:04:01.000000000 +0800
-+++ linux-2.6/fs/nfs/nfs4proc.c 2006-07-15 21:09:29.000000000 +0800
-@@ -1246,7 +1246,7 @@ static int nfs4_intent_set_file(struct n
- ctx->state = state;
- return 0;
- }
-- nfs4_close_state(state, nd->intent.open.flags);
-+ nfs4_close_state(state, nd->intent.flags);
- return PTR_ERR(filp);
- }
-
-@@ -1259,22 +1259,22 @@ nfs4_atomic_open(struct inode *dir, stru
- struct dentry *res;
-
- if (nd->flags & LOOKUP_CREATE) {
-- attr.ia_mode = nd->intent.open.create_mode;
-+ attr.ia_mode = nd->intent.create_mode;
- attr.ia_valid = ATTR_MODE;
- if (!IS_POSIXACL(dir))
- attr.ia_mode &= ~current->fs->umask;
- } else {
- attr.ia_valid = 0;
-- BUG_ON(nd->intent.open.flags & O_CREAT);
-+ BUG_ON(nd->intent.flags & O_CREAT);
- }
-
- cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
- if (IS_ERR(cred))
- return (struct dentry *)cred;
-- state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
-+ state = nfs4_do_open(dir, dentry, nd->intent.flags, &attr, cred);
- put_rpccred(cred);
- if (IS_ERR(state)) {
-- if (PTR_ERR(state) == -ENOENT)
-+ ose_statef (PTR_ERR(state) == -ENOENT);
- d_add(dentry, NULL);
- return (struct dentry *)state;
- }
#define FMODE_READ 1
#define FMODE_WRITE 2
-+#define FMODE_EXEC 4
++#define FMODE_EXEC 16
/* Internal kernel extensions */
#define FMODE_LSEEK 4
Index: linux-2.6/fs/inode.c
===================================================================
---- linux-2.6.orig/fs/inode.c 2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/inode.c 2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/fs/inode.c 2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/inode.c 2006-08-31 11:23:48.000000000 +0800
@@ -234,6 +234,7 @@ void __iget(struct inode * inode)
inodes_stat.nr_unused--;
}
* @inode: inode to clear
Index: linux-2.6/fs/open.c
===================================================================
---- linux-2.6.orig/fs/open.c 2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/open.c 2006-07-15 21:04:08.000000000 +0800
-@@ -225,12 +225,12 @@ static long do_sys_truncate(const char _
- struct nameidata nd;
+--- linux-2.6.orig/fs/open.c 2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/open.c 2006-08-31 11:59:09.000000000 +0800
+@@ -226,11 +226,12 @@ static long do_sys_truncate(const char _
struct inode * inode;
int error;
--
-+ intent_init(&nd.intent, IT_GETATTR);
+
++ intent_init(&nd.intent.open, IT_GETATTR);
error = -EINVAL;
if (length < 0) /* sorry, but loff_t says... */
goto out;
if (error)
goto out;
inode = nd.dentry->d_inode;
-@@ -495,6 +495,7 @@ asmlinkage long sys_faccessat(int dfd, c
+@@ -495,6 +496,7 @@ asmlinkage long sys_faccessat(int dfd, c
int old_fsuid, old_fsgid;
kernel_cap_t old_cap;
int res;
-+ intent_init(&nd.intent, IT_GETATTR);
++ intent_init(&nd.intent.open, IT_GETATTR);
if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
return -EINVAL;
-@@ -519,7 +520,7 @@ asmlinkage long sys_faccessat(int dfd, c
+@@ -519,7 +521,7 @@ asmlinkage long sys_faccessat(int dfd, c
else
current->cap_effective = current->cap_permitted;
if (!res) {
res = vfs_permission(&nd, mode);
/* SuS v2 requires we report a read only fs too */
-@@ -545,8 +546,9 @@ asmlinkage long sys_chdir(const char __u
+@@ -545,8 +547,9 @@ asmlinkage long sys_chdir(const char __u
{
struct nameidata nd;
int error;
-+ intent_init(&nd.intent, IT_GETATTR);
++ intent_init(&nd.intent.open, IT_GETATTR);
- error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+ error = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
if (error)
goto out;
-@@ -596,8 +598,9 @@ asmlinkage long sys_chroot(const char __
+@@ -596,8 +599,9 @@ asmlinkage long sys_chroot(const char __
{
struct nameidata nd;
int error;
-+ intent_init(&nd.intent, IT_GETATTR);
++ intent_init(&nd.intent.open, IT_GETATTR);
- error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
+ error = __user_walk_it(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
if (error)
goto out;
-@@ -823,6 +826,7 @@ static struct file *__dentry_open(struct
+@@ -823,6 +827,7 @@ static struct file *__dentry_open(struct
error = open(inode, f);
if (error)
goto cleanup_all;
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
-@@ -849,6 +853,7 @@ cleanup_all:
+@@ -849,6 +854,7 @@ cleanup_all:
f->f_dentry = NULL;
f->f_vfsmnt = NULL;
cleanup_file:
put_filp(f);
dput(dentry);
mntput(mnt);
-@@ -874,6 +879,7 @@ static struct file *do_filp_open(int dfd
+@@ -874,6 +880,7 @@ static struct file *do_filp_open(int dfd
{
int namei_flags, error;
struct nameidata nd;
-+ intent_init(&nd.intent, IT_OPEN);
++ intent_init(&nd.intent.open, IT_OPEN);
namei_flags = flags;
if ((namei_flags+1) & O_ACCMODE)
-@@ -914,19 +920,19 @@ EXPORT_SYMBOL(filp_open);
- struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
- int (*open)(struct inode *, struct file *))
- {
-- if (IS_ERR(nd->intent.open.file))
-+ if (IS_ERR(nd->intent.file))
- goto out;
- if (IS_ERR(dentry))
- goto out_err;
-- nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt),
-- nd->intent.open.flags - 1,
-- nd->intent.open.file,
-+ nd->intent.file = __dentry_open(dget(dentry), mntget(nd->mnt),
-+ nd->intent.flags - 1,
-+ nd->intent.file,
- open);
- out:
-- return nd->intent.open.file;
-+ return nd->intent.file;
- out_err:
- release_open_intent(nd);
-- nd->intent.open.file = (struct file *)dentry;
-+ nd->intent.file = (struct file *)dentry;
- goto out;
- }
- EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
-@@ -943,7 +949,8 @@ struct file *nameidata_to_filp(struct na
- struct file *filp;
+@@ -944,6 +951,7 @@ struct file *nameidata_to_filp(struct na
/* Pick up the filp from the open intent */
-- filp = nd->intent.open.file;
-+ filp = nd->intent.file;
-+ filp->f_it = &nd->intent;
+ filp = nd->intent.open.file;
++ filp->f_it = &nd->intent.open;
/* Has the filesystem initialised the file for us? */
if (filp->f_dentry == NULL)
filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
Index: linux-2.6/fs/nfsctl.c
===================================================================
---- linux-2.6.orig/fs/nfsctl.c 2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/nfsctl.c 2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/fs/nfsctl.c 2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/nfsctl.c 2006-08-31 11:23:48.000000000 +0800
@@ -25,6 +25,7 @@ static struct file *do_open(char *name,
struct nameidata nd;
int error;
-+ intent_init(&nd.intent, IT_OPEN);
++ intent_init(&nd.intent.open, IT_OPEN);
nd.mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
if (IS_ERR(nd.mnt))
Index: linux-2.6/fs/namei.c
===================================================================
---- linux-2.6.orig/fs/namei.c 2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/namei.c 2006-07-15 21:04:36.000000000 +0800
-@@ -337,8 +337,19 @@ int deny_write_access(struct file * file
+--- linux-2.6.orig/fs/namei.c 2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/namei.c 2006-08-31 11:59:09.000000000 +0800
+@@ -344,8 +344,19 @@ int deny_write_access(struct file * file
return 0;
}
dput(nd->dentry);
mntput(nd->mnt);
}
-@@ -359,10 +370,10 @@ void path_release_on_umount(struct namei
- */
- void release_open_intent(struct nameidata *nd)
- {
-- if (nd->intent.open.file->f_dentry == NULL)
-- put_filp(nd->intent.open.file);
-+ if (nd->intent.file->f_dentry == NULL)
-+ put_filp(nd->intent.file);
- else
-- fput(nd->intent.open.file);
-+ fput(nd->intent.file);
- }
-
- /*
-@@ -440,8 +451,12 @@ static struct dentry * real_lookup(struc
+@@ -447,8 +458,12 @@ static struct dentry * real_lookup(struc
{
struct dentry * result;
struct inode *dir = parent->d_inode;
/*
* First re-do the cached lookup just in case it was created
* while we waited for the directory semaphore..
-@@ -475,13 +490,16 @@ static struct dentry * real_lookup(struc
+@@ -482,13 +497,16 @@ static struct dentry * real_lookup(struc
* Uhhuh! Nasty case: the cache was re-populated while
* we waited on the semaphore. Need to revalidate.
*/
return result;
}
-@@ -509,7 +527,9 @@ walk_init_root(const char *name, struct
+@@ -516,7 +534,9 @@ walk_init_root(const char *name, struct
static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
{
int res = 0;
-+ struct lookup_intent it = nd->intent;
++ struct lookup_intent it = nd->intent.open;
char *name;
+
if (IS_ERR(link))
goto fail;
-@@ -519,6 +539,10 @@ static __always_inline int __vfs_follow_
+@@ -526,6 +546,10 @@ static __always_inline int __vfs_follow_
/* weird __emul_prefix() stuff did it */
goto out;
}
-+ intent_init(&nd->intent, it.it_op);
-+ nd->intent.it_flags = it.it_flags;
-+ nd->intent.it_create_mode = it.it_create_mode;
-+ nd->intent.file = it.file;
++ intent_init(&nd->intent.open, it.it_op);
++ nd->intent.open.it_flags = it.it_flags;
++ nd->intent.open.it_create_mode = it.it_create_mode;
++ nd->intent.open.file = it.file;
res = link_path_walk(link, nd);
out:
if (nd->depth || res || nd->last_type!=LAST_NORM)
-@@ -771,6 +795,33 @@ fail:
+@@ -778,6 +802,33 @@ fail:
return PTR_ERR(dentry);
}
/*
* Name resolution.
* This is the basic name resolution function, turning a pathname into
-@@ -867,7 +918,11 @@ static fastcall int __link_path_walk(con
+@@ -874,7 +925,11 @@ static fastcall int __link_path_walk(con
goto out_dput;
if (inode->i_op->follow_link) {
if (err)
goto return_err;
err = -ENOENT;
-@@ -902,6 +957,23 @@ last_component:
+@@ -909,6 +964,23 @@ last_component:
inode = nd->dentry->d_inode;
/* fallthrough */
case 1:
goto return_reval;
}
if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
-@@ -909,7 +981,9 @@ last_component:
+@@ -916,7 +988,9 @@ last_component:
if (err < 0)
break;
}
if (err)
break;
inode = next.dentry->d_inode;
-@@ -1152,13 +1226,13 @@ static int __path_lookup_intent_open(int
-
- if (filp == NULL)
- return -ENFILE;
-- nd->intent.open.file = filp;
-- nd->intent.open.flags = open_flags;
-- nd->intent.open.create_mode = create_mode;
-+ nd->intent.file = filp;
-+ nd->intent.flags = open_flags;
-+ nd->intent.create_mode = create_mode;
- err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
-- if (IS_ERR(nd->intent.open.file)) {
-+ if (IS_ERR(nd->intent.file)) {
- if (err == 0) {
-- err = PTR_ERR(nd->intent.open.file);
-+ err = PTR_ERR(nd->intent.file);
- path_release(nd);
- }
- } else if (err != 0)
-@@ -1261,7 +1335,7 @@ static struct dentry *lookup_hash(struct
+@@ -1268,7 +1342,7 @@ static struct dentry *lookup_hash(struct
}
/* SMP-safe */
{
unsigned long hash;
struct qstr this;
-@@ -1281,11 +1355,17 @@ struct dentry * lookup_one_len(const cha
+@@ -1288,11 +1362,17 @@ struct dentry * lookup_one_len(const cha
}
this.hash = end_name_hash(hash);
/*
* namei()
*
-@@ -1297,8 +1377,9 @@ access:
+@@ -1304,8 +1384,9 @@ access:
* that namei follows links, while lnamei does not.
* SMP-safe
*/
{
char *tmp = getname(name);
int err = PTR_ERR(tmp);
-@@ -1310,9 +1391,22 @@ int fastcall __user_walk_fd(int dfd, con
+@@ -1317,9 +1398,22 @@ int fastcall __user_walk_fd(int dfd, con
return err;
}
+int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags,
+ struct nameidata *nd)
+{
-+ intent_init(&nd->intent, IT_LOOKUP);
++ intent_init(&nd->intent.open, IT_LOOKUP);
+ return __user_walk_fd_it(dfd, name, flags, nd);
+}
+
int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
{
- return __user_walk_fd(AT_FDCWD, name, flags, nd);
-+ intent_init(&nd->intent, IT_LOOKUP);
++ intent_init(&nd->intent.open, IT_LOOKUP);
+ return __user_walk_it(name, flags, nd);
}
/*
-@@ -1593,6 +1687,8 @@ int open_namei(int dfd, const char *path
+@@ -1600,6 +1694,8 @@ int open_namei(int dfd, const char *path
if (flag & O_APPEND)
acc_mode |= MAY_APPEND;
-+ nd->intent.it_flags = flag;
-+ nd->intent.it_create_mode = mode;
++ nd->intent.open.it_flags = flag;
++ nd->intent.open.it_create_mode = mode;
/*
* The simplest case - just a plain lookup.
*/
-@@ -1607,6 +1703,7 @@ int open_namei(int dfd, const char *path
+@@ -1614,6 +1710,7 @@ int open_namei(int dfd, const char *path
/*
* Create - we need to know the parent.
*/
-+ nd->intent.it_op |= IT_CREAT;
++ nd->intent.open.it_op |= IT_CREAT;
error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
if (error)
return error;
-@@ -1623,7 +1720,9 @@ int open_namei(int dfd, const char *path
+@@ -1630,7 +1727,9 @@ int open_namei(int dfd, const char *path
dir = nd->dentry;
nd->flags &= ~LOOKUP_PARENT;
mutex_lock(&dir->d_inode->i_mutex);
path.mnt = nd->mnt;
do_last:
-@@ -1633,9 +1732,9 @@ do_last:
- goto exit;
- }
-
-- if (IS_ERR(nd->intent.open.file)) {
-+ if (IS_ERR(nd->intent.file)) {
- mutex_unlock(&dir->d_inode->i_mutex);
-- error = PTR_ERR(nd->intent.open.file);
-+ error = PTR_ERR(nd->intent.file);
- goto exit_dput;
- }
-
-@@ -1688,7 +1787,7 @@ ok:
- exit_dput:
- dput_path(&path, nd);
- exit:
-- if (!IS_ERR(nd->intent.open.file))
-+ if (!IS_ERR(nd->intent.file))
- release_open_intent(nd);
- path_release(nd);
- return error;
-@@ -1731,7 +1830,9 @@ do_link:
+@@ -1746,7 +1845,9 @@ do_link:
}
dir = nd->dentry;
mutex_lock(&dir->d_inode->i_mutex);
path.mnt = nd->mnt;
__putname(nd->last.name);
goto do_last;
-@@ -2243,6 +2344,9 @@ asmlinkage long sys_linkat(int olddfd, c
+@@ -2260,6 +2361,9 @@ asmlinkage long sys_linkat(int olddfd, c
int error;
char * to;
-+ intent_init(&nd.intent, IT_LOOKUP);
-+ intent_init(&old_nd.intent, IT_LOOKUP);
++ intent_init(&nd.intent.open, IT_LOOKUP);
++ intent_init(&old_nd.intent.open, IT_LOOKUP);
+
if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
return -EINVAL;
-@@ -2250,7 +2354,7 @@ asmlinkage long sys_linkat(int olddfd, c
+@@ -2267,7 +2371,7 @@ asmlinkage long sys_linkat(int olddfd, c
if (IS_ERR(to))
return PTR_ERR(to);
if (error)
Index: linux-2.6/fs/stat.c
===================================================================
---- linux-2.6.orig/fs/stat.c 2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/stat.c 2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/fs/stat.c 2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/stat.c 2006-08-31 11:23:48.000000000 +0800
@@ -37,7 +37,7 @@ void generic_fillattr(struct inode *inod
EXPORT_SYMBOL(generic_fillattr);
int error;
- error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
-+ intent_init(&nd.intent, IT_GETATTR);
++ intent_init(&nd.intent.open, IT_GETATTR);
+ error = __user_walk_fd_it(dfd, name, LOOKUP_FOLLOW, &nd);
if (!error) {
- error = vfs_getattr(nd.mnt, nd.dentry, stat);
-+ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat);
++ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent.open, stat);
path_release(&nd);
}
return error;
int error;
- error = __user_walk_fd(dfd, name, 0, &nd);
-+ intent_init(&nd.intent, IT_GETATTR);
++ intent_init(&nd.intent.open, IT_GETATTR);
+ error = __user_walk_fd_it(dfd, name, 0, &nd);
if (!error) {
- error = vfs_getattr(nd.mnt, nd.dentry, stat);
-+ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat);
++ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent.open, stat);
path_release(&nd);
}
return error;
struct file *f = fget(fd);
int error = -EBADF;
+ struct nameidata nd;
-+ intent_init(&nd.intent, IT_GETATTR);
++ intent_init(&nd.intent.open, IT_GETATTR);
if (f) {
- error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat);
-+ error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat);
-+ intent_release(&nd.intent);
++ error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent.open, stat);
++ intent_release(&nd.intent.open);
fput(f);
}
return error;
Index: linux-2.6/fs/namespace.c
===================================================================
---- linux-2.6.orig/fs/namespace.c 2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/namespace.c 2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/fs/namespace.c 2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/namespace.c 2006-08-31 11:59:07.000000000 +0800
@@ -73,6 +73,7 @@ struct vfsmount *alloc_vfsmnt(const char
INIT_LIST_HEAD(&mnt->mnt_share);
INIT_LIST_HEAD(&mnt->mnt_slave_list);
return err;
if (!old_name || !*old_name)
return -EINVAL;
-+ intent_init(&old_nd.intent, IT_LOOKUP);
++ intent_init(&old_nd.intent.open, IT_LOOKUP);
err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
if (err)
return err;
return -EPERM;
if (!old_name || !*old_name)
return -EINVAL;
-+ intent_init(&old_nd.intent, IT_LOOKUP);
++ intent_init(&old_nd.intent.open, IT_LOOKUP);
err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
if (err)
return err;
int retval = 0;
int mnt_flags = 0;
-+ intent_init(&nd.intent, IT_LOOKUP);
++ intent_init(&nd.intent.open, IT_LOOKUP);
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
flags &= ~MS_MGC_MSK;
Index: linux-2.6/fs/exec.c
===================================================================
---- linux-2.6.orig/fs/exec.c 2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/exec.c 2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/fs/exec.c 2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/exec.c 2006-08-31 11:59:09.000000000 +0800
@@ -127,6 +127,7 @@ asmlinkage long sys_uselib(const char __
struct nameidata nd;
int error;
-+ intent_init(&nd.intent, IT_OPEN);
++ intent_init(&nd.intent.open, IT_OPEN);
error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
if (error)
goto out;
int err;
struct file *file;
-+ intent_init(&nd.intent, IT_OPEN);
++ intent_init(&nd.intent.open, IT_OPEN);
err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
file = ERR_PTR(err);
Index: linux-2.6/include/linux/dcache.h
===================================================================
---- linux-2.6.orig/include/linux/dcache.h 2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/include/linux/dcache.h 2006-07-15 21:04:08.000000000 +0800
-@@ -4,6 +4,7 @@
- #ifdef __KERNEL__
-
- #include <asm/atomic.h>
-+#include <linux/string.h>
- #include <linux/list.h>
- #include <linux/spinlock.h>
- #include <linux/cache.h>
-@@ -36,6 +37,8 @@ struct qstr {
+--- linux-2.6.orig/include/linux/dcache.h 2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/include/linux/dcache.h 2006-08-31 12:00:07.000000000 +0800
+@@ -36,6 +36,9 @@ struct qstr {
const unsigned char *name;
};
++struct inode;
+#include <linux/namei.h>
+
struct dentry_stat_t {
int nr_unused;
Index: linux-2.6/include/linux/fs.h
===================================================================
---- linux-2.6.orig/include/linux/fs.h 2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/include/linux/fs.h 2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/include/linux/fs.h 2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/include/linux/fs.h 2006-08-31 11:59:09.000000000 +0800
@@ -280,6 +280,8 @@ typedef void (dio_iodone_t)(struct kiocb
#define ATTR_KILL_SUID 2048
#define ATTR_KILL_SGID 4096
__u32 i_generation;
-@@ -699,6 +702,7 @@ struct file {
+@@ -700,6 +703,7 @@ struct file {
spinlock_t f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
};
extern spinlock_t files_lock;
#define file_list_lock() spin_lock(&files_lock);
-@@ -1099,7 +1103,9 @@ struct inode_operations {
+@@ -1100,7 +1104,9 @@ struct inode_operations {
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int, struct nameidata *);
int (*setattr) (struct dentry *, struct iattr *);
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
-@@ -1140,6 +1146,7 @@ struct super_operations {
+@@ -1141,6 +1147,7 @@ struct super_operations {
int (*remount_fs) (struct super_block *, int *, char *);
void (*clear_inode) (struct inode *);
void (*umount_begin) (struct vfsmount *, int);
int (*show_options)(struct seq_file *, struct vfsmount *);
int (*show_stats)(struct seq_file *, struct vfsmount *);
-@@ -1362,6 +1369,7 @@ extern int may_umount_tree(struct vfsmou
+@@ -1363,6 +1370,7 @@ extern int may_umount_tree(struct vfsmou
extern int may_umount(struct vfsmount *);
extern void umount_tree(struct vfsmount *, int, struct list_head *);
extern void release_mounts(struct list_head *);
extern long do_mount(char *, char *, char *, unsigned long, void *);
extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
-@@ -1423,6 +1431,7 @@ extern long do_sys_open(int fdf, const c
+@@ -1424,6 +1432,7 @@ extern long do_sys_open(int fdf, const c
int mode);
extern struct file *filp_open(const char *, int, int);
extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
Index: linux-2.6/include/linux/namei.h
===================================================================
---- linux-2.6.orig/include/linux/namei.h 2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/include/linux/namei.h 2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/include/linux/namei.h 2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/include/linux/namei.h 2006-08-31 11:23:48.000000000 +0800
@@ -5,10 +5,39 @@
struct vfsmount;
+ } d;
};
- enum { MAX_NESTED_LINKS = 5 };
-@@ -22,12 +51,16 @@ struct nameidata {
- unsigned depth;
- char *saved_names[MAX_NESTED_LINKS + 1];
-
-- /* Intent data */
-- union {
-- struct open_intent open;
-- } intent;
-+ struct lookup_intent intent;
+ enum { MAX_NESTED_LINKS = 8 };
+@@ -28,6 +57,13 @@ struct nameidata {
+ } intent;
};
+static inline void intent_init(struct lookup_intent *it, int op)
/*
* Type of the last component on LOOKUP_PARENT
*/
-@@ -48,6 +81,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
+@@ -48,6 +84,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
#define LOOKUP_PARENT 16
#define LOOKUP_NOALT 32
#define LOOKUP_REVAL 64
/*
* Intent data
*/
-@@ -57,10 +92,19 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
+@@ -57,10 +95,19 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
Index: linux-2.6/include/linux/mount.h
===================================================================
---- linux-2.6.orig/include/linux/mount.h 2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/include/linux/mount.h 2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/include/linux/mount.h 2006-08-31 11:17:40.000000000 +0800
++++ linux-2.6/include/linux/mount.h 2006-08-31 11:23:48.000000000 +0800
@@ -53,6 +53,8 @@ struct vfsmount {
struct list_head mnt_slave; /* slave list entry */
struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */
Index: linux-2.6/fs/dcache.c
===================================================================
---- linux-2.6.orig/fs/dcache.c 2006-07-15 16:08:36.000000000 +0800
-+++ linux-2.6/fs/dcache.c 2006-07-15 16:10:41.000000000 +0800
+--- linux-2.6.orig/fs/dcache.c 2006-08-31 11:59:09.000000000 +0800
++++ linux-2.6/fs/dcache.c 2006-09-06 14:01:37.000000000 +0800
@@ -226,6 +226,13 @@ int d_invalidate(struct dentry * dentry)
spin_unlock(&dcache_lock);
return 0;
x = y; y = __tmp; } while (0)
Index: linux-2.6/include/linux/dcache.h
===================================================================
---- linux-2.6.orig/include/linux/dcache.h 2006-07-15 16:10:33.000000000 +0800
-+++ linux-2.6/include/linux/dcache.h 2006-07-15 16:10:41.000000000 +0800
-@@ -176,6 +176,8 @@ d_iput: no no no yes
+--- linux-2.6.orig/include/linux/dcache.h 2006-08-31 12:00:23.000000000 +0800
++++ linux-2.6/include/linux/dcache.h 2006-09-06 14:02:36.000000000 +0800
+@@ -176,6 +176,7 @@ d_iput: no no no yes
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
#define DCACHE_UNHASHED 0x0010
-+#define DCACHE_LUSTRE_INVALID 0x0020 /* Lustre invalidated */
-+
++#define DCACHE_LUSTRE_INVALID 0x0040 /* Lustre invalidated */
#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */
uml-exprt-clearuser.patch
fsprivate-2.6.patch
linux-2.6.9-ext3-sub-second-timestamp.patch
+bitops_ext2_find_next_le_bit-2.6.patch
raid5-optimize-memcpy.patch
raid5-merge-ios.patch
raid5-serialize-ovelapping-reqs.patch
+bitops_ext2_find_next_le_bit-2.6.patch
raid5-stripe-by-stripe-handling.patch
raid5-merge-ios.patch
raid5-serialize-ovelapping-reqs.patch
+jbd-stats-2.6.9.patch
+bitops_ext2_find_next_le_bit-2.6.patch
lustre_version.patch
vfs_intent-2.6-sles10.patch
vfs_nointent-2.6-sles10.patch
-vfs_races-2.6.12.patch
+vfs_races-2.6.18-vanilla.patch
ext3-wantedi-misc-2.6-suse.patch
jbd-2.6.10-jcberr.patch
nfs-cifs-intent-2.6-fc5.patch
dev_read_only-2.6-fc5.patch
export-2.6-fc5.patch
lookup_bdev_init_intent.patch
-8kstack-2.6.12.patch
remove-suid-2.6-suse.patch
export-show_task-2.6-fc5.patch
sd_iostats-2.6-rhel4.patch
dcache-qstr-api-fix-2.6-suse.patch
iallocsem_consistency.patch
tcp-zero-copy-2.6.5-7.244.patch
+jbd-stats-2.6.5.patch
+bitops_ext2_find_next_le_bit-2.6.patch
vfs_races-2.6.18-vanilla.patch
ext3-wantedi-misc-2.6.18-vanilla.patch
jbd-jcberr-2.6.18-vanilla.patch
-nfs-cifs-intent-2.6.18-vanilla.patch
iopen-misc-2.6.18-vanilla.patch
export-truncate-2.6.18-vanilla.patch
export_symbols-2.6.18-vanilla.patch
dev_read_only-2.6.18-vanilla.patch
export-2.6.18-vanilla.patch
-lookup_bdev_init_intent.patch
+lookup_bdev_init_intent-2.6.18-vanilla.patch
8kstack-2.6.12.patch
remove-suid-2.6-suse.patch
export-show_task-2.6.18-vanilla.patch
sd_iostats-2.6-rhel4.patch
export_symbol_numa-2.6-fc5.patch
tcp-zero-copy-2.6.18-vanilla.patch
-vfs_intent-2.6-fc5-fix.patch
+export-do_kern_mount.patch
lnxmaj="2.6.5"
-lnxrel="7.267"
+lnxrel="7.276"
KERNEL=linux-$lnxmaj-$lnxrel.tar.bz2
# they include our patches
/* cli->cl_max_mds_{easize,cookiesize} updated by mdc_init_ea_size() */
cli->cl_max_mds_easize = sizeof(struct lov_mds_md);
cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie);
- cli->cl_sandev = to_kdev_t(0);
if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
if (!strcmp(lustre_cfg_string(lcfg, 3), "inactive")) {
target_cancel_recovery_timer(obd);
spin_unlock_bh(&obd->obd_processing_task_lock);
- CERROR("%s: recovery period over; disconnecting unfinished clients.\n",
- obd->obd_name);
+ LCONSOLE_WARN("%s: recovery period over; disconnecting unfinished "
+ "clients.\n", obd->obd_name);
class_disconnect_stale_exports(obd);
abort_recovery_queue(obd);
target_finish_recovery(obd);
-
- ptlrpc_run_recovery_over_upcall(obd);
+ CDEBUG(D_HA, "%s: recovery complete\n", obd_uuid2str(&obd->obd_uuid));
EXIT;
}
spin_unlock_bh(&obd->obd_processing_task_lock);
target_finish_recovery(obd);
- ptlrpc_run_recovery_over_upcall(obd);
+ CDEBUG(D_HA, "%s: recovery complete\n",
+ obd_uuid2str(&obd->obd_uuid));
} else {
CWARN("%s: %d recoverable clients remain\n",
obd->obd_name, obd->obd_recoverable_clients);
struct obd_device *obd = req->rq_export->exp_obd;
struct obd_device *master_obd;
struct lustre_quota_ctxt *qctxt;
- struct qunit_data *qdata, *rep;
+ struct qunit_data *qdata;
+ void* rep;
+ struct qunit_data_old *qdata_old;
int rc = 0;
int repsize[2] = { sizeof(struct ptlrpc_body),
sizeof(struct qunit_data) };
CERROR("packing reply failed!: rc = %d\n", rc);
RETURN(rc);
}
- rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*rep));
- LASSERT(rep);
-
- qdata = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata),
- lustre_swab_qdata);
+ LASSERT(req->rq_export);
+
+ /* fixed for bug10707 */
+ if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) &&
+ !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) {
+ CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
+ rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
+ sizeof(struct qunit_data));
+ LASSERT(rep);
+ qdata = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata),
+ lustre_swab_qdata);
+ } else {
+ CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
+ rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
+ sizeof(struct qunit_data_old));
+ LASSERT(rep);
+ qdata_old = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata_old),
+ lustre_swab_qdata_old);
+ qdata = lustre_quota_old_to_new(qdata_old);
+ }
+
if (qdata == NULL) {
CERROR("Can't unpack qunit_data\n");
RETURN(-EPROTO);
"dqacq failed! (rc:%d)\n", rc);
/* the qd_count might be changed in lqc_handler */
- memcpy(rep, qdata, sizeof(*rep));
+ if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) &&
+ !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) {
+ memcpy(rep,qdata,sizeof(*qdata));
+ } else {
+ qdata_old = lustre_quota_new_to_old(qdata);
+ memcpy(rep,qdata_old,sizeof(*qdata_old));
+ }
req->rq_status = rc;
rc = ptlrpc_reply(req);
LDLM_LOCK_PUT(lock);
}
+
+void cdebug_va(cfs_debug_limit_state_t *cdls, __u32 mask,
+ const char *file, const char *func, const int line,
+ const char *fmt, va_list args);
+void cdebug(cfs_debug_limit_state_t *cdls, __u32 mask,
+ const char *file, const char *func, const int line,
+ const char *fmt, ...);
+
+void
+ldlm_lock_debug(cfs_debug_limit_state_t *cdls,
+ __u32 level, struct ldlm_lock *lock,
+ const char *file, const char *func, const int line,
+ char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ cdebug_va(cdls, level, file, func, line, fmt, args);
+ va_end(args);
+
+ if (lock->l_resource == NULL) {
+ cdebug(cdls, level, file, func, line,
+ " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+ "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: "
+ LPX64" expref: %d pid: %u\n", lock,
+ lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
+ lock->l_readers, lock->l_writers,
+ ldlm_lockname[lock->l_granted_mode],
+ ldlm_lockname[lock->l_req_mode],
+ lock->l_flags, lock->l_remote_handle.cookie,
+ lock->l_export ?
+ atomic_read(&lock->l_export->exp_refcount) : -99,
+ lock->l_pid);
+ return;
+ }
+
+ switch (lock->l_resource->lr_type) {
+ case LDLM_EXTENT:
+ cdebug(cdls, level, file, func, line,
+ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+ "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64
+ "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64
+ " expref: %d pid: %u\n",
+ lock->l_resource->lr_namespace->ns_name, lock,
+ lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
+ lock->l_readers, lock->l_writers,
+ ldlm_lockname[lock->l_granted_mode],
+ ldlm_lockname[lock->l_req_mode],
+ lock->l_resource->lr_name.name[0],
+ lock->l_resource->lr_name.name[1],
+ atomic_read(&lock->l_resource->lr_refcount),
+ ldlm_typename[lock->l_resource->lr_type],
+ lock->l_policy_data.l_extent.start,
+ lock->l_policy_data.l_extent.end,
+ lock->l_req_extent.start, lock->l_req_extent.end,
+ lock->l_flags, lock->l_remote_handle.cookie,
+ lock->l_export ?
+ atomic_read(&lock->l_export->exp_refcount) : -99,
+ lock->l_pid);
+ break;
+
+ case LDLM_FLOCK:
+ cdebug(cdls, level, file, func, line,
+ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+ "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d "
+ "["LPU64"->"LPU64"] flags: %x remote: "LPX64
+ " expref: %d pid: %u\n",
+ lock->l_resource->lr_namespace->ns_name, lock,
+ lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
+ lock->l_readers, lock->l_writers,
+ ldlm_lockname[lock->l_granted_mode],
+ ldlm_lockname[lock->l_req_mode],
+ lock->l_resource->lr_name.name[0],
+ lock->l_resource->lr_name.name[1],
+ atomic_read(&lock->l_resource->lr_refcount),
+ ldlm_typename[lock->l_resource->lr_type],
+ lock->l_policy_data.l_flock.pid,
+ lock->l_policy_data.l_flock.start,
+ lock->l_policy_data.l_flock.end,
+ lock->l_flags, lock->l_remote_handle.cookie,
+ lock->l_export ?
+ atomic_read(&lock->l_export->exp_refcount) : -99,
+ lock->l_pid);
+ break;
+
+ case LDLM_IBITS:
+ cdebug(cdls, level, file, func, line,
+ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+ "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s "
+ "flags: %x remote: "LPX64" expref: %d "
+ "pid %u\n",
+ lock->l_resource->lr_namespace->ns_name,
+ lock, lock->l_handle.h_cookie,
+ atomic_read (&lock->l_refc),
+ lock->l_readers, lock->l_writers,
+ ldlm_lockname[lock->l_granted_mode],
+ ldlm_lockname[lock->l_req_mode],
+ lock->l_resource->lr_name.name[0],
+ lock->l_resource->lr_name.name[1],
+ lock->l_policy_data.l_inodebits.bits,
+ atomic_read(&lock->l_resource->lr_refcount),
+ ldlm_typename[lock->l_resource->lr_type],
+ lock->l_flags, lock->l_remote_handle.cookie,
+ lock->l_export ?
+ atomic_read(&lock->l_export->exp_refcount) : -99,
+ lock->l_pid);
+ break;
+
+ default:
+ cdebug(cdls, level, file, func, line,
+ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+ "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x "
+ "remote: "LPX64" expref: %d pid: %u\n",
+ lock->l_resource->lr_namespace->ns_name,
+ lock, lock->l_handle.h_cookie,
+ atomic_read (&lock->l_refc),
+ lock->l_readers, lock->l_writers,
+ ldlm_lockname[lock->l_granted_mode],
+ ldlm_lockname[lock->l_req_mode],
+ lock->l_resource->lr_name.name[0],
+ lock->l_resource->lr_name.name[1],
+ atomic_read(&lock->l_resource->lr_refcount),
+ ldlm_typename[lock->l_resource->lr_type],
+ lock->l_flags, lock->l_remote_handle.cookie,
+ lock->l_export ?
+ atomic_read(&lock->l_export->exp_refcount) : -99,
+ lock->l_pid);
+ break;
+ }
+}
+EXPORT_SYMBOL(ldlm_lock_debug);
{
struct list_head *expired = &expired_lock_thread.elt_expired_locks;
struct l_wait_info lwi = { 0 };
+ int do_dump;
ENTRY;
cfs_daemonize("ldlm_elt");
expired_lock_thread.elt_dump = 0;
}
+ do_dump = 0;
+
while (!list_empty(expired)) {
struct obd_export *export;
struct ldlm_lock *lock;
export = class_export_get(lock->l_export);
spin_unlock_bh(&waiting_locks_spinlock);
+ do_dump++;
class_fail_export(export);
class_export_put(export);
spin_lock_bh(&waiting_locks_spinlock);
}
spin_unlock_bh(&waiting_locks_spinlock);
+ if (do_dump && obd_dump_on_eviction) {
+ CERROR("dump the log upon eviction\n");
+ libcfs_debug_dumplog();
+ }
+
if (expired_lock_thread.elt_state == ELT_TERMINATE)
break;
}
}
}
+ unlink(path);
t_touch(path);
fd = t_open(path);
while (np <= _npages) {
printf("%3d per xfer(total %d)...\t", np, _npages);
fflush(stdout);
- pages_io(np, offset);
+ if (pages_io(np, offset) != 0)
+ return 1;
np += np;
}
LEAVE();
if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
#else
if (!d_unhashed(dentry)) {
- struct inode *inode = dentry->d_inode;
#endif
CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
"inode %p refc %d\n", dentry->d_name.len,
* sys_getcwd() could return -ENOENT -bzzz */
#ifdef LUSTRE_KERNEL_VERSION
dentry->d_flags |= DCACHE_LUSTRE_INVALID;
-#else
- if (!inode || !S_ISDIR(inode->i_mode))
- __d_drop(dentry);
-#endif
-
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
__d_drop(dentry);
- if (inode) {
+ if (dentry->d_inode) {
/* Put positive dentries to orphan list */
- hlist_add_head(&dentry->d_hash,
- &ll_i2sbi(inode)->ll_orphan_dentry_list);
+ list_add(&dentry->d_hash,
+ &ll_i2sbi(dentry->d_inode)->ll_orphan_dentry_list);
}
#endif
+#else
+ if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
+ __d_drop(dentry);
+#endif
+
}
unlock_dentry(dentry);
return 0;
ldlm_policy_data_t tmpex;
unsigned long start, end, count, skip, i, j;
struct page *page;
- int rc, rc2, discard = lock->l_flags & LDLM_FL_DISCARD_DATA;
+ int rc, rc2, l_flags, discard = lock->l_flags & LDLM_FL_DISCARD_DATA;
struct lustre_handle lockh;
ENTRY;
* batching writeback under the lock explicitly. */
for (i = start, j = start % count; i <= end;
j++, i++, tmpex.l_extent.start += PAGE_CACHE_SIZE) {
- int l_flags;
if (j == count) {
CDEBUG(D_PAGE, "skip index %lu to %lu\n", i, i + skip);
i += skip;
lock_page(page);
}
- l_flags = LDLM_FL_BLOCK_GRANTED|LDLM_FL_CBPENDING |
- LDLM_FL_TEST_LOCK;
-
tmpex.l_extent.end = tmpex.l_extent.start + PAGE_CACHE_SIZE - 1;
- /* check to see if another DLM lock covers this page */
+ l_flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
+ /* check to see if another DLM lock covers this page b=2765 */
rc2 = obd_match(ll_s2dtexp(inode->i_sb), lsm, LDLM_EXTENT,
&tmpex, LCK_PR | LCK_PW, &l_flags, inode,
&lockh);
- /* rc2 < 0 means some error occured, e.g. export was down.
- * rc2 == 0 means nothing was matched */
- if (rc2 <= 0 && page->mapping != NULL) {
+ if (rc2 == 0 && page->mapping != NULL) {
struct ll_async_page *llap = llap_cast_private(page);
// checking again to account for writeback's lock_page()
LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n");
/* BUG: 5972 */
file_accessed(file);
retval = generic_file_read(file, buf, chunk, ppos);
+ ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, count, 0);
ll_tree_unlock(&tree);
CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
inode->i_ino, chunk, *ppos);
retval = generic_file_write(file, buf, chunk, ppos);
+ ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, count, 1);
out:
ll_tree_unlock(&tree);
#endif
#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
-#if !defined(LUSTRE_KERNEL_VERSION) || (LUSTRE_KERNEL_VERSION < 46)
#define LUSTRE_FPRIVATE(file) ((file)->private_data)
-#else
-#if (LUSTRE_KERNEL_VERSION < 46)
-#define LUSTRE_FPRIVATE(file) ((file)->private_data)
-#else
-#define LUSTRE_FPRIVATE(file) ((file)->fs_private)
-#endif
-#endif
#ifdef LUSTRE_KERNEL_VERSION
static inline struct lookup_intent *ll_nd2it(struct nameidata *nd)
unsigned long ra_stats[_NR_RA_STAT];
};
+/* LL_HIST_MAX=32 causes an overflow */
+#define LL_HIST_MAX 28
+#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */
+#define LL_PROCESS_HIST_MAX 10
+struct per_process_info {
+ pid_t pid;
+ struct obd_histogram pp_r_hist;
+ struct obd_histogram pp_w_hist;
+};
+
+/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */
+struct ll_rw_extents_info {
+ struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1];
+};
+
+#define LL_OFFSET_HIST_MAX 100
+struct ll_rw_process_info {
+ pid_t rw_pid;
+ int rw_op;
+ loff_t rw_range_start;
+ loff_t rw_range_end;
+ loff_t rw_last_file_pos;
+ loff_t rw_offset;
+ size_t rw_smallest_extent;
+ size_t rw_largest_extent;
+ struct file *rw_last_file;
+};
+
/* flags for sbi->ll_flags */
#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */
#define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */
struct list_head ll_conn_chain; /* per-conn chain of SBs */
struct lustre_client_ocd ll_lco;
- struct hlist_head ll_orphan_dentry_list; /*please don't ask -p*/
+ struct list_head ll_orphan_dentry_list; /*please don't ask -p*/
struct ll_close_queue *ll_lcq;
struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
/* =0 - hold lock over whole read/write
* >0 - max. chunk to be read/written w/o lock re-acquiring */
unsigned long ll_max_rw_chunk;
+ struct ll_rw_extents_info ll_rw_extents_info;
+ int ll_extent_process_count;
+ struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX];
+ unsigned int ll_offset_process_count;
+ struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
+ unsigned int ll_rw_offset_entry_count;
};
#define LL_DEFAULT_MAX_RW_CHUNK (32 * 1024 * 1024)
int ll_md_close(struct obd_export *md_exp, struct inode *inode,
struct file *file);
int ll_md_real_close(struct inode *inode, int flags);
+extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
+ *file, size_t count, int rw);
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
struct lookup_intent *it, struct kstat *stat);
{
struct ll_sb_info *sbi = NULL;
class_uuid_t uuid;
+ int i;
ENTRY;
OBD_ALLOC(sbi, sizeof(*sbi));
spin_lock_init(&sbi->ll_lock);
spin_lock_init(&sbi->ll_lco.lco_lock);
INIT_LIST_HEAD(&sbi->ll_pglist);
- sbi->ll_pglist_gen = 0;
if (num_physpages >> (20 - PAGE_SHIFT) < 512)
sbi->ll_async_page_max = num_physpages / 2;
else
SBI_DEFAULT_READAHEAD_WHOLE_MAX;
INIT_LIST_HEAD(&sbi->ll_conn_chain);
- INIT_HLIST_HEAD(&sbi->ll_orphan_dentry_list);
+ INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
class_generate_random_uuid(uuid);
class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
INIT_LIST_HEAD(&sbi->ll_deathrow);
spin_lock_init(&sbi->ll_deathrow_lock);
+ for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_r_hist.oh_lock);
+ spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_w_hist.oh_lock);
+ }
+
RETURN(sbi);
}
RETURN(rc);
}
-int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
+static int client_common_fill_super(struct super_block *sb,
+ char *md, char *dt)
{
struct inode *root = 0;
struct ll_sb_info *sbi = ll_s2sbi(sb);
struct lu_fid rootfid;
struct obd_statfs osfs;
struct ptlrpc_request *request = NULL;
- struct lustre_handle osc_conn = {0, };
+ struct lustre_handle dt_conn = {0, };
struct lustre_handle md_conn = {0, };
struct obd_connect_data *data = NULL;
- struct lustre_md md;
+ struct lustre_md lmd;
int err;
ENTRY;
- obd = class_name2obd(mdc);
+ obd = class_name2obd(md);
if (!obd) {
- CERROR("MDC %s: not setup or attached\n", mdc);
+ CERROR("MD %s: not setup or attached\n", md);
RETURN(-EINVAL);
}
if (proc_lustre_fs_root) {
err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
- osc, mdc);
+ dt, md);
if (err < 0)
CERROR("could not register mount in /proc/lustre");
}
err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, data);
if (err == -EBUSY) {
- CERROR("An MDT (mdc %s) is performing recovery, of which this"
- " client is not a part. Please wait for recovery to "
- "complete, abort, or time out.\n", mdc);
+ LCONSOLE_ERROR("An MDT (md %s) is performing recovery, of "
+ "which this client is not a part. Please wait "
+ "for recovery to complete, abort, or "
+ "time out.\n", md);
GOTO(out, err);
} else if (err) {
- CERROR("cannot connect to %s: rc = %d\n", mdc, err);
+ CERROR("cannot connect to %s: rc = %d\n", md, err);
GOTO(out, err);
}
sbi->ll_md_exp = class_conn2export(&md_conn);
err = obd_statfs(obd, &osfs, cfs_time_current_64() - HZ);
if (err)
- GOTO(out_mdc, err);
+ GOTO(out_md, err);
LASSERT(osfs.os_bsize);
sb->s_blocksize = osfs.os_bsize;
* on all clients. */
/* s_dev is also used in lt_compare() to compare two fs, but that is
* only a node-local comparison. */
+
+ /* XXX: this will not work with LMV */
sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_target_uuid.uuid,
strlen(sbi2mdc(sbi)->cl_target_uuid.uuid));
#endif
err = ll_fid_md_init(sbi);
if (err) {
CERROR("can't init FIDs framework, rc %d\n", err);
- GOTO(out_mdc, err);
+ GOTO(out_md, err);
}
- obd = class_name2obd(osc);
+ obd = class_name2obd(dt);
if (!obd) {
- CERROR("OSC %s: not setup or attached\n", osc);
+ CERROR("DT %s: not setup or attached\n", dt);
GOTO(out_md_fid, err = -ENODEV);
}
obd->obd_upcall.onu_upcall = ll_ocd_update;
data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << PAGE_SHIFT;
-
- err = obd_connect(NULL, &osc_conn, obd, &sbi->ll_sb_uuid, data);
+ err = obd_connect(NULL, &dt_conn, obd, &sbi->ll_sb_uuid, data);
if (err == -EBUSY) {
- CERROR("An OST (osc %s) is performing recovery, of which this"
- " client is not a part. Please wait for recovery to "
- "complete, abort, or time out.\n", osc);
+ LCONSOLE_ERROR("An OST (dt %s) is performing recovery, of which this"
+ " client is not a part. Please wait for recovery to "
+ "complete, abort, or time out.\n", dt);
GOTO(out, err);
} else if (err) {
- CERROR("cannot connect to %s: rc = %d\n", osc, err);
- GOTO(out_mdc, err);
+ CERROR("cannot connect to %s: rc = %d\n", dt, err);
+ GOTO(out_md, err);
}
- sbi->ll_dt_exp = class_conn2export(&osc_conn);
+ sbi->ll_dt_exp = class_conn2export(&dt_conn);
spin_lock(&sbi->ll_lco.lco_lock);
sbi->ll_lco.lco_flags = data->ocd_connect_flags;
LCONSOLE_ERROR("There are no OST's in this filesystem. "
"There must be at least one active OST for "
"a client to start.\n");
- GOTO(out_osc, err);
+ GOTO(out_dt, err);
}
if (!ll_async_page_slab) {
ll_async_page_slab_size,
0, 0, NULL, NULL);
if (!ll_async_page_slab)
- GOTO(out_osc, -ENOMEM);
+ GOTO(out_dt, -ENOMEM);
}
/* init FIDs framework */
err = ll_fid_dt_init(sbi);
if (err) {
CERROR("can't init FIDs framework, rc %d\n", err);
- GOTO(out_osc, err);
+ GOTO(out_dt, err);
}
err = md_getstatus(sbi->ll_md_exp, &rootfid);
0, &request);
if (err) {
CERROR("md_getattr failed for root: rc = %d\n", err);
- GOTO(out_osc, err);
+ GOTO(out_dt, err);
}
err = md_get_lustre_md(sbi->ll_md_exp, request,
REPLY_REC_OFF, sbi->ll_dt_exp, sbi->ll_md_exp,
- &md);
+ &lmd);
if (err) {
CERROR("failed to understand root inode md: rc = %d\n", err);
ptlrpc_req_finished (request);
- GOTO(out_osc, err);
+ GOTO(out_dt, err);
}
LASSERT(fid_is_sane(&sbi->ll_root_fid));
- root = ll_iget(sb, ll_fid_build_ino(sbi, &sbi->ll_root_fid), &md);
+ root = ll_iget(sb, ll_fid_build_ino(sbi, &sbi->ll_root_fid), &lmd);
ptlrpc_req_finished(request);
if (root == NULL || is_bad_inode(root)) {
- md_free_lustre_md(sbi->ll_dt_exp, &md);
+ md_free_lustre_md(sbi->ll_dt_exp, &lmd);
CERROR("lustre_lite: bad iget4 for root\n");
GOTO(out_root, err = -EBADF);
}
iput(root);
out_dt_fid:
obd_fid_fini(sbi->ll_dt_exp);
-out_osc:
+out_dt:
obd_disconnect(sbi->ll_dt_exp);
sbi->ll_dt_exp = NULL;
out_md_fid:
obd_fid_fini(sbi->ll_md_exp);
-out_mdc:
+out_md:
obd_disconnect(sbi->ll_md_exp);
sbi->ll_md_exp = NULL;
out:
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
void lustre_throw_orphan_dentries(struct super_block *sb)
{
- struct hlist_node *tmp, *next;
+ struct dentry *dentry, *next;
struct ll_sb_info *sbi = ll_s2sbi(sb);
/* Do this to get rid of orphaned dentries. That is not really trw. */
- hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) {
- struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash);
+ list_for_each_entry_safe(dentry, next, &sbi->ll_orphan_dentry_list,
+ d_hash) {
CWARN("found orphan dentry %.*s (%p->%p) at unmount, dumping "
"before and after shrink_dcache_parent\n",
dentry->d_name.len, dentry->d_name.name, dentry, next);
CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
+ cfs_module_get();
+
/* client additional sb info */
lsi->lsi_llsbi = sbi = ll_init_sbi();
- if (!sbi)
+ if (!sbi) {
+ cfs_module_put();
RETURN(-ENOMEM);
+ }
err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
if (err)
lustre_common_put_super(sb);
LCONSOLE_WARN("client %s umount complete\n", ll_instance);
+
+ cfs_module_put();
+
EXIT;
} /* client_put_super */
inode = igrab(lock->l_ast_data);
} else {
inode = lock->l_ast_data;
- if (inode->i_state & I_FREEING)
- __LDLM_DEBUG(D_INFO, lock,
- "l_ast_data %p is bogus: magic %08x",
- lock->l_ast_data, lli->lli_inode_magic);
- else
- __LDLM_DEBUG(D_WARNING, lock,
- "l_ast_data %p is bogus: magic %08x",
- lock->l_ast_data, lli->lli_inode_magic);
-
+ ldlm_lock_debug(NULL, inode->i_state & I_FREEING ?
+ D_INFO : D_WARNING,
+ lock, __FILE__, __func__, __LINE__,
+ "l_ast_data %p is bogus: magic %08x",
+ lock->l_ast_data, lli->lli_inode_magic);
inode = NULL;
}
}
/* /proc/lustre/llite mount point registration */
struct file_operations llite_dump_pgcache_fops;
struct file_operations ll_ra_stats_fops;
+struct file_operations ll_rw_extents_stats_fops;
+struct file_operations ll_rw_extents_stats_pp_fops;
+struct file_operations ll_rw_offset_stats_fops;
static int ll_rd_blksize(char *page, char **start, off_t off, int count,
int *eof, void *data)
entry->proc_fops = &llite_dump_pgcache_fops;
entry->data = sbi;
- entry = create_proc_entry("read_ahead_stats", 0444, sbi->ll_proc_root);
+ entry = create_proc_entry("read_ahead_stats", 0644, sbi->ll_proc_root);
if (entry == NULL)
GOTO(out, err = -ENOMEM);
entry->proc_fops = &ll_ra_stats_fops;
entry->data = sbi;
+ entry = create_proc_entry("extents_stats", 0644, sbi->ll_proc_root);
+ if (entry == NULL)
+ GOTO(out, err = -ENOMEM);
+ entry->proc_fops = &ll_rw_extents_stats_fops;
+ entry->data = sbi;
+
+ entry = create_proc_entry("extents_stats_per_process", 0644,
+ sbi->ll_proc_root);
+ if (entry == NULL)
+ GOTO(out, err = -ENOMEM);
+ entry->proc_fops = &ll_rw_extents_stats_pp_fops;
+ entry->data = sbi;
+
+ entry = create_proc_entry("offset_stats", 0644, sbi->ll_proc_root);
+ if (entry == NULL)
+ GOTO(out, err = -ENOMEM);
+ entry->proc_fops = &ll_rw_offset_stats_fops;
+ entry->data = sbi;
+
svc_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES);
if (svc_stats == NULL) {
err = -ENOMEM;
return dummy_llap;
}
-static void llite_dump_pgcache_seq_stop(struct seq_file *seq, void *v)
+static void null_stop(struct seq_file *seq, void *v)
{
}
struct seq_operations llite_dump_pgcache_seq_sops = {
.start = llite_dump_pgcache_seq_start,
- .stop = llite_dump_pgcache_seq_stop,
+ .stop = null_stop,
.next = llite_dump_pgcache_seq_next,
.show = llite_dump_pgcache_seq_show,
};
return 0;
}
-static void *ll_ra_stats_seq_start(struct seq_file *p, loff_t *pos)
+static void *ll_stats_seq_start(struct seq_file *p, loff_t *pos)
{
if (*pos == 0)
return (void *)1;
return NULL;
}
-static void *ll_ra_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
+static void *ll_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
{
++*pos;
return NULL;
}
-static void ll_ra_stats_seq_stop(struct seq_file *p, void *v)
-{
-}
+
struct seq_operations ll_ra_stats_seq_sops = {
- .start = ll_ra_stats_seq_start,
- .stop = ll_ra_stats_seq_stop,
- .next = ll_ra_stats_seq_next,
+ .start = ll_stats_seq_start,
+ .stop = null_stop,
+ .next = ll_stats_seq_next,
.show = ll_ra_stats_seq_show,
};
.release = seq_release,
};
+#define pct(a,b) (b ? a * 100 / b : 0)
+
+static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
+ struct seq_file *seq, int which)
+{
+ unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum;
+ unsigned long start, end, r, w;
+ char *unitp = "KMGTPEZY";
+ int i, units = 10;
+ struct per_process_info *pp_info = &io_extents->pp_extents[which];
+
+ read_cum = 0;
+ write_cum = 0;
+ start = 0;
+
+ for(i = 0; i < LL_HIST_MAX; i++) {
+ read_tot += pp_info->pp_r_hist.oh_buckets[i];
+ write_tot += pp_info->pp_w_hist.oh_buckets[i];
+ }
+
+ for(i = 0; i < LL_HIST_MAX; i++) {
+ r = pp_info->pp_r_hist.oh_buckets[i];
+ w = pp_info->pp_w_hist.oh_buckets[i];
+ read_cum += r;
+ write_cum += w;
+ end = 1 << (i + LL_HIST_START - units);
+ seq_printf(seq, "%4lu%c - %4lu%c%c: %14lu %4lu %4lu | "
+ "%14lu %4lu %4lu\n", start, *unitp, end, *unitp,
+ (i == LL_HIST_MAX - 1) ? '+' : ' ',
+ r, pct(r, read_tot), pct(read_cum, read_tot),
+ w, pct(w, write_tot), pct(write_cum, write_tot));
+ start = end;
+ if (start == 1<<10) {
+ start = 1;
+ units += 10;
+ unitp++;
+ }
+ if (read_cum == read_tot && write_cum == write_tot)
+ break;
+ }
+}
+
+static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v)
+{
+ struct timeval now;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+ int k;
+
+ do_gettimeofday(&now);
+
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+ seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write");
+ seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n",
+ "extents", "calls", "%", "cum%",
+ "calls", "%", "cum%");
+
+ spin_lock(&sbi->ll_lock);
+ for(k = 0; k < LL_PROCESS_HIST_MAX; k++) {
+ if(io_extents->pp_extents[k].pid != 0) {
+ seq_printf(seq, "\nPID: %d\n",
+ io_extents->pp_extents[k].pid);
+ ll_display_extents_info(io_extents, seq, k);
+ }
+ }
+ spin_unlock(&sbi->ll_lock);
+
+ return 0;
+}
+
+struct seq_operations ll_rw_extents_stats_pp_seq_sops = {
+ .start = ll_stats_seq_start,
+ .stop = null_stop,
+ .next = ll_stats_seq_next,
+ .show = ll_rw_extents_stats_pp_seq_show,
+};
+
+static int ll_rw_extents_stats_pp_seq_open(struct inode *inode,
+ struct file *file)
+{
+ struct proc_dir_entry *dp = PDE(inode);
+ struct seq_file *seq;
+ int rc;
+
+ rc = seq_open(file, &ll_rw_extents_stats_pp_seq_sops);
+ if(rc)
+ return rc;
+ seq = file->private_data;
+ seq->private = dp->data;
+ return 0;
+}
+
+static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file,
+ const char *buf, size_t len,
+ loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+ int i;
+
+ spin_lock(&sbi->ll_lock);
+ for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ io_extents->pp_extents[i].pid = 0;
+ lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
+ lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
+ }
+ spin_unlock(&sbi->ll_lock);
+ return len;
+}
+
+struct file_operations ll_rw_extents_stats_pp_fops = {
+ .owner = THIS_MODULE,
+ .open = ll_rw_extents_stats_pp_seq_open,
+ .read = seq_read,
+ .write = ll_rw_extents_stats_pp_seq_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct timeval now;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+
+ do_gettimeofday(&now);
+
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+
+ seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write");
+ seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n",
+ "extents", "calls", "%", "cum%",
+ "calls", "%", "cum%");
+
+ spin_lock(&sbi->ll_lock);
+ ll_display_extents_info(io_extents, seq, LL_PROCESS_HIST_MAX);
+ spin_unlock(&sbi->ll_lock);
+
+ return 0;
+}
+
+struct seq_operations ll_rw_extents_stats_seq_sops = {
+ .start = ll_stats_seq_start,
+ .stop = null_stop,
+ .next = ll_stats_seq_next,
+ .show = ll_rw_extents_stats_seq_show,
+};
+
+static int ll_rw_extents_stats_seq_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *dp = PDE(inode);
+ struct seq_file *seq;
+ int rc;
+
+ rc = seq_open(file, &ll_rw_extents_stats_seq_sops);
+ if(rc)
+ return rc;
+ seq = file->private_data;
+ seq->private = dp->data;
+ return 0;
+}
+
+static ssize_t ll_rw_extents_stats_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+
+ lprocfs_oh_clear(&io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist);
+ lprocfs_oh_clear(&io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist);
+
+ return len;
+}
+
+struct file_operations ll_rw_extents_stats_fops = {
+ .owner = THIS_MODULE,
+ .open = ll_rw_extents_stats_seq_open,
+ .read = seq_read,
+ .write = ll_rw_extents_stats_seq_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
+ *file, size_t count, int rw)
+{
+ int i, cur = -1;
+ struct ll_rw_process_info *process;
+ struct ll_rw_process_info *offset;
+ int *off_count = &sbi->ll_rw_offset_entry_count;
+ int *process_count = &sbi->ll_offset_process_count;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+
+ process = sbi->ll_rw_process_info;
+ offset = sbi->ll_rw_offset_info;
+
+ spin_lock(&sbi->ll_lock);
+ /* Extent statistics */
+ for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ if(io_extents->pp_extents[i].pid == pid) {
+ cur = i;
+ break;
+ }
+ }
+
+ if (cur == -1) {
+ /* new process */
+ sbi->ll_extent_process_count =
+ (sbi->ll_extent_process_count + 1) % LL_PROCESS_HIST_MAX;
+ cur = sbi->ll_extent_process_count;
+ io_extents->pp_extents[cur].pid = pid;
+ lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_r_hist);
+ lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
+ }
+
+ for(i = 0; (count >= (1 << LL_HIST_START << i)) &&
+ (i < (LL_HIST_MAX - 1)); i++);
+ if (rw == 0) {
+ io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
+ io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
+ } else {
+ io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
+ io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
+ }
+
+ /* Offset statistics */
+ for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ if (process[i].rw_pid == pid) {
+ if (process[i].rw_last_file != file) {
+ process[i].rw_range_start = file->f_pos;
+ process[i].rw_last_file_pos =
+ file->f_pos + count;
+ process[i].rw_smallest_extent = count;
+ process[i].rw_largest_extent = count;
+ process[i].rw_offset = 0;
+ process[i].rw_last_file = file;
+ spin_unlock(&sbi->ll_lock);
+ return;
+ }
+ if (process[i].rw_last_file_pos != file->f_pos) {
+ *off_count =
+ (*off_count + 1) % LL_OFFSET_HIST_MAX;
+ offset[*off_count].rw_op = process[i].rw_op;
+ offset[*off_count].rw_pid = pid;
+ offset[*off_count].rw_range_start =
+ process[i].rw_range_start;
+ offset[*off_count].rw_range_end =
+ process[i].rw_last_file_pos;
+ offset[*off_count].rw_smallest_extent =
+ process[i].rw_smallest_extent;
+ offset[*off_count].rw_largest_extent =
+ process[i].rw_largest_extent;
+ offset[*off_count].rw_offset =
+ process[i].rw_offset;
+ process[i].rw_op = rw;
+ process[i].rw_range_start = file->f_pos;
+ process[i].rw_smallest_extent = count;
+ process[i].rw_largest_extent = count;
+ process[i].rw_offset = file->f_pos -
+ process[i].rw_last_file_pos;
+ }
+ if(process[i].rw_smallest_extent > count)
+ process[i].rw_smallest_extent = count;
+ if(process[i].rw_largest_extent < count)
+ process[i].rw_largest_extent = count;
+ process[i].rw_last_file_pos = file->f_pos + count;
+ spin_unlock(&sbi->ll_lock);
+ return;
+ }
+ }
+ *process_count = (*process_count + 1) % LL_PROCESS_HIST_MAX;
+ process[*process_count].rw_pid = pid;
+ process[*process_count].rw_op = rw;
+ process[*process_count].rw_range_start = file->f_pos;
+ process[*process_count].rw_last_file_pos = file->f_pos + count;
+ process[*process_count].rw_smallest_extent = count;
+ process[*process_count].rw_largest_extent = count;
+ process[*process_count].rw_offset = 0;
+ process[*process_count].rw_last_file = file;
+ spin_unlock(&sbi->ll_lock);
+}
+
+char lpszt[] = LPSZ;
+
+static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct timeval now;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_process_info *offset = sbi->ll_rw_offset_info;
+ struct ll_rw_process_info *process = sbi->ll_rw_process_info;
+ char format[50];
+ int i;
+
+ do_gettimeofday(&now);
+
+ spin_lock(&sbi->ll_lock);
+
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+ seq_printf(seq, "%3s %10s %14s %14s %17s %17s %14s\n",
+ "R/W", "PID", "RANGE START", "RANGE END",
+ "SMALLEST EXTENT", "LARGEST EXTENT", "OFFSET");
+ sprintf(format, "%s%s%s%s%s\n",
+ "%3c %10d %14Lu %14Lu %17", lpszt+1, " %17", lpszt+1, " %14Ld");
+ /* We stored the discontiguous offsets here; print them first */
+ for(i = 0; i < LL_OFFSET_HIST_MAX; i++) {
+ if (offset[i].rw_pid != 0)
+ /* Is there a way to snip the '%' off of LPSZ? */
+ seq_printf(seq, format,
+ offset[i].rw_op ? 'W' : 'R',
+ offset[i].rw_pid,
+ offset[i].rw_range_start,
+ offset[i].rw_range_end,
+ offset[i].rw_smallest_extent,
+ offset[i].rw_largest_extent,
+ offset[i].rw_offset);
+ }
+ /* Then print the current offsets for each process */
+ for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ if (process[i].rw_pid != 0)
+ seq_printf(seq, format,
+ process[i].rw_op ? 'W' : 'R',
+ process[i].rw_pid,
+ process[i].rw_range_start,
+ process[i].rw_last_file_pos,
+ process[i].rw_smallest_extent,
+ process[i].rw_largest_extent,
+ process[i].rw_offset);
+ }
+ spin_unlock(&sbi->ll_lock);
+
+ return 0;
+}
+
+struct seq_operations ll_rw_offset_stats_seq_sops = {
+ .start = ll_stats_seq_start,
+ .stop = null_stop,
+ .next = ll_stats_seq_next,
+ .show = ll_rw_offset_stats_seq_show,
+};
+
+static int ll_rw_offset_stats_seq_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *dp = PDE(inode);
+ struct seq_file *seq;
+ int rc;
+
+ rc = seq_open(file, &ll_rw_offset_stats_seq_sops);
+ if (rc)
+ return rc;
+ seq = file->private_data;
+ seq->private = dp->data;
+ return 0;
+}
+
+static ssize_t ll_rw_offset_stats_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_process_info *process_info = sbi->ll_rw_process_info;
+ struct ll_rw_process_info *offset_info = sbi->ll_rw_offset_info;
+
+ spin_lock(&sbi->ll_lock);
+ sbi->ll_offset_process_count = 0;
+ sbi->ll_rw_offset_entry_count = 0;
+ memset(process_info, 0, sizeof(struct ll_rw_process_info) *
+ LL_PROCESS_HIST_MAX);
+ memset(offset_info, 0, sizeof(struct ll_rw_process_info) *
+ LL_OFFSET_HIST_MAX);
+ spin_unlock(&sbi->ll_lock);
+
+ return len;
+}
+
+struct file_operations ll_rw_offset_stats_fops = {
+ .owner = THIS_MODULE,
+ .open = ll_rw_offset_stats_seq_open,
+ .read = seq_read,
+ .write = ll_rw_offset_stats_seq_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
LPROCFS_INIT_VARS(llite, NULL, lprocfs_obd_vars)
#endif /* LPROCFS */
static void __exit exit_lustre_lite(void)
{
int rc;
- int rc;
lustre_register_client_fill_super(NULL);
lustre_register_client_process_config(NULL);
-
+
ll_unregister_cache(&ll_cache_definition);
rc = kmem_cache_destroy(ll_file_data_slab);
rc = vfs_follow_link(nd, symname);
ptlrpc_req_finished(request);
+ EXIT;
out:
- RETURN(rc);
+ return rc;
}
struct inode_operations ll_fast_symlink_inode_operations = {
if (rc)
RETURN(rc);
+ /* b10667: ignore lustre special xattr for now */
+ if (xattr_type == XATTR_TRUSTED_T && strcmp(name, "trusted.lov") == 0)
+ RETURN(0);
+
rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid,
name, value, size, 0, flags, &req);
if (rc) {
RETURN(-EINVAL);
}
- rc = obd_llog_init(obd, mdc_obd, 0, NULL);
+ rc = obd_llog_init(obd, mdc_obd, 0, NULL, tgt_uuid);
if (rc) {
lmv_init_unlock(lmv);
CERROR("lmv failed to setup llogging subsystems\n");
}
static int lmv_llog_init(struct obd_device *obd, struct obd_device *tgt,
- int count, struct llog_catid *logid)
+ int count, struct llog_catid *logid, struct obd_uuid *uuid)
{
struct llog_ctxt *ctxt;
int rc;
}
if (lmm->lmm_stripe_size == 0 ||
- (__u64)le32_to_cpu(lmm->lmm_stripe_size) * stripe_count > ~0UL) {
+ (__u64)le32_to_cpu(lmm->lmm_stripe_size)*stripe_count > 0xffffffff){
CERROR("bad stripe size %u\n",
le32_to_cpu(lmm->lmm_stripe_size));
lov_dump_lmm_v1(D_WARNING, lmm);
obd_off *lov_off, unsigned long *swidth)
{
if (swidth)
- *swidth = lsm->lsm_stripe_size * lsm->lsm_stripe_count;
+ *swidth = (ulong)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
}
static void
obd_off *lov_off, unsigned long *swidth)
{
if (swidth)
- *swidth = lsm->lsm_stripe_size * lsm->lsm_stripe_count;
+ *swidth = (ulong)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
}
static obd_off
return 0;
}
+static obd_off
+lsm_stripe_offset_by_offset_plain(struct lov_stripe_md *lsm,
+ obd_off lov_off)
+{
+ return 0;
+}
+
static int
lsm_stripe_index_by_offset_plain(struct lov_stripe_md *lsm,
obd_off lov_off)
.lsm_stripe_by_index = lsm_stripe_by_index_plain,
.lsm_stripe_by_offset = lsm_stripe_by_offset_plain,
.lsm_revalidate = lsm_revalidate_plain,
- .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_plain,
- .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_plain,
+ .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_plain,
+ .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_plain,
+ .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_plain,
.lsm_lmm_verify = lsm_lmm_verify_plain,
.lsm_unpackmd = lsm_unpackmd_plain,
};
*stripeno -= le->le_loi_idx;
if (swidth)
- *swidth = lsm->lsm_stripe_size * le->le_stripe_count;
+ *swidth = (ulong)lsm->lsm_stripe_size * le->le_stripe_count;
if (lov_off) {
struct lov_extent *lov_le = lovea_off2le(lsm, *lov_off);
*stripeno -= le->le_loi_idx;
if (swidth)
- *swidth = lsm->lsm_stripe_size * le->le_stripe_count;
+ *swidth = (ulong)lsm->lsm_stripe_size * le->le_stripe_count;
}
static obd_off
return le ? le->le_start : 0;
}
+static obd_off
+lsm_stripe_offset_by_offset_join(struct lov_stripe_md *lsm,
+ obd_off lov_off)
+{
+ struct lov_extent *le;
+
+ le = lovea_off2le(lsm, lov_off);
+
+ return le ? le->le_start : 0;
+}
+
static int
lsm_stripe_index_by_offset_join(struct lov_stripe_md *lsm,
obd_off lov_off)
.lsm_stripe_by_index = lsm_stripe_by_index_join,
.lsm_stripe_by_offset = lsm_stripe_by_offset_join,
.lsm_revalidate = lsm_revalidate_join,
- .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_join,
- .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_join,
+ .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_join,
+ .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_join,
+ .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_join,
.lsm_lmm_verify = lsm_lmm_verify_join,
.lsm_unpackmd = lsm_unpackmd_join,
};
int lov_fini_statfs_set(struct lov_request_set *set);
/* lov_obd.c */
+void lov_fix_desc(struct lov_desc *desc);
int lov_get_stripecnt(struct lov_obd *lov, int stripe_count);
void lov_getref(struct obd_device *obd);
void lov_putref(struct obd_device *obd);
/* lov_log.c */
int lov_llog_init(struct obd_device *obd, struct obd_device *tgt,
- int count, struct llog_catid *logid);
+ int count, struct llog_catid *logid, struct obd_uuid *uuid);
int lov_llog_finish(struct obd_device *obd, int count);
/* lov_pack.c */
{
struct obd_device *obd = ctxt->loc_obd;
struct lov_obd *lov = &obd->u.lov;
- int i, rc = 0;
+ int i, rc = 0, err = 0;
ENTRY;
- /* We might have added an osc and not told the mds yet */
- if (count != lov->desc.ld_tgt_count)
- CERROR("Origin connect mds cnt %d != lov cnt %d\n", count,
- lov->desc.ld_tgt_count);
-
lov_getref(obd);
for (i = 0; i < count; i++) {
struct obd_device *child;
if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
continue;
- child = lov->lov_tgts[i]->ltd_exp->exp_obd;
- cctxt = llog_get_context(child, ctxt->loc_idx);
if (uuid && !obd_uuid_equals(uuid, &lov->lov_tgts[i]->ltd_uuid))
continue;
-
+ CDEBUG(D_CONFIG, "connect %d/%d\n", i, count);
+ child = lov->lov_tgts[i]->ltd_exp->exp_obd;
+ cctxt = llog_get_context(child, ctxt->loc_idx);
rc = llog_connect(cctxt, 1, logid, gen, uuid);
if (rc) {
CERROR("error osc_llog_connect tgt %d (%d)\n", i, rc);
- break;
+ if (!err)
+ err = rc;
}
}
lov_putref(obd);
- RETURN(rc);
+ RETURN(err);
}
/* the replicators commit callback */
};
int lov_llog_init(struct obd_device *obd, struct obd_device *tgt,
- int count, struct llog_catid *logid)
+ int count, struct llog_catid *logid, struct obd_uuid *uuid)
{
struct lov_obd *lov = &obd->u.lov;
struct obd_device *child;
if (rc)
RETURN(rc);
- CDEBUG(D_CONFIG, "llog init with %d/%d targets\n",
- count, lov->desc.ld_tgt_count);
- /* count may not match ld_tgt_count during dynamic ost add */
-
lov_getref(obd);
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ /* count may not match lov->desc.ld_tgt_count during dynamic ost add */
+ for (i = 0; i < count; i++) {
if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
continue;
+ if (uuid && !obd_uuid_equals(uuid, &lov->lov_tgts[i]->ltd_uuid))
+ continue;
+ CDEBUG(D_CONFIG, "init %d/%d\n", i, count);
LASSERT(lov->lov_tgts[i]->ltd_exp);
child = lov->lov_tgts[i]->ltd_exp->exp_obd;
- if (lov->lov_tgts[i]->ltd_exp->exp_imp_reverse) {
- CERROR("NZR: idx %d import state %s\n", i,
- ptlrpc_import_state_name(lov->lov_tgts[i]->ltd_exp->exp_imp_reverse->imp_state));
- } else {
- CERROR("NZR: idx %d no import\n", i);
- }
- rc = obd_llog_init(child, tgt, 1, logid + i);
+ rc = obd_llog_init(child, tgt, 1, logid + i, uuid);
if (rc) {
CERROR("error osc_llog_init idx %d osc '%s' tgt '%s' "
"(rc=%d)\n", i, child->obd_name, tgt->obd_name,
}
}
-static void lov_fix_desc(struct lov_desc *desc)
+void lov_fix_desc(struct lov_desc *desc)
{
if (desc->ld_default_stripe_size < PTLRPC_MAX_BRW_SIZE) {
- CWARN("Increasing default_stripe_size "LPU64" to %u\n",
- desc->ld_default_stripe_size, PTLRPC_MAX_BRW_SIZE);
+ LCONSOLE_WARN("Increasing default stripe size to min %u\n",
+ PTLRPC_MAX_BRW_SIZE);
desc->ld_default_stripe_size = PTLRPC_MAX_BRW_SIZE;
} else if (desc->ld_default_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
- CWARN("default_stripe_size "LPU64" isn't a multiple of %u\n",
- desc->ld_default_stripe_size, LOV_MIN_STRIPE_SIZE);
desc->ld_default_stripe_size &= ~(LOV_MIN_STRIPE_SIZE - 1);
- CWARN("changing to "LPU64"\n", desc->ld_default_stripe_size);
- }
+ LCONSOLE_WARN("Changing default stripe size to "LPU64" (a "
+ "multiple of %u)\n",
+ desc->ld_default_stripe_size,LOV_MIN_STRIPE_SIZE);
+ }
if (desc->ld_default_stripe_count == 0)
desc->ld_default_stripe_count = 1;
/* from lov_setstripe */
if ((desc->ld_pattern != 0) &&
(desc->ld_pattern != LOV_PATTERN_RAID0)) {
- CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n",
- desc->ld_pattern);
+ LCONSOLE_WARN("Unknown stripe pattern: %#x\n",desc->ld_pattern);
desc->ld_pattern = 0;
}
}
* of 4GB or larger on 32-bit CPUs. */
count = desc->ld_default_stripe_count;
if ((count > 0 ? count : desc->ld_tgt_count) *
- desc->ld_default_stripe_size > ~0UL) {
- CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n",
- desc->ld_default_stripe_size, count, ~0UL);
+ desc->ld_default_stripe_size > 0xffffffff) {
+ CERROR("LOV: stripe width "LPU64"x%u > 4294967295 bytes\n",
+ desc->ld_default_stripe_size, count);
RETURN(-EINVAL);
}
}
case LCFG_PARAM: {
struct lprocfs_static_vars lvars;
- struct lov_obd *lov = &obd->u.lov;
- struct lov_desc *desc = &(lov->desc);
- int i;
+ struct lov_desc *desc = &(obd->u.lov.desc);
if (!desc)
GOTO(out, rc = -EINVAL);
lprocfs_init_vars(lov, &lvars);
- /* setparam 0:lov_mdsA 1:default_stripe_size=1048576
- 2:default_stripe_pattern=0 3:default_stripe_offset=0 */
- for (i = 1; i < lcfg->lcfg_bufcount; i++) {
- char *key, *sval;
- long val;
- key = lustre_cfg_buf(lcfg, i);
- sval = strchr(key, '=');
- if (!sval || (*(sval + 1) == 0)) {
- CERROR("Can't parse param %s\n", key);
- rc = -EINVAL;
- /* continue parsing other params */
- continue;
- }
- val = simple_strtol(sval + 1, NULL, 0);
- rc = 0;
- /* LOV_STRIPE_* aren't settable in proc */
- if (class_match_param(key,
- PARAM_LOV_STRIPE_SIZE,0) == 0)
- desc->ld_default_stripe_size = val;
- else if (class_match_param(key,
- PARAM_LOV_STRIPE_COUNT, 0) == 0)
- desc->ld_default_stripe_count = val;
- else if (class_match_param(key,
- PARAM_LOV_STRIPE_OFFSET, 0) == 0)
- desc->ld_default_stripe_offset = val;
- else if (class_match_param(key,
- PARAM_LOV_STRIPE_PATTERN, 0) == 0)
- desc->ld_pattern = val;
- else
- rc = class_process_proc_param(PARAM_LOV,
- lvars.obd_vars,
- lcfg, obd);
- if (rc >= 0) {
- LCONSOLE_INFO("set %s to %ld\n", key, val);
- rc = 0;
- }
- }
- lov_fix_desc(desc);
+ rc = class_process_proc_param(PARAM_LOV, lvars.obd_vars,
+ lcfg, obd);
GOTO(out, rc);
}
default: {
{
unsigned long ssize = lsm->lsm_stripe_size;
unsigned long swidth, stripe_off, this_stripe;
+ uint64_t l_off, s_off;
int magic = lsm->lsm_magic;
int ret = 0;
}
LASSERT(lsm_op_find(magic) != NULL);
+ /*It will check whether the lov_off and stripeno
+ *are in the same extent.
+ *1) lov_off extent < stripeno extent, ret = -1, obd_off = 0
+ *2) lov_off extent > stripeno extent, ret = 1,
+ * obd_off = lov_off extent offset*/
+ l_off = lsm_op_find(magic)->lsm_stripe_offset_by_index(lsm, stripeno);
+ s_off = lsm_op_find(magic)->lsm_stripe_offset_by_offset(lsm, lov_off);
+ if (s_off < l_off) {
+ ret = -1;
+ *obd_off = 0;
+ return ret;
+ } else if (s_off > l_off) {
+ ret = 1;
+ *obd_off = s_off;
+ return ret;
+ }
+ /*If they are in the same extent, original logic*/
lsm_op_find(magic)->lsm_stripe_by_index(lsm, &stripeno, &lov_off,
&swidth);
loi->loi_id = req->rq_oi.oi_oa->o_id;
loi->loi_gr = req->rq_oi.oi_oa->o_gr;
loi->loi_ost_idx = req->rq_idx;
- CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPX64" at idx %d\n",
+ CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPU64" at idx %d\n",
lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx);
loi_init(loi);
do_div(osfs->os_ffree, expected_stripes);
spin_lock(&obd->obd_osfs_lock);
- memcpy(&obd->obd_osfs, osfs, sizeof(osfs));
- obd->obd_osfs_age = cfs_time_current_64();
+ memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
+ obd->obd_osfs_age = get_jiffies_64();
spin_unlock(&obd->obd_osfs_lock);
RETURN(0);
}
struct obd_statfs *lov_sfs, int success)
{
spin_lock(&obd->obd_osfs_lock);
- memcpy(&obd->obd_osfs, lov_sfs, sizeof(osfs));
- obd->obd_osfs_age = cfs_time_current_64();
+ memcpy(&obd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
+ obd->obd_osfs_age = get_jiffies_64();
spin_unlock(&obd->obd_osfs_lock);
if (success == 0) {
#include <lprocfs_status.h>
#include <obd_class.h>
#include <linux/seq_file.h>
+#include "lov_internal.h"
#ifdef LPROCFS
static int lov_rd_stripesize(char *page, char **start, off_t off, int count,
return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size);
}
+static int lov_wr_stripesize(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct obd_device *dev = (struct obd_device *)data;
+ struct lov_desc *desc;
+ __u64 val;
+ int rc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ rc = lprocfs_write_u64_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ desc->ld_default_stripe_size = val;
+ lov_fix_desc(desc);
+ return count;
+}
+
static int lov_rd_stripeoffset(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset);
}
+static int lov_wr_stripeoffset(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct obd_device *dev = (struct obd_device *)data;
+ struct lov_desc *desc;
+ __u64 val;
+ int rc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ rc = lprocfs_write_u64_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ desc->ld_default_stripe_offset = val;
+ lov_fix_desc(desc);
+ return count;
+}
+
static int lov_rd_stripetype(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
return snprintf(page, count, "%u\n", desc->ld_pattern);
}
+static int lov_wr_stripetype(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct obd_device *dev = (struct obd_device *)data;
+ struct lov_desc *desc;
+ int val, rc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ desc->ld_pattern = val;
+ lov_fix_desc(desc);
+ return count;
+}
+
static int lov_rd_stripecount(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
return snprintf(page, count, "%u\n", desc->ld_default_stripe_count);
}
+static int lov_wr_stripecount(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct obd_device *dev = (struct obd_device *)data;
+ struct lov_desc *desc;
+ int val, rc;
+
+ LASSERT(dev != NULL);
+ desc = &dev->u.lov.desc;
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ desc->ld_default_stripe_count = val;
+ lov_fix_desc(desc);
+ return count;
+}
+
static int lov_rd_numobd(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
struct lprocfs_vars lprocfs_obd_vars[] = {
{ "uuid", lprocfs_rd_uuid, 0, 0 },
- /* If you change the stripe* names,
- make sure lustre_param.h is updated */
- { "stripesize", lov_rd_stripesize, 0, 0 },
- { "stripeoffset", lov_rd_stripeoffset, 0, 0 },
- { "stripecount", lov_rd_stripecount, 0, 0 },
- { "stripetype", lov_rd_stripetype, 0, 0 },
+ { "stripesize", lov_rd_stripesize, lov_wr_stripesize, 0 },
+ { "stripeoffset", lov_rd_stripeoffset, lov_wr_stripeoffset, 0 },
+ { "stripecount", lov_rd_stripecount, lov_wr_stripecount, 0 },
+ { "stripetype", lov_rd_stripetype, lov_wr_stripetype, 0 },
{ "numobd", lov_rd_numobd, 0, 0 },
{ "activeobd", lov_rd_activeobd, 0, 0 },
{ "filestotal", lprocfs_rd_filestotal, 0, 0 },
{ "filesfree", lprocfs_rd_filesfree, 0, 0 },
- /*{ "filegroups", lprocfs_rd_filegroups, 0, 0 },*/
+ /*{ "filegroups", lprocfs_rd_filegroups, 0, 0 },*/
{ "blocksize", lprocfs_rd_blksize, 0, 0 },
{ "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 },
{ "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 },
{ "kbytesavail", lprocfs_rd_kbytesavail, 0, 0 },
{ "desc_uuid", lov_rd_desc_uuid, 0, 0 },
- { "qos_prio_free", lov_rd_qos_priofree, lov_wr_qos_priofree, 0 },
- { "qos_maxage", lov_rd_qos_maxage, lov_wr_qos_maxage, 0 },
+ { "qos_prio_free",lov_rd_qos_priofree, lov_wr_qos_priofree, 0 },
+ { "qos_maxage", lov_rd_qos_maxage, lov_wr_qos_maxage, 0 },
{ 0 }
};
__u32 dqb_valid; /* flag for above fields */
};
-static inline unsigned int const
-chkquot_hash(qid_t id, int type)
+static inline unsigned int chkquot_hash(qid_t id, int type)
+ __attribute__((__const__));
+
+static inline unsigned int chkquot_hash(qid_t id, int type)
{
return (id * (MAXQUOTAS - type)) % NR_DQHASH;
}
lprocfs_init_vars(mdc, &lvars);
lprocfs_obd_setup(obd, lvars.obd_vars);
- rc = obd_llog_init(obd, obd, 0, NULL);
+ rc = obd_llog_init(obd, obd, 0, NULL, NULL);
if (rc) {
mdc_cleanup(obd);
CERROR("failed to setup llogging subsystems\n");
static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt,
- int count, struct llog_catid *logid)
+ int count, struct llog_catid *logid,
+ struct obd_uuid *uuid)
{
struct llog_ctxt *ctxt;
int rc;
int rc;
struct lprocfs_static_vars lvars;
lprocfs_init_vars(mdc, &lvars);
-
+
+ request_module("lquota");
quota_interface = PORTAL_SYMBOL_GET(mdc_quota_interface);
init_obd_quota_ops(quota_interface, &mdc_obd_ops);
{
struct obd_device *obd = exp->exp_obd;
if (data != NULL) {
- data->ocd_connect_flags &= MDS_CONNECT_SUPPORTED;
+ data->ocd_connect_flags &= MDT_CONNECT_SUPPORTED;
data->ocd_ibits_known &= MDS_INODELOCK_FULL;
/* If no known bits (which should not happen, probably,
int rc = 0;
ENTRY;
+ OBD_COUNTER_INCREMENT(obd, getattr);
+
body = lustre_swab_reqbuf(req, offset, sizeof(*body),
lustre_swab_mds_body);
if (body == NULL)
/* This will trigger a watchdog timeout */
OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP,
(MDS_SERVICE_WATCHDOG_TIMEOUT / 1000) + 1);
+ OBD_COUNTER_INCREMENT(obd, statfs);
rc = lustre_pack_reply(req, 2, size, NULL);
if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_STATFS_PACK)) {
GOTO(err_qctxt, rc);
lprocfs_init_vars(mds, &lvars);
- lprocfs_obd_setup(obd, lvars.obd_vars);
+ if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0 &&
+ lprocfs_alloc_obd_stats(obd, LPROC_MDS_LAST) == 0) {
+ /* Init private stats here */
+ lprocfs_counter_init(obd->obd_stats, LPROC_MDS_OPEN,
+ /*LPROCFS_CNTR_AVGMINMAX*/0,
+ "open", "reqs");
+ lprocfs_counter_init(obd->obd_stats, LPROC_MDS_CLOSE,
+ 0, "close", "reqs");
+ lprocfs_counter_init(obd->obd_stats, LPROC_MDS_MKNOD,
+ 0, "mknod", "reqs");
+ lprocfs_counter_init(obd->obd_stats, LPROC_MDS_LINK,
+ 0, "link", "reqs");
+ lprocfs_counter_init(obd->obd_stats, LPROC_MDS_UNLINK,
+ 0, "unlink", "reqs");
+ lprocfs_counter_init(obd->obd_stats, LPROC_MDS_MKDIR,
+ 0, "mkdir", "reqs");
+ lprocfs_counter_init(obd->obd_stats, LPROC_MDS_RMDIR,
+ 0, "rmdir", "reqs");
+ lprocfs_counter_init(obd->obd_stats, LPROC_MDS_RENAME,
+ 0, "rename", "reqs");
+ lprocfs_counter_init(obd->obd_stats, LPROC_MDS_GETXATTR,
+ 0, "getxattr", "reqs");
+ lprocfs_counter_init(obd->obd_stats, LPROC_MDS_SETXATTR,
+ 0, "setxattr", "reqs");
+ }
uuid_ptr = fsfilt_uuid(obd, obd->u.obt.obt_sb);
if (uuid_ptr != NULL) {
we just need to drop our ref */
class_export_put(mds->mds_osc_exp);
+ lprocfs_free_obd_stats(obd);
lprocfs_obd_cleanup(obd);
lquota_cleanup(quota_interface, obd);
switch ((long)it->opc) {
case IT_OPEN:
case IT_CREAT|IT_OPEN:
+ lprocfs_counter_incr(req->rq_export->exp_obd->obd_stats,
+ LPROC_MDS_OPEN);
fixup_handle_for_resent_req(req, DLM_LOCKREQ_OFF, lock, NULL,
&lockh);
/* XXX swab here to assert that an mds_open reint
getattr_part = MDS_INODELOCK_LOOKUP;
case IT_GETATTR:
getattr_part |= MDS_INODELOCK_LOOKUP;
+ OBD_COUNTER_INCREMENT(req->rq_export->exp_obd, getattr);
case IT_READDIR:
fixup_handle_for_resent_req(req, DLM_LOCKREQ_OFF, lock,
&new_lock, &lockh);
int rc;
struct lprocfs_static_vars lvars;
+ request_module("lquota");
quota_interface = PORTAL_SYMBOL_GET(mds_quota_interface);
rc = lquota_init(quota_interface);
if (rc) {
/* mds/mds_log.c */
int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, int count,
- struct llog_catid *logid);
+ struct llog_catid *logid, struct obd_uuid *uuid);
int mds_llog_finish(struct obd_device *obd, int count);
/* mds/mds_lov.c */
return sprintf(str, "%llx:%08x", (unsigned long long)id, generation);
}
+/* mds/lproc_mds.c */
+enum {
+ LPROC_MDS_OPEN = 0,
+ LPROC_MDS_CLOSE,
+ LPROC_MDS_MKNOD,
+ LPROC_MDS_LINK,
+ LPROC_MDS_UNLINK,
+ LPROC_MDS_MKDIR,
+ LPROC_MDS_RMDIR,
+ LPROC_MDS_RENAME,
+ LPROC_MDS_GETXATTR,
+ LPROC_MDS_SETXATTR,
+ LPROC_MDS_LAST,
+};
+
#endif /* _MDS_INTERNAL_H */
LASSERT(body != NULL); /* previously verified & swabbed by caller */
-#if CRAY_XT3
+#ifdef CRAY_XT3
if (req->rq_uid != LNET_UID_ANY) {
/* Non-root local cluster client */
LASSERT (req->rq_uid != 0);
return rc;
}
-#if CRAY_XT3
+#ifdef CRAY_XT3
if (ucred->luc_uce)
ucred->luc_fsgid = ucred->luc_uce->ue_primary;
#endif
};
int mds_llog_init(struct obd_device *obd, struct obd_device *tgt,
- int count, struct llog_catid *logid)
+ int count, struct llog_catid *logid, struct obd_uuid *uuid)
{
struct obd_device *lov_obd = obd->u.mds.mds_osc_obd;
int rc;
if (rc)
RETURN(rc);
- rc = obd_llog_init(lov_obd, tgt, count, logid);
+ rc = obd_llog_init(lov_obd, tgt, count, logid, uuid);
if (rc)
- CERROR("error lov_llog_init\n");
+ CERROR("lov_llog_init err %d\n", rc);
RETURN(rc);
}
LASSERT(!mds->mds_lov_objids_size);
LASSERT(!mds->mds_lov_objids_dirty);
- /* Read everything in the file, even if our current lov desc
- has fewer targets. Old targets not in the lov descriptor
+ /* Read everything in the file, even if our current lov desc
+ has fewer targets. Old targets not in the lov descriptor
during mds setup may still have valid objids. */
size = mds->mds_lov_objid_filp->f_dentry->d_inode->i_size;
if (size == 0)
CERROR("Error reading objids %d\n", rc);
RETURN(rc);
}
-
- mds->mds_lov_objids_in_file = size / sizeof(*ids);
-
+
+ mds->mds_lov_objids_in_file = size / sizeof(*ids);
+
for (i = 0; i < mds->mds_lov_objids_in_file; i++) {
CDEBUG(D_INFO, "read last object "LPU64" for idx %d\n",
mds->mds_lov_objids[i], i);
{
struct mds_obd *mds = &obd->u.mds;
loff_t off = 0;
- int i, rc, tgts;
+ int i, rc, tgts;
ENTRY;
if (!mds->mds_lov_objids_dirty)
KEY_NEXT_ID,
mds->mds_lov_desc.ld_tgt_count,
mds->mds_lov_objids, NULL);
-
- if (rc)
- CERROR ("%s: mds_lov_set_nextid failed (%d)\n",
+
+ if (rc)
+ CERROR ("%s: mds_lov_set_nextid failed (%d)\n",
obd->obd_name, rc);
RETURN(rc);
static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov)
{
struct mds_obd *mds = &obd->u.mds;
- struct lov_desc *ld;
+ struct lov_desc *ld;
__u32 size, stripes, valsize = sizeof(mds->mds_lov_desc);
int rc = 0;
ENTRY;
/* The size of the LOV target table may have increased. */
size = ld->ld_tgt_count * sizeof(obd_id);
- if ((mds->mds_lov_objids_size == 0) ||
+ if ((mds->mds_lov_objids_size == 0) ||
(size > mds->mds_lov_objids_size)) {
obd_id *ids;
-
+
/* add room by powers of 2 */
size = 1;
- while (size < ld->ld_tgt_count)
+ while (size < ld->ld_tgt_count)
size = size << 1;
size = size * sizeof(obd_id);
memset(ids, 0, size);
if (mds->mds_lov_objids_size) {
obd_id *old_ids = mds->mds_lov_objids;
- memcpy(ids, mds->mds_lov_objids,
+ memcpy(ids, mds->mds_lov_objids,
mds->mds_lov_objids_size);
mds->mds_lov_objids = ids;
OBD_FREE(old_ids, mds->mds_lov_objids_size);
CDEBUG(D_CONFIG, "updated lov_desc, tgt_count: %d\n",
mds->mds_lov_desc.ld_tgt_count);
- stripes = min((__u32)LOV_MAX_STRIPE_COUNT,
- max(mds->mds_lov_desc.ld_tgt_count,
- mds->mds_lov_objids_in_file));
+ stripes = min_t(__u32, LOV_MAX_STRIPE_COUNT,
+ max(mds->mds_lov_desc.ld_tgt_count,
+ mds->mds_lov_objids_in_file));
mds->mds_max_mdsize = lov_mds_md_size(stripes);
mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie);
CDEBUG(D_CONFIG, "updated max_mdsize/max_cookiesize: %d/%d\n",
#define MDSLOV_NO_INDEX -1
/* Inform MDS about new/updated target */
-static int mds_lov_update_mds(struct obd_device *obd,
- struct obd_device *watched,
- __u32 idx)
+static int mds_lov_update_mds(struct obd_device *obd,
+ struct obd_device *watched,
+ __u32 idx, struct obd_uuid *uuid)
{
struct mds_obd *mds = &obd->u.mds;
int old_count;
RETURN(rc);
CDEBUG(D_CONFIG, "idx=%d, recov=%d/%d, cnt=%d/%d\n",
- idx, obd->obd_recovering, obd->obd_async_recov, old_count,
+ idx, obd->obd_recovering, obd->obd_async_recov, old_count,
mds->mds_lov_desc.ld_tgt_count);
/* idx is set as data from lov_notify. */
if (idx != MDSLOV_NO_INDEX && !obd->obd_recovering) {
if (idx >= mds->mds_lov_desc.ld_tgt_count) {
- CERROR("index %d > count %d!\n", idx,
+ CERROR("index %d > count %d!\n", idx,
mds->mds_lov_desc.ld_tgt_count);
RETURN(-EINVAL);
}
-
+
if (idx >= mds->mds_lov_objids_in_file) {
/* We never read this lastid; ask the osc */
obd_id lastid;
__u32 size = sizeof(lastid);
rc = obd_get_info(watched->obd_self_export,
- strlen("last_id"),
+ strlen("last_id"),
"last_id", &size, &lastid);
if (rc)
RETURN(rc);
mds_lov_write_objids(obd);
} else {
/* We have read this lastid from disk; tell the osc.
- Don't call this during recovery. */
+ Don't call this during recovery. */
rc = mds_lov_set_nextid(obd);
}
-
+
CDEBUG(D_CONFIG, "last object "LPU64" from OST %d\n",
mds->mds_lov_objids[idx], idx);
}
/* We only _need_ to do this at first add (idx), or the first time
after recovery. However, it should now be safe to call anytime. */
CDEBUG(D_CONFIG, "reset llogs idx=%d\n", idx);
- llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count);
+ mutex_down(&obd->obd_dev_sem);
+ llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, uuid);
+ mutex_up(&obd->obd_dev_sem);
RETURN(rc);
}
if (data == NULL)
RETURN(-ENOMEM);
data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX |
- OBD_CONNECT_REQPORTAL;
+ OBD_CONNECT_REQPORTAL | OBD_CONNECT_QUOTA64;
data->ocd_version = LUSTRE_VERSION_CODE;
data->ocd_group = mds->mds_id + FILTER_GROUP_MDS0;
/* NB: lov_connect() needs to fill in .ocd_index for each OST */
GOTO(err_reg, rc);
/* tgt_count may be 0! */
- rc = llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count);
+ rc = llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL);
if (rc) {
CERROR("failed to initialize catalog %d\n", rc);
GOTO(err_reg, rc);
push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
rc = llog_ioctl(ctxt, cmd, data);
pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
- llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count);
-
+ llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL);
group = FILTER_GROUP_MDS0 + mds->mds_id;
rc2 = obd_set_info_async(mds->mds_osc_exp,
strlen(KEY_MDS_CONN), KEY_MDS_CONN,
};
/* We only sync one osc at a time, so that we don't have to hold
- any kind of lock on the whole mds_lov_desc, which may change
+ any kind of lock on the whole mds_lov_desc, which may change
(grow) as a result of mds_lov_add_ost. This also avoids any
- kind of mismatch between the lov_desc and the mds_lov_desc,
+ kind of mismatch between the lov_desc and the mds_lov_desc,
which are not in lock-step during lov_add_obd */
static int __mds_lov_synchronize(void *data)
{
uuid = &watched->u.cli.cl_target_uuid;
LASSERT(uuid);
- rc = mds_lov_update_mds(obd, watched, idx);
+ rc = mds_lov_update_mds(obd, watched, idx, uuid);
if (rc != 0)
GOTO(out, rc);
group = FILTER_GROUP_MDS0 + mds->mds_id;
rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT),
mds->mds_lov_desc.ld_tgt_count,
NULL, NULL, uuid);
-
+
if (rc != 0) {
CERROR("%s: failed at llog_origin_connect: %d\n",
obd->obd_name, rc);
char name[20];
if (mlsi->mlsi_index == MDSLOV_NO_INDEX)
- /* There is still a watched target,
+ /* There is still a watched target,
but we don't know its index */
sprintf(name, "ll_sync_tgt");
else
RETURN(__mds_lov_synchronize(data));
}
-int mds_lov_start_synchronize(struct obd_device *obd,
+int mds_lov_start_synchronize(struct obd_device *obd,
struct obd_device *watched,
void *data, int nonblock)
{
mlsi->mlsi_obd = obd;
mlsi->mlsi_watched = watched;
- if (data)
+ if (data)
mlsi->mlsi_index = *(__u32 *)data;
else
mlsi->mlsi_index = MDSLOV_NO_INDEX;
if (obd->obd_recovering) {
CWARN("MDS %s: in recovery, not resetting orphans on %s\n",
- obd->obd_name,
+ obd->obd_name,
obd_uuid2str(&watched->u.cli.cl_target_uuid));
- /* We still have to fix the lov descriptor for ost's added
+ /* We still have to fix the lov descriptor for ost's added
after the mdt in the config log. They didn't make it into
mds_lov_connect. */
rc = mds_lov_update_desc(obd, obd->u.mds.mds_osc_exp);
}
LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL);
- rc = mds_lov_start_synchronize(obd, watched, data,
+ rc = mds_lov_start_synchronize(obd, watched, data,
!(ev == OBD_NOTIFY_SYNC));
-
+
lquota_recovery(quota_interface, obd);
-
+
RETURN(rc);
}
int rc, err;
ENTRY;
- if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC ||
+ if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC ||
le32_to_cpu(lmm->lmm_magic == LOV_MAGIC_JOIN))
RETURN(0);
CDEBUG(D_INODE, "converting LOV EA on %lu/%u from %#08x to %#08x\n",
inode->i_ino, inode->i_generation, le32_to_cpu(lmm->lmm_magic),
LOV_MAGIC);
-
+
rc = obd_unpackmd(obd->u.mds.mds_osc_exp, &lsm, lmm, lmm_size);
if (rc < 0)
GOTO(conv_end, rc);
int lock_flags = 0;
ENTRY;
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_OPEN);
OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PAUSE_OPEN | OBD_FAIL_ONCE,
(obd_timeout + 1) / 4);
else
ptlrpc_save_lock(req, &parent_lockh, parent_mode);
}
+ /* trigger dqacq on the owner of child and parent */
+ lquota_adjust(quota_interface, obd, qcids, qpids, rc, FSFILT_OP_CREATE);
+
/* If we have not taken the "open" lock, we may not return 0 here,
because caller expects 0 to mean "lock is taken", and it needs
nonzero return here for caller to return EDLM_LOCK_ABORTED to
if ((cleanup_phase != 3) && !rc)
rc = ENOLCK;
- /* trigger dqacq on the owner of child and parent */
- lquota_adjust(quota_interface, obd, qcids, qpids, rc, FSFILT_OP_CREATE);
RETURN(rc);
}
CDEBUG(D_HA, "close req->rep_len %d mdsize %d cookiesize %d\n",
req->rq_replen,
obd->u.mds.mds_max_mdsize, obd->u.mds.mds_max_cookiesize);
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_CLOSE);
body = lustre_swab_reqbuf(req, offset, sizeof(*body),
lustre_swab_mds_body);
DEBUG_REQ(D_INODE, req, "setattr "LPU64"/%u %x", rec->ur_fid1->id,
rec->ur_fid1->generation, rec->ur_iattr.ia_valid);
+ OBD_COUNTER_INCREMENT(obd, setattr);
MDS_CHECK_RESENT(req, reconstruct_reint_setattr(rec, offset, req));
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
rc = ll_vfs_create(dir, dchild, rec->ur_mode, NULL);
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_MKNOD);
EXIT;
break;
}
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_mkdir(dir, dchild, rec->ur_mode);
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_MKDIR);
EXIT;
break;
}
rc = -EINVAL; /* -EPROTO? */
else
rc = ll_vfs_symlink(dir, dchild, rec->ur_tgt, S_IALLUGO);
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_MKNOD);
EXIT;
break;
}
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev);
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_MKNOD);
EXIT;
break;
}
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_rmdir(dparent->d_inode, dchild);
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_RMDIR);
break;
case S_IFREG: {
struct lov_mds_md *lmm = lustre_msg_buf(req->rq_repmsg,
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_unlink(dparent->d_inode, dchild);
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_UNLINK);
break;
}
case S_IFLNK:
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_unlink(dparent->d_inode, dchild);
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_UNLINK);
break;
default:
CERROR("bad file type %o unlinking %s\n", rec->ur_mode,
DEBUG_REQ(D_INODE, req, "original "LPU64"/%u to "LPU64"/%u %s",
rec->ur_fid1->id, rec->ur_fid1->generation,
rec->ur_fid2->id, rec->ur_fid2->generation, rec->ur_name);
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_LINK);
MDS_CHECK_RESENT(req, mds_reconstruct_generic(req));
DEBUG_REQ(D_INODE, req, "parent "LPU64"/%u %s to "LPU64"/%u %s",
rec->ur_fid1->id, rec->ur_fid1->generation, rec->ur_name,
rec->ur_fid2->id, rec->ur_fid2->generation, rec->ur_tgt);
-
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_RENAME);
+
MDS_CHECK_RESENT(req, mds_reconstruct_generic(req));
rc = mds_get_parents_children_locked(obd, mds, rec->ur_fid1, &de_srcdir,
int rc;
ENTRY;
-#if CRAY_XT3
+#ifdef CRAY_XT3
if (req->rq_uid != LNET_UID_ANY) {
/* non-root local cluster client
* NB root's creds are believed... */
/* checked by unpacker */
LASSERT(rec->ur_opcode < REINT_MAX && reinters[rec->ur_opcode] != NULL);
-#if CRAY_XT3
+#ifdef CRAY_XT3
if (rec->ur_uc.luc_uce)
rec->ur_uc.luc_fsgid = rec->ur_uc.luc_uce->ue_primary;
#endif
int rc = 0;
ENTRY;
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_GETXATTR);
+
body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body),
lustre_swab_mds_body);
if (body == NULL)
int rc;
ENTRY;
+ lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_SETXATTR);
+
body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body),
lustre_swab_mds_body);
if (body == NULL)
if (rc)
GOTO(err_decref, rc);
- rc = obd_llog_init(obd, obd, 0, NULL);
+ rc = obd_llog_init(obd, obd, 0, NULL, NULL);
if (rc) {
CERROR("failed to setup llogging subsystems\n");
GOTO(err_cleanup, rc);
}
static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt,
- int count, struct llog_catid *logid)
+ int count, struct llog_catid *logid,
+ struct obd_uuid *uuid)
{
struct llog_ctxt *ctxt;
int rc;
if (rc)
GOTO(err_decref, rc);
- rc = obd_llog_init(obd, obd, 0, NULL);
+ rc = obd_llog_init(obd, obd, 0, NULL, NULL);
if (rc) {
CERROR("failed to setup llogging subsystems\n");
GOTO(err_cleanup, rc);
}
static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt,
- int count, struct llog_catid *logid)
+ int count, struct llog_catid *logid,
+ struct obd_uuid *uuid)
{
struct llog_ctxt *ctxt;
int rc;
mti->mti_flags |= LDD_F_UPDATE;
}
-
if (mti->mti_flags & LDD_F_UPDATE) {
CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname,
mti->mti_stripe_index);
/* We also have to update the other logs where this osc is part of
the lov */
- /* Append ost info to mdt log */
if (mti->mti_flags & LDD_F_UPGRADE14)
/* If we're upgrading, the old mdt log already has our
entry. Let's do a fake one for fun. */
flags = CM_SKIP | CM_UPGRADE146;
+
+ if ((mti->mti_flags & LDD_F_UPDATE) != LDD_F_UPDATE) {
+ /* If the update flag isn't set, don't update client/mdt
+ logs. */
+ flags |= CM_SKIP;
+ LCONSOLE_WARN("Client log for %s was not updated; writeconf "
+ "the MDT first to regenerate it.\n",
+ mti->mti_svname);
+ }
// for_all_existing_mdt
for (i = 0; i < INDEX_MAP_SIZE * 8; i++){
mti->mti_stripe_index, mti->mti_svname);
/* FIXME mark old log sections as invalid,
inc config ver #, add new log sections.
- Make sure to update client and mds logs too
+ Make sure to update client and mdt logs too
if needed */
- /* in the mean time, assume all logs were lost
- (writeconf), and recreate this one */
+ /* In the meantime, if we found the index in the
+ client log, we can't add it again. So recreate
+ the target log, but do _not_ update the client/mdt
+ logs. For "full" writeconf, the client log won't
+ have an entry for this target, so we won't get
+ here. */
+ mti->mti_flags &= ~LDD_F_UPDATE;
}
}
RETURN(rc);
}
- /* Delete the fs db */
down(&mgs->mgs_sem);
+
+ /* Delete the fs db */
fsdb = mgs_find_fsdb(obd, fsname);
if (fsdb)
mgs_free_fsdb(fsdb);
- up(&mgs->mgs_sem);
list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
list_del(&dirent->lld_list);
OBD_FREE(dirent, sizeof(*dirent));
}
+ up(&mgs->mgs_sem);
+
RETURN(rc);
}
/* The following are visible and mutable through /proc/sys/lustre/. */
unsigned int obd_fail_loc;
unsigned int obd_dump_on_timeout;
+unsigned int obd_dump_on_eviction;
unsigned int obd_timeout = 100; /* seconds */
unsigned int ldlm_timeout = 20; /* seconds */
unsigned int obd_health_check_timeout = 120; /* seconds */
-char obd_lustre_upcall[128] = "DEFAULT"; /* or NONE or /full/path/to/upcall */
cfs_waitq_t obd_race_waitq;
int obd_race_state;
EXPORT_SYMBOL(obd_race_waitq);
EXPORT_SYMBOL(obd_race_state);
EXPORT_SYMBOL(obd_dump_on_timeout);
+EXPORT_SYMBOL(obd_dump_on_eviction);
EXPORT_SYMBOL(obd_timeout);
EXPORT_SYMBOL(ldlm_timeout);
EXPORT_SYMBOL(obd_health_check_timeout);
-EXPORT_SYMBOL(obd_lustre_upcall);
EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
EXPORT_SYMBOL(proc_lustre_root);
if (err)
return err;
err = class_procfs_init();
- lustre_register_fs();
+ if (err)
+ return err;
+ err = lustre_register_fs();
#endif
return err;
extern unsigned int obd_dump_on_timeout;
extern unsigned int obd_timeout;
extern unsigned int ldlm_timeout;
-extern char obd_lustre_upcall[128];
extern unsigned int obd_sync_filter;
extern atomic_t obd_memory;
SYSCTL_INT(_lustre, OID_AUTO, dump_on_timeout,
CTLTYPE_INT | CTLFLAG_RW, &obd_dump_on_timeout,
0, "lustre_dump_on_timeout");
-SYSCTL_STRING(_lustre, OID_AUTO, upcall,
- CTLTYPE_STRING | CTLFLAG_RW, obd_lustre_upcall,
- 128, "lustre_upcall");
SYSCTL_INT(_lustre, OID_AUTO, memused,
CTLTYPE_INT | CTLFLAG_RW, (int *)&obd_memory.counter,
0, "lustre_memory_used");
#include <obd_support.h>
#include <obd_class.h>
#include <lprocfs_status.h>
+#include <lustre_ver.h>
#ifdef __KERNEL__
#include <linux/lustre_build_version.h>
#include <linux/lustre_version.h>
int *eof, void *data)
{
*eof = 1;
- return snprintf(page, count, "%s\n", BUILD_VERSION);
-}
-
-int obd_proc_read_kernel_version(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- *eof = 1;
+ return snprintf(page, count, "lustre: %s\nkernel: %u\nbuild: %s\n",
+ LUSTRE_VERSION_STRING,
#ifdef LUSTRE_KERNEL_VERSION
- return snprintf(page, count, "%u\n", LUSTRE_KERNEL_VERSION);
+ LUSTRE_KERNEL_VERSION,
#else
- return snprintf(page, count, "%u\n", "patchless");
+ "patchless",
#endif
+ BUILD_VERSION);
}
int obd_proc_read_pinger(char *page, char **start, off_t off, int count,
struct lprocfs_vars lprocfs_base[] = {
{ "version", obd_proc_read_version, NULL, NULL },
- { "kernel_version", obd_proc_read_kernel_version, NULL, NULL },
{ "pinger", obd_proc_read_pinger, NULL, NULL },
{ "health_check", obd_proc_read_health, NULL, NULL },
{ "health_check_timeout", obd_proc_rd_health_timeout,
OBD_FAIL_LOC = 1, /* control test failures instrumentation */
OBD_TIMEOUT, /* RPC timeout before recovery/intr */
OBD_DUMP_ON_TIMEOUT, /* dump kernel debug log upon eviction */
- OBD_UPCALL, /* path to recovery upcall */
OBD_MEMUSED, /* bytes currently OBD_ALLOCated */
OBD_SYNCFILTER, /* XXX temporary, as we play with sync osts.. */
OBD_LDLM_TIMEOUT, /* LDLM timeout for ASTs before client eviction */
+ OBD_DUMP_ON_EVICTION, /* dump kernel debug log upon eviction */
};
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8)
&proc_set_timeout},
{OBD_DUMP_ON_TIMEOUT, "dump_on_timeout", &obd_dump_on_timeout,
sizeof(int), 0644, NULL, &proc_dointvec},
- /* XXX need to lock so we avoid update races with recovery upcall! */
- {OBD_UPCALL, "upcall", obd_lustre_upcall, 128, 0644, NULL,
- &proc_dostring, &sysctl_string },
+ {OBD_DUMP_ON_EVICTION, "dump_on_eviction", &obd_dump_on_eviction,
+ sizeof(int), 0644, NULL, &proc_dointvec},
{OBD_MEMUSED, "memused", (int *)&obd_memory.counter,
sizeof(int), 0644, NULL, &proc_dointvec},
{OBD_LDLM_TIMEOUT, "ldlm_timeout", &ldlm_timeout, sizeof(int), 0644,
}
EXPORT_SYMBOL(llog_obd_origin_add);
-int llog_cat_initialize(struct obd_device *obd, int count)
+int llog_cat_initialize(struct obd_device *obd, int count,
+ struct obd_uuid *uuid)
{
+ char name[32] = CATLIST;
struct llog_catid *idarray;
int size = sizeof(*idarray) * count;
- char name[32] = CATLIST;
int rc;
ENTRY;
- /* We don't want multiple mdt threads here at once */
- mutex_down(&obd->obd_dev_sem);
-
OBD_ALLOC(idarray, size);
- if (!idarray) {
- mutex_up(&obd->obd_dev_sem);
+ if (!idarray)
RETURN(-ENOMEM);
- }
rc = llog_get_cat_list(obd, obd, name, count, idarray);
if (rc) {
GOTO(out, rc);
}
- rc = obd_llog_init(obd, obd, count, idarray);
+ rc = obd_llog_init(obd, obd, count, idarray, uuid);
if (rc) {
CERROR("rc: %d\n", rc);
GOTO(out, rc);
out:
OBD_FREE(idarray, size);
- mutex_up(&obd->obd_dev_sem);
RETURN(rc);
}
EXPORT_SYMBOL(llog_cat_initialize);
int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd,
- int count, struct llog_catid *logid)
+ int count, struct llog_catid *logid, struct obd_uuid *uuid)
{
int rc;
ENTRY;
OBD_CHECK_DT_OP(obd, llog_init, 0);
OBD_COUNTER_INCREMENT(obd, llog_init);
- rc = OBP(obd, llog_init)(obd, disk_obd, count, logid);
+ rc = OBP(obd, llog_init)(obd, disk_obd, count, logid, uuid);
RETURN(rc);
}
EXPORT_SYMBOL(obd_llog_init);
static int llog_test_llog_init(struct obd_device *obd, struct obd_device *tgt,
- int count, struct llog_catid *logid)
+ int count, struct llog_catid *logid,
+ struct obd_uuid *uuid)
{
int rc;
ENTRY;
RETURN(-EINVAL);
}
- rc = obd_llog_init(obd, tgt, 0, NULL);
+ rc = obd_llog_init(obd, tgt, 0, NULL, NULL);
if (rc)
RETURN(rc);
"join_file",
"getattr_by_fid",
"no_oh_for_devices",
+ "local_1.8_client",
+ "remote_1.8_client",
+ "max_byte_per_rpc",
+ "64bit_qdata",
+ "fid_capability",
+ "oss_capability",
NULL
};
struct proc_dir_entry *entry;
LASSERT(root != NULL);
- entry = create_proc_entry(name, 0444, root);
+ entry = create_proc_entry(name, 0644, root);
if (entry == NULL)
return -ENOMEM;
entry->proc_fops = &lprocfs_stats_seq_fops;
LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, join_lru);
- LPROCFS_OBD_OP_INIT(num_private_stats, stats, san_preprw);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, init_export);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, extent_calc);
__u64 *val, int mult)
{
char kernbuf[22], *end, *pbuf;
+ __u64 whole, frac = 0, frac_d = 1, units;
if (count > (sizeof(kernbuf) - 1) )
return -EINVAL;
kernbuf[count] = '\0';
pbuf = kernbuf;
- if (*pbuf == '-') {
- mult = -mult;
- pbuf++;
- }
+ if (*pbuf == '-')
+ return -ERANGE;
- *val = simple_strtoull(pbuf, &end, 10) * mult;
+ whole = simple_strtoull(pbuf, &end, 10);
if (pbuf == end)
return -EINVAL;
if (end != NULL && *end == '.') {
- int temp_val;
- int i, pow = 1;
-
+ int i;
pbuf = end + 1;
- if (strlen(pbuf) > 10)
- pbuf[10] = '\0';
-
- temp_val = (int)simple_strtoull(pbuf, &end, 10) * mult;
-
- if (pbuf < end) {
- for (i = 0; i < (end - pbuf); i++)
- pow *= 10;
+ frac = simple_strtoull(pbuf, &end, 10);
+ /* count decimal places */
+ for (i = 0; i < (end - pbuf); i++)
+ frac_d *= 10;
+ }
- *val += (__u64)(temp_val / pow);
- }
+ units = 1;
+ switch(*end) {
+ case 'p': case 'P':
+ units <<= 10;
+ case 't': case 'T':
+ units <<= 10;
+ case 'g': case 'G':
+ units <<= 10;
+ case 'm': case 'M':
+ units <<= 10;
+ case 'k': case 'K':
+ units <<= 10;
}
+ /* Specified units override the multiplier */
+ if (units)
+ mult = units;
+
+ frac = frac * mult;
+ do_div(frac, frac_d);
+ *val = whole * mult + frac;
return 0;
}
struct list_head un_list;
lnet_nid_t un_nid;
char *un_uuid;
+ int un_count; /* nid/uuid pair refcount */
};
/* FIXME: This should probably become more elegant than a global linked list */
LNET will choose the best one. */
int class_add_uuid(const char *uuid, __u64 nid)
{
- struct uuid_nid_data *data;
+ struct list_head *tmp, *n;
+ struct uuid_nid_data *data, *entry;
int nob = strnlen (uuid, PAGE_SIZE) + 1;
+ int found = 0;
LASSERT(nid != 0); /* valid newconfig NID is never zero */
return -ENOMEM;
}
- CDEBUG(D_INFO, "add uuid %s %s\n", uuid, libcfs_nid2str(nid));
memcpy(data->un_uuid, uuid, nob);
data->un_nid = nid;
+ data->un_count = 1;
spin_lock (&g_uuid_lock);
- list_add(&data->un_list, &g_uuid_list);
+ list_for_each_safe(tmp, n, &g_uuid_list) {
+ entry = list_entry(tmp, struct uuid_nid_data, un_list);
+ if (entry->un_nid == nid &&
+ (strcmp(entry->un_uuid, uuid) == 0)) {
+ found++;
+ entry->un_count++;
+ break;
+ }
+ }
+ if (!found)
+ list_add(&data->un_list, &g_uuid_list);
spin_unlock (&g_uuid_lock);
+ if (found) {
+ CDEBUG(D_INFO, "found uuid %s %s cnt=%d\n", uuid,
+ libcfs_nid2str(nid), entry->un_count);
+ OBD_FREE(data->un_uuid, nob);
+ OBD_FREE(data, sizeof(*data));
+ } else {
+ CDEBUG(D_INFO, "add uuid %s %s\n", uuid, libcfs_nid2str(nid));
+ }
return 0;
}
list_for_each_safe(tmp, n, &g_uuid_list) {
data = list_entry(tmp, struct uuid_nid_data, un_list);
- if (uuid == NULL || strcmp(data->un_uuid, uuid) == 0) {
+ if (uuid == NULL) {
list_del (&data->un_list);
list_add (&data->un_list, &deathrow);
- if (uuid)
- break;
+ } else if (strcmp(data->un_uuid, uuid) == 0) {
+ --data->un_count;
+ if (data->un_count <= 0) {
+ list_del (&data->un_list);
+ list_add (&data->un_list, &deathrow);
+ }
+ break;
}
}
data = list_entry(deathrow.next, struct uuid_nid_data, un_list);
list_del (&data->un_list);
- CDEBUG(D_INFO, "del uuid %s\n", data->un_uuid);
+ CDEBUG(D_INFO, "del uuid %s %s\n", data->un_uuid,
+ libcfs_nid2str(data->un_nid));
OBD_FREE(data->un_uuid, strlen(data->un_uuid) + 1);
OBD_FREE(data, sizeof(*data));
GOTO(out, err = 0);
}
case LCFG_SET_UPCALL: {
- CDEBUG(D_IOCTL, "setting lustre ucpall to: %s\n",
- lustre_cfg_string(lcfg, 1));
- if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof obd_lustre_upcall)
- GOTO(out, err = -EINVAL);
- strncpy(obd_lustre_upcall, lustre_cfg_string(lcfg, 1),
- sizeof (obd_lustre_upcall));
+ LCONSOLE_ERROR("recovery upcall is deprecated\n");
+ /* COMPAT_146 Don't fail on old configs */
GOTO(out, err = 0);
}
case LCFG_MARKER: {
case LCFG_PARAM: {
/* llite has no obd */
if ((class_match_param(lustre_cfg_string(lcfg, 1),
- PARAM_LLITE, 0) == 0) &&
+ PARAM_LLITE, 0) == 0) &&
client_process_config) {
err = (*client_process_config)(lcfg);
GOTO(out, err);
int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
struct lustre_cfg *lcfg, void *data)
{
+#ifdef __KERNEL__
struct lprocfs_vars *var;
char *key, *sval;
int i, vallen;
if (class_match_param(key, (char *)var->name, 0) == 0) {
matched++;
rc = -EROFS;
- if (var->write_fptr)
+ if (var->write_fptr) {
+ mm_segment_t oldfs;
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
rc = (var->write_fptr)(NULL, sval,
vallen, data);
+ set_fs(oldfs);
+ }
if (rc < 0)
CERROR("writing proc entry %s err %d\n",
var->name, rc);
if (rc > 0)
rc = 0;
RETURN(rc);
+#else
+ CDEBUG(D_CONFIG, "liblustre can't process params.\n");
+ return -ENOSYS;
+#endif
}
int class_config_dump_handler(struct llog_handle * handle,
static void ldd_print(struct lustre_disk_data *ldd)
{
- PRINT_CMD(PRINT_MASK, " disk data:\n");
- PRINT_CMD(PRINT_MASK, "config: %d\n", ldd->ldd_config_ver);
- PRINT_CMD(PRINT_MASK, "fs: %s\n", ldd->ldd_fsname);
+ PRINT_CMD(PRINT_MASK, " disk data:\n");
PRINT_CMD(PRINT_MASK, "server: %s\n", ldd->ldd_svname);
+ PRINT_CMD(PRINT_MASK, "uuid: %s\n", (char *)ldd->ldd_uuid);
+ PRINT_CMD(PRINT_MASK, "fs: %s\n", ldd->ldd_fsname);
PRINT_CMD(PRINT_MASK, "index: %04x\n", ldd->ldd_svindex);
+ PRINT_CMD(PRINT_MASK, "config: %d\n", ldd->ldd_config_ver);
PRINT_CMD(PRINT_MASK, "flags: %#x\n", ldd->ldd_flags);
PRINT_CMD(PRINT_MASK, "diskfs: %s\n", MT_STR(ldd));
PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
- PRINT_CMD(PRINT_MASK, "params: %s\n", ldd->ldd_params);
+ PRINT_CMD(PRINT_MASK, "params: %s\n", ldd->ldd_params);
+ PRINT_CMD(PRINT_MASK, "comment: %s\n", ldd->ldd_userdata);
}
static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
RETURN(rc);
}
+DECLARE_MUTEX(mgc_start_lock);
+
/* Set up a mgcobd to process startup logs */
static int lustre_start_mgc(struct super_block *sb)
{
GOTO(out_free, rc = -ENOMEM);
sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
+ mutex_down(&mgc_start_lock);
+
obd = class_name2obd(mgcname);
if (obd) {
/* Re-using an existing MGC */
to the same mgc.*/
lsi->lsi_mgc = obd;
out_free:
+ mutex_up(&mgc_start_lock);
+
if (mgcname)
OBD_FREE(mgcname, len);
if (niduuid)
struct lustre_sb_info *lsi = s2lsi(sb);
struct obd_device *obd;
char *niduuid, *ptr = 0;
- int i, rc, len;
+ int i, rc = 0, len;
ENTRY;
if (!lsi)
RETURN(-ENOENT);
lsi->lsi_mgc = NULL;
+ mutex_down(&mgc_start_lock);
if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
/* This is not fatal, every client that stops
will call in here. */
CDEBUG(D_MOUNT, "mgc still has %d references.\n",
atomic_read(&obd->u.cli.cl_mgc_refcount));
- RETURN(-EBUSY);
+ GOTO(out, rc = -EBUSY);
}
/* MGC must always stop */
rc = class_manual_cleanup(obd);
if (rc)
- RETURN(rc);
+ GOTO(out, rc);
/* Clean the nid uuids */
if (!niduuid)
OBD_FREE(niduuid, len);
/* class_import_put will get rid of the additional connections */
- RETURN(0);
+out:
+ mutex_up(&mgc_start_lock);
+ RETURN(rc);
}
/* Since there's only one mgc per node, we have to change it's fs to get
RETURN(rc);
}
+DECLARE_MUTEX(server_start_lock);
+
/* Stop MDS/OSS if nobody is using them */
static int server_stop_servers(int lddflags, int lsiflags)
{
int rc = 0;
ENTRY;
- /* Either an MDT or an OST or neither */
+ mutex_down(&server_start_lock);
+ /* Either an MDT or an OST or neither */
/* if this was an MDT, and there are no more MDT's, clean up the MDS */
if ((lddflags & LDD_F_SV_TYPE_MDT) &&
(obd = class_name2obd(LUSTRE_MDS_OBDNAME))) {
rc = err;
}
+ mutex_up(&server_start_lock);
+
RETURN(rc);
}
/* If we're an MDT, make sure the global MDS is running */
if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
/* make sure the MDS is started */
+ mutex_down(&server_start_lock);
obd = class_name2obd(LUSTRE_MDS_OBDNAME);
if (!obd) {
rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
LUSTRE_MDS_OBDNAME"_uuid",
0, 0);
if (rc) {
+ mutex_up(&server_start_lock);
CERROR("failed to start MDS: %d\n", rc);
RETURN(rc);
}
}
+ mutex_up(&server_start_lock);
}
/* If we're an MDT, make sure the global MDS is running */
if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
/* If we're an OST, make sure the global OSS is running */
if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
/* make sure OSS is started */
+ mutex_down(&server_start_lock);
obd = class_name2obd(LUSTRE_OSS_OBDNAME);
if (!obd) {
rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
LUSTRE_OSS_OBDNAME"_uuid",
0, 0);
if (rc) {
+ mutex_up(&server_start_lock);
CERROR("failed to start OSS: %d\n", rc);
RETURN(rc);
}
}
+ mutex_up(&server_start_lock);
}
/* Set the mgc fs to our server disk. This allows the MGC
/* start MGS before MGC */
if (IS_MGS(lsi->lsi_ldd)) {
rc = server_start_mgs(sb);
- if (rc) {
- CERROR("ignoring Failed MGS start!!\n");
- //GOTO(out_mnt, rc);
- }
+ if (rc)
+ GOTO(out_mnt, rc);
}
rc = lustre_start_mgc(sb);
{
char *s1 = ptr, *s2;
__u32 index, *exclude_list;
- int rc = 0;
+ int rc = 0, devmax;
ENTRY;
+
+ /* The shortest an ost name can be is 8 chars: -OST0000.
+ We don't actually know the fsname at this time, so in fact
+ a user could specify any fsname. */
+ devmax = strlen(ptr) / 8 + 1;
/* temp storage until we figure out how many we have */
- OBD_ALLOC(exclude_list, sizeof(index) * MAX_OBD_DEVICES);
+ OBD_ALLOC(exclude_list, sizeof(index) * devmax);
if (!exclude_list)
RETURN(-ENOMEM);
s1 = s2;
/* now we are pointing at ':' (next exclude)
or ',' (end of excludes) */
-
- if (lmd->lmd_exclude_count >= MAX_OBD_DEVICES)
+ if (lmd->lmd_exclude_count >= devmax)
break;
}
if (rc >= 0) /* non-err */
lmd->lmd_exclude_count = 0;
}
}
- OBD_FREE(exclude_list, sizeof(index) * MAX_OBD_DEVICES);
+ OBD_FREE(exclude_list, sizeof(index) * devmax);
RETURN(rc);
}
#ifdef __KERNEL__
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#include <linux/iobuf.h>
+
static int echo_client_ubrw(struct obd_device *obd, int rw,
struct obdo *oa, struct lov_stripe_md *lsm,
obd_off offset, obd_size count, char *buffer,
MODULES := obdfilter
-obdfilter-objs := filter.o filter_io.o filter_log.o filter_san.o
+obdfilter-objs := filter.o filter_io.o filter_log.o
obdfilter-objs += lproc_obdfilter.o filter_lvb.o
ifeq ($(PATCHLEVEL),4)
ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
"filter_ldlm_cb_client", &obd->obd_ldlm_client);
- rc = llog_cat_initialize(obd, 1);
+ rc = llog_cat_initialize(obd, 1, NULL);
if (rc) {
CERROR("failed to setup llogging subsystems\n");
GOTO(err_post, rc);
};
static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt,
- int count, struct llog_catid *catid)
+ int count, struct llog_catid *catid,
+ struct obd_uuid *uuid)
{
struct llog_ctxt *ctxt;
int rc;
data->ocd_connect_flags &= OST_CONNECT_SUPPORTED;
exp->exp_connect_flags = data->ocd_connect_flags;
+ if (exp->exp_imp_reverse)
+ exp->exp_imp_reverse->imp_connect_data.ocd_connect_flags
+ = data->ocd_connect_flags;
data->ocd_version = LUSTRE_VERSION_CODE;
if (exp->exp_connect_flags & OBD_CONNECT_GRANT) {
.o_process_config = filter_process_config,
};
-static struct obd_ops filter_sanobd_ops = {
- .o_owner = THIS_MODULE,
- .o_get_info = filter_get_info,
- .o_set_info_async = filter_set_info_async,
- .o_setup = filter_san_setup,
- .o_precleanup = filter_precleanup,
- .o_cleanup = filter_cleanup,
- .o_connect = filter_connect,
- .o_reconnect = filter_reconnect,
- .o_disconnect = filter_disconnect,
- .o_ping = filter_ping,
- .o_init_export = filter_init_export,
- .o_destroy_export = filter_destroy_export,
- .o_statfs = filter_statfs,
- .o_getattr = filter_getattr,
- .o_unpackmd = filter_unpackmd,
- .o_create = filter_create,
- .o_setattr = filter_setattr,
- .o_destroy = filter_destroy,
- .o_brw = filter_brw,
- .o_punch = filter_truncate,
- .o_sync = filter_sync,
- .o_preprw = filter_preprw,
- .o_commitrw = filter_commitrw,
- .o_san_preprw = filter_san_preprw,
- .o_llog_init = filter_llog_init,
- .o_llog_finish = filter_llog_finish,
- .o_iocontrol = filter_iocontrol,
-};
-
quota_interface_t *quota_interface;
extern quota_interface_t filter_quota_interface;
lprocfs_init_vars(filter, &lvars);
+ request_module("lquota");
OBD_ALLOC(obdfilter_created_scratchpad,
OBDFILTER_CREATED_SCRATCHPAD_ENTRIES *
sizeof(*obdfilter_created_scratchpad));
quota_interface = PORTAL_SYMBOL_GET(filter_quota_interface);
init_obd_quota_ops(quota_interface, &filter_obd_ops);
- init_obd_quota_ops(quota_interface, &filter_sanobd_ops);
rc = class_register_type(&filter_obd_ops, NULL, lvars.module_vars,
LUSTRE_OST_NAME, NULL);
- if (rc)
- GOTO(out_fmd, rc);
-
- rc = class_register_type(&filter_sanobd_ops, NULL, lvars.module_vars,
- LUSTRE_OSTSAN_NAME, NULL);
if (rc) {
int err;
- class_unregister_type(LUSTRE_OST_NAME);
-out_fmd:
err = kmem_cache_destroy(ll_fmd_cachep);
LASSERTF(err == 0, "Cannot destroy ll_fmd_cachep: rc %d\n",err);
ll_fmd_cachep = NULL;
ll_fmd_cachep = NULL;
}
- class_unregister_type(LUSTRE_OSTSAN_NAME);
class_unregister_type(LUSTRE_OST_NAME);
OBD_FREE(obdfilter_created_scratchpad,
OBDFILTER_CREATED_SCRATCHPAD_ENTRIES *
int filter_recov_log_mds_ost_cb(struct llog_handle *llh,
struct llog_rec_hdr *rec, void *data);
-/* filter_san.c */
-int filter_san_setup(struct obd_device *obd, struct lustre_cfg *cfg);
-int filter_san_preprw(int cmd, struct obd_export *, struct obdo *, int objcount,
- struct obd_ioobj *, int niocount, struct niobuf_remote *);
-
#ifdef LPROCFS
void filter_tally_write(struct filter_obd *filter, struct page **pages,
int nr_pages, unsigned long *blocks,
iattr_from_obdo(&iattr, oa, i);
if (iattr.ia_valid & (ATTR_UID | ATTR_GID)) {
+ unsigned int save;
+
CDEBUG(D_INODE, "update UID/GID to %lu/%lu\n",
(unsigned long)oa->o_uid, (unsigned long)oa->o_gid);
/* To avoid problems with quotas, UID and GID must be set
* in the inode before filter_direct_io() - see bug 10357. */
- if (iattr.ia_valid & ATTR_UID)
- inode->i_uid = iattr.ia_uid;
- if (iattr.ia_valid & ATTR_GID)
- inode->i_gid = iattr.ia_gid;
+ save = iattr.ia_valid;
+ iattr.ia_valid &= (ATTR_UID | ATTR_GID);
+ rc = fsfilt_setattr(obd, res->dentry, oti->oti_handle, &iattr, 0);
+ CDEBUG(D_QUOTA, "set uid(%u)/gid(%u) to ino(%lu). rc(%d)\n",
+ iattr.ia_uid, iattr.ia_gid, inode->i_ino, rc);
+ iattr.ia_valid = save & ~(ATTR_UID | ATTR_GID);
}
/* filter_direct_io drops i_mutex */
err = lquota_adjust(quota_interface, obd, qcids, NULL, rc,
FSFILT_OP_CREATE);
CDEBUG(err ? D_ERROR : D_QUOTA,
- "error filter adjust qunit! (rc:%d)\n", err);
+ "filter adjust qunit! (rc:%d)\n", err);
RETURN(rc);
}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * linux/fs/obdfilter/filter_san.c
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Andreas Dilger <adilger@clusterfs.com>
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * You may have signed or agreed to another license before downloading
- * this software. If so, you are bound by the terms and conditions
- * of that agreement, and the following does not apply to you. See the
- * LICENSE file included with this distribution for more information.
- *
- * If you did not agree to a different license, then this copy of Lustre
- * is open source software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * In either case, Lustre is distributed in the hope that it will be
- * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * license text for more details.
- */
-
-#define DEBUG_SUBSYSTEM S_FILTER
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/pagemap.h> // XXX kill me soon
-#include <linux/version.h>
-
-#include <obd_class.h>
-#include <lustre_fsfilt.h>
-#include "filter_internal.h"
-
-/* sanobd setup methods - use a specific mount option */
-int filter_san_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
-{
- unsigned long page;
- int rc;
-
- if (lcfg->lcfg_bufcount < 3 || LUSTRE_CFG_BUFLEN(lcfg, 2) < 1)
- RETURN(-EINVAL);
-
- /* 2.6.9 selinux wants a full option page for do_kern_mount (bug6471) */
- page = get_zeroed_page(GFP_KERNEL);
- if (!page)
- RETURN(-ENOMEM);
-
- /* for ext3/ldiskfs filesystem, we must mount in 'writeback' mode */
- if (!strcmp(lustre_cfg_string(lcfg, 2), "ldiskfs"))
- strcpy((void *)page, "data=writeback");
- else if (!strcmp(lustre_cfg_string(lcfg, 2), "ext3"))
- strcpy((void *)page, "data=writeback,asyncdel");
- else
- LBUG(); /* just a reminder */
-
- rc = filter_common_setup(obd, lcfg, (void *)page);
- free_page(page);
-
- return rc;
-}
-
-int filter_san_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
- int objcount, struct obd_ioobj *obj, int niocount,
- struct niobuf_remote *nb)
-{
- struct obd_ioobj *o = obj;
- struct niobuf_remote *rnb = nb;
- int rc = 0;
- int i;
- ENTRY;
- LASSERT(objcount == 1);
-
- for (i = 0; i < objcount; i++, o++) {
- struct dentry *dentry;
- struct inode *inode;
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
- sector_t (*fs_bmap)(struct address_space *, sector_t);
-#else
- int (*fs_bmap)(struct address_space *, long);
-#endif
- int j;
-
- dentry = filter_oa2dentry(exp->exp_obd, oa);
- if (IS_ERR(dentry))
- GOTO(out, rc = PTR_ERR(dentry));
-
- inode = dentry->d_inode;
- fs_bmap = inode->i_mapping->a_ops->bmap;
-
- for (j = 0; j < o->ioo_bufcnt; j++, rnb++) {
- long block;
-
- block = rnb->offset >> inode->i_blkbits;
-
- if (cmd == OBD_BRW_READ) {
- block = fs_bmap(inode->i_mapping, block);
- } else {
- loff_t newsize = rnb->offset + rnb->len;
- /* fs_prep_san_write will also update inode
- * size for us:
- * (1) new alloced block
- * (2) existed block but size extented
- */
- /* FIXME We could call fs_prep_san_write()
- * only once for all the blocks allocation.
- * Now call it once for each block, for
- * simplicity. And if error happens, we
- * probably need to release previous alloced
- * block */
- rc = fs_prep_san_write(exp->exp_obd, inode,
- &block, 1, newsize);
- if (rc)
- break;
- }
-
- rnb->offset = block;
- }
- f_dput(dentry);
- }
-out:
- RETURN(rc);
-}
-
MODULES := osc
-osc-objs := osc_request.o lproc_osc.o osc_lib.o osc_create.o
+osc-objs := osc_request.o lproc_osc.o osc_create.o
@INCLUDE_RULES@
if LIBLUSTRE
noinst_LIBRARIES = libosc.a
-libosc_a_SOURCES = osc_request.c osc_lib.c osc_create.c osc_internal.h
+libosc_a_SOURCES = osc_request.c osc_create.c osc_internal.h
libosc_a_CPPFLAGS = $(LLCPPFLAGS)
libosc_a_CFLAGS = $(LLCFLAGS)
endif
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2003 Cluster File Systems, Inc.
- *
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
- *
- * You may have signed or agreed to another license before downloading
- * this software. If so, you are bound by the terms and conditions
- * of that agreement, and the following does not apply to you. See the
- * LICENSE file included with this distribution for more information.
- *
- * If you did not agree to a different license, then this copy of Lustre
- * is open source software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * In either case, Lustre is distributed in the hope that it will be
- * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * license text for more details.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#define DEBUG_SUBSYSTEM S_OSC
-
-#ifdef __KERNEL__
-# include <linux/module.h>
-# include <obd.h>
-# include <obd_ost.h>
-# include <lustre_net.h>
-# include <lustre_dlm.h>
-# include <lustre_lib.h>
-# include <linux/lustre_compat25.h>
-
-/* convert a pathname into a kdev_t */
-static kdev_t path2dev(char *path)
-{
- struct dentry *dentry;
- struct nameidata nd;
- kdev_t dev = KDEVT_INIT(0);
-
- if (ll_path_lookup(path, LOOKUP_FOLLOW, &nd))
- return val_to_kdev(0);
-
- dentry = nd.dentry;
- if (dentry->d_inode && !is_bad_inode(dentry->d_inode) &&
- S_ISBLK(dentry->d_inode->i_mode))
- dev = dentry->d_inode->i_rdev;
- path_release(&nd);
-
- return dev;
-}
-
-int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg)
-{
- struct client_obd *cli = &obddev->u.cli;
- ENTRY;
-
- if (lcfg->lcfg_bufcount < 4 || LUSTRE_CFG_BUFLEN(lcfg, 3) < 1) {
- CERROR("setup requires a SAN device pathname\n");
- RETURN(-EINVAL);
- }
-
- client_obd_setup(obddev, lcfg);
-
- cli->cl_sandev = path2dev(lustre_cfg_string(lcfg, 3));
- if (!kdev_t_to_nr(cli->cl_sandev)) {
- CERROR("%s seems not a valid SAN device\n",
- lustre_cfg_string(lcfg, 3));
- RETURN(-EINVAL);
- }
-
- RETURN(0);
-}
-#endif
RETURN(rc);
}
-/* Note: caller will lock/unlock, and set uptodate on the pages */
-#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int sanosc_brw_read(struct obd_export *exp, struct obd_info *oinfo,
- obd_count page_count, struct brw_page *pga)
-{
- struct ptlrpc_request *req = NULL;
- struct ost_body *body;
- struct niobuf_remote *nioptr;
- struct obd_ioobj *iooptr;
- struct obd_import *imp = class_exp2cliimp(exp);
- int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body)};
- int swab, mapped = 0, rc;
- ENTRY;
-
- /* XXX does not handle 'new' brw protocol */
-
- size[REQ_REC_OFF + 1] = sizeof(struct obd_ioobj);
- size[REQ_REC_OFF + 2] = page_count * sizeof(*nioptr);
-
- req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
- OST_SAN_READ, 4, size, NULL);
- if (!req)
- RETURN(-ENOMEM);
-
- /* FIXME bug 249 */
- /* See bug 7198 */
- if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_REQPORTAL)
- req->rq_request_portal = OST_IO_PORTAL;
-
- body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
- iooptr = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1,
- sizeof(*iooptr));
- nioptr = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2,
- sizeof(*nioptr) * page_count);
-
- memcpy(&body->oa, oinfo->oi_oa, sizeof(body->oa));
-
- obdo_to_ioobj(oinfo->oi_oa, iooptr);
- iooptr->ioo_bufcnt = page_count;
-
- for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
- LASSERT(PageLocked(pga[mapped].pg));
- LASSERT(mapped == 0 || pga[mapped].off > pga[mapped - 1].off);
-
- nioptr->offset = pga[mapped].off;
- nioptr->len = pga[mapped].count;
- nioptr->flags = pga[mapped].flag;
- }
-
- size[REPLY_REC_OFF + 1] = page_count * sizeof(*nioptr);
- ptlrpc_req_set_repsize(req, 3, size);
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- GOTO(out_req, rc);
-
- body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body),
- lustre_swab_ost_body);
- if (body == NULL) {
- CERROR("Can't unpack body\n");
- GOTO(out_req, rc = -EPROTO);
- }
-
- memcpy(oinfo->oi_oa, &body->oa, sizeof(*oinfo->oi_oa));
-
- swab = lustre_msg_swabbed(req->rq_repmsg);
- LASSERT_REPSWAB(req, REPLY_REC_OFF + 1);
- nioptr = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1,
- size[REPLY_REC_OFF + 1]);
- if (!nioptr) {
- /* nioptr missing or short */
- GOTO(out_req, rc = -EPROTO);
- }
-
- /* actual read */
- for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
- struct page *page = pga[mapped].pg;
- struct buffer_head *bh;
- kdev_t dev;
-
- if (swab)
- lustre_swab_niobuf_remote (nioptr);
-
- /* got san device associated */
- LASSERT(exp->exp_obd != NULL);
- dev = exp->exp_obd->u.cli.cl_sandev;
-
- /* hole */
- if (!nioptr->offset) {
- CDEBUG(D_PAGE, "hole at ino %lu; index %ld\n",
- page->mapping->host->i_ino,
- page->index);
- memset(page_address(page), 0, CFS_PAGE_SIZE);
- continue;
- }
-
- if (!page->buffers) {
- create_empty_buffers(page, dev, CFS_PAGE_SIZE);
- bh = page->buffers;
-
- clear_bit(BH_New, &bh->b_state);
- set_bit(BH_Mapped, &bh->b_state);
- bh->b_blocknr = (unsigned long)nioptr->offset;
-
- clear_bit(BH_Uptodate, &bh->b_state);
-
- ll_rw_block(READ, 1, &bh);
- } else {
- bh = page->buffers;
-
- /* if buffer already existed, it must be the
- * one we mapped before, check it */
- LASSERT(!test_bit(BH_New, &bh->b_state));
- LASSERT(test_bit(BH_Mapped, &bh->b_state));
- LASSERT(bh->b_blocknr == (unsigned long)nioptr->offset);
-
- /* wait it's io completion */
- if (test_bit(BH_Lock, &bh->b_state))
- wait_on_buffer(bh);
-
- if (!test_bit(BH_Uptodate, &bh->b_state))
- ll_rw_block(READ, 1, &bh);
- }
-
-
- /* must do syncronous write here */
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh)) {
- /* I/O error */
- rc = -EIO;
- goto out_req;
- }
- }
-
-out_req:
- ptlrpc_req_finished(req);
- RETURN(rc);
-}
-
-static int sanosc_brw_write(struct obd_export *exp, struct obd_info *oinfo,
- obd_count page_count, struct brw_page *pga)
-{
- struct ptlrpc_request *req = NULL;
- struct ost_body *body;
- struct niobuf_remote *nioptr;
- struct obd_ioobj *iooptr;
- struct obd_import *imp = class_exp2cliimp(exp);
- int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) };
- int swab, mapped = 0, rc;
- ENTRY;
-
- size[REQ_REC_OFF + 1] = sizeof(struct obd_ioobj);
- size[REQ_REC_OFF + 2] = page_count * sizeof(*nioptr);
-
- req = ptlrpc_prep_req_pool(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
- OST_SAN_WRITE, 4, size, NULL,
- imp->imp_rq_pool, NULL);
- if (!req)
- RETURN(-ENOMEM);
-
- /* FIXME bug 249 */
- /* See bug 7198 */
- if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_REQPORTAL)
- req->rq_request_portal = OST_IO_PORTAL;
-
- body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
- iooptr = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1,
- sizeof(*iooptr));
- nioptr = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2,
- sizeof(*nioptr) * page_count);
-
- memcpy(&body->oa, oinfo->oi_oa, sizeof(body->oa));
-
- obdo_to_ioobj(oinfo->oi_oa, iooptr);
- iooptr->ioo_bufcnt = page_count;
-
- /* pack request */
- for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
- LASSERT(PageLocked(pga[mapped].pg));
- LASSERT(mapped == 0 || pga[mapped].off > pga[mapped - 1].off);
-
- nioptr->offset = pga[mapped].off;
- nioptr->len = pga[mapped].count;
- nioptr->flags = pga[mapped].flag;
- }
-
- size[REPLY_REC_OFF + 1] = page_count * sizeof(*nioptr);
- ptlrpc_req_set_repsize(req, 3, size);
-
- rc = ptlrpc_queue_wait(req);
- if (rc)
- GOTO(out_req, rc);
-
- swab = lustre_msg_swabbed (req->rq_repmsg);
- LASSERT_REPSWAB(req, REPLY_REC_OFF + 1);
- nioptr = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1,
- size[REPLY_REC_OFF + 1]);
- if (!nioptr) {
- CERROR("absent/short niobuf array\n");
- GOTO(out_req, rc = -EPROTO);
- }
-
- /* actual write */
- for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
- struct page *page = pga[mapped].pg;
- struct buffer_head *bh;
- kdev_t dev;
-
- if (swab)
- lustre_swab_niobuf_remote (nioptr);
-
- /* got san device associated */
- LASSERT(exp->exp_obd != NULL);
- dev = exp->exp_obd->u.cli.cl_sandev;
-
- if (!page->buffers) {
- create_empty_buffers(page, dev, CFS_PAGE_SIZE);
- } else {
- /* checking */
- LASSERT(!test_bit(BH_New, &page->buffers->b_state));
- LASSERT(test_bit(BH_Mapped, &page->buffers->b_state));
- LASSERT(page->buffers->b_blocknr ==
- (unsigned long)nioptr->offset);
- }
- bh = page->buffers;
-
- LASSERT(bh);
-
- /* if buffer locked, wait it's io completion */
- if (test_bit(BH_Lock, &bh->b_state))
- wait_on_buffer(bh);
-
- clear_bit(BH_New, &bh->b_state);
- set_bit(BH_Mapped, &bh->b_state);
-
- /* override the block nr */
- bh->b_blocknr = (unsigned long)nioptr->offset;
-
- /* we are about to write it, so set it
- * uptodate/dirty
- * page lock should garentee no race condition here */
- set_bit(BH_Uptodate, &bh->b_state);
- set_bit(BH_Dirty, &bh->b_state);
-
- ll_rw_block(WRITE, 1, &bh);
-
- /* must do syncronous write here */
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh) || test_bit(BH_Dirty, &bh->b_state)) {
- /* I/O error */
- rc = -EIO;
- goto out_req;
- }
- }
-
-out_req:
- ptlrpc_req_finished(req);
- RETURN(rc);
-}
-
-static int sanosc_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo,
- obd_count page_count, struct brw_page *pga,
- struct obd_trans_info *oti)
-{
- struct obd_import *imp = class_exp2cliimp(exp);
- struct client_obd *cli = &imp->imp_obd->u.cli;
- ENTRY;
-
- while (page_count) {
- obd_count pages_per_brw;
- int rc;
-
- if (page_count > cli->cl_max_pages_per_rpc)
- pages_per_brw = cli->cl_max_pages_per_rpc;
- else
- pages_per_brw = page_count;
-
- if (cmd & OBD_BRW_WRITE)
- rc = sanosc_brw_write(exp, oinfo, pages_per_brw, pga);
- else
- rc = sanosc_brw_read(exp, oinfo, pages_per_brw, pga);
-
- if (rc != 0)
- RETURN(rc);
-
- page_count -= pages_per_brw;
- pga += pages_per_brw;
- }
- RETURN(0);
-}
-#endif
-
static void osc_set_data_with_check(struct lustre_handle *lockh, void *data,
int flags)
{
* lov_enqueue() */
}
+ /* We already have a lock, and it's referenced */
+ oinfo->oi_cb_up(oinfo, ELDLM_OK);
+
/* For async requests, decref the lock. */
if (einfo->ei_rqset)
ldlm_lock_decref(oinfo->oi_lockh, einfo->ei_mode);
- /* We already have a lock, and it's referenced */
- oinfo->oi_cb_up(oinfo, ELDLM_OK);
RETURN(ELDLM_OK);
}
osc_set_data_with_check(oinfo->oi_lockh,
einfo->ei_cbdata,
einfo->ei_flags);
- ldlm_lock_decref(oinfo->oi_lockh, LCK_PW);
oinfo->oi_cb_up(oinfo, ELDLM_OK);
+ ldlm_lock_decref(oinfo->oi_lockh, LCK_PW);
RETURN(ELDLM_OK);
}
}
static struct llog_operations osc_mds_ost_orig_logops;
static int osc_llog_init(struct obd_device *obd, struct obd_device *tgt,
- int count, struct llog_catid *catid)
+ int count, struct llog_catid *catid,
+ struct obd_uuid *uuid)
{
int rc;
ENTRY;
- osc_mds_ost_orig_logops = llog_lvfs_ops;
- osc_mds_ost_orig_logops.lop_setup = llog_obd_origin_setup;
- osc_mds_ost_orig_logops.lop_cleanup = llog_obd_origin_cleanup;
- osc_mds_ost_orig_logops.lop_add = llog_obd_origin_add;
- osc_mds_ost_orig_logops.lop_connect = llog_origin_connect;
+ spin_lock(&obd->obd_dev_lock);
+ if (osc_mds_ost_orig_logops.lop_setup != llog_obd_origin_setup) {
+ osc_mds_ost_orig_logops = llog_lvfs_ops;
+ osc_mds_ost_orig_logops.lop_setup = llog_obd_origin_setup;
+ osc_mds_ost_orig_logops.lop_cleanup = llog_obd_origin_cleanup;
+ osc_mds_ost_orig_logops.lop_add = llog_obd_origin_add;
+ osc_mds_ost_orig_logops.lop_connect = llog_origin_connect;
+ }
+ spin_unlock(&obd->obd_dev_lock);
rc = llog_setup(obd, LLOG_MDS_OST_ORIG_CTXT, tgt, count,
&catid->lci_logid, &osc_mds_ost_orig_logops);
.o_process_config = osc_process_config,
};
-#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-struct obd_ops sanosc_obd_ops = {
- .o_owner = THIS_MODULE,
- .o_setup = client_sanobd_setup,
- .o_precleanup = osc_precleanup,
- .o_cleanup = osc_cleanup,
- .o_add_conn = client_import_add_conn,
- .o_del_conn = client_import_del_conn,
- .o_connect = client_connect_import,
- .o_reconnect = osc_reconnect,
- .o_disconnect = client_disconnect_export,
- .o_statfs = osc_statfs,
- .o_statfs_async = osc_statfs_async,
- .o_packmd = osc_packmd,
- .o_unpackmd = osc_unpackmd,
- .o_create = osc_real_create,
- .o_destroy = osc_destroy,
- .o_getattr = osc_getattr,
- .o_getattr_async = osc_getattr_async,
- .o_setattr = osc_setattr,
- .o_setattr_async = osc_setattr_async,
- .o_brw = sanosc_brw,
- .o_punch = osc_punch,
- .o_sync = osc_sync,
- .o_enqueue = osc_enqueue,
- .o_match = osc_match,
- .o_change_cbdata = osc_change_cbdata,
- .o_cancel = osc_cancel,
- .o_cancel_unused = osc_cancel_unused,
- .o_join_lru = osc_join_lru,
- .o_iocontrol = osc_iocontrol,
- .o_import_event = osc_import_event,
- .o_llog_init = osc_llog_init,
- .o_llog_finish = osc_llog_finish,
-};
-#endif
-
extern quota_interface_t osc_quota_interface;
int __init osc_init(void)
{
struct lprocfs_static_vars lvars;
-#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- struct lprocfs_static_vars sanlvars;
-#endif
int rc;
ENTRY;
lprocfs_init_vars(osc, &lvars);
-#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- lprocfs_init_vars(osc, &sanlvars);
-#endif
+ request_module("lquota");
quota_interface = PORTAL_SYMBOL_GET(osc_quota_interface);
lquota_init(quota_interface);
init_obd_quota_ops(quota_interface, &osc_obd_ops);
RETURN(rc);
}
-#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- rc = class_register_type(&sanosc_obd_ops, NULL, sanlvars.module_vars,
- LUSTRE_SANOSC_NAME, NULL);
- if (rc) {
- class_unregister_type(LUSTRE_OSC_NAME);
- if (quota_interface)
- PORTAL_SYMBOL_PUT(osc_quota_interface);
- RETURN(rc);
- }
-#endif
-
RETURN(rc);
}
if (quota_interface)
PORTAL_SYMBOL_PUT(osc_quota_interface);
-#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- class_unregister_type(LUSTRE_SANOSC_NAME);
-#endif
class_unregister_type(LUSTRE_OSC_NAME);
}
RETURN(rc);
}
-static int ost_san_brw(struct ptlrpc_request *req, int cmd)
-{
- struct niobuf_remote *remote_nb, *res_nb, *pp_rnb = NULL;
- struct obd_ioobj *ioo;
- struct ost_body *body, *repbody;
- int rc, i, objcount, niocount, npages, swab;
- int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
- ENTRY;
-
- /* XXX not set to use latest protocol */
-
- swab = lustre_msg_swabbed(req->rq_reqmsg);
- body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body),
- lustre_swab_ost_body);
- if (body == NULL) {
- CERROR("Missing/short ost_body\n");
- GOTO(out, rc = -EFAULT);
- }
-
- ioo = lustre_swab_reqbuf(req, REQ_REC_OFF + 1, sizeof(*ioo),
- lustre_swab_obd_ioobj);
- if (ioo == NULL) {
- CERROR("Missing/short ioobj\n");
- GOTO(out, rc = -EFAULT);
- }
- objcount = lustre_msg_buflen(req->rq_reqmsg, REQ_REC_OFF + 1) /
- sizeof(*ioo);
- niocount = ioo[0].ioo_bufcnt;
- for (i = 1; i < objcount; i++) {
- if (swab)
- lustre_swab_obd_ioobj (&ioo[i]);
- niocount += ioo[i].ioo_bufcnt;
- }
-
- remote_nb = lustre_swab_reqbuf(req, REQ_REC_OFF + 2,
- niocount * sizeof(*remote_nb),
- lustre_swab_niobuf_remote);
- if (remote_nb == NULL) {
- CERROR("Missing/short niobuf\n");
- GOTO(out, rc = -EFAULT);
- }
- if (swab) { /* swab the remaining niobufs */
- for (i = 1; i < niocount; i++)
- lustre_swab_niobuf_remote (&remote_nb[i]);
- }
-
- /*
- * Per-thread array of struct niobuf_remote's was allocated by
- * ost_thread_init().
- */
- pp_rnb = ost_tls(req)->remote;
-
- /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */
- npages = get_per_page_niobufs(ioo, objcount,remote_nb,niocount,&pp_rnb);
- if (npages < 0)
- GOTO (out, rc = npages);
-
- size[REPLY_REC_OFF + 1] = npages * sizeof(*pp_rnb);
- rc = lustre_pack_reply(req, 3, size, NULL);
- if (rc)
- GOTO(out, rc);
-
- req->rq_status = obd_san_preprw(cmd, req->rq_export, &body->oa,
- objcount, ioo, npages, pp_rnb);
-
- if (req->rq_status)
- GOTO(out, rc = 0);
-
- repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
- sizeof(*repbody));
- memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
-
- res_nb = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1,
- size[REPLY_REC_OFF + 1]);
- memcpy(res_nb, remote_nb, size[REPLY_REC_OFF + 1]);
- rc = 0;
-out:
- target_committed_to_req(req);
- if (rc) {
- req->rq_status = rc;
- ptlrpc_error(req);
- } else {
- ptlrpc_reply(req);
- }
-
- return rc;
-}
-
-
static int ost_set_info(struct obd_export *exp, struct ptlrpc_request *req)
{
char *key, *val = NULL;
case OST_SETATTR:
case OST_WRITE:
case OST_READ:
- case OST_SAN_READ:
- case OST_SAN_WRITE:
case OST_PUNCH:
case OST_STATFS:
case OST_SYNC:
LASSERT(current->journal_info == NULL);
/* ost_brw_read sends its own replies */
RETURN(rc);
- case OST_SAN_READ:
- CDEBUG(D_INODE, "san read\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
- rc = ost_san_brw(req, OBD_BRW_READ);
- /* ost_san_brw sends its own replies */
- RETURN(rc);
- case OST_SAN_WRITE:
- CDEBUG(D_INODE, "san write\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
- rc = ost_san_brw(req, OBD_BRW_WRITE);
- /* ost_san_brw sends its own replies */
- RETURN(rc);
case OST_PUNCH:
CDEBUG(D_INODE, "punch\n");
OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
DEBUG_REQ(D_ERROR, req, "timeout (sent at %lu, %lus ago)",
(long)req->rq_sent, CURRENT_SECONDS - req->rq_sent);
+ if (imp != NULL)
+ LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer);
+
spin_lock(&req->rq_lock);
req->rq_timedout = 1;
req->rq_wait_ctx = 0;
req->rq_self = ev->target.nid;
req->rq_rqbd = rqbd;
req->rq_phase = RQ_PHASE_NEW;
-#if CRAY_XT3
+#ifdef CRAY_XT3
req->rq_uid = ev->uid;
#endif
class_export_put(dlmexp);
if (imp->imp_conn_current != imp_conn) {
- LCONSOLE_INFO("Changing connection for %s to %s/%s\n",
- imp->imp_obd->obd_name, imp_conn->oic_uuid.uuid,
- libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
- imp->imp_conn_current = imp_conn;
+ if (imp->imp_conn_current)
+ LCONSOLE_INFO("Changing connection for %s to %s/%s\n",
+ imp->imp_obd->obd_name,
+ imp_conn->oic_uuid.uuid,
+ libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+ imp->imp_conn_current = imp_conn;
}
CDEBUG(D_HA, "%s: import %p using connection %s/%s\n",
ptlrpc_invalidate_import(imp);
+ if (obd_dump_on_eviction) {
+ CERROR("dump the log upon eviction\n");
+ libcfs_debug_dumplog();
+ }
+
IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
ptlrpc_import_recovery_state_machine(imp);
{ OST_OPEN, "ost_open" },
{ OST_CLOSE, "ost_close" },
{ OST_STATFS, "ost_statfs" },
- { OST_SAN_READ, "ost_san_read" },
- { OST_SAN_WRITE, "ost_san_write" },
+ { 14, NULL },
+ { 15, NULL },
{ OST_SYNC, "ost_sync" },
{ OST_SET_INFO, "ost_set_info" },
{ OST_QUOTACHECK, "ost_quotacheck" },
* is missing from the table above.
* or 2) The opcode space was renumbered or rearranged,
* and the opcode_offset() function in
- * ptlrpc_internals.h needs to be modified.
+ * ptlrpc_internal.h needs to be modified.
*/
__u32 offset = opcode_offset(opcode);
LASSERT(offset < LUSTRE_MAX_OPCODES);
m->lm_buflens[n] = len;
}
-static inline int
+static inline void
lustre_msg_set_buflen_v2(struct lustre_msg_v2 *m, int n, int len)
{
if (n >= m->lm_bufcount)
void lustre_swab_qdata(struct qunit_data *d)
{
__swab32s (&d->qd_id);
+ __swab32s (&d->qd_flags);
+ __swab64s (&d->qd_count);
+}
+
+void lustre_swab_qdata_old(struct qunit_data_old *d)
+{
+ __swab32s (&d->qd_id);
__swab32s (&d->qd_type);
__swab32s (&d->qd_count);
__swab32s (&d->qd_isblk);
}
+#ifdef __KERNEL__
+struct qunit_data *lustre_quota_old_to_new(struct qunit_data_old *d)
+{
+ struct qunit_data_old tmp;
+ struct qunit_data *ret;
+ ENTRY;
+
+ if (!d)
+ return NULL;
+
+ tmp = *d;
+ ret = (struct qunit_data *)d;
+ ret->qd_id = tmp.qd_id;
+ ret->qd_flags = (tmp.qd_type ? QUOTA_IS_GRP : 0) | (tmp.qd_isblk ? QUOTA_IS_BLOCK : 0);
+ ret->qd_count = tmp.qd_count;
+ RETURN(ret);
+
+}
+EXPORT_SYMBOL(lustre_quota_old_to_new);
+
+struct qunit_data_old *lustre_quota_new_to_old(struct qunit_data *d)
+{
+ struct qunit_data tmp;
+ struct qunit_data_old *ret;
+ ENTRY;
+
+ if (!d)
+ return NULL;
+
+ LASSERT(d->qd_count <= MAX_QUOTA_COUNT32);
+ tmp = *d;
+ ret = (struct qunit_data_old *)d;
+ ret->qd_id = tmp.qd_id;
+ ret->qd_type = ((tmp.qd_flags & QUOTA_IS_GRP) ? GRPQUOTA : USRQUOTA);
+ ret->qd_count = (__u32)tmp.qd_count;
+ ret->qd_isblk = ((tmp.qd_flags & QUOTA_IS_BLOCK) ? 1 : 0);
+ RETURN(ret);
+}
+EXPORT_SYMBOL(lustre_quota_new_to_old);
+#endif /* __KERNEL__ */
+
+
+void cdebug_va(cfs_debug_limit_state_t *cdls, __u32 mask,
+ const char *file, const char *func, const int line,
+ const char *fmt, va_list args);
+void cdebug(cfs_debug_limit_state_t *cdls, __u32 mask,
+ const char *file, const char *func, const int line,
+ const char *fmt, ...);
+
+void debug_req(cfs_debug_limit_state_t *cdls,
+ __u32 level, struct ptlrpc_request *req,
+ const char *file, const char *func, const int line,
+ const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ cdebug_va(cdls, level, file, func, line, fmt, args);
+ va_end(args);
+
+ cdebug(cdls, level, file, func, line,
+ " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl "
+ REQ_FLAGS_FMT"/%x/%x rc %d/%d\n",
+ req, req->rq_xid, req->rq_transno,
+ req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : -1,
+ req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) :
+ req->rq_export ?
+ (char*)req->rq_export->exp_client_uuid.uuid : "<?>",
+ req->rq_import ?
+ (char *)req->rq_import->imp_connection->c_remote_uuid.uuid :
+ req->rq_export ?
+ (char *)req->rq_export->exp_connection->c_remote_uuid.uuid : "<?>",
+ (req->rq_import && req->rq_import->imp_client) ?
+ req->rq_import->imp_client->cli_request_portal : -1,
+ req->rq_reqlen, req->rq_replen, atomic_read(&req->rq_refcount),
+ DEBUG_REQ_FLAGS(req),
+ req->rq_reqmsg ? lustre_msg_get_flags(req->rq_reqmsg) : 0,
+ req->rq_repmsg ? lustre_msg_get_flags(req->rq_repmsg) : 0,
+ req->rq_status,
+ req->rq_repmsg ? lustre_msg_get_status(req->rq_repmsg) : 0);
+}
+EXPORT_SYMBOL(debug_req);
EXPORT_SYMBOL(lustre_swab_ldlm_request);
EXPORT_SYMBOL(lustre_swab_ldlm_reply);
EXPORT_SYMBOL(lustre_swab_qdata);
+EXPORT_SYMBOL(lustre_swab_qdata_old);
EXPORT_SYMBOL(lustre_msg_get_flags);
EXPORT_SYMBOL(lustre_msg_add_flags);
EXPORT_SYMBOL(lustre_msg_set_flags);
EXPORT_SYMBOL(lustre_swab_generic_32s);
/* recover.c */
-EXPORT_SYMBOL(ptlrpc_run_recovery_over_upcall);
-EXPORT_SYMBOL(ptlrpc_run_failed_import_upcall);
EXPORT_SYMBOL(ptlrpc_disconnect_import);
EXPORT_SYMBOL(ptlrpc_resend);
EXPORT_SYMBOL(ptlrpc_wake_delayed);
static int ptlrpc_recover_import_no_retry(struct obd_import *, char *);
-void ptlrpc_run_recovery_over_upcall(struct obd_device *obd)
-{
- char *argv[4];
- char *envp[3];
- int rc;
- ENTRY;
-
- argv[0] = obd_lustre_upcall;
- argv[1] = "RECOVERY_OVER";
- argv[2] = obd->obd_uuid.uuid;
- argv[3] = NULL;
-
- envp[0] = "HOME=/";
- envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
- envp[2] = NULL;
-
- rc = USERMODEHELPER(argv[0], argv, envp);
- if (rc < 0) {
- CERROR("Error invoking recovery upcall %s %s %s: %d; check "
- "/proc/sys/lustre/upcall\n",
- argv[0], argv[1], argv[2], rc);
-
- } else {
- CWARN("Invoked upcall %s %s %s\n",
- argv[0], argv[1], argv[2]);
- }
-}
-
-void ptlrpc_run_failed_import_upcall(struct obd_import* imp)
-{
-#ifdef __KERNEL__
- char *argv[7];
- char *envp[3];
- int rc;
- ENTRY;
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_state == LUSTRE_IMP_CLOSED) {
- spin_unlock(&imp->imp_lock);
- EXIT;
- return;
- }
- spin_unlock(&imp->imp_lock);
-
- argv[0] = obd_lustre_upcall;
- argv[1] = "FAILED_IMPORT";
- argv[2] = obd2cli_tgt(imp->imp_obd);
- argv[3] = imp->imp_obd->obd_name;
- argv[4] = imp->imp_connection->c_remote_uuid.uuid;
- argv[5] = imp->imp_obd->obd_uuid.uuid;
- argv[6] = NULL;
-
- envp[0] = "HOME=/";
- envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
- envp[2] = NULL;
-
- rc = USERMODEHELPER(argv[0], argv, envp);
- if (rc < 0) {
- CERROR("Error invoking recovery upcall %s %s %s %s %s %s: %d; "
- "check /proc/sys/lustre/lustre_upcall\n",
- argv[0], argv[1], argv[2], argv[3], argv[4], argv[5],rc);
- } else {
- CWARN("Invoked upcall %s %s %s %s %s %s\n",
- argv[0], argv[1], argv[2], argv[3], argv[4], argv[5]);
- }
-#else
- if (imp->imp_state == LUSTRE_IMP_CLOSED) {
- EXIT;
- return;
- }
- ptlrpc_recover_import(imp, NULL);
-#endif
-}
-
-/* This might block waiting for the upcall to start, so it should
- * not be called from a thread that shouldn't block. (Like ptlrpcd) */
void ptlrpc_initiate_recovery(struct obd_import *imp)
{
ENTRY;
- LASSERT (obd_lustre_upcall != NULL);
-
- if (strcmp(obd_lustre_upcall, "DEFAULT") == 0) {
- CDEBUG(D_HA, "%s: starting recovery without upcall\n",
- obd2cli_tgt(imp->imp_obd));
- ptlrpc_connect_import(imp, NULL);
- } else if (strcmp(obd_lustre_upcall, "NONE") == 0) {
- CDEBUG(D_HA, "%s: recovery disabled\n",
- obd2cli_tgt(imp->imp_obd));
- } else {
- CDEBUG(D_HA, "%s: calling upcall to start recovery\n",
- obd2cli_tgt(imp->imp_obd));
- ptlrpc_run_failed_import_upcall(imp);
- }
+ CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
+ ptlrpc_connect_import(imp, NULL);
EXIT;
}
/* special security flags accoding to opcode */
switch (opcode) {
case OST_READ:
- case OST_SAN_READ:
req->rq_bulk_read = 1;
break;
case OST_WRITE:
- case OST_SAN_WRITE:
req->rq_bulk_write = 1;
break;
case SEC_CTX_INIT:
if (rc == SECSVC_OK) {
__u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
- if (opc == OST_WRITE || opc == OST_SAN_WRITE)
+ if (opc == OST_WRITE)
req->rq_bulk_write = 1;
- else if (opc == OST_READ || opc == OST_SAN_READ)
+ else if (opc == OST_READ)
req->rq_bulk_read = 1;
}
struct list_head lq_waiters; /* All write threads waiting for this qunit */
};
+int should_translate_quota (struct obd_import *imp)
+{
+ struct obd_device *obd;
+ struct obd_export *tmp;
+ ENTRY;
+
+ LASSERT(imp);
+ if (imp->imp_connect_data.ocd_connect_flags){
+ if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64)
+ return 0;
+ else
+ return 1;
+ }
+
+ obd = imp->imp_obd;
+ spin_lock(&obd->obd_dev_lock);
+ list_for_each_entry(tmp,&obd->obd_exports,exp_obd_chain){
+ if (tmp->exp_imp_reverse == imp){
+ imp->imp_connect_data.ocd_connect_flags = tmp->exp_connect_flags;
+ spin_unlock(&obd->obd_dev_lock);
+ if (tmp->exp_connect_flags & OBD_CONNECT_QUOTA64)
+ return 0;
+ else
+ return 1;
+ }
+ }
+ spin_unlock(&obd->obd_dev_lock);
+ CDEBUG(D_QUOTA, "don't find the corresponding export!");
+
+ RETURN(0);
+}
+
void qunit_cache_cleanup(void)
{
int i;
RETURN(0);
}
-static inline int const
+static inline int
+qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
+ __attribute__((__const__));
+
+static inline int
qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
{
unsigned int id = qdata->qd_id;
- unsigned int type = qdata->qd_type;
+ unsigned int type = qdata->qd_flags & QUOTA_IS_GRP;
unsigned long tmp = ((unsigned long)qctxt >> L1_CACHE_SHIFT) ^ id;
tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
list_for_each_entry(qunit, qunit_hash + hashent, lq_hash) {
tmp = &qunit->lq_data;
if (qunit->lq_ctxt == qctxt &&
- qdata->qd_id == tmp->qd_id && qdata->qd_type == tmp->qd_type
- && qdata->qd_isblk == tmp->qd_isblk)
+ qdata->qd_id == tmp->qd_id && qdata->qd_flags == tmp->qd_flags)
return qunit;
}
return NULL;
__u64 usage, limit;
struct obd_quotactl *qctl;
int ret = 0;
+ __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
+ __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
ENTRY;
if (!sb_any_quota_enabled(sb))
RETURN(0);
/* ignore root user */
- if (qdata->qd_id == 0 && qdata->qd_type == USRQUOTA)
+ if (qdata->qd_id == 0 && qdata_type == USRQUOTA)
RETURN(0);
OBD_ALLOC_PTR(qctl);
/* get fs quota usage & limit */
qctl->qc_cmd = Q_GETQUOTA;
qctl->qc_id = qdata->qd_id;
- qctl->qc_type = qdata->qd_type;
+ qctl->qc_type = qdata_type;
ret = fsfilt_quotactl(obd, sb, qctl);
if (ret) {
if (ret == -ESRCH) /* no limit */
GOTO(out, ret);
}
- if (qdata->qd_isblk) {
+ if (is_blk) {
usage = qctl->qc_dqblk.dqb_curspace;
limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS;
qunit_sz = qctxt->lqc_bunit_sz;
GOTO(out, ret = 0);
/* we don't count the MIN_QLIMIT */
- if ((limit == MIN_QLIMIT && !qdata->qd_isblk) ||
- (toqb(limit) == MIN_QLIMIT && qdata->qd_isblk))
+ if ((limit == MIN_QLIMIT && !is_blk) ||
+ (toqb(limit) == MIN_QLIMIT && is_blk))
limit = 0;
LASSERT(qdata->qd_count == 0);
int qw_rc;
};
-#define QDATA_DEBUG(qd, fmt, arg...) \
- CDEBUG(D_QUOTA, "id(%u) type(%u) count(%u) isblk(%u):" \
- fmt, qd->qd_id, qd->qd_type, qd->qd_count, qd->qd_isblk, \
- ## arg); \
-
#define INC_QLIMIT(limit, count) (limit == MIN_QLIMIT) ? \
(limit = count) : (limit += count)
schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
struct qunit_data *qdata, int opc, int wait);
+static int split_before_schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
+ struct qunit_data *qdata, int opc, int wait)
+{
+ int rc = 0, ret;
+ struct qunit_data tmp_qdata;
+ ENTRY;
+
+ LASSERT(qdata);
+ if (qctxt->lqc_import)
+ while (should_translate_quota(qctxt->lqc_import) &&
+ qdata->qd_count > MAX_QUOTA_COUNT32) {
+
+ tmp_qdata = *qdata;
+ tmp_qdata.qd_count = MAX_QUOTA_COUNT32;
+ qdata->qd_count -= tmp_qdata.qd_count;
+ ret = schedule_dqacq(obd, qctxt, &tmp_qdata, opc, wait);
+ if (!rc)
+ rc = ret;
+ }
+
+ if (qdata->qd_count){
+ ret = schedule_dqacq(obd, qctxt, qdata, opc, wait);
+ if (!rc)
+ rc = ret;
+ }
+
+ RETURN(rc);
+}
+
static int
dqacq_completion(struct obd_device *obd,
struct lustre_quota_ctxt *qctxt,
unsigned long qunit_sz;
struct qunit_waiter *qw, *tmp;
int err = 0;
+ __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
+ __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
ENTRY;
LASSERT(qdata);
- qunit_sz = qdata->qd_isblk ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz;
- LASSERT(!(qdata->qd_count % qunit_sz));
+ qunit_sz = is_blk ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz;
+ /* now qdata->qd_count is 64bit, we can't do it */
+ //LASSERT(!(qdata->qd_count % qunit_sz));
/* update local operational quota file */
if (rc == 0) {
- __u32 count = QUSG(qdata->qd_count, qdata->qd_isblk);
+ __u32 count = QUSG(qdata->qd_count, is_blk);
struct obd_quotactl *qctl;
__u64 *hardlimit;
* set fs quota limit */
qctl->qc_cmd = Q_GETQUOTA;
qctl->qc_id = qdata->qd_id;
- qctl->qc_type = qdata->qd_type;
+ qctl->qc_type = qdata_type;
err = fsfilt_quotactl(obd, sb, qctl);
if (err) {
CERROR("error get quota fs limit! (rc:%d)\n", err);
GOTO(out_mem, err);
}
- if (qdata->qd_isblk) {
+ if (is_blk) {
qctl->qc_dqblk.dqb_valid = QIF_BLIMITS;
hardlimit = &qctl->qc_dqblk.dqb_bhardlimit;
} else {
* - local dqacq/dqrel.
* - local disk io failure.
*/
- if (err || (rc && rc != -EBUSY) ||
- is_master(obd, qctxt, qdata->qd_id, qdata->qd_type))
+ if (err || (rc && rc != -EBUSY) ||
+ is_master(obd, qctxt, qdata->qd_id, qdata_type))
RETURN(err);
/* reschedule another dqacq/dqrel if needed */
if (rc > 0) {
int opc;
opc = rc == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
- rc = schedule_dqacq(obd, qctxt, qdata, opc, 0);
+ rc = split_before_schedule_dqacq(obd, qctxt, qdata, opc, 0);
QDATA_DEBUG(qdata, "reschedudle opc(%d) rc(%d)\n", opc, rc);
}
RETURN(err);
struct lustre_qunit *qunit = aa->aa_qunit;
struct obd_device *obd = req->rq_import->imp_obd;
struct qunit_data *qdata = NULL;
+ struct qunit_data_old *qdata_old = NULL;
ENTRY;
- qdata = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*qdata),
- lustre_swab_qdata);
+ LASSERT(req);
+ LASSERT(req->rq_import);
+ if ((req->rq_import->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64) &&
+ !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) {
+ CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
+ qdata = lustre_swab_reqbuf(req, REPLY_REC_OFF, sizeof(*qdata), lustre_swab_qdata);
+ } else {
+ CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
+ qdata_old = lustre_swab_reqbuf(req, REPLY_REC_OFF, sizeof(struct qunit_data_old),
+ lustre_swab_qdata_old);
+ qdata = lustre_quota_old_to_new(qdata_old);
+ }
if (qdata == NULL) {
DEBUG_REQ(D_ERROR, req, "error unpacking qunit_data\n");
RETURN(-EPROTO);
}
LASSERT(qdata->qd_id == qunit->lq_data.qd_id &&
- qdata->qd_type == qunit->lq_data.qd_type &&
+ (qdata->qd_flags & QUOTA_IS_GRP) == (qunit->lq_data.qd_flags & QUOTA_IS_GRP) &&
(qdata->qd_count == qunit->lq_data.qd_count ||
qdata->qd_count == 0));
LASSERT(qunit);
/* master is going to dqacq/dqrel from itself */
- if (is_master(obd, qctxt, qdata->qd_id, qdata->qd_type)) {
+ if (is_master(obd, qctxt, qdata->qd_id, qdata->qd_flags & QUOTA_IS_GRP)) {
int rc2;
QDATA_DEBUG(qdata, "local %s.\n",
opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
RETURN(-ENOMEM);
}
- reqdata = lustre_msg_buf(req->rq_reqmsg, REPLY_REC_OFF,
- sizeof(*reqdata));
- *reqdata = *qdata;
+ LASSERT(!should_translate_quota(qctxt->lqc_import) ||
+ qdata->qd_count <= MAX_QUOTA_COUNT32);
+ if (should_translate_quota(qctxt->lqc_import) ||
+ OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT))
+ {
+ struct qunit_data_old *reqdata_old, *tmp;
+
+ reqdata_old = lustre_msg_buf(req->rq_reqmsg, REPLY_REC_OFF,
+ sizeof(*reqdata_old));
+ tmp = lustre_quota_new_to_old(qdata);
+ *reqdata_old = *tmp;
+ size[1] = sizeof(*reqdata_old);
+ CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
+ } else {
+ reqdata = lustre_msg_buf(req->rq_reqmsg, REPLY_REC_OFF,
+ sizeof(*reqdata));
+ *reqdata = *qdata;
+ size[1] = sizeof(*reqdata);
+ CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
+ }
ptlrpc_req_set_repsize(req, 2, size);
CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
for (i = 0; i < MAXQUOTAS; i++) {
qdata[i].qd_id = id[i];
- qdata[i].qd_type = i;
- qdata[i].qd_isblk = isblk;
+ qdata[i].qd_flags = 0;
+ qdata[i].qd_flags |= i;
+ qdata[i].qd_flags |= isblk ? QUOTA_IS_BLOCK : 0;
qdata[i].qd_count = 0;
ret = check_cur_qunit(obd, qctxt, &qdata[i]);
int opc;
/* need acquire or release */
opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
- ret = schedule_dqacq(obd, qctxt, &qdata[i], opc, wait);
+ ret = split_before_schedule_dqacq(obd, qctxt, &qdata[i],
+ opc, wait);
if (!rc)
rc = ret;
}
qw.qw_rc = 0;
qdata.qd_id = id;
- qdata.qd_type = type;
- qdata.qd_isblk = isblk;
+ qdata.qd_flags = 0;
+ qdata.qd_flags |= type;
+ qdata.qd_flags |= isblk ? QUOTA_IS_BLOCK : 0;
qdata.qd_count = 0;
spin_lock(&qunit_hash_lock);
goto free;
qdata.qd_id = dqid->di_id;
- qdata.qd_type = type;
- qdata.qd_isblk = 1;
+ qdata.qd_flags = 0;
+ qdata.qd_flags |= type;
+ qdata.qd_flags |= QUOTA_IS_BLOCK;
qdata.qd_count = 0;
ret = check_cur_qunit(obd, qctxt, &qdata);
if (ret > 0) {
int opc;
opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
- rc = schedule_dqacq(obd, qctxt, &qdata, opc, 0);
+ rc = split_before_schedule_dqacq(obd, qctxt, &qdata, opc, 0);
} else
rc = 0;
/* SLAB cache for client quota context */
cfs_mem_cache_t *qinfo_cachep = NULL;
-static inline int const hashfn(struct client_obd *cli,
- unsigned long id,
- int type)
+static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
+ __attribute__((__const__));
+
+static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
{
unsigned long tmp = ((unsigned long)cli>>6) ^ id;
tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
qinfo->qi_info[0].dqi_free_entry, \
qinfo->qi_info[1].dqi_free_entry, ## arg);
+#define QDATA_DEBUG(qd, fmt, arg...) \
+ CDEBUG(D_QUOTA, "id(%u) type(%lu) count(%llu) isblk(%lu):" \
+ fmt, qd->qd_id, qd->qd_flags & QUOTA_IS_GRP, qd->qd_count, \
+ (qd->qd_flags & QUOTA_IS_BLOCK) >> 1, \
+ ## arg);
+
+
/* quota_context.c */
void qunit_cache_cleanup(void);
int qunit_cache_init(void);
EXIT;
}
-static inline int const dquot_hashfn(struct lustre_quota_info *info,
- unsigned int id, int type)
+static inline int
+dquot_hashfn(struct lustre_quota_info *info, unsigned int id, int type)
+ __attribute__((__const__));
+
+static inline int
+dquot_hashfn(struct lustre_quota_info *info, unsigned int id, int type)
{
unsigned long tmp = ((unsigned long)info >> L1_CACHE_SHIFT) ^ id;
tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
struct lustre_dquot *dquot = NULL;
__u64 *usage = NULL;
__u32 hlimit = 0, slimit = 0;
+ __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
+ __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
time_t *time = NULL;
unsigned int grace = 0;
int rc = 0;
OBD_FAIL_RETURN(OBD_FAIL_OBD_DQACQ, -EIO);
/* slaves never acquires qunit for user root */
- LASSERT(qdata->qd_id || qdata->qd_type == GRPQUOTA);
+ LASSERT(qdata->qd_id || qdata_type);
- dquot = lustre_dqget(obd, info, qdata->qd_id, qdata->qd_type);
+ dquot = lustre_dqget(obd, info, qdata->qd_id, qdata_type);
if (IS_ERR(dquot))
RETURN(PTR_ERR(dquot));
GOTO(out, rc = -EBUSY);
}
- if (qdata->qd_isblk) {
- grace = info->qi_info[qdata->qd_type].dqi_bgrace;
+ if (is_blk) {
+ grace = info->qi_info[qdata_type].dqi_bgrace;
usage = &dquot->dq_dqb.dqb_curspace;
hlimit = dquot->dq_dqb.dqb_bhardlimit;
slimit = dquot->dq_dqb.dqb_bsoftlimit;
time = &dquot->dq_dqb.dqb_btime;
} else {
- grace = info->qi_info[qdata->qd_type].dqi_igrace;
+ grace = info->qi_info[qdata_type].dqi_igrace;
usage = (__u64 *) & dquot->dq_dqb.dqb_curinodes;
hlimit = dquot->dq_dqb.dqb_ihardlimit;
slimit = dquot->dq_dqb.dqb_isoftlimit;
switch (opc) {
case QUOTA_DQACQ:
if (hlimit &&
- QUSG(*usage + qdata->qd_count, qdata->qd_isblk) > hlimit)
+ QUSG(*usage + qdata->qd_count, is_blk) > hlimit)
GOTO(out, rc = -EDQUOT);
if (slimit &&
- QUSG(*usage + qdata->qd_count, qdata->qd_isblk) > slimit) {
+ QUSG(*usage + qdata->qd_count, is_blk) > slimit) {
if (*time && cfs_time_current_sec() >= *time)
GOTO(out, rc = -EDQUOT);
else if (!*time)
*usage -= qdata->qd_count;
/* (usage <= soft limit) but not (usage < soft limit) */
- if (!slimit || QUSG(*usage, qdata->qd_isblk) <= slimit)
+ if (!slimit || QUSG(*usage, is_blk) <= slimit)
*time = 0;
break;
default:
# These are scripts that are generated from .in files
genscripts = lustre_config.sh lc_modprobe.sh lc_net.sh lc_hb.sh lc_cluman.sh lustre_createcsv.sh lc_md.sh lc_lvm.sh
-sbin_SCRIPTS = $(genscripts) lc_servip.sh lustre_up14.sh
+sbin_SCRIPTS = $(genscripts) lc_servip.sh lustre_up14.sh lustre_rmmod.sh
EXTRA_DIST = license-status maketags.sh version_tag.pl.in lc_common.sh \
$(addsuffix .in,$(genscripts)) lc_mon.sh lc_servip.sh \
- lustre_up14.sh
+ lustre_up14.sh lustre_rmmod.sh
scriptlibdir = $(libdir)/@PACKAGE@
scriptlib_DATA = lc_common.sh
my $lov = $mds->{"lov"};
my $mkfs_options="";
if (defined($lov->{"stripe_sz"})) {
- $mkfs_options .= "lov.stripe.size=" . $lov->{"stripe_sz"} . " ";
+ $mkfs_options .= "lov.stripesize=" . $lov->{"stripe_sz"} . " ";
}
if (defined($lov->{"stripe_cnt"})) {
- $mkfs_options .= "lov.stripe.count=" . $lov->{"stripe_cnt"} . " ";
+ $mkfs_options .= "lov.stripecount=" . $lov->{"stripe_cnt"} . " ";
}
if (defined($lov->{"stripe_pattern"})) {
- $mkfs_options .= "lov.stripe.pattern=" . $lov->{"stripe_pattern"} . " ";
+ $mkfs_options .= "lov.stripetype=" . $lov->{"stripe_pattern"} . " ";
}
chop($mkfs_options);
if ($mkfs_options ne "") {
--- /dev/null
+#!/bin/sh
+#
+# remove all lustre modules. Won't succeed if they're in use, or if you
+# manually did a 'lctl network up'.
+###############################################################################
+
+SRCDIR=`dirname $0`
+PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
+
+lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
+# do it again, in case we tried to unload the lnd's too early
+lsmod | grep lnet > /dev/null && lctl modules | awk '{ print $2 }' | xargs rmmod
SRCDIR=`dirname $0`
PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
-PTLDEBUG=${PTLDEBUG:-1}
+PTLDEBUG=${PTLDEBUG:--1}
LUSTRE=${LUSTRE:-`dirname $0`/..}
RLUSTRE=${RLUSTRE:-$LUSTRE}
MOUNTLUSTRE=${MOUNTLUSTRE:-/sbin/mount.lustre}
mount_client() {
local MOUNTPATH=$1
- echo "mount lustre on ${MOUNTPATH}....."
+ echo "mount $FSNAME on ${MOUNTPATH}....."
zconf_mount `hostname` $MOUNTPATH || return 96
}
}
run_test 23 "interrupt client during recovery mount delay"
+test_24a() {
+ local fs2mds_HOST=$mds_HOST
+ add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME}2 --nomgs --mgsnode=$MGSNID --reformat ${MDSDEV}_2 || exit 10
+
+ local fs2ost_HOST=$ost_HOST
+ local fs2ostdev=$(ostdevname 1)_2
+ add fs2ost $OST_MKFS_OPTS --fsname=${FSNAME}2 --reformat $fs2ostdev || exit 10
+
+ setup
+ start fs2mds ${MDSDEV}_2 $MDS_MOUNT_OPTS
+ start fs2ost $fs2ostdev $OST_MOUNT_OPTS
+ mkdir -p $MOUNT2
+ mount -t lustre $MGSNID:/${FSNAME}2 $MOUNT2 || return 1
+ check_mount || return 2
+ sleep 5
+ cp /etc/passwd $MOUNT2/b || return 3
+ rm $MOUNT2/b || return 4
+ grep $MOUNT2' ' /proc/mounts > /dev/null || return 5
+ df
+ stop_mds
+ umount $MOUNT2
+ stop fs2mds -f
+ stop fs2ost -f
+ cleanup || return 6
+}
+run_test 24a "Multiple MDTs on a single node"
+
+test_24b() {
+ local fs2mds_HOST=$mds_HOST
+ add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME}2 --mgs --reformat ${MDSDEV}_2 || exit 10
+ setup
+ start fs2mds ${MDSDEV}_2 $MDS_MOUNT_OPTS && return 2
+ cleanup || return 6
+}
+run_test 24b "Multiple MGSs on a single node (should return err)"
+
+test_25() {
+ setup
+ check_mount || return 2
+ local MODULES=$($LCTL modules | awk '{ print $2 }')
+ rmmod $MODULES 2>/dev/null || true
+ cleanup || return 6
+}
+run_test 25 "Verify modules are referenced"
+
+
+
umount_client $MOUNT
cleanup_nocli
cleanup_krb5_env
rc = write(fd, wbuf, len);
if (rc != len) {
- printf("Write error %s (rc = %d)\n",strerror(errno),rc);
+ printf("Write error %s (rc = %d, len = %ld)\n",
+ strerror(errno), rc, len);
return 1;
}
}
SETUP=${SETUP:-"setup"}
CLEANUP=${CLEANUP:-"cleanup"}
-UPCALL=${UPCALL:-DEFAULT}
build_test_filter
MAX_ERR=4 # max expected error from e2fsck
fi
+get_mnt_devs() {
+ DEVS=`cat /proc/fs/lustre/$1/*/mntdev`
+ for DEV in $DEVS; do
+ case $DEV in
+ *loop*) losetup $DEV | sed -e "s/.*(//" -e "s/).*//" ;;
+ *) echo $DEV ;;
+ esac
+ done
+}
+
if [ "$LFSCK_SETUP" != "no" ]; then
#Create test directory
rm -rf $DIR
done
MDS_REMOVE=`echo $MDS_REMOVE | sed "s#$MOUNT/##g"`
- OSTDEVS=`cat /proc/fs/lustre/obdfilter/*/mntdev`
+ MDTDEVS=`get_mnt_devs mds`
+ OSTDEVS=`get_mnt_devs obdfilter`
+ OSTCOUNT=`echo $OSTDEVS | wc -w`
sh llmountcleanup.sh || exit 40
# Remove objects associated with files
do_umount
else
- OSTDEVS=`cat /proc/fs/lustre/obdfilter/*/mntdev`
- OSTCOUNT=`$LFIND $MOUNT | grep -c "^[0-9]*: "`
+ MDTDEVS=`get_mnt_devs mds`
+ OSTDEVS=`get_mnt_devs obdfilter`
+ OSTCOUNT=`echo $OSTDEVS | wc -w`
fi # LFSCK_SETUP
# Run e2fsck to get mds and ost info
}
fname = strrchr(argv[2], '/');
- fname++;
+ fname = (fname == NULL ? argv[2] : fname + 1);
+
strncpy((char *)lum_file1, fname, lum_size);
rc = ioctl(dirfd(dir), IOC_MDC_GETFILESTRIPE, lum_file1);
if (rc) {
}
fname = strrchr(argv[3], '/');
- fname++;
+ fname = (fname == NULL ? argv[3] : fname + 1);
strncpy((char *)lum_file2, fname, lum_size);
rc = ioctl(dirfd(dir), IOC_MDC_GETFILESTRIPE, lum_file2);
if (rc) {
lustre_opt="--lustre=$LUSTRE"
fi
-sh -x $mkconfig $config || exit 1
+[ -f $config ] || sh -x $mkconfig $config || exit 1
$LCONF $lustre_opt --reformat $@ $OPTS $config || exit 4
+++ /dev/null
-#!/bin/sh
-LUSTRE=`dirname $0`/..
-exec >> /tmp/recovery-`hostname`.log
-exec 2>&1
-
-$LUSTRE/utils/lconf --recover --verbose --tgt_uuid $2 --client_uuid $3 --conn_uuid $4 $LUSTRE/tests/local.xml
drop_request "statone /mnt/lustre/2" & wait_for_timeout
try_to_cleanup
-do_client "cp /etc/resolv.conf /mnt/lustre/resolv.conf"
-drop_request "cat /mnt/lustre/resolv.conf > /dev/null" & wait_for_timeout
+do_client "cp /etc/inittab /mnt/lustre/inittab"
+drop_request "cat /mnt/lustre/inittab > /dev/null" & wait_for_timeout
try_to_cleanup
-drop_request "mv /mnt/lustre/resolv.conf /mnt/lustre/renamed" & wait_for_timeout
+drop_request "mv /mnt/lustre/inittab /mnt/lustre/renamed" & wait_for_timeout
try_to_cleanup
drop_request "mlink /mnt/lustre/renamed-again /mnt/lustre/link1" & wait_for_timeout
+++ /dev/null
-#!/bin/sh
-LUSTRE=`dirname $0`/..
-PATH=$LUSTRE/utils:$PATH
-lctl --device %$3 recover || logger -p kern.info recovery failed: $@
set -e
-# bug 5494 7288
-ALWAYS_EXCEPT="24 27 $RECOVERY_SMALL_EXCEPT"
+# bug 5494 7288 5493
+ALWAYS_EXCEPT="24 27 52 $RECOVERY_SMALL_EXCEPT"
-PTLDEBUG=${PTLDEBUG:-1}
+PTLDEBUG=${PTLDEBUG:--1}
LUSTRE=${LUSTRE:-`dirname $0`/..}
. $LUSTRE/tests/test-framework.sh
init_test_env $@
run_test 3 "stat: drop req, drop rep"
test_4() {
- do_facet client "cp /etc/passwd $MOUNT/passwd" || return 1
- drop_request "cat $MOUNT/passwd > /dev/null" || return 2
- drop_reply "cat $MOUNT/passwd > /dev/null" || return 3
+ do_facet client "cp /etc/inittab $MOUNT/inittab" || return 1
+ drop_request "cat $MOUNT/inittab > /dev/null" || return 2
+ drop_reply "cat $MOUNT/inittab > /dev/null" || return 3
}
run_test 4 "open: drop req, drop rep"
test_5() {
- drop_request "mv $MOUNT/passwd $MOUNT/renamed" || return 1
+ drop_request "mv $MOUNT/inittab $MOUNT/renamed" || return 1
drop_reint_reply "mv $MOUNT/renamed $MOUNT/renamed-again" || return 2
do_facet client "checkstat -v $MOUNT/renamed-again" || return 3
}
set -e
-# bug number: 6088 10124
-ALWAYS_EXCEPT="8 15c $REPLAY_DUAL_EXCEPT"
+# bug number: 6088 10124 10800
+ALWAYS_EXCEPT="8 15c 17 $REPLAY_DUAL_EXCEPT"
-PTLDEBUG=${PTLDEBUG:-1}
+PTLDEBUG=${PTLDEBUG:--1}
LUSTRE=${LUSTRE:-`dirname $0`/..}
. $LUSTRE/tests/test-framework.sh
set -e
-PTLDEBUG=${PTLDEBUG:-1}
+PTLDEBUG=${PTLDEBUG:--1}
LUSTRE=${LUSTRE:-`dirname $0`/..}
. $LUSTRE/tests/test-framework.sh
init_test_env $@
ostfailover_HOST=${ostfailover_HOST:-$ost_HOST}
#failover= must be defined in OST_MKFS_OPTIONS if ostfailover_HOST != ost_HOST
+# Tests that fail on uml
+CPU=`awk '/model/ {print $4}' /proc/cpuinfo`
+[ "$CPU" = "UML" ] && EXCEPT="$EXCEPT 6"
+
# Skip these tests
# BUG NUMBER:
ALWAYS_EXCEPT="$REPLAY_OST_SINGLE_EXCEPT"
+++ /dev/null
-#!/bin/sh
-
-
-TESTDIR=`dirname $0`
-LUSTRE=$TESTDIR/..
-
-exec >> $TESTDIR/recovery-`hostname`.log
-exec 2>&1
-
-set -xv
-
-failed_import() {
-# $LUSTRE/utils/lctl --device %$3 recover ||
-# logger -p kern.info recovery failed: $@
-
- if [ -f $LUSTRE/tests/ostactive ] ; then
- source $LUSTRE/tests/ostactive
- else
- ostactive=ost
- fi
-
- $LUSTRE/utils/lconf --verbose --recover --node client_facet \
- --select ost1=${ostactive}_facet\
- --tgt_uuid $2 --client_uuid $3 --conn_uuid $4 $TESTDIR/replay-ost-single.xml
-
-}
-
-recovery_over() {
- logger -p kern.info upcall: $@
-}
-
-
-case "$1" in
-FAILED_IMPORT) failed_import $@
- ;;
-RECOVERY_OVER) recovery_over $@
- ;;
-esac
+++ /dev/null
-#!/bin/sh
-
-
-TESTDIR=`dirname $0`
-LUSTRE=$TESTDIR/..
-
-mkdir -p $TESTDIR/logs
-
-exec >> $TESTDIR/logs/recovery-`hostname`.log
-exec 2>&1
-
-echo ==========================================
-echo "start upcall: `date`"
-echo "command line: $0 $*"
-
-set -xv
-
-failed_import() {
- if [ -f $TESTDIR/XMLCONFIG ] ; then
- source $TESTDIR/XMLCONFIG
- if [ ! -f $TESTDIR/XMLCONFIG ]; then
- echo "config file not found: $XMLCONFIG"
- exit 1
- fi
- else
- echo "$TESTDIR/XMLCONFIG: not found"
- exit 1
- fi
-
- if [ -f $TESTDIR/mdsactive ] ; then
- source $TESTDIR/mdsactive
- MDSSELECT="--select mds_svc=${mdsactive}_facet"
- fi
-
- if [ -f $TESTDIR/ostactive ] ; then
- source $TESTDIR/ostactive
- OSTSELECT="--select ost_svc=${ostactive}_facet"
- fi
-
- $LUSTRE/utils/lconf --verbose --recover --node client_facet \
- $MDSSELECT $OSTSELECT \
- --tgt_uuid $2 --client_uuid $3 --conn_uuid $4 $XMLCONFIG
-
-}
-
-recovery_over() {
- logger -p kern.info upcall: $@
-}
-
-
-case "$1" in
-FAILED_IMPORT) failed_import $@
- ;;
-RECOVERY_OVER) recovery_over $@
- ;;
-esac
# Skip these tests
-# bug number: 2766
-ALWAYS_EXCEPT="0b $REPLAY_SINGLE_EXCEPT"
+# bug number: 2766 4176
+ALWAYS_EXCEPT="0b 39 $REPLAY_SINGLE_EXCEPT"
build_test_filter
}
run_test 20 "|X| open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)"
+test_20b() {
+ BEFORESPACE=`df -P $DIR | tail -1 | awk '{ print $4 }'`
+ dd if=/dev/zero of=$DIR/$tfile bs=4k count=10000 &
+ pid=$!
+ usleep 60 # give dd a chance to start
+ lfs getstripe $DIR/$tfile || return 1
+ rm -f $DIR/$tfile || return 2 # make it an orphan
+ mds_evict_client
+ df -P $DIR || df -P $DIR || true # reconnect
+
+ fail mds # start orphan recovery
+ df -P $DIR || df -P $DIR || true # reconnect
+ sleep 2
+ AFTERSPACE=`df -P $DIR | tail -1 | awk '{ print $4 }'`
+ [ $AFTERSPACE -lt $((BEFORESPACE - 20)) ] && \
+ error "after $AFTERSPACE < before $BEFORESPACE" && return 5
+ return 0
+}
+run_test 20b "write, unlink, eviction, replay, (test mds_cleanup_orphans)"
+
test_21() {
replay_barrier mds
multiop $DIR/$tfile O_tSc &
mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices`
[ "$mdcdev" ] || exit 2
for i in `seq 1 10`; do
- echo iteration $i
- #define OBD_FAIL_TGT_CONN_RACE 0x701
- do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
- $LCTL --device $mdcdev recover
- df $MOUNT
+ #define OBD_FAIL_TGT_CONN_RACE 0x701
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
+ $LCTL --device $mdcdev recover
+ df $MOUNT
done
do_facet mds "sysctl -w lustre.fail_loc=0"
return 0
mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices`
[ "$mdcdev" ] || exit 2
for i in `seq 1 10`; do
- echo iteration $i
- #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
- do_facet mds "sysctl -w lustre.fail_loc=0x80000704"
- $LCTL --device $mdcdev recover
- df $MOUNT
+ #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000704"
+ $LCTL --device $mdcdev recover
+ df $MOUNT
done
do_facet mds "sysctl -w lustre.fail_loc=0"
return 0
done
# turn on/off quota tests must be included
eval ONLY_0=true
- eval ONLY_9=true
+ eval ONLY_99=true
}
_basetest() {
echo $(($1 * $BLK_SZ)) > $i
done
# set btune size on mds
- for i in `ls /proc/fs/lustre/mds/mds*/quota_btune_sz`; do
+ for i in `ls /proc/fs/lustre/mds/lustre-MDT*/quota_btune_sz`; do
echo $(($1 * $BLK_SZ)) > $i
done
}
for i in `ls /proc/fs/lustre/obdfilter/*/quota_bunit_sz`; do
echo $(($1 * $BLK_SZ)) > $i
done
- for i in `ls /proc/fs/lustre/mds/mds*/quota_bunit_sz`; do
+ for i in `ls /proc/fs/lustre/mds/lustre-MDT*/quota_bunit_sz`; do
echo $(($1 * $BLK_SZ)) > $i
done
}
echo $1 > $i
done
# set iunit and itune size on mds
- for i in `ls /proc/fs/lustre/mds/mds*/quota_itune_sz`; do
+ for i in `ls /proc/fs/lustre/mds/lustre-MDT*/quota_itune_sz`; do
echo $1 > $i
done
for i in `ls /proc/fs/lustre/obdfilter/*/quota_iunit_sz`; do
echo $1 > $i
done;
- for i in `ls /proc/fs/lustre/mds/mds*/quota_iunit_sz`; do
+ for i in `ls /proc/fs/lustre/mds/lustre-MDT*/quota_iunit_sz`; do
echo $1 > $i
done
}
# set block tunables
set_blk_tunesz $BTUNE_SZ
set_blk_unitsz $BUNIT_SZ
- # set file tunaables
+ # set file tunables
set_file_tunesz $ITUNE_SZ
set_file_unitsz $IUNIT_SZ
fi
echo " Write before timer goes off"
$RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$BUNIT_SZ >/dev/null 2>&1 || error "write failure, but expect success"
- sync; sleep 1; sync;
echo " Done"
echo " Sleep $GRACE seconds ..."
echo " Write after timer goes off"
# maybe cache write, ignore.
+ sync; sleep 1; sync;
$RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$(($BUNIT_SZ * 2)) >/dev/null 2>&1 || echo " " > /dev/null
sync; sleep 1; sync;
$RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=1 seek=$(($BUNIT_SZ * 3)) >/dev/null 2>&1 && error "write success, but expect EDQUOT"
echo " Exceed quota limit ..."
$RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ count=$(($LIMIT - $BUNIT_SZ * $OSTCOUNT)) >/dev/null 2>&1 || error "write fileb failure, but expect success"
- sync; sleep 1; sync;
+ #sync; sleep 1; sync;
$RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ seek=$LIMIT count=$BUNIT_SZ >/dev/null 2>&1 && error "write fileb success, but expect EDQUOT"
- sync; sleep 1; sync;
+ #sync; sleep 1; sync;
echo " Write to OST0 return EDQUOT"
# this write maybe cache write, ignore it's failure
$RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ count=$(($BUNIT_SZ * 2)) >/dev/null 2>&1 || echo " " > /dev/null
echo 0 > /proc/sys/lustre/fail_loc
echo " Trigger recovery..."
- OSC0_UUID="`$LCTL dl | awk '/.* *-osc-* / { print $1 }'`"
+ OSC0_UUID="`$LCTL dl | awk '$3 ~ /osc/ { print $1 }'`"
for i in $OSC0_UUID; do
$LCTL --device $i activate > /dev/null 2>&1 || error "activate osc failed!"
done
[ $TOTAL_LIMIT -eq $LIMIT ] || error "total limits not recovery!"
echo " total limits = $TOTAL_LIMIT"
- OST0_UUID=`$LCTL dl | awk '/.*OST_[^ ]+_UUID.* / { print $5 }'`
- [ -z "$OST0_UUID" ] && OST0_UUID=`$LCTL dl | awk '/.*ost1_[^ ]*UUID.* / { print $5 }'`
+ OST0_UUID=`$LCTL dl | awk '$3 ~ /obdfilter/ { print $5 }'| head -n1`
+ [ -z "$OST0_UUID" ] && OST0_UUID=`$LCTL dl | awk '$3 ~ /obdfilter/ { print $5 }'|head -n1`
OST0_LIMIT="`$LFS quota -o $OST0_UUID -u $TSTUSR $MOUNT | awk '/^.*[[:digit:]+][[:space:]+]/ { print $3 }'`"
[ $OST0_LIMIT -eq $BUNIT_SZ ] || error "high limits not released!"
echo " limits on $OST0_UUID = $OST0_LIMIT"
}
run_test 8 "Run dbench with quota enabled ==========="
+# run for fixing bug10707, it needs a big room. test for 64bit
+test_9() {
+ lustrefs_size=`df | grep $MOUNT | awk '{print $(NF - 2)}'`
+ size_file=$((1024 * 1024 * 9 / 2 * $OSTCOUNT))
+ echo "lustrefs_size:$lustrefs_size size_file:$size_file"
+ if [ $lustrefs_size -lt $size_file ]; then
+ echo "WARN: too few capacity, skip this test."
+ return 0;
+ fi
+
+ # set the D_QUOTA flag
+ debug_flag=`cat /proc/sys/lnet/debug`
+ D_QUOTA_FLAG=67108864
+ set_flag=0
+ if [ $((debug_flag & D_QUOTA_FLAG)) -ne $D_QUOTA_FLAG ]; then
+ echo $((debug_flag | D_QUOTA_FLAG)) > /proc/sys/lnet/debug
+ set_flag=1
+ fi
+
+ TESTFILE="$TSTDIR/quota_tst90"
+
+ echo " Set block limit $LIMIT bytes to $TSTUSR.$TSTUSR"
+ BLK_LIMIT=$((100 * 1024 * 1024)) # 100G
+ FILE_LIMIT=1000000
+
+ echo " Set enough high limit for user: $TSTUSR"
+ $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $MOUNT
+ echo " Set enough high limit for group: $TSTUSR"
+ $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $MOUNT
+
+ echo " Set stripe"
+ [ $OSTCOUNT -ge 2 ] && $LFS setstripe $TESTFILE 65536 0 $OSTCOUNT
+ touch $TESTFILE
+ chown $TSTUSR.$TSTUSR $TESTFILE
+
+ echo " Write the big file of $(($OSTCOUNT * 9 / 2 ))G ..."
+ $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$size_file >/dev/null 2>&1 || error "(usr) write $((9 / 2 * $OSTCOUNT))G file failure, but expect success"
+
+ echo " delete the big file of $(($OSTCOUNT * 9 / 2))G..."
+ $RUNAS rm -f $TESTFILE >/dev/null 2>&1
+
+ echo " write the big file of 2G..."
+ $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$((1024 * 1024 * 2)) >/dev/null 2>&1 || error "(usr) write $((9 / 2 * $OSTCOUNT))G file failure, but expect seccess"
+
+ echo " delete the big file of 2G..."
+ $RUNAS rm -f $TESTFILE >/dev/null 2>&1
+
+ RC=$?
+
+ # clear the flage
+ if [ $set_flag -eq 1 ]; then
+ echo $debug_flag > /proc/sys/lnet/debug
+ fi
+
+ return $RC
+}
+run_test 9 "run for fixing bug10707(64bit) ==========="
+
+# run for fixing bug10707, it need a big room. test for 32bit
+test_10() {
+ lustrefs_size=`df | grep $MOUNT | awk '{print $(NF - 2)}'`
+ size_file=$((1024 * 1024 * 9 / 2 * $OSTCOUNT))
+ echo "lustrefs_size:$lustrefs_size size_file:$size_file"
+ if [ $lustrefs_size -lt $size_file ]; then
+ echo "WARN: too few capacity, skip this test."
+ return 0;
+ fi
+
+ if [ ! -d /proc/fs/lustre/ost/ -o ! -d /proc/fs/lustre/mds ]; then
+ echo "WARN: mds or ost isn't on the local machine, skip this test."
+ return 0;
+ fi
+
+ sync; sleep 10; sync;
+
+ # set the D_QUOTA flag
+ debug_flag=`cat /proc/sys/lnet/debug`
+ D_QUOTA_FLAG=67108864
+ set_flag=0
+ if [ $((debug_flag & D_QUOTA_FLAG)) -ne $D_QUOTA_FLAG ]; then
+ echo $((debug_flag | D_QUOTA_FLAG)) > /proc/sys/lnet/debug
+ set_flag=1
+ fi
+
+ # make qd_count 32 bit
+ sysctl -w lustre.fail_loc=2560
+
+ TESTFILE="$TSTDIR/quota_tst100"
+
+ echo " Set block limit $LIMIT bytes to $TSTUSR.$TSTUSR"
+ BLK_LIMIT=$((100 * 1024 * 1024)) # 100G
+ FILE_LIMIT=1000000
+
+ echo " Set enough high limit for user: $TSTUSR"
+ $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $MOUNT
+ echo " Set enough high limit for group: $TSTUSR"
+ $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $MOUNT
+
+ echo " Set stripe"
+ [ $OSTCOUNT -ge 2 ] && $LFS setstripe $TESTFILE 65536 0 $OSTCOUNT
+ touch $TESTFILE
+ chown $TSTUSR.$TSTUSR $TESTFILE
+
+ echo " Write the big file of $(($OSTCOUNT * 9 / 2 ))G ..."
+ $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$size_file >/dev/null 2>&1 || error "(usr) write $((9 / 2 * $OSTCOUNT))G file failure, but expect success"
+
+ echo " delete the big file of $(($OSTCOUNT * 9 / 2))G..."
+ $RUNAS rm -f $TESTFILE >/dev/null 2>&1
+
+ echo " write the big file of 2G..."
+ $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$((1024 * 1024 * 2)) >/dev/null 2>&1 || error "(usr) write $((9 / 2 * $OSTCOUNT))G file failure, but expect success"
+
+ echo " delete the big file of 2G..."
+ $RUNAS rm -f $TESTFILE >/dev/null 2>&1
+
+ RC=$?
+
+ # clear the flage
+ if [ $set_flag -eq 1 ]; then
+ echo $debug_flag > /proc/sys/lnet/debug
+ fi
+
+ # make qd_count 64 bit
+ sysctl -w lustre.fail_loc=0
+
+ return $RC
+}
+run_test 10 "run for fixing bug10707(32bit) ==========="
+
+
# turn off quota
-test_9()
+test_99()
{
$LFS quotaoff $MOUNT
return 0
}
-run_test 9 "Quota off ==============================="
+run_test 99 "Quota off ==============================="
log "cleanup: ======================================================"
TRACE=${TRACE:-""}
check_kernel_version() {
- VERSION_FILE=$LPROC/kernel_version
+ VERSION_FILE=$LPROC/version
WANT_VER=$1
[ ! -f $VERSION_FILE ] && echo "can't find kernel version" && return 1
- GOT_VER=`cat $VERSION_FILE`
+ GOT_VER=$(awk '/kernel:/ {print $2}' $VERSION_FILE)
[ $GOT_VER -ge $WANT_VER ] && return 0
log "test needs at least kernel version $WANT_VER, running $GOT_VER"
return 1
reset_enospc
}
-run_test 27r "stripe file with some full OSTs (shouldn't LBUG) ==="
+run_test 27r "stripe file with some full OSTs (shouldn't LBUG) ="
+
+test_27s() {
+ mkdir -p $DIR/$tdir
+ $LSTRIPE $DIR/$tdir $((2048 * 1024 * 1024)) -1 2 && \
+ error "stripe width >= 2^32 succeeded" || true
+}
+run_test 27s "lsm_xfersize overflow (should error) (bug 10725)"
+
+test_27t() { # bug 10864
+ WDIR=`pwd`
+ WLFS=`which lfs`
+ cd $DIR
+ touch $tfile
+ $WLFS getstripe $tfile
+ cd $WDIR
+}
+run_test 27t "check that utils parse path correctly"
+
test_28() {
mkdir $DIR/d28
test_65j() { # bug6367
return
# if we aren't already remounting for each test, do so for this test
- if [ "$CLEANUP" = ":" ]; then
+ if [ "$CLEANUP" = ":" -a "$I_MOUNTED" = "yes" ]; then
cleanup -f || error "failed to unmount"
- setup || error "failed to remount"
+ setup
fi
- $SETSTRIPE -d $MOUNT || true
+ $SETSTRIPE -d $MOUNT
}
run_test 65j "set default striping on root directory (bug 6367)="
run_test 74 "ldlm_enqueue freed-export error path (shouldn't LBUG)"
JOIN=${JOIN:-"lfs join"}
-test_75() {
+F75=$DIR/f75
+F128k=${F75}_128k
+FHEAD=${F75}_head
+FTAIL=${F75}_tail
+export T75_PREP=no
+test75_prep() {
+ [ $T75_PREP = "yes" ] && return
+ echo "using F75=$F75, F128k=$F128k, FHEAD=$FHEAD, FTAIL=$FTAIL"
+
+ dd if=/dev/urandom of=${F75}_128k bs=128k count=1 || error "dd failed"
+ log "finished dd"
+ chmod 777 ${F128k}
+ T75_PREP=yes
+}
+
+test_75a() {
# skipped temporarily: we do not have join file currently
# please remove this when ready - huanghua
return
- F=$DIR/$tfile
- F128k=${F}_128k
- FHEAD=${F}_head
- FTAIL=${F}_tail
- echo "using F=$F, F128k=$F128k, FHEAD=$FHEAD, FTAIL=$FTAIL"
- rm -f $F*
-
- dd if=/dev/urandom of=${F}_128k bs=1024 count=128 || error "dd failed"
- chmod 777 ${F128k}
- cp -p ${F128k} ${FHEAD}
- cp -p ${F128k} ${FTAIL}
- cat ${F128k} ${F128k} > ${F}_sim_sim
-
- $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
- cmp ${FHEAD} ${F}_sim_sim || error "${FHEAD} ${F}_sim_sim differ"
- $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} still exist after join"
-
- cp -p ${F128k} ${FTAIL}
- cat ${F}_sim_sim >> ${F}_join_sim
- cat ${F128k} >> ${F}_join_sim
- $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
- cmp ${FHEAD} ${F}_join_sim || \
- error "${FHEAD} ${F}_join_sim are different"
- $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join"
-
- cp -p ${F128k} ${FTAIL}
- cat ${F128k} >> ${F}_sim_join
- cat ${F}_join_sim >> ${F}_sim_join
- $JOIN ${FTAIL} ${FHEAD} || error "join error"
- cmp ${FTAIL} ${F}_sim_join || \
- error "${FTAIL} ${F}_sim_join are different"
- $CHECKSTAT -a ${FHEAD} || error "tail ${FHEAD} exist after join"
-
- cp -p ${F128k} ${FHEAD}
- cp -p ${F128k} ${FHEAD}_tmp
- cat ${F}_sim_sim >> ${F}_join_join
- cat ${F}_sim_join >> ${F}_join_join
- $JOIN ${FHEAD} ${FHEAD}_tmp || error "join ${FHEAD} ${FHEAD}_tmp error"
- $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
- cmp ${FHEAD} ${F}_join_join || error "${FHEAD} ${F}_join_join differ"
- $CHECKSTAT -a ${FHEAD}_tmp || error "${FHEAD}_tmp exist after join"
- $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join (2)"
-
- rm -rf ${FHEAD} || error "delete join file error"
- cp -p ${F128k} ${F}_join_10_compare
- cp -p ${F128k} ${F}_join_10
- for ((i = 0; i < 10; i++)); do
- cat ${F128k} >> ${F}_join_10_compare
- cp -p ${F128k} ${FTAIL}
- $JOIN ${F}_join_10 ${FTAIL} || \
- error "join ${F}_join_10 ${FTAIL} error"
- $CHECKSTAT -a ${FTAIL} || error "tail file exist after join"
- done
- cmp ${F}_join_10 ${F}_join_10_compare || \
- error "files ${F}_join_10 ${F}_join_10_compare are different"
- $LFS getstripe ${F}_join_10
- $OPENUNLINK ${F}_join_10 ${F}_join_10 || error "files unlink open"
-
- ls -l $F*
+ test75_prep
+
+ cp -p ${F128k} ${FHEAD}
+ log "finished cp to $FHEAD"
+ cp -p ${F128k} ${FTAIL}
+ log "finished cp to $FTAIL"
+ cat ${F128k} ${F128k} > ${F75}_sim_sim
+
+ $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
+ log "finished join $FHEAD to ${F75}_sim_sim"
+ cmp ${FHEAD} ${F75}_sim_sim || error "${FHEAD} ${F75}_sim_sim differ"
+ log "finished cmp $FHEAD to ${F75}_sim_sim"
+ $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} still exist after join"
+}
+run_test 75a "TEST join file ===================================="
+
+test_75b() {
+# skipped temporarily: we do not have join file currently
+# please remove this when ready - huanghua
+ return
+ test75_prep
+
+ cp -p ${F128k} ${FTAIL}
+ cat ${F75}_sim_sim >> ${F75}_join_sim
+ cat ${F128k} >> ${F75}_join_sim
+ $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
+ cmp ${FHEAD} ${F75}_join_sim || \
+ error "${FHEAD} ${F75}_join_sim are different"
+ $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join"
+}
+run_test 75b "TEST join file 2 =================================="
+
+test_75c() {
+# skipped temporarily: we do not have join file currently
+# please remove this when ready - huanghua
+ return
+ test75_prep
+
+ cp -p ${F128k} ${FTAIL}
+ cat ${F128k} >> ${F75}_sim_join
+ cat ${F75}_join_sim >> ${F75}_sim_join
+ $JOIN ${FTAIL} ${FHEAD} || error "join error"
+ cmp ${FTAIL} ${F75}_sim_join || \
+ error "${FTAIL} ${F75}_sim_join are different"
+ $CHECKSTAT -a ${FHEAD} || error "tail ${FHEAD} exist after join"
+}
+run_test 75c "TEST join file 3 =================================="
+
+test_75d() {
+# skipped temporarily: we do not have join file currently
+# please remove this when ready - huanghua
+ return
+ test75_prep
+
+ cp -p ${F128k} ${FHEAD}
+ cp -p ${F128k} ${FHEAD}_tmp
+ cat ${F75}_sim_sim >> ${F75}_join_join
+ cat ${F75}_sim_join >> ${F75}_join_join
+ $JOIN ${FHEAD} ${FHEAD}_tmp || error "join ${FHEAD} ${FHEAD}_tmp error"
+ $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
+ cmp ${FHEAD} ${F75}_join_join ||error "${FHEAD} ${F75}_join_join differ" $CHECKSTAT -a ${FHEAD}_tmp || error "${FHEAD}_tmp exist after join"
+ $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join (2)"
+}
+run_test 75d "TEST join file 4 =================================="
+
+test_75e() {
+# skipped temporarily: we do not have join file currently
+# please remove this when ready - huanghua
+ return
+ test75_prep
+
+ rm -rf ${FHEAD} || "delete join file error"
+}
+run_test 75e "TEST join file 5 (remove joined file) ============="
+
+test_75f() {
+# skipped temporarily: we do not have join file currently
+# please remove this when ready - huanghua
+ return
+ test75_prep
+
+ cp -p ${F128k} ${F75}_join_10_compare
+ cp -p ${F128k} ${F75}_join_10
+ for ((i = 0; i < 10; i++)); do
+ cat ${F128k} >> ${F75}_join_10_compare
+ cp -p ${F128k} ${FTAIL}
+ $JOIN ${F75}_join_10 ${FTAIL} || \
+ error "join ${F75}_join_10 ${FTAIL} error"
+ $CHECKSTAT -a ${FTAIL} || error "tail file exist after join"
+ done
+ cmp ${F75}_join_10 ${F75}_join_10_compare || \
+ error "files ${F75}_join_10 ${F75}_join_10_compare differ"
+}
+run_test 75f "TEST join file 6 (join 10 files) =================="
+
+test_75g() {
+# skipped temporarily: we do not have join file currently
+# please remove this when ready - huanghua
+ return
+ [ ! -f ${F75}_join_10 ] && echo "${F75}_join_10 missing" && return
+ $LFS getstripe ${F75}_join_10
+
+ $OPENUNLINK ${F75}_join_10 ${F75}_join_10 || error "files unlink open"
+
+ ls -l $F75*
}
-run_test 75 "TEST join file ===================================="
+run_test 75g "TEST join file 7 (open unlink) ===================="
num_inodes() {
awk '/lustre_inode_cache|^inode_cache/ {print $2; exit}' /proc/slabinfo
[ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return
[ -z "`grep xattr $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return
+ [ -z "$(which setfattr 2>/dev/null)" ] && echo "skipping $TESTNAME (could not find setfattr)" && return
+
echo "set/get xattr..."
setfattr -n trusted.name1 -v value1 $testfile || error
[ "`getfattr -n trusted.name1 $testfile 2> /dev/null | \
getfattr -d -m user $testfile 2> /dev/null | \
grep "user.author1" && error || true
- echo "set lustre specific xattr (should be denied)..."
- setfattr -n "trusted.lov" -v "invalid value" $testfile || true
+ # b10667: setting lustre special xattr be silently discarded
+ echo "set lustre special xattr ..."
+ setfattr -n "trusted.lov" -v "invalid value" $testfile || error
rm -f $testfile
}
for O in $ONLY; do
eval ONLY_${O}=true
done
- for E in $EXCEPT $ALWAYS_EXCEPT $SANITY_EXCEPT; do
+ for E in $EXCEPT $ALWAYS_EXCEPT $SANITYN_EXCEPT; do
eval EXCEPT_${E}=true
done
}
set -e
trap 'echo "test-framework exiting on error"' ERR
-#set -vx
+#set -x
export REFORMAT=""
load_module fid/fid
load_module fld/fld
load_module lmv/lmv
+ load_module quota/lquota
load_module mdc/mdc
load_module osc/osc
load_module lov/lov
[ -f $LUSTRE/utils/mount.lustre ] && cp $LUSTRE/utils/mount.lustre /sbin/. || true
}
+wait_for_lnet() {
+ local UNLOADED=0
+ local WAIT=0
+ local MAX=60
+ MODULES=$($LCTL modules | awk '{ print $2 }')
+ while [ -n "$MODULES" ]; do
+ sleep 5
+ rmmod $MODULES >/dev/null 2>&1 || true
+ MODULES=$($LCTL modules | awk '{ print $2 }')
+ if [ -z "$MODULES" ]; then
+ return 0
+ else
+ WAIT=$((WAIT + 5))
+ echo "waiting, $((MAX - WAIT)) secs left"
+ fi
+ if [ $WAIT -eq $MAX ]; then
+ echo "LNET modules $MODULES will not unload"
+ lsmod
+ return 3
+ fi
+ done
+}
+
unload_modules() {
lsmod | grep lnet > /dev/null && $LCTL dl && $LCTL dk $TMP/debug
local MODULES=$($LCTL modules | awk '{ print $2 }')
rmmod $MODULES >/dev/null 2>&1 || true
# do it again, in case we tried to unload ksocklnd too early
MODULES=$($LCTL modules | awk '{ print $2 }')
- [ -n "$MODULES" ] && rmmod $MODULES >/dev/null && sleep 2 || true
+ [ -n "$MODULES" ] && rmmod $MODULES >/dev/null || true
MODULES=$($LCTL modules | awk '{ print $2 }')
if [ -n "$MODULES" ]; then
- echo "modules still loaded"
+ echo "Modules still loaded: "
echo $MODULES
- cat $LPROC/devices || true
- lsmod
- return 2
+ if [ -e $LPROC ]; then
+ echo "Lustre still loaded"
+ cat $LPROC/devices || true
+ lsmod
+ return 2
+ else
+ echo "Lustre stopped, but LNET is still loaded"
+ wait_for_lnet || return 3
+ fi
fi
HAVE_MODULES=false
*
* compile: mpicc -g -Wall -o write_disjoint write_disjoint.c
* run: mpirun -np N -machlist <hostlist file> write_disjoint
- * or: pdsh -w <N hosts> write_disjoint
+ * or: pdsh -w <N hosts> write_disjoint
* or: prun -n N [-N M] write_disjoint
*/
#include <stdlib.h>
void rprintf(int rank, int loop, const char *fmt, ...)
{
va_list ap;
-
+
printf("rank %d, loop %d: ", rank, loop);
-
+
va_start(ap, fmt);
-
+
vprintf(fmt, ap);
-
- MPI_Abort(MPI_COMM_WORLD, -1);
+
+ MPI_Abort(MPI_COMM_WORLD, -1); /* This will exit() according to man */
}
+#define CHUNK_SIZE(n) chunk_size[(n) % 2]
+
int main (int argc, char *argv[]) {
- int i, n, fd, chunk_size, file_size;
- int rank, noProcessors, done;
- int error;
- off_t offset;
- char **chunk_buf;
- char *read_buf, c;
- struct stat stat_buf;
- ssize_t ret;
- char *filename = "/mnt/lustre/write_disjoint";
- int numloops = 1000;
+ int i, n, fd;
+ unsigned long chunk_size[2];
+ int rank, noProcessors, done;
+ int error;
+ off_t offset;
+ char **chunk_buf;
+ char *read_buf, c;
+ struct stat stat_buf;
+ ssize_t ret;
+ char *filename = "/mnt/lustre/write_disjoint";
+ int numloops = 1000;
error = MPI_Init(&argc, &argv);
if (error != MPI_SUCCESS)
}
}
- MPI_Comm_size(MPI_COMM_WORLD, &noProcessors);
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-
- chunk_buf = malloc(noProcessors * sizeof(chunk_buf[0]));
- for (i=0; i < noProcessors; i++) {
+ MPI_Comm_size(MPI_COMM_WORLD, &noProcessors);
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ chunk_buf = malloc(noProcessors * sizeof(chunk_buf[0]));
+ for (i=0; i < noProcessors; i++) {
chunk_buf[i] = malloc(CHUNK_MAX_SIZE);
memset(chunk_buf[i], 'A'+ i, CHUNK_MAX_SIZE);
- }
- read_buf = malloc(noProcessors * CHUNK_MAX_SIZE);
-
- if (rank == 0) {
+ }
+ read_buf = malloc(noProcessors * CHUNK_MAX_SIZE);
+
+ if (rank == 0) {
fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666);
- if (fd < 0)
- rprintf(rank, -1, "open() returned %s\n",
+ if (fd < 0)
+ rprintf(rank, -1, "open() returned %s\n",
strerror(errno));
- }
- MPI_Barrier(MPI_COMM_WORLD);
-
- fd = open(filename, O_RDWR);
- if (fd < 0)
- rprintf(rank, -1, "open() returned %s\n", strerror(errno));
-
- for (n=0; n < numloops; n++) {
- /* reset the environment */
- if (rank == 0) {
- ret = truncate(filename, 0);
- if (ret != 0)
- rprintf(rank, n, "truncate() returned %s\n",
- strerror(errno) );
- }
- chunk_size = rand() % CHUNK_MAX_SIZE;
-
- if (n % 1000 == 0 && rank == 0)
- printf("loop %d: chunk_size %d\n", n, chunk_size);
-
- MPI_Barrier(MPI_COMM_WORLD);
-
- /* Do the race */
- offset = rank * chunk_size;
- lseek(fd, offset, SEEK_SET);
-
- done = 0;
- do {
- ret = write(fd, chunk_buf[rank]+done, chunk_size-done);
- if (ret < 0)
- rprintf(rank, n, "write() returned %s\n",
- strerror(errno));
+ }
+ MPI_Barrier(MPI_COMM_WORLD);
+
+ fd = open(filename, O_RDWR);
+ if (fd < 0)
+ rprintf(rank, -1, "open() returned %s\n", strerror(errno));
+
+ for (n = 0; n < numloops; n++) {
+ /* reset the environment */
+ if (rank == 0) {
+ ret = truncate(filename, 0);
+ if (ret != 0)
+ rprintf(rank, n, "truncate() returned %s\n",
+ strerror(errno) );
+ }
+ CHUNK_SIZE(n) = rand() % CHUNK_MAX_SIZE;
+
+ if (n % 1000 == 0 && rank == 0)
+ printf("loop %d: chunk_size %lu\n", n, CHUNK_SIZE(n));
+
+ MPI_Barrier(MPI_COMM_WORLD);
+
+ /* Do the race */
+ offset = rank * CHUNK_SIZE(n);
+ lseek(fd, offset, SEEK_SET);
+
+ done = 0;
+ do {
+ ret = write(fd, chunk_buf[rank] + done,
+ CHUNK_SIZE(n) - done);
+ if (ret < 0)
+ rprintf(rank, n, "write() returned %s\n",
+ strerror(errno));
done += ret;
- } while (done != chunk_size);
-
- MPI_Barrier(MPI_COMM_WORLD);
-
- /* Check the result */
- if (rank == 0) {
- lseek(fd, 0, SEEK_SET);
-
- /* quick check */
- stat(filename, &stat_buf);
- file_size = stat_buf.st_size;
- if (file_size != chunk_size * noProcessors)
- rprintf(rank, n, "invalid file size %d"
- " instead of %d\n", file_size,
- chunk_size * noProcessors);
+ } while (done != CHUNK_SIZE(n));
+
+ MPI_Barrier(MPI_COMM_WORLD);
+
+ /* Check the result */
+ if (rank == 0) {
+ if (lseek(fd, 0, SEEK_SET) < 0)
+ rprintf(rank, n, "error seeking to 0: %s\n",
+ strerror(errno));
+
+ /* quick check */
+ if (stat(filename, &stat_buf) < 0)
+ rprintf(rank, n, "error stating %s: %s\n",
+ filename, strerror(errno));
+
+ if (stat_buf.st_size != CHUNK_SIZE(n) * noProcessors)
+ rprintf(rank, n, "invalid file size %lu"
+ " instead of %lu\n",
+ (unsigned long)stat_buf.st_size,
+ CHUNK_SIZE(n) * noProcessors);
done = 0;
do {
- ret = read(fd, read_buf + done,
- (chunk_size * noProcessors) - done);
- if (ret < 0)
+ ret = read(fd, read_buf + done,
+ CHUNK_SIZE(n) * noProcessors - done);
+ if (ret < 0)
rprintf(rank, n, "read returned %s\n",
strerror(errno));
done += ret;
- } while (done != chunk_size * noProcessors);
+ } while (done != CHUNK_SIZE(n) * noProcessors);
for (i = 0; i < noProcessors; i++) {
- char command[4096];
+ char command[4096];
int j;
- if (!memcmp(read_buf + (i * chunk_size),
- chunk_buf[i], chunk_size))
+ if (!memcmp(read_buf + (i * CHUNK_SIZE(n)),
+ chunk_buf[i], CHUNK_SIZE(n)))
continue;
- printf("rank %d, loop %d: chunk %d corrupted "
- "with chunk_size %d, page_size %d\n",
- rank, n, i, chunk_size, getpagesize());
- printf("(ranks: page boundry, chunk boundry, "
- "page boundry)\n");
+ /* print out previous chunk sizes */
+ if (n > 0)
+ printf("loop %d: chunk_size %lu\n",
+ n - 1, CHUNK_SIZE(n - 1));
+
+ printf("loop %d: chunk %d corrupted "
+ "with chunk_size %lu, page_size %d\n",
+ n, i, CHUNK_SIZE(n), getpagesize());
+ printf("ranks:\tpage boundry\tchunk boundry\t"
+ "page boundry\n");
for (j = 1 ; j < noProcessors; j++) {
- int b = j * chunk_size;
- printf("\t%c -> %c: %d %d %d\n",
- 'A' + j - 1, 'A' + j,
- b & ~(getpagesize()-1), b,
- (b + getpagesize()) & ~(getpagesize()-1));
+ int b = j * CHUNK_SIZE(n);
+ printf("%c -> %c:\t%d\t%d\t%d\n",
+ 'A' + j - 1, 'A' + j,
+ b & ~(getpagesize()-1), b,
+ (b + getpagesize()) &
+ ~(getpagesize()-1));
}
sprintf(command, "od -Ad -a %s", filename);
system(command);
- MPI_Finalize();
- exit(1);
+ rprintf(0, n, "data check error - exiting\n");
}
}
}
.deps
tags
TAGS
-obdctl
-lctl
-obdstat
-obdio
-obdbarrier
-lload
-wirecheck
-lfs
mkfs.lustre
-mkfs_lustre
mount.lustre
-mount_lustre
tunefs.lustre
-tunefs_lustre
-llog_reader
-llmount
-l_getgroups
+lctl
+lfs
+wirecheck
wiretest
llog_reader
-.*.cmd
-.*.d
+lr_reader
+obdio
+obdbarrier
+lload
llverfs
llverdev
+l_getgroups
+.*.cmd
+.*.d
sbin_scripts = llstat.pl llobdstat.pl lrun
if UTILS
-noinst_PROGRAMS = mount_lustre mkfs_lustre tunefs_lustre \
- llog_reader lr_reader wirecheck wiretest lload obdio obdbarrier
+noinst_PROGRAMS = llog_reader lr_reader wirecheck wiretest lload obdio obdbarrier
+
# mount only finds helpers in /sbin
rootsbin_PROGRAMS = mount.lustre
sbin_PROGRAMS = mkfs.lustre tunefs.lustre lctl \
cp ../ptlrpc/wirehdr.c ../ptlrpc/wiretest.c
./wirecheck >> ../ptlrpc/wiretest.c
-# Apparently I can't use .'s in automake names
-mount.lustre$(EXEEXT): mount_lustre
- cp $< $@
-
-mkfs.lustre$(EXEEXT): mkfs_lustre
- cp $< $@
-
-tunefs.lustre$(EXEEXT): tunefs_lustre
- cp $< $@
# use internal journal
return mountfsoptions
- # run blkid
- blkid = "blkid -o device -t UUID='%s'" % (journal_UUID)
+ # run blkid, lookup highest-priority device with matching UUID
+ blkid = "blkid -o device -l -t UUID='%s'" % (journal_UUID)
(ret, devname) = run(blkid)
if ret or len(devname) == 0:
panic("cannot find external journal for ", blkdev)
fprintf(stderr, ": %s (%d)\n", strerror(tmp_errno), tmp_errno);
}
-int llapi_file_create(const char *name, long stripe_size, int stripe_offset,
+int llapi_file_create(const char *name, unsigned long stripe_size, int stripe_offset,
int stripe_count, int stripe_pattern)
{
struct lov_user_md lum = { 0 };
"multiple of %d bytes", stripe_size, page_size);
goto out;
}
- if (stripe_offset < -1 || stripe_offset > 2048) {
+ if (stripe_offset < -1 || stripe_offset > MAX_OBD_DEVICES) {
errno = rc = -EINVAL;
err_msg("error: bad stripe offset %d", stripe_offset);
goto out;
err_msg("error: bad stripe count %d", stripe_count);
goto out;
}
- if (stripe_count > 0 && (__u64)stripe_size * stripe_count > ~0UL) {
+ if (stripe_count > 0 && (__u64)stripe_size * stripe_count > 0xffffffff){
errno = rc = -EINVAL;
- err_msg("error: stripe_size %ld * stripe_count %d "
- "exceeds %lu bytes", ~0UL);
+ err_msg("error: stripe_size %lu * stripe_count %u "
+ "exceeds 4GB", stripe_size, stripe_count);
goto out;
}
int llapi_file_get_stripe(const char *path, struct lov_user_md *lum)
{
- char *dname, *fname;
+ const char *fname;
+ char *dname;
int fd, rc = 0;
fname = strrchr(path, '/');
ret = ioctl(dirfd(dir), LL_IOC_MDC_GETINFO,
(void *)param->lmd);
} else if (!decision && parent) {
- char *fname = strrchr(path, '/') + 1;
+ char *fname = strrchr(path, '/');
+ fname = (fname == NULL ? path : fname + 1);
/* retrieve needed file info */
strncpy((char *)param->lmd, fname, param->lumlen);
ret = ioctl(dirfd(d), LL_IOC_LOV_GETSTRIPE,
(void *)¶m->lmd->lmd_lmm);
} else if (parent) {
- char *fname = strrchr(path, '/') + 1;
+ char *fname = strrchr(path, '/');
+ fname = (fname == NULL ? path : fname + 1);
strncpy((char *)¶m->lmd->lmd_lmm, fname, param->lumlen);
ret = ioctl(dirfd(parent), IOC_MDC_GETFILESTRIPE,
rc = ioctl(dirfd(d), LL_IOC_MDC_GETINFO,
(void *)param->lmd);
} else if (parent) {
- char *fname = strrchr(path, '/') + 1;
+ char *fname = strrchr(path, '/');
+ fname = (fname == NULL ? path : fname + 1);
strncpy((char *)param->lmd, fname, param->lumlen);
rc = ioctl(dirfd(parent), IOC_MDC_GETFILEINFO,
#!/usr/bin/perl
+# llobdstat.pl is a utility that parses obdfilter statistics files
+# found at proc/fs/lustre/<ostname>/stats.
+# It is mainly useful to watch the statistics change over time.
my $pname = $0;
sub usage()
{
- print STDERR "Usage: $pname <stats_file> [<interval>]\n";
- print STDERR "example: $pname help (to get help message)\n";
- print STDERR "example: $pname ost1 1 (monitor /proc/fs/lustre/obdfilter/ost1/stats\n";
+ print STDERR "Usage: $pname <ost_name> [<interval>]\n";
+ print STDERR "where ost_name : ost name under $defaultpath/obdfilter\n";
+ print STDERR " interval : sample interaval in seconds\n";
+ print STDERR "example: $pname lustre-OST0000 2\n";
+ print STDERR "Use CTRL + C to stop statistics printing\n";
exit 1;
}
my %cur;
my %last;
my $mhz = 0;
-my ($read_bytes, $read, $write_bytes, $write, $getattr, $setattr, $open, $close, $create, $destroy, $statfs, $punch, $snapshot_time) =
- ("read_bytes", "read", "write_bytes", "write", "getattr", "setattr", "open", "close", "create", "destroy", "statfs", "punch", "snapshot_time");
-my @extinfo = ($setattr, $open, $close, $create, $destroy, $statfs, $punch);
-my %shortname = ($setattr => "sa", $open => "op", $close => "cl",
- $create => "cx", $destroy => "dx", $statfs => "st", $punch => "pu");
+#Removed some statstics like open, close that obdfilter don't contains.
+#To add statistics parameters one need to specify parameter names in below declarations in same sequence.
+my ($read_bytes, $write_bytes, $create, $destroy, $statfs, $punch, $snapshot_time) =
+ ("read_bytes", "write_bytes", "create", "destroy", "statfs", "punch", "snapshot_time");
+
+my @extinfo = ($create, $destroy, $statfs, $punch);
+my %shortname = ($create => "cx", $destroy => "dx", $statfs => "st", $punch => "pu");
sub get_cpumhz()
{
get_cpumhz();
print "Processor counters run at $mhz MHz\n";
+# readstats subroutine reads statistics from obdfilter stats file.
+# This subroutine gets called after every interval specified by user.
sub readstat()
{
my $prevcount;
}
}
}
-
+# process_stats subroutine processes stats information read from obdfilter stats file.
+# This subroutine gets called after every interval specified by user.
sub process_stats()
{
my $delta;
my $data;
my $last_time = $last{$snapshot_time};
if (!defined($last_time)) {
- printf "R %-g/%-g W %-g/%-g attr %-g/%-g open %-g/%-g create %-g/%-g stat %-g punch %-g\n",
- $cur{$read_bytes}, $cur{$read},
- $cur{$write_bytes}, $cur{$write},
- $cur{$getattr}, $cur{$setattr},
- $cur{$open}, $cur{$close},
+ printf "Read: %-g, Write: %-g, create/destroy: %-g/%-g, stat: %-g, punch: %-g\n",
+ $cur{$read_bytes}, $cur{$write_bytes},
$cur{$create}, $cur{$destroy},
$cur{$statfs}, $cur{$punch};
+ if ($interval) {
+ print "[NOTE: cx: create, dx: destroy, st: statfs, pu: punch ]\n\n";
+ print "Timestamp Read-delta ReadRate Write-delta WriteRate\n";
+ print "--------------------------------------------------------\n";
+ }
}
else {
my $timespan = $cur{$snapshot_time} - $last{$snapshot_time};
-
- my $rdelta = $cur{$read} - $last{$read};
- my $rvdelta = int ($rdelta / $timespan);
- my $rrate = ($cur{$read_bytes} - $last{$read_bytes}) /
- ($timespan * ( 1 << 20 ));
- my $wdelta = $cur{$write} - $last{$write};
- my $wvdelta = int ($wdelta / $timespan);
- my $wrate = ($cur{$write_bytes} - $last{$write_bytes}) /
- ($timespan * ( 1 << 20 ));
- printf "R %6lu (%5lu %6.2fMB)/s W %6lu (%5lu %6.2fMB)/s",
- $rdelta, $rvdelta, $rrate,
- $wdelta, $wvdelta, $wrate;
+ my $rdelta = $cur{$read_bytes} - $last{$read_bytes};
+ my $rrate = ($rdelta) / ($timespan * ( 1 << 20 ));
+ my $wdelta = $cur{$write_bytes} - $last{$write_bytes};
+ my $wrate = ($wdelta) / ($timespan * ( 1 << 20 ));
+ $rdelta = ($rdelta) / (1024 * 1024);
+ $wdelta = ($wdelta) / (1024 * 1024);
+ # This print repeats after every interval.
+ printf "%10lu %6.2fMB %6.2fMB/s %6.2fMB %6.2fMB/s",
+ $cur{$snapshot_time}, $rdelta, $rrate, $wdelta, $wrate;
$delta = $cur{$getattr} - $last{$getattr};
if ( $delta != 0 ) {
$| = 1;
}
}
-
+#Open the obdfilter stat file with STATS
open(STATS, $statspath) || die "Cannot open $statspath: $!\n";
do {
- readstat();
- process_stats();
- if ($interval) {
- sleep($interval);
+ readstat(); # read the statistics from stat file.
+ process_stats();
+ if ($interval) {
+ sleep($interval);
%last = %cur;
}
-} while ($interval);
+} while ($interval); # Repeat the statistics printing after every "interval" specified in command line.
close STATS;
+# llobdfilter.pl ends here.
#!/usr/bin/perl
-
+# llstat.pl is a utility that takes stats files as input with optional clear-flag.
+# The clear-flag is used to clear the stats file before printing stats information.
+# The lustre stats files generally located inside proc/fs/lustre/
+# llstat.pl first reads the required statistics information from specified stat file,
+# process the information and prints the output after every interval specified by user.
+
my $pname = $0;
my $defaultpath = "/proc/fs/lustre";
my $obdstats = "stats";
+# Subroutine for printing usages information
sub usage()
{
- print STDERR "Usage: $pname <stats_file> [<interval>]\n";
+ print STDERR "Usage: $pname [-c] <stats_file> [<interval>]\n";
+ print STDERR " <stats_file> : lustre stats file, full /proc path or substring search\n";
+ print STDERR " <interval> : Time in seconds to repeat statistics print cycle\n";
+ print STDERR " -c : zero stats first\n";
+ print STDERR "eg: $pname ost 1 -- monitors /proc/fs/lustre/ost/OSS/ost/stats\n";
+ print STDERR "Use CTRL + C to stop statistics printing\n";
exit 1;
}
my $statspath = "None";
my $interval = 0;
-
-if (($#ARGV < 0) || ($#ARGV > 1)) {
+my $argpos = 0;
+# check for number of auguments
+if (($#ARGV < 0) || ($#ARGV > 2)) {
usage();
-} else {
+} else { # Process arguments
if ( $ARGV[0] =~ /help$/ ) {
usage();
}
- if ( -f $ARGV[0] ) {
- $statspath = $ARGV[0];
- } elsif ( -f "$ARGV[0]/$obdstats" ) {
- $statspath = "$ARGV[0]/$obdstats";
+ if ($#ARGV == 1) {
+ if (($ARGV[0] eq "-c") || ($ARGV[0] eq "-C")) {
+ $argpos = 1;
+ } else {
+ $interval = $ARGV[1];
+ }
+ }
+ if ( $#ARGV == 2 ) {
+ $interval = $ARGV[2];
+ $argpos = 1;
+ }
+ if ( -f $ARGV[$argpos] ) {
+ $statspath = $ARGV[$argpos];
+ } elsif ( -f "$ARGV[$argpos]/$obdstats" ) {
+ $statspath = "$ARGV[$argpos]/$obdstats";
} else {
- my $st = `ls $defaultpath/*/$ARGV[0]/$obdstats 2> /dev/null`;
+ my $st = `ls $defaultpath/*/$ARGV[$argpos]/$obdstats 2> /dev/null`;
chop $st;
if ( -f "$st" ) {
$statspath = $st;
} else {
- $st = `ls $defaultpath/*/*/$ARGV[0]/$obdstats 2> /dev/null`;
+ $st = `ls $defaultpath/*/*/$ARGV[$argpos]/$obdstats 2> /dev/null`;
chop $st;
if ( -f "$st" ) {
$statspath = $st;
}
}
if ( $statspath =~ /^None$/ ) {
- die "Cannot locate stat file for: $ARGV[0]\n";
+ die "Cannot locate stat file for: $ARGV[$argpos]\n";
+ }
+ if ($#ARGV == 2) {
+ # Clears stats file before printing information in intervals
+ if ( ($ARGV[0] eq "-c") || ($ARGV[0] eq "-C" ) ) {
+ open ( STATS, "> $statspath") || die "Cannot clear $statspath: $!\n";
+ print STATS " ";
+ close STATS;
+ sleep($interval);
+ } else {
+ usage();
+ }
}
- if ($#ARGV == 1) {
- $interval = $ARGV[1];
- }
}
print "$pname on $statspath\n";
my $anysum = 0;
my $anysumsquare = 0;
my $mhz = 0;
+my $falg = 0;
sub get_cpumhz()
{
get_cpumhz();
print "Processor counters run at $mhz MHz\n";
+# readstats subroutine reads and processes statistics from stats file.
+# This subroutine gets called after every interval specified by user.
sub readstat()
{
seek STATS, 0, 0;
$diff = $cumulcount - $prevcount;
if ($name eq "snapshot_time") {
$tdiff = $diff;
- # printf "%-25s prev=$prevcount, cumul=$cumulcount diff=$diff, tdiff=$tdiff\n", $name;
- printf "$statspath @ $cumulcount\n";
- printf "%-25s %-10s %-10s %-10s", "Name", "Cur.Count", "Cur.Rate", "#Events";
- if ($anysum) {
- printf "%-8s %10s %10s %12s %10s", "Unit", "last", "min", "avg", "max";
- }
- if ($anysumsquare) {
- printf "%10s", "stddev";
- }
- printf "\n";
+ printf "\n%-10.0f", $cumulcount;
$| = 1;
}
elsif ($cumulcount!=0) {
- printf "%-25s %-10lu %-10lu %-10lu",
- $name, $diff, ($diff/$tdiff), $cumulcount;
+
+ printf " %s %lu %lu",
+ $name, ($diff/$tdiff), $cumulcount;
if (defined($sum)) {
my $sum_orig = $sum;
$sum_diff = $sum_diff/$mhz;
$max = $max/$mhz;
}
- printf "%-8s %10.2f %10lu %12.2f %10lu", $unit, ($sum_diff/$diff), $min,($sum/$cumulcount),$max;
+ printf " %lu %.2f %lu", $min,($sum/$cumulcount),$max;
if (defined($sumsquare)) {
my $s = $sumsquare - (($sum_orig*$sum_orig)/$cumulcount);
if ($s >= 0) {
if (($unit eq "[usecs]") && ($mhz != 1)) {
$stddev = $stddev/$mhz;
}
- printf " %10.2f", $stddev;
+ printf " %.2f ", $stddev;
}
}
}
- printf "\n";
$| = 1;
}
}
else {
if ($cumulcount!=0) {
- printf "%-25s $cumulcount\n", $name
+ printf "%-25s $cumulcount\n", $name # print info when interval is not specified.
}
if (defined($sum)) {
$anysum = 1;
%cumulhash->{$name} = $cumulcount;
%sumhash->{$name} = $sum;
}
+ if ( !$flag && $interval) {
+ printf "Timestamp [Name Rate Total";
+ if ($anysum) {
+ printf " min avg max";
+ }
+ if ($anysumsquare) {
+ printf " stddev";
+ }
+ printf " ]...";
+ printf "\n--------------------------------------------------------------------";
+ $flag = 1;
+ }
}
open(STATS, $statspath) || die "Cannot open $statspath: $!\n";
"\t\t--failnode=<nid>[,<...>] : NID(s) of a failover partner\n"
"\t\t--param <key>=<value> : set a permanent parameter\n"
"\t\t\te.g. --param sys.timeout=40\n"
- "\t\t\t --param lov.stripe.size=4194304\n"
+ "\t\t\t --param lov.stripesize=2M\n"
"\t\t--index=#N : target index (i.e. ost index within the lov)\n"
/* FIXME implement 1.6.x
"\t\t--configdev=<altdevice|file>: store configuration info\n"
"\t\t\tfor this device on an alternate device\n"
*/
+ "\t\t--comment=<user comment>: arbitrary user string (%d bytes)\n"
"\t\t--mountfsoptions=<opts> : permanent mount options\n"
#ifndef TUNEFS
"\t\t--backfstype=<fstype> : backing fs type (ext3, ldiskfs)\n"
"\t\t--noformat: just report what we would do; "
"don't write to disk\n"
"\t\t--verbose\n"
- "\t\t--quiet\n");
+ "\t\t--quiet\n",
+ sizeof(((struct lustre_disk_data *)0)->ldd_userdata));
return;
}
printf("Index: unassigned\n");
else
printf("Index: %d\n", ldd->ldd_svindex);
- printf("UUID: %s\n", (char *)ldd->ldd_uuid);
+ if (ldd->ldd_uuid[0])
+ printf("UUID: %s\n", (char *)ldd->ldd_uuid);
printf("Lustre FS: %s\n", ldd->ldd_fsname);
printf("Mount type: %s\n", MT_STR(ldd));
printf("Flags: %#x\n", ldd->ldd_flags);
ldd->ldd_flags & LDD_F_UPGRADE14 ? "upgrade1.4 ":"");
printf("Persistent mount opts: %s\n", ldd->ldd_mount_opts);
printf("Parameters:%s\n", ldd->ldd_params);
+ if (ldd->ldd_userdata[0])
+ printf("Comment: %s\n", ldd->ldd_userdata);
printf("\n");
}
sprintf(filepnm, "%s/%s", mntpt, MOUNT_DATA_FILE);
filep = fopen(filepnm, "w");
if (!filep) {
- fprintf(stderr, "%s: Unable to create %s file\n",
- progname, filepnm);
+ fprintf(stderr, "%s: Unable to create %s file: %s\n",
+ progname, filepnm, strerror(errno));
goto out_umnt;
}
fwrite(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep);
static struct option long_opt[] = {
{"backfstype", 1, 0, 'b'},
{"stripe-count-hint", 1, 0, 'c'},
+ {"comment", 1, 0, 'u'},
{"configdev", 1, 0, 'C'},
{"device-size", 1, 0, 'd'},
{"erase-params", 0, 0, 'e'},
{"writeconf", 0, 0, 'w'},
{0, 0, 0, 0}
};
- char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqrvw";
+ char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqru:vw";
char opt;
int rc, longidx;
case 'r':
mop->mo_flags |= MO_FORCEFORMAT;
break;
+ case 'u':
+ strncpy(mop->mo_ldd.ldd_userdata, optarg,
+ sizeof(mop->mo_ldd.ldd_userdata));
+ mop->mo_ldd.ldd_userdata[
+ sizeof(mop->mo_ldd.ldd_userdata) - 1] = 0;
+ break;
case 'v':
verbose++;
break;
char default_mountopts[512] = "";
int ret = 0;
- //printf("pad %d\n", offsetof(struct lustre_disk_data, ldd_padding));
- assert(offsetof(struct lustre_disk_data, ldd_padding) == 200);
-
if ((progname = strrchr(argv[0], '/')) != NULL)
progname++;
else
+++ /dev/null
-#!/bin/sh
-
-SRCDIR=`dirname $0`
-PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
-
-rmmod quotacheck_test quotactl_test quotafmt_test pingsrv pingcli
-lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
-# do it again, in case we tried to unload ksocklnd too early
-lsmod | grep lnet > /dev/null && lctl modules | awk '{ print $2 }' | xargs rmmod
CHECK_CDEFINE(OBD_CONNECT_LCL_CLIENT);
CHECK_CDEFINE(OBD_CONNECT_RMT_CLIENT);
CHECK_CDEFINE(OBD_CONNECT_BRW_SIZE);
+ CHECK_CDEFINE(OBD_CONNECT_QUOTA64);
}
static void
BLANK_LINE();
CHECK_STRUCT(qunit_data);
CHECK_MEMBER(qunit_data, qd_id);
- CHECK_MEMBER(qunit_data, qd_type);
+ CHECK_MEMBER(qunit_data, qd_flags);
CHECK_MEMBER(qunit_data, qd_count);
- CHECK_MEMBER(qunit_data, qd_isblk);
+}
+
+static void
+check_qunit_data_old(void)
+{
+ BLANK_LINE();
+ CHECK_STRUCT(qunit_data_old);
+ CHECK_MEMBER(qunit_data_old, qd_id);
+ CHECK_MEMBER(qunit_data_old, qd_type);
+ CHECK_MEMBER(qunit_data_old, qd_count);
+ CHECK_MEMBER(qunit_data_old, qd_isblk);
}
static void
CHECK_MEMBER(lustre_disk_data, ldd_fsname);
CHECK_MEMBER(lustre_disk_data, ldd_svname);
CHECK_MEMBER(lustre_disk_data, ldd_uuid);
+ CHECK_MEMBER(lustre_disk_data, ldd_userdata);
CHECK_MEMBER(lustre_disk_data, ldd_mount_opts);
CHECK_MEMBER(lustre_disk_data, ldd_params);
}
CHECK_VALUE(OST_OPEN);
CHECK_VALUE(OST_CLOSE);
CHECK_VALUE(OST_STATFS);
- CHECK_VALUE(OST_SAN_READ);
- CHECK_VALUE(OST_SAN_WRITE);
CHECK_VALUE(OST_SYNC);
CHECK_VALUE(OST_QUOTACHECK);
CHECK_VALUE(OST_QUOTACTL);
check_llog_array_rec();
check_mds_extent_desc();
check_qunit_data();
+ check_qunit_data_old();
check_mgs_target_info();
check_lustre_disk_data();
void lustre_assert_wire_constants(void)
{
}
-
-