From: yury Date: Thu, 14 Sep 2006 13:38:33 +0000 (+0000) Subject: - update from 1_5 X-Git-Tag: v1_8_0_110~486^2~938 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=b3e6d8ef74722c4045dbc11f8533843a92f07421;p=fs%2Flustre-release.git - update from 1_5 --- diff --git a/ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch b/ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch index dca4676..e54774f 100644 --- a/ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch +++ b/ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch @@ -1,7 +1,19 @@ -Index: linux-2.6.5-7.201/fs/ext3/super.c +Index: linux-2.6.5-7.201-full/include/linux/ext3_fs.h =================================================================== ---- linux-2.6.5-7.201.orig/fs/ext3/super.c 2006-06-20 19:40:44.000000000 +0400 -+++ linux-2.6.5-7.201/fs/ext3/super.c 2006-06-20 19:42:08.000000000 +0400 +--- linux-2.6.5-7.201-full.orig/include/linux/ext3_fs.h 2006-08-09 17:59:34.000000000 +0400 ++++ linux-2.6.5-7.201-full/include/linux/ext3_fs.h 2006-08-22 12:35:55.000000000 +0400 +@@ -793,6 +793,7 @@ extern void ext3_put_super (struct super + extern void ext3_write_super (struct super_block *); + extern void ext3_write_super_lockfs (struct super_block *); + extern void ext3_unlockfs (struct super_block *); ++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int); + extern int ext3_remount (struct super_block *, int *, char *); + extern int ext3_statfs (struct super_block *, struct kstatfs *); + +Index: linux-2.6.5-7.201-full/fs/ext3/super.c +=================================================================== +--- linux-2.6.5-7.201-full.orig/fs/ext3/super.c 2006-08-09 17:59:37.000000000 +0400 ++++ linux-2.6.5-7.201-full/fs/ext3/super.c 2006-08-09 17:59:37.000000000 +0400 @@ -39,7 +39,7 @@ static int ext3_load_journal(struct super_block *, struct ext3_super_block *); static int ext3_create_journal(struct super_block *, struct ext3_super_block *, @@ -20,10 +32,10 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c struct ext3_super_block * es, int sync) { -Index: linux-2.6.5-7.201/fs/ext3/namei.c +Index: linux-2.6.5-7.201-full/fs/ext3/namei.c =================================================================== ---- linux-2.6.5-7.201.orig/fs/ext3/namei.c 2006-06-20 19:40:44.000000000 +0400 -+++ linux-2.6.5-7.201/fs/ext3/namei.c 2006-06-20 19:42:08.000000000 +0400 +--- linux-2.6.5-7.201-full.orig/fs/ext3/namei.c 2006-08-09 17:59:37.000000000 +0400 ++++ linux-2.6.5-7.201-full/fs/ext3/namei.c 2006-08-09 17:59:37.000000000 +0400 @@ -1598,7 +1598,7 @@ static int ext3_delete_entry (handle_t * struct buffer_head * bh) { @@ -44,10 +56,10 @@ Index: linux-2.6.5-7.201/fs/ext3/namei.c if (pde) pde->rec_len = cpu_to_le16(le16_to_cpu(pde->rec_len) + -Index: linux-2.6.5-7.201/fs/ext3/xattr.c +Index: linux-2.6.5-7.201-full/fs/ext3/xattr.c =================================================================== ---- linux-2.6.5-7.201.orig/fs/ext3/xattr.c 2006-06-20 19:40:44.000000000 +0400 -+++ linux-2.6.5-7.201/fs/ext3/xattr.c 2006-06-20 19:42:30.000000000 +0400 +--- linux-2.6.5-7.201-full.orig/fs/ext3/xattr.c 2006-07-14 01:53:23.000000000 +0400 ++++ linux-2.6.5-7.201-full/fs/ext3/xattr.c 2006-08-09 17:59:37.000000000 +0400 @@ -107,7 +107,7 @@ ext3_xattr_register(int name_index, stru { int error = -EINVAL; @@ -57,10 +69,10 @@ Index: linux-2.6.5-7.201/fs/ext3/xattr.c write_lock(&ext3_handler_lock); if (!ext3_xattr_handlers[name_index-1]) { ext3_xattr_handlers[name_index-1] = handler; -Index: linux-2.6.5-7.201/fs/ext3/inode.c +Index: linux-2.6.5-7.201-full/fs/ext3/inode.c =================================================================== ---- linux-2.6.5-7.201.orig/fs/ext3/inode.c 2006-06-20 19:40:44.000000000 +0400 -+++ linux-2.6.5-7.201/fs/ext3/inode.c 2006-06-20 19:42:08.000000000 +0400 +--- linux-2.6.5-7.201-full.orig/fs/ext3/inode.c 2006-07-14 01:53:22.000000000 +0400 ++++ linux-2.6.5-7.201-full/fs/ext3/inode.c 2006-08-22 12:35:28.000000000 +0400 @@ -1517,9 +1517,14 @@ out_stop: if (end > inode->i_size) { ei->i_disksize = end; diff --git a/ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch b/ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch index df3d2ea..f6904f2 100644 --- a/ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch +++ b/ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch @@ -1,7 +1,19 @@ +Index: linux-2.6.9-full/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2006-08-09 17:56:39.000000000 +0400 ++++ linux-2.6.9-full/include/linux/ext3_fs.h 2006-08-22 12:36:22.000000000 +0400 +@@ -826,6 +826,7 @@ extern void ext3_put_super (struct super + extern void ext3_write_super (struct super_block *); + extern void ext3_write_super_lockfs (struct super_block *); + extern void ext3_unlockfs (struct super_block *); ++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int); + extern int ext3_remount (struct super_block *, int *, char *); + extern int ext3_statfs (struct super_block *, struct kstatfs *); + Index: linux-2.6.9-full/fs/ext3/super.c =================================================================== ---- linux-2.6.9-full.orig/fs/ext3/super.c 2006-06-02 23:37:51.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/super.c 2006-06-02 23:56:29.000000000 +0400 +--- linux-2.6.9-full.orig/fs/ext3/super.c 2006-08-09 17:56:40.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/super.c 2006-08-09 17:56:40.000000000 +0400 @@ -43,7 +43,7 @@ static int ext3_load_journal(struct supe unsigned long journal_devnum); static int ext3_create_journal(struct super_block *, struct ext3_super_block *, @@ -22,8 +34,8 @@ Index: linux-2.6.9-full/fs/ext3/super.c { Index: linux-2.6.9-full/fs/ext3/namei.c =================================================================== ---- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-06-02 23:37:49.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/namei.c 2006-06-02 23:43:31.000000000 +0400 +--- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-08-09 17:56:40.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/namei.c 2006-08-09 17:56:40.000000000 +0400 @@ -1599,7 +1599,7 @@ static int ext3_delete_entry (handle_t * struct buffer_head * bh) { @@ -47,7 +59,7 @@ Index: linux-2.6.9-full/fs/ext3/namei.c Index: linux-2.6.9-full/fs/ext3/xattr.c =================================================================== --- linux-2.6.9-full.orig/fs/ext3/xattr.c 2006-06-01 14:58:48.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/xattr.c 2006-06-03 00:02:00.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/xattr.c 2006-08-09 17:56:40.000000000 +0400 @@ -132,7 +132,7 @@ ext3_xattr_handler(int name_index) { struct xattr_handler *handler = NULL; @@ -60,7 +72,7 @@ Index: linux-2.6.9-full/fs/ext3/xattr.c Index: linux-2.6.9-full/fs/ext3/inode.c =================================================================== --- linux-2.6.9-full.orig/fs/ext3/inode.c 2006-06-02 23:37:38.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/inode.c 2006-06-03 00:27:41.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/inode.c 2006-08-22 12:34:28.000000000 +0400 @@ -1513,9 +1513,14 @@ out_stop: if (end > inode->i_size) { ei->i_disksize = end; diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch index 325d080..b807900 100644 --- a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch +++ b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch @@ -1387,8 +1387,8 @@ Index: linux-2.6.16.i686/fs/ext3/mballoc.c + * Someone more lucky has already allocated it. + * The only thing we can do is just take first + * found block(s) -+ */ + printk(KERN_ERR "EXT3-fs: and someone won our chunk\n"); ++ */ + ac.ac_b_ex.fe_group = 0; + ac.ac_b_ex.fe_start = 0; + ac.ac_b_ex.fe_len = 0; diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch index c77ebdd..646e4fe 100644 --- a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch +++ b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch @@ -1,8 +1,8 @@ -Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h +Index: linux-stage/include/linux/ext3_fs.h =================================================================== ---- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs.h 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/include/linux/ext3_fs.h 2006-04-26 23:40:28.000000000 +0400 -@@ -57,6 +57,14 @@ struct statfs; +--- linux-stage.orig/include/linux/ext3_fs.h 2006-09-06 12:37:01.000000000 +0800 ++++ linux-stage/include/linux/ext3_fs.h 2006-09-06 12:37:27.000000000 +0800 +@@ -57,6 +57,14 @@ #define ext3_debug(f, a...) do {} while (0) #endif @@ -17,7 +17,7 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h /* * Special inodes numbers */ -@@ -339,6 +347,7 @@ struct ext3_inode { +@@ -339,6 +347,7 @@ #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ @@ -25,7 +25,22 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt -@@ -700,7 +709,9 @@ extern int ext3_bg_has_super(struct supe +@@ -361,6 +370,14 @@ + #define ext3_find_first_zero_bit ext2_find_first_zero_bit + #define ext3_find_next_zero_bit ext2_find_next_zero_bit + ++#ifndef ext2_find_next_le_bit ++#ifdef __LITTLE_ENDIAN ++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off)) ++#else ++#error "mballoc needs a patch for big-endian systems - CFS bug 10634" ++#endif /* __LITTLE_ENDIAN */ ++#endif /* !ext2_find_next_le_bit */ ++ + /* + * Maximal mount counts between two filesystem checks + */ +@@ -700,7 +717,9 @@ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, @@ -36,7 +51,7 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h extern unsigned long ext3_count_free_blocks (struct super_block *); extern void ext3_check_blocks_bitmap (struct super_block *); extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, -@@ -822,6 +833,17 @@ extern void ext3_extents_initialize_bloc +@@ -824,6 +843,17 @@ extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); @@ -54,10 +69,10 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h #endif /* __KERNEL__ */ #define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) -Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h +Index: linux-stage/include/linux/ext3_fs_sb.h =================================================================== ---- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs_sb.h 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h 2006-04-26 23:40:28.000000000 +0400 +--- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-09-06 12:37:00.000000000 +0800 ++++ linux-stage/include/linux/ext3_fs_sb.h 2006-09-06 12:37:01.000000000 +0800 @@ -23,9 +23,15 @@ #define EXT_INCLUDE #include @@ -74,7 +89,7 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h /* * third extended-fs super-block data in memory -@@ -78,6 +84,43 @@ struct ext3_sb_info { +@@ -78,6 +84,43 @@ struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ #endif @@ -112,17 +127,17 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h + unsigned long s_mb_buddies_generated; + unsigned long long s_mb_generation_time; }; -+ + +#define EXT3_GROUP_INFO(sb, group) \ + EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \ + [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)] - ++ #endif /* _LINUX_EXT3_FS_SB */ -Index: linux-2.6.5-7.252-full/fs/ext3/super.c +Index: linux-stage/fs/ext3/super.c =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/super.c 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/fs/ext3/super.c 2006-04-26 23:40:28.000000000 +0400 -@@ -389,6 +389,7 @@ void ext3_put_super (struct super_block +--- linux-stage.orig/fs/ext3/super.c 2006-09-06 12:37:01.000000000 +0800 ++++ linux-stage/fs/ext3/super.c 2006-09-06 12:37:01.000000000 +0800 +@@ -389,6 +389,7 @@ struct ext3_super_block *es = sbi->s_es; int i; @@ -130,7 +145,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c ext3_ext_release(sb); ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); -@@ -545,6 +546,7 @@ enum { +@@ -546,6 +547,7 @@ Opt_err, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_extents, Opt_noextents, Opt_extdebug, @@ -138,7 +153,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c }; static match_table_t tokens = { -@@ -591,6 +592,9 @@ static match_table_t tokens = { +@@ -592,6 +594,9 @@ {Opt_extents, "extents"}, {Opt_noextents, "noextents"}, {Opt_extdebug, "extdebug"}, @@ -148,7 +163,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c {Opt_barrier, "barrier=%u"}, {Opt_err, NULL} }; -@@ -813,6 +815,19 @@ static int parse_options (char * options +@@ -817,6 +822,19 @@ case Opt_extdebug: set_opt (sbi->s_mount_opt, EXTDEBUG); break; @@ -168,7 +183,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1466,6 +1471,7 @@ static int ext3_fill_super (struct super +@@ -1470,6 +1488,7 @@ ext3_count_dirs(sb)); ext3_ext_init(sb); @@ -176,7 +191,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c return 0; -@@ -2114,7 +2120,13 @@ static struct file_system_type ext3_fs_t +@@ -2118,7 +2137,13 @@ static int __init init_ext3_fs(void) { @@ -191,7 +206,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c if (err) return err; err = init_inodecache(); -@@ -2143,6 +2155,7 @@ static void __exit exit_ext3_fs(void) +@@ -2147,6 +2172,7 @@ unregister_filesystem(&ext3_fs_type); destroy_inodecache(); exit_ext3_xattr(); @@ -199,11 +214,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c } int ext3_prep_san_write(struct inode *inode, long *blocks, -Index: linux-2.6.5-7.252-full/fs/ext3/extents.c +Index: linux-stage/fs/ext3/extents.c =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/extents.c 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/fs/ext3/extents.c 2006-04-26 23:40:28.000000000 +0400 -@@ -777,7 +777,7 @@ cleanup: +--- linux-stage.orig/fs/ext3/extents.c 2006-09-06 12:37:01.000000000 +0800 ++++ linux-stage/fs/ext3/extents.c 2006-09-06 12:37:01.000000000 +0800 +@@ -779,7 +779,7 @@ for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; @@ -212,7 +227,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c } } kfree(ablocks); -@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st +@@ -1438,7 +1438,7 @@ path->p_idx->ei_leaf); bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); @@ -221,7 +236,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c return err; } -@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t +@@ -1923,10 +1923,12 @@ int needed = ext3_remove_blocks_credits(tree, ex, from, to); handle_t *handle = ext3_journal_start(tree->inode, needed); struct buffer_head *bh; @@ -235,7 +250,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { /* tail removal */ unsigned long num, start; -@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t +@@ -1938,7 +1940,7 @@ bh = sb_find_get_block(tree->inode->i_sb, start + i); ext3_forget(handle, 0, tree->inode, bh, start + i); } @@ -244,11 +259,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { printk("strange request: removal %lu-%lu from %u:%u\n", from, to, ex->ee_block, ex->ee_len); -Index: linux-2.6.5-7.252-full/fs/ext3/inode.c +Index: linux-stage/fs/ext3/inode.c =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/inode.c 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/fs/ext3/inode.c 2006-04-26 23:40:28.000000000 +0400 -@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h +--- linux-stage.orig/fs/ext3/inode.c 2006-09-06 12:37:01.000000000 +0800 ++++ linux-stage/fs/ext3/inode.c 2006-09-06 12:37:01.000000000 +0800 +@@ -574,7 +574,7 @@ ext3_journal_forget(handle, branch[i].bh); } for (i = 0; i < keys; i++) @@ -257,7 +272,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c return err; } -@@ -675,7 +675,7 @@ err_out: +@@ -675,7 +675,7 @@ if (err == -EAGAIN) for (i = 0; i < num; i++) ext3_free_blocks(handle, inode, @@ -266,7 +281,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c return err; } -@@ -1837,7 +1837,7 @@ ext3_clear_blocks(handle_t *handle, stru +@@ -1837,7 +1837,7 @@ } } @@ -275,7 +290,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c } /** -@@ -2008,7 +2008,7 @@ static void ext3_free_branches(handle_t +@@ -2008,7 +2008,7 @@ ext3_journal_test_restart(handle, inode); } @@ -284,11 +299,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c if (parent_bh) { /* -Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c +Index: linux-stage/fs/ext3/balloc.c =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/balloc.c 2006-02-14 15:26:58.000000000 +0300 -+++ linux-2.6.5-7.252-full/fs/ext3/balloc.c 2006-04-26 23:40:28.000000000 +0400 -@@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_ +--- linux-stage.orig/fs/ext3/balloc.c 2006-09-06 12:36:59.000000000 +0800 ++++ linux-stage/fs/ext3/balloc.c 2006-09-06 12:37:01.000000000 +0800 +@@ -78,7 +78,7 @@ * * Return buffer_head on success or NULL in case of failure. */ @@ -297,7 +312,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext3_group_desc * desc; -@@ -274,7 +274,7 @@ void ext3_discard_reservation(struct ino +@@ -274,7 +274,7 @@ } /* Free given blocks, update quota and i_blocks field */ @@ -306,7 +321,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c unsigned long block, unsigned long count) { struct buffer_head *bitmap_bh = NULL; -@@ -1142,7 +1142,7 @@ int ext3_should_retry_alloc(struct super +@@ -1142,7 +1142,7 @@ * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */ @@ -315,11 +330,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c unsigned long goal, int *errp) { struct buffer_head *bitmap_bh = NULL; -Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c +Index: linux-stage/fs/ext3/xattr.c =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/xattr.c 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/fs/ext3/xattr.c 2006-04-26 23:40:28.000000000 +0400 -@@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle, +--- linux-stage.orig/fs/ext3/xattr.c 2006-09-06 12:37:00.000000000 +0800 ++++ linux-stage/fs/ext3/xattr.c 2006-09-06 12:37:01.000000000 +0800 +@@ -1371,7 +1371,7 @@ new_bh = sb_getblk(sb, block); if (!new_bh) { getblk_failed: @@ -328,7 +343,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c error = -EIO; goto cleanup; } -@@ -1411,7 +1411,7 @@ getblk_failed: +@@ -1411,7 +1411,7 @@ if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { /* Free the old block. */ ea_bdebug(old_bh, "freeing"); @@ -337,7 +352,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c /* ext3_forget() calls bforget() for us, but we let our caller release old_bh, so we need to -@@ -1519,7 +1519,7 @@ ext3_xattr_delete_inode(handle_t *handle +@@ -1519,7 +1519,7 @@ mb_cache_entry_free(ce); ce = NULL; } @@ -346,10 +361,10 @@ Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c get_bh(bh); ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); } else { -Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c +Index: linux-stage/fs/ext3/mballoc.c =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400 -+++ linux-2.6.5-7.252-full/fs/ext3/mballoc.c 2006-04-26 23:42:45.000000000 +0400 +--- linux-stage.orig/fs/ext3/mballoc.c 2006-09-06 11:16:28.656439250 +0800 ++++ linux-stage/fs/ext3/mballoc.c 2006-09-06 12:37:34.000000000 +0800 @@ -0,0 +1,2702 @@ +/* + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com @@ -792,7 +807,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c + while (i < max) { + fragments++; + first = i; -+ i = find_next_bit(bitmap, max, i); ++ i = ext2_find_next_le_bit(bitmap, max, i); + len = i - first; + free += len; + if (len > 1) @@ -3053,11 +3068,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c + remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3); + remove_proc_entry(EXT3_ROOT, proc_root_fs); +} -Index: linux-2.6.5-7.252-full/fs/ext3/Makefile +Index: linux-stage/fs/ext3/Makefile =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/Makefile 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/fs/ext3/Makefile 2006-04-26 23:40:28.000000000 +0400 -@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o +--- linux-stage.orig/fs/ext3/Makefile 2006-09-06 12:37:01.000000000 +0800 ++++ linux-stage/fs/ext3/Makefile 2006-09-06 12:37:01.000000000 +0800 +@@ -6,7 +6,7 @@ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ ioctl.o namei.o super.o symlink.o hash.o \ diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch index 0040a6f..13f3482 100644 --- a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch +++ b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch @@ -1356,8 +1356,8 @@ Index: linux-stage/fs/ext3/mballoc.c + * Someone more lucky has already allocated it. + * The only thing we can do is just take first + * found block(s) -+ */ + printk(KERN_ERR "EXT3-fs: and someone won our chunk\n"); ++ */ + ac.ac_b_ex.fe_group = 0; + ac.ac_b_ex.fe_start = 0; + ac.ac_b_ex.fe_len = 0; diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch index e34c411..a00cd4a 100644 --- a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch +++ b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch @@ -1,8 +1,8 @@ Index: linux-stage/include/linux/ext3_fs.h =================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/include/linux/ext3_fs.h 2006-05-25 10:36:04.000000000 -0600 -@@ -57,6 +57,14 @@ struct statfs; +--- linux-stage.orig/include/linux/ext3_fs.h 2006-09-06 12:27:37.000000000 +0800 ++++ linux-stage/include/linux/ext3_fs.h 2006-09-06 12:29:38.000000000 +0800 +@@ -57,6 +57,14 @@ #define ext3_debug(f, a...) do {} while (0) #endif @@ -17,7 +17,7 @@ Index: linux-stage/include/linux/ext3_fs.h /* * Special inodes numbers */ -@@ -365,6 +373,7 @@ struct ext3_inode { +@@ -365,6 +373,7 @@ #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ @@ -25,7 +25,22 @@ Index: linux-stage/include/linux/ext3_fs.h /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt -@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe +@@ -387,6 +396,14 @@ + #define ext3_find_first_zero_bit ext2_find_first_zero_bit + #define ext3_find_next_zero_bit ext2_find_next_zero_bit + ++#ifndef ext2_find_next_le_bit ++#ifdef __LITTLE_ENDIAN ++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off)) ++#else ++#error "mballoc needs a patch for big-endian systems - CFS bug 10634" ++#endif /* __LITTLE_ENDIAN */ ++#endif /* !ext2_find_next_le_bit */ ++ + /* + * Maximal mount counts between two filesystem checks + */ +@@ -726,7 +743,7 @@ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, @@ -34,7 +49,7 @@ Index: linux-stage/include/linux/ext3_fs.h extern void ext3_free_blocks_sb (handle_t *, struct super_block *, unsigned long, unsigned long, int *); extern unsigned long ext3_count_free_blocks (struct super_block *); -@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc +@@ -859,6 +876,17 @@ extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); @@ -54,8 +69,8 @@ Index: linux-stage/include/linux/ext3_fs.h /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ Index: linux-stage/include/linux/ext3_fs_sb.h =================================================================== ---- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/include/linux/ext3_fs_sb.h 2006-05-25 10:59:14.000000000 -0600 +--- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-09-06 12:27:36.000000000 +0800 ++++ linux-stage/include/linux/ext3_fs_sb.h 2006-09-06 12:27:37.000000000 +0800 @@ -23,9 +23,15 @@ #define EXT_INCLUDE #include @@ -72,7 +87,7 @@ Index: linux-stage/include/linux/ext3_fs_sb.h /* * third extended-fs super-block data in memory -@@ -81,6 +87,43 @@ struct ext3_sb_info { +@@ -81,6 +87,43 @@ char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ int s_jquota_fmt; /* Format of quota to use */ #endif @@ -110,17 +125,17 @@ Index: linux-stage/include/linux/ext3_fs_sb.h + unsigned long s_mb_buddies_generated; + unsigned long long s_mb_generation_time; }; -+ + +#define EXT3_GROUP_INFO(sb, group) \ + EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \ + [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)] - ++ #endif /* _LINUX_EXT3_FS_SB */ Index: linux-stage/fs/ext3/super.c =================================================================== ---- linux-stage.orig/fs/ext3/super.c 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/fs/ext3/super.c 2006-05-25 10:36:04.000000000 -0600 -@@ -394,6 +394,7 @@ void ext3_put_super (struct super_block +--- linux-stage.orig/fs/ext3/super.c 2006-09-06 12:27:37.000000000 +0800 ++++ linux-stage/fs/ext3/super.c 2006-09-06 12:27:37.000000000 +0800 +@@ -394,6 +394,7 @@ struct ext3_super_block *es = sbi->s_es; int i; @@ -128,7 +143,7 @@ Index: linux-stage/fs/ext3/super.c ext3_ext_release(sb); ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); -@@ -597,6 +598,7 @@ enum { +@@ -597,6 +598,7 @@ Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_extents, Opt_noextents, Opt_extdebug, @@ -136,7 +151,7 @@ Index: linux-stage/fs/ext3/super.c }; static match_table_t tokens = { -@@ -649,6 +651,9 @@ static match_table_t tokens = { +@@ -649,6 +651,9 @@ {Opt_extents, "extents"}, {Opt_noextents, "noextents"}, {Opt_extdebug, "extdebug"}, @@ -146,7 +161,7 @@ Index: linux-stage/fs/ext3/super.c {Opt_barrier, "barrier=%u"}, {Opt_err, NULL}, {Opt_resize, "resize"}, -@@ -962,6 +967,19 @@ static int parse_options (char * options +@@ -962,6 +967,19 @@ case Opt_extdebug: set_opt (sbi->s_mount_opt, EXTDEBUG); break; @@ -166,7 +181,7 @@ Index: linux-stage/fs/ext3/super.c default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1651,6 +1669,7 @@ static int ext3_fill_super (struct super +@@ -1651,6 +1669,7 @@ ext3_count_dirs(sb)); ext3_ext_init(sb); @@ -174,7 +189,7 @@ Index: linux-stage/fs/ext3/super.c return 0; -@@ -2433,7 +2452,13 @@ static struct file_system_type ext3_fs_t +@@ -2433,7 +2452,13 @@ static int __init init_ext3_fs(void) { @@ -189,7 +204,7 @@ Index: linux-stage/fs/ext3/super.c if (err) return err; err = init_inodecache(); -@@ -2455,6 +2480,7 @@ static void __exit exit_ext3_fs(void) +@@ -2455,6 +2480,7 @@ unregister_filesystem(&ext3_fs_type); destroy_inodecache(); exit_ext3_xattr(); @@ -199,9 +214,9 @@ Index: linux-stage/fs/ext3/super.c int ext3_prep_san_write(struct inode *inode, long *blocks, Index: linux-stage/fs/ext3/extents.c =================================================================== ---- linux-stage.orig/fs/ext3/extents.c 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/fs/ext3/extents.c 2006-05-25 10:36:04.000000000 -0600 -@@ -777,7 +777,7 @@ cleanup: +--- linux-stage.orig/fs/ext3/extents.c 2006-09-06 12:27:37.000000000 +0800 ++++ linux-stage/fs/ext3/extents.c 2006-09-06 12:27:37.000000000 +0800 +@@ -779,7 +779,7 @@ for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; @@ -210,7 +225,7 @@ Index: linux-stage/fs/ext3/extents.c } } kfree(ablocks); -@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st +@@ -1438,7 +1438,7 @@ path->p_idx->ei_leaf); bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); @@ -219,7 +234,7 @@ Index: linux-stage/fs/ext3/extents.c return err; } -@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t +@@ -1923,10 +1923,12 @@ int needed = ext3_remove_blocks_credits(tree, ex, from, to); handle_t *handle = ext3_journal_start(tree->inode, needed); struct buffer_head *bh; @@ -233,7 +248,7 @@ Index: linux-stage/fs/ext3/extents.c if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { /* tail removal */ unsigned long num, start; -@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t +@@ -1938,7 +1940,7 @@ bh = sb_find_get_block(tree->inode->i_sb, start + i); ext3_forget(handle, 0, tree->inode, bh, start + i); } @@ -244,9 +259,9 @@ Index: linux-stage/fs/ext3/extents.c from, to, ex->ee_block, ex->ee_len); Index: linux-stage/fs/ext3/inode.c =================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/fs/ext3/inode.c 2006-05-25 10:36:04.000000000 -0600 -@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h +--- linux-stage.orig/fs/ext3/inode.c 2006-09-06 12:27:37.000000000 +0800 ++++ linux-stage/fs/ext3/inode.c 2006-09-06 12:27:37.000000000 +0800 +@@ -572,7 +572,7 @@ ext3_journal_forget(handle, branch[i].bh); } for (i = 0; i < keys; i++) @@ -255,7 +270,7 @@ Index: linux-stage/fs/ext3/inode.c return err; } -@@ -673,7 +673,7 @@ err_out: +@@ -673,7 +673,7 @@ if (err == -EAGAIN) for (i = 0; i < num; i++) ext3_free_blocks(handle, inode, @@ -264,7 +279,7 @@ Index: linux-stage/fs/ext3/inode.c return err; } -@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru +@@ -1831,7 +1831,7 @@ } } @@ -273,7 +288,7 @@ Index: linux-stage/fs/ext3/inode.c } /** -@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t +@@ -2004,7 +2004,7 @@ ext3_journal_test_restart(handle, inode); } @@ -284,9 +299,9 @@ Index: linux-stage/fs/ext3/inode.c /* Index: linux-stage/fs/ext3/balloc.c =================================================================== ---- linux-stage.orig/fs/ext3/balloc.c 2006-05-25 10:36:02.000000000 -0600 -+++ linux-stage/fs/ext3/balloc.c 2006-05-25 10:36:04.000000000 -0600 -@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_ +--- linux-stage.orig/fs/ext3/balloc.c 2006-09-06 12:27:36.000000000 +0800 ++++ linux-stage/fs/ext3/balloc.c 2006-09-06 12:27:37.000000000 +0800 +@@ -79,7 +79,7 @@ * * Return buffer_head on success or NULL in case of failure. */ @@ -331,9 +346,9 @@ Index: linux-stage/fs/ext3/balloc.c struct buffer_head *bitmap_bh = NULL; Index: linux-stage/fs/ext3/xattr.c =================================================================== ---- linux-stage.orig/fs/ext3/xattr.c 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/fs/ext3/xattr.c 2006-05-25 10:36:04.000000000 -0600 -@@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle, +--- linux-stage.orig/fs/ext3/xattr.c 2006-09-06 12:27:37.000000000 +0800 ++++ linux-stage/fs/ext3/xattr.c 2006-09-06 12:27:37.000000000 +0800 +@@ -1281,7 +1281,7 @@ new_bh = sb_getblk(sb, block); if (!new_bh) { getblk_failed: @@ -342,7 +357,7 @@ Index: linux-stage/fs/ext3/xattr.c error = -EIO; goto cleanup; } -@@ -1328,7 +1328,7 @@ getblk_failed: +@@ -1328,7 +1328,7 @@ if (ce) mb_cache_entry_free(ce); ea_bdebug(old_bh, "freeing"); @@ -351,7 +366,7 @@ Index: linux-stage/fs/ext3/xattr.c /* ext3_forget() calls bforget() for us, but we let our caller release old_bh, so we need to -@@ -1427,7 +1427,7 @@ ext3_xattr_delete_inode(handle_t *handle +@@ -1427,7 +1427,7 @@ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { if (ce) mb_cache_entry_free(ce); @@ -362,8 +377,8 @@ Index: linux-stage/fs/ext3/xattr.c } else { Index: linux-stage/fs/ext3/mballoc.c =================================================================== ---- linux-stage.orig/fs/ext3/mballoc.c 2006-05-23 17:33:37.579436680 -0600 -+++ linux-stage/fs/ext3/mballoc.c 2006-05-25 10:59:14.000000000 -0600 +--- linux-stage.orig/fs/ext3/mballoc.c 2006-09-06 11:16:28.656439250 +0800 ++++ linux-stage/fs/ext3/mballoc.c 2006-09-06 12:30:11.000000000 +0800 @@ -0,0 +1,2701 @@ +/* + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com @@ -806,7 +821,7 @@ Index: linux-stage/fs/ext3/mballoc.c + while (i < max) { + fragments++; + first = i; -+ i = find_next_bit(bitmap, max, i); ++ i = ext2_find_next_le_bit(bitmap, max, i); + len = i - first; + free += len; + if (len > 1) @@ -3068,8 +3083,8 @@ Index: linux-stage/fs/ext3/mballoc.c +} Index: linux-stage/fs/ext3/Makefile =================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/fs/ext3/Makefile 2006-05-25 10:36:04.000000000 -0600 +--- linux-stage.orig/fs/ext3/Makefile 2006-09-06 12:27:37.000000000 +0800 ++++ linux-stage/fs/ext3/Makefile 2006-09-06 12:27:37.000000000 +0800 @@ -6,7 +6,7 @@ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 6ecae2e..3310ff0 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -5,7 +5,7 @@ tbd Cluster File Systems, Inc. special upgrade step is needed. Please read the user documentation before upgrading any part of a live system. * WIRE PROTOCOL CHANGE from previous 1.6 beta versions. This - version will not interoperate with older 1.6 betas. + version will not interoperate with 1.6 betas before beta5 (1.5.95). * WARNING: Lustre configuration and startup changes are required with this release. See https://mail.clusterfs.com/wikis/lustre/MountConf for details. @@ -13,18 +13,6 @@ tbd Cluster File Systems, Inc. Severity : enhancement -Bugzilla : 4226 -Description: Permanently set tunables -Details : All writable /proc/fs/lustre tunables can now be permanently - set on a per-server basis, at mkfs time or on a live - system. - -Severity : enhancement -Bugzilla : 10547 -Description: Lustre message v2 -Details : Add lustre message format v2. - -Severity : enhancement Bugzilla : 8007 Description: MountConf Details : Lustre configuration is now managed via mkfs and mount @@ -58,6 +46,18 @@ Details : stripe assignments are now made based on ost space available, to optimize storage space and networking resources. Severity : enhancement +Bugzilla : 4226 +Description: Permanently set tunables +Details : All writable /proc/fs/lustre tunables can now be permanently + set on a per-server basis, at mkfs time or on a live + system. + +Severity : enhancement +Bugzilla : 10547 +Description: Lustre message v2 +Details : Add lustre message format v2. + +Severity : enhancement Bugzilla : 9866 Description: client OST exclusion list Details : Clients can be started with a list of OSTs that should be @@ -77,20 +77,42 @@ Details : Further unserialise some read-only MDS RPCs - learn about intents. MDS RPCs in flight for a single client and add /proc controls to adjust this limit. +Severity : enhancement +Bugzilla : 22484 +Description: client read/write statistics +Details : Add client read/write call usage stats for performance + analysis of user processes. + /proc/fs/lustre/llite/*/offset_stats shows non-sequential + file access. extents_stats shows chunk size distribution. + extents_stats_per_process show chunk size distribution per + user process. + +Severity : enhancement +Bugzilla : 22486 +Description: mds statistics +Details : Add detailed mds operations statistics in + /proc/fs/lustre/mds/*/stats. + +Severity : minor +Bugzilla : 10667 +Description: Failure of copying files with lustre special EAs. +Details : Client side always return success for setxattr call for lustre + special xattr (currently only "trusted.lov"). ------------------------------------------------------------------------------ -tbd Cluster File Systems, Inc. +08-20-2006 Cluster File Systems, Inc. * version 1.4.7 * Support for kernels: - 2.6.9-34.EL (RHEL 4) - 2.6.5-7.252 (SLES 9) + 2.6.9-42.EL (RHEL 4) + 2.6.5-7.276 (SLES 9) + 2.4.21-40.EL (RHEL 3) 2.6.12.6 vanilla (kernel.org) * bug fixes Severity : major Frequency : rare -Bugzilla : 5719, 9635, 9792, 9684, +Bugzilla : 5719, 9635, 9792, 9684 Description: OST (or MDS) trips assertions in (re)connection under heavy load Details : If a server is under heavy load and cannot reply to new connection requests before the client resends the (re)connect, @@ -236,11 +258,11 @@ Details : Running the "racer" program may cause one MDS thread to rename Severity : major Frequency : only very large systems with liblustre clients -Bugzilla : 7304 +Bugzilla : 7304 Description: slow eviction of liblustre clients with the "evict_by_nid" RPC -Details : Use asynchronous set_info RPCs to send the "evict_by_nid" to +Details : Use asynchronous set_info RPCs to send the "evict_by_nid" to all OSTs in parallel. This allows the eviction of stale liblustre - clients to proceed much faster than if they were done in series, + clients to proceed much faster than if they were done in series, and also offers similar improvements for other set_info RPCs. Severity : minor @@ -267,7 +289,7 @@ Details : Do not serialize getattr (non-intent version) and statfs. Severity : minor Frequency : occasional, when OST network is overloaded/intermittent -Bugzilla : 10416 +Bugzilla : 10416 Description: client evicted by OST after bulk IO timeout Details : If a client sends a bulk IO request (read or write) the OST may evict the client if it is unresposive to its data GET/PUT @@ -279,7 +301,7 @@ Details : If a client sends a bulk IO request (read or write) the OST Severity : minor Frequency : Always when mmapping file with no objects -Bugzilla : 10438 +Bugzilla : 10438 Description: client crashes when mmapping file with no objects Details : Check that we actually have objects in a file before doing any operations on objects in ll_vm_open, ll_vm_close and @@ -338,8 +360,8 @@ Details : Many of the /proc/ tunables can only be tuned at a megabyte Severity : enhancement Bugzilla : 9292 Description: Getattr by fid -Details : Getting a file attributes by its fid, obtaining UPDATE|LOOKUP - locks, avoids extra getattr rpc requests to MDS, allows '/' to +Details : Getting a file attributes by its fid, obtaining UPDATE|LOOKUP + locks, avoids extra getattr rpc requests to MDS, allows '/' to have locks and avoids getattr rpc requests for it on every stat. Severity : major @@ -457,16 +479,6 @@ Details : If replay happened on an open request reply before we were able assertion in ll_local_open. Now we set the handler right after recognising of open request -Severity : minor -Frequency : very rare -Bugzilla : 10669 -Description: Deadlock: extent lock cancellation callback vs import invalidation -Details : If extent lock cancellation callback takes long enough time, and it - happens that import gets invalidated in process, there is a - deadlock on page_lock in extent lock cancellation vs ns_lock in - import invalidation processes. The fix is to not try to match - locks from inactive OSTs. - Severity : trivial Frequency : very rare Bugzilla : 10584 @@ -483,6 +495,14 @@ Details : It is now possible to clear the OBD RPC statistics by writing to the "stats" file. Severity : minor +Frequency : rare +Bugzilla : 10641 +Description: Client mtime is not the same on different clients after utimes +Details : In some cases, the client was using the utimes() syscall on + a file cached on another node. The clients now validate the + ctime from the MDS + OSTs to determine which one is right. + +Severity : minor Frequency : always Bugzilla : 10611 Description: Inability to activate failout mode @@ -506,6 +526,42 @@ Details : Under some heavy load conditions it is possible that a failed mount can wait for the full obd_timeout interval, possibly several minutes, before reporting an error. Instead return an error as soon as the status is known. +Severity : major +Frequency : quota enabled and large files being deleted +Bugzilla : 10707 +Description: releasing more than 4GB of quota at once hangs OST +Details : If a user deletes more than 4GB of files on a single OST it + will cause the OST to spin in an infinite loop. Release + quota in < 4GB chunks, or use a 64-bit value for 1.4.7.1+. + +Severity : trivial +Frequency : rare +Bugzilla : 10845 +Description: statfs data retrieved from /proc may be stale or zero +Details : When reading per-device statfs data from /proc, in the + {kbytes,files}_{total,free,avail} files, it may appear + as zero or be out of date. + +Severity : trivial +Frequency : systems with MD RAID1 external journal devices +Bugzilla : 10832 +Description: lconf's call to blkid is confused by RAID1 journal devices +Details : Use the "blkid -l" flag to locate the MD RAID device instead + of returning all block devices that match the journal UUID. + +Severity : normal +Frequency : always, for aggregate stripe size over 4GB +Bugzilla : 10725 +Description: assertion fails when trying to use 4GB stripe size +Details : Use "setstripe" to set stripe size over 4GB will fail the kernel, + complaining "ASSERTION(lsm->lsm_xfersize != 0)" + +Severity : normal +Frequency : always on ppc64 +Bugzilla : 10634 +Description: the first write on an ext3 filesystem with mballoc got stuck +Details : ext3_mb_generate_buddy() uses find_next_bit() which does not + perform endianness conversion. ------------------------------------------------------------------------------ diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index ada23ad..19edffa 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -266,7 +266,7 @@ LB_LINUX_TRY_COMPILE([ # AC_DEFUN([LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP], [AC_MSG_CHECKING([if kernel defines grab_cache_page_nowait_gfp()]) -HAVE_GCPN_GFP="`grep -c 'grab_cache_page_nowait_gfp' $LINUX/mm/filemap.c`" +HAVE_GCPN_GFP="`grep -c 'grab_cache_page_nowait_gfp' $LINUX/include/linux/pagemap.h`" if test "$HAVE_GCPN_GFP" != 0 ; then AC_DEFINE(HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP, 1, [kernel has grab_cache_page_nowait_gfp()]) diff --git a/lustre/autoconf/lustre-version.ac b/lustre/autoconf/lustre-version.ac index aff7119..2881e61 100644 --- a/lustre/autoconf/lustre-version.ac +++ b/lustre/autoconf/lustre-version.ac @@ -1,6 +1,6 @@ m4_define([LUSTRE_MAJOR],[1]) -m4_define([LUSTRE_MINOR],[9]) -m4_define([LUSTRE_PATCH],[0]) +m4_define([LUSTRE_MINOR],[5]) +m4_define([LUSTRE_PATCH],[95]) m4_define([LUSTRE_FIX],[0]) dnl # 288 stands for 0.0.1.32 , next version with fixes is ok, but next after diff --git a/lustre/doc/mkfs.lustre.8 b/lustre/doc/mkfs.lustre.8 index a1469e4..b469034 100644 --- a/lustre/doc/mkfs.lustre.8 +++ b/lustre/doc/mkfs.lustre.8 @@ -37,6 +37,9 @@ service defined by this command. .BI \--backfstype= fstype Force a particular format for the backing fs (ext3, ldiskfs) .TP +.BI \--comment= comment +Set user comment about this disk, ignored by Lustre. +.TP .BI \--device-size= KB Set device size for loop devices .TP diff --git a/lustre/doc/mount.lustre.8 b/lustre/doc/mount.lustre.8 index 3e830a9..69f1815 100644 --- a/lustre/doc/mount.lustre.8 +++ b/lustre/doc/mount.lustre.8 @@ -70,7 +70,7 @@ options: Only start the MGC (and MGS, if co-located) for a target service, and not the actual service. .TP .BI exclude= ostlist -Start a client or MDT with a list of known inactive OSTs +Start a client or MDT with a (colon-separated) list of known inactive OSTs .TP .BI abort_recov Abort recovery (targets only) diff --git a/lustre/doc/tunefs.lustre.8 b/lustre/doc/tunefs.lustre.8 index 423c8bd..7ca33ce 100644 --- a/lustre/doc/tunefs.lustre.8 +++ b/lustre/doc/tunefs.lustre.8 @@ -24,6 +24,9 @@ mounted. .SH OPTIONS .TP +.BI \--comment= comment +Set user comment about this disk, ignored by Lustre. +.TP .BI \--erase-params Remove all previous parameter info .TP diff --git a/lustre/include/linux/lvfs_linux.h b/lustre/include/linux/lvfs_linux.h index 5c466ce..9c41cd0 100644 --- a/lustre/include/linux/lvfs_linux.h +++ b/lustre/include/linux/lvfs_linux.h @@ -1,13 +1,15 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: - */ + */ #ifndef __LVFS_LINUX_H__ #define __LVFS_LINUX_H__ #include #include #include +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) #include +#endif #include #include diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index bb188e7..833e961 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -85,7 +85,7 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb) } } -#define OBD_SLEEP_ON(wq) interruptible_sleep_on(wq) +#define OBD_SLEEP_ON(wq, state) wait_event_interruptible(wq, state) #else /* !__KERNEL__ */ diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h index a828b35..e4730f8 100644 --- a/lustre/include/lustre/liblustreapi.h +++ b/lustre/include/lustre/liblustreapi.h @@ -11,7 +11,7 @@ typedef void (*llapi_cb_t)(char *obd_type_name, char *obd_name, char *obd_uuid, void *args); /* liblustreapi.c */ -extern int llapi_file_create(const char *name, long stripe_size, +extern int llapi_file_create(const char *name, unsigned long stripe_size, int stripe_offset, int stripe_count, int stripe_pattern); extern int llapi_file_get_stripe(const char *path, struct lov_user_md *lum); diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index bae756b..2662c06 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -308,7 +308,6 @@ struct lmv_stripe_md { struct lu_fid mea_ids[0]; }; - struct lustre_handle { __u64 cookie; }; @@ -422,43 +421,43 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define MSG_CONNECT_NEXT_VER 0x80 /* use next version of lustre_msg */ /* Connect flags */ -#define OBD_CONNECT_RDONLY 0x1ULL /* client allowed read-only access */ -#define OBD_CONNECT_INDEX 0x2ULL /* connect to specific LOV idx */ -#define OBD_CONNECT_GRANT 0x8ULL /* OSC acquires grant at connect */ -#define OBD_CONNECT_SRVLOCK 0x10ULL /* server takes locks for client */ -#define OBD_CONNECT_VERSION 0x20ULL /* Server supports versions in ocd */ -#define OBD_CONNECT_REQPORTAL 0x40ULL /* Separate portal for non-IO reqs */ -#define OBD_CONNECT_ACL 0x80ULL /* client using access control lists */ -#define OBD_CONNECT_XATTR 0x100ULL /* client using extended attributes*/ -#define OBD_CONNECT_CROW 0x200ULL /* MDS+OST do object create-on-write */ -#define OBD_CONNECT_TRUNCLOCK 0x400ULL /* server gets locks for punch b=9528 */ -#define OBD_CONNECT_TRANSNO 0x800ULL /* replay is sending initial transno */ -#define OBD_CONNECT_IBITS 0x1000ULL /* support for inodebits locks */ -#define OBD_CONNECT_JOIN 0x2000ULL /* files can be concatenated */ -#define OBD_CONNECT_REAL 0x4000ULL -#define OBD_CONNECT_ATTRFID 0x8000ULL /* Server supports GetAttr By Fid */ -#define OBD_CONNECT_NODEVOH 0x10000ULL /* No open handle for special nodes */ -#define OBD_CONNECT_LCL_CLIENT 0x20000ULL /* local 1.6 client */ -#define OBD_CONNECT_RMT_CLIENT 0x40000ULL /* Remote client */ -#define OBD_CONNECT_BRW_SIZE 0x80000ULL /* Maximum pages per RPC */ - -/* also update obd_connect_names[] for lprocfs_rd_connect_flags() */ - -#define MDS_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \ +#define OBD_CONNECT_RDONLY 0x1ULL /* client allowed read-only access */ +#define OBD_CONNECT_INDEX 0x2ULL /* connect to specific LOV idx */ +#define OBD_CONNECT_GRANT 0x8ULL /* OSC acquires grant at connect */ +#define OBD_CONNECT_SRVLOCK 0x10ULL /* server takes locks for client */ +#define OBD_CONNECT_VERSION 0x20ULL /* Server supports versions in ocd */ +#define OBD_CONNECT_REQPORTAL 0x40ULL /* Separate portal for non-IO reqs */ +#define OBD_CONNECT_ACL 0x80ULL /* client uses access control lists */ +#define OBD_CONNECT_XATTR 0x100ULL /* client using extended attributes*/ +#define OBD_CONNECT_CROW 0x200ULL /* MDS+OST create objects on write */ +#define OBD_CONNECT_TRUNCLOCK 0x400ULL /* locks on server for punch b=9528 */ +#define OBD_CONNECT_TRANSNO 0x800ULL /* replay sends initial transno */ +#define OBD_CONNECT_IBITS 0x1000ULL /* support for inodebits locks */ +#define OBD_CONNECT_JOIN 0x2000ULL /* files can be concatenated */ +#define OBD_CONNECT_REAL 0x4000ULL +#define OBD_CONNECT_ATTRFID 0x8000ULL /* Server supports GetAttr By Fid */ +#define OBD_CONNECT_NODEVOH 0x10000ULL /* No open handle for special nodes */ +#define OBD_CONNECT_LCL_CLIENT 0x20000ULL /* local 1.8 client */ +#define OBD_CONNECT_RMT_CLIENT 0x40000ULL /* Remote 1.8 client */ +#define OBD_CONNECT_BRW_SIZE 0x80000ULL /* Max bytes per rpc */ +#define OBD_CONNECT_QUOTA64 0x100000ULL /* 64bit qunit_data.qd_count b=10707*/ +#define OBD_CONNECT_FID_CAPA 0x200000ULL /* fid capability */ +#define OBD_CONNECT_OSS_CAPA 0x400000ULL /* OSS capability */ +/* also update obd_connect_names[] for lprocfs_rd_connect_flags() + * and lustre/utils/wirecheck.c */ + +#define MDT_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \ OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \ OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \ OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID) #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \ - OBD_CONNECT_BRW_SIZE) + OBD_CONNECT_BRW_SIZE | OBD_CONNECT_QUOTA64) #define ECHO_CONNECT_SUPPORTED (0) #define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION) -#define MDT_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \ - OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \ - OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \ - OBD_CONNECT_ATTRFID) +#define MAX_QUOTA_COUNT32 ((0xffffffffULL >> QUOTABLOCK_BITS) << QUOTABLOCK_BITS) #define OBD_OCD_VERSION(major,minor,patch,fix) (((major)<<24) + ((minor)<<16) +\ ((patch)<<8) + (fix)) @@ -509,8 +508,8 @@ typedef enum { OST_OPEN = 11, OST_CLOSE = 12, OST_STATFS = 13, - OST_SAN_READ = 14, - OST_SAN_WRITE = 15, +/* OST_SAN_READ = 14, deprecated */ +/* OST_SAN_WRITE = 15, deprecated */ OST_SYNC = 16, OST_SET_INFO = 17, OST_QUOTACHECK = 18, @@ -652,7 +651,6 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ #define XATTR_NAME_ACL_ACCESS "system.posix_acl_access" #define XATTR_NAME_LOV "trusted.lov" - #define OBD_MD_FLID (0x00000001ULL) /* object ID */ #define OBD_MD_FLATIME (0x00000002ULL) /* access time */ #define OBD_MD_FLMTIME (0x00000004ULL) /* data modification time */ @@ -1536,9 +1534,9 @@ struct lov_mds_md_join { #define LLOG_OP_MASK 0xfff00000 typedef enum { - LLOG_PAD_MAGIC = LLOG_OP_MAGIC | 0, - OST_SZ_REC = LLOG_OP_MAGIC | (OST_SAN_WRITE << 8), - OST_RAID1_REC = LLOG_OP_MAGIC | ((OST_SAN_WRITE + 1) << 8), + LLOG_PAD_MAGIC = LLOG_OP_MAGIC | 0x00000, + OST_SZ_REC = LLOG_OP_MAGIC | 0x00f00, + OST_RAID1_REC = LLOG_OP_MAGIC | 0x01000, MDS_UNLINK_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_UNLINK, MDS_SETATTR_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_SETATTR, OBD_CFG_REC = LLOG_OP_MAGIC | 0x20000, @@ -1761,14 +1759,28 @@ extern void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct lustre_cfg; extern void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg); -/* quota */ +/* quota. fixed by tianzy for bug10707 */ +#define QUOTA_IS_GRP 0X1UL /* 0 is user, 1 is group. Used by qd_flags*/ +#define QUOTA_IS_BLOCK 0x2UL /* 0 is inode, 1 is block. Used by qd_flags*/ + struct qunit_data { + __u32 qd_id; /* ID appiles to (uid, gid) */ + __u32 qd_flags; /* Quota type (USRQUOTA, GRPQUOTA) occupy one bit; + * Block quota or file quota occupy one bit */ + __u64 qd_count; /* acquire/release count (bytes for block quota) */ +}; + +struct qunit_data_old { __u32 qd_id; /* ID appiles to (uid, gid) */ __u32 qd_type; /* Quota type (USRQUOTA, GRPQUOTA) */ __u32 qd_count; /* acquire/release count (bytes for block quota) */ __u32 qd_isblk; /* Block quota or file quota */ }; + extern void lustre_swab_qdata(struct qunit_data *d); +extern void lustre_swab_qdata_old(struct qunit_data_old *d); +extern struct qunit_data *lustre_quota_old_to_new(struct qunit_data_old *d); +extern struct qunit_data_old *lustre_quota_new_to_old(struct qunit_data *d); typedef enum { QUOTA_DQACQ = 601, diff --git a/lustre/include/lustre_cfg.h b/lustre/include/lustre_cfg.h index ae21989..f916bc7 100644 --- a/lustre/include/lustre_cfg.h +++ b/lustre/include/lustre_cfg.h @@ -51,8 +51,8 @@ enum lcfg_command_type { LCFG_DEL_CONN = 0x00cf00c, LCFG_LOV_ADD_OBD = 0x00cf00d, LCFG_LOV_DEL_OBD = 0x00cf00e, - LCFG_PARAM = 0x00ce00f, - LCFG_MARKER = 0x00ce010, + LCFG_PARAM = 0x00cf00f, + LCFG_MARKER = 0x00cf010, LCFG_LOG_START = 0x00ce011, LCFG_LOG_END = 0x00ce012, LCFG_LOV_ADD_INA = 0x00ce013, diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index 69f07d7..9c76666 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -92,7 +92,8 @@ struct lustre_disk_data { char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/ __u8 ldd_uuid[40]; /* server UUID (COMPAT_146) */ -/*200*/ __u8 ldd_padding[4096 - 200]; +/*200*/ char ldd_userdata[1024 - 200]; /* arbitrary user string */ +/*1024*/__u8 ldd_padding[4096 - 1024]; /*4096*/char ldd_mount_opts[4096]; /* target fs mount opts */ /*8192*/char ldd_params[4096]; /* key=value pairs */ }; diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 4b3cd6a..5ee5a81 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -370,120 +370,21 @@ extern char *ldlm_lockname[]; extern char *ldlm_typename[]; extern char *ldlm_it2str(int it); -#define __LDLM_DEBUG(level, lock, format, a...) \ -do { \ - if (lock->l_resource == NULL) { \ - CDEBUG(level, "### " format \ - " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\ - "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: " \ - LPX64" expref: %d pid: %u\n" , ## a, lock, \ - lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_flags, lock->l_remote_handle.cookie, \ - lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99, \ - lock->l_pid); \ - break; \ - } \ - if (lock->l_resource->lr_type == LDLM_EXTENT) { \ - CDEBUG(level, "### " format \ - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ - "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64\ - "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64 \ - " expref: %d pid: %u\n" , ## a, \ - lock->l_resource->lr_namespace->ns_name, lock, \ - lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_resource->lr_name.name[0], \ - lock->l_resource->lr_name.name[1], \ - atomic_read(&lock->l_resource->lr_refcount), \ - ldlm_typename[lock->l_resource->lr_type], \ - lock->l_policy_data.l_extent.start, \ - lock->l_policy_data.l_extent.end, \ - lock->l_req_extent.start, lock->l_req_extent.end, \ - lock->l_flags, lock->l_remote_handle.cookie, \ - lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99, \ - lock->l_pid); \ - break; \ - } \ - if (lock->l_resource->lr_type == LDLM_FLOCK) { \ - CDEBUG(level, "### " format \ - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ - "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d " \ - "["LPU64"->"LPU64"] flags: %x remote: "LPX64 \ - " expref: %d pid: %u\n" , ## a, \ - lock->l_resource->lr_namespace->ns_name, lock, \ - lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_resource->lr_name.name[0], \ - lock->l_resource->lr_name.name[1], \ - atomic_read(&lock->l_resource->lr_refcount), \ - ldlm_typename[lock->l_resource->lr_type], \ - lock->l_policy_data.l_flock.pid, \ - lock->l_policy_data.l_flock.start, \ - lock->l_policy_data.l_flock.end, \ - lock->l_flags, lock->l_remote_handle.cookie, \ - lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99, \ - lock->l_pid); \ - break; \ - } \ - if (lock->l_resource->lr_type == LDLM_IBITS) { \ - CDEBUG(level, "### " format \ - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ - "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s " \ - "flags: %x remote: "LPX64" expref: %d " \ - "pid %u\n" , ## a, \ - lock->l_resource->lr_namespace->ns_name, \ - lock, lock->l_handle.h_cookie, \ - atomic_read (&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_resource->lr_name.name[0], \ - lock->l_resource->lr_name.name[1], \ - lock->l_policy_data.l_inodebits.bits, \ - atomic_read(&lock->l_resource->lr_refcount), \ - ldlm_typename[lock->l_resource->lr_type], \ - lock->l_flags, lock->l_remote_handle.cookie, \ - lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99, \ - lock->l_pid); \ - break; \ - } \ - { \ - CDEBUG(level, "### " format \ - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ - "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x " \ - "remote: "LPX64" expref: %d pid: %u\n" , ## a, \ - lock->l_resource->lr_namespace->ns_name, \ - lock, lock->l_handle.h_cookie, \ - atomic_read (&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_resource->lr_name.name[0], \ - lock->l_resource->lr_name.name[1], \ - atomic_read(&lock->l_resource->lr_refcount), \ - ldlm_typename[lock->l_resource->lr_type], \ - lock->l_flags, lock->l_remote_handle.cookie, \ - lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99, \ - lock->l_pid); \ - } \ +void ldlm_lock_debug(cfs_debug_limit_state_t *cdls, + __u32 level, struct ldlm_lock *lock, + const char *file, const char *func, const int line, + char *fmt, ...); + +#define LDLM_DEBUG(lock, fmt, a...) ldlm_lock_debug(NULL, D_DLMTRACE, lock, \ + __FILE__, __func__, __LINE__, "### " fmt, ## a) + +#define LDLM_ERROR(lock, fmt, a...) \ +do { \ + static cfs_debug_limit_state_t cdls; \ + ldlm_lock_debug(&cdls, D_ERROR, lock, \ + __FILE__, __func__, __LINE__, "### " fmt, ## a); \ } while (0) -#define LDLM_DEBUG(lock, format, a...) __LDLM_DEBUG(D_DLMTRACE, lock, \ - format, ## a) -#define LDLM_ERROR(lock, format, a...) __LDLM_DEBUG(D_ERROR, lock, format, ## a) - #define LDLM_DEBUG_NOLOCK(format, a...) \ CDEBUG(D_DLMTRACE, "### " format "\n" , ## a) diff --git a/lustre/include/lustre_ha.h b/lustre/include/lustre_ha.h index 8377728..43071ba 100644 --- a/lustre/include/lustre_ha.h +++ b/lustre/include/lustre_ha.h @@ -11,8 +11,6 @@ struct obd_device; struct ptlrpc_request; -void ptlrpc_run_failed_import_upcall(struct obd_import *imp); -void ptlrpc_run_recovery_over_upcall(struct obd_device *obd); int ptlrpc_replay(struct obd_import *imp); int ptlrpc_resend(struct obd_import *imp); void ptlrpc_free_committed(struct obd_import *imp); diff --git a/lustre/include/lustre_log.h b/lustre/include/lustre_log.h index c05ce65..412bb02 100644 --- a/lustre/include/lustre_log.h +++ b/lustre/include/lustre_log.h @@ -134,9 +134,10 @@ int llog_obd_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, struct llog_cookie *logcookies, int numcookies); -int llog_cat_initialize(struct obd_device *obd, int count); +int llog_cat_initialize(struct obd_device *obd, int count, + struct obd_uuid *uuid); int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd, - int count, struct llog_catid *logid); + int count, struct llog_catid *logid, struct obd_uuid *uuid); int obd_llog_finish(struct obd_device *obd, int count); diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 8c2f0af..1356fbf 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -398,7 +398,7 @@ struct ptlrpc_request { struct timeval rq_arrival_time; /* request arrival time */ struct ptlrpc_reply_state *rq_reply_state; /* separated reply state */ struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer*/ -#if CRAY_XT3 +#ifdef CRAY_XT3 __u32 rq_uid; /* peer uid, used in MDS only */ #endif @@ -463,34 +463,21 @@ ptlrpc_rqphase2str(const struct ptlrpc_request *req) #define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s" -#define __DEBUG_REQ(CDEB_TYPE, level, req, fmt, args...) \ -CDEB_TYPE(level, "@@@ " fmt \ - " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl " \ - REQ_FLAGS_FMT"/%x/%x rc %d/%d\n" , ## args, req, req->rq_xid, \ - req->rq_transno, \ - req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : -1, \ - req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : \ - req->rq_export ? (char*)req->rq_export->exp_client_uuid.uuid : "",\ - req->rq_import ? \ - (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : \ - req->rq_export ? \ - (char *)req->rq_export->exp_connection->c_remote_uuid.uuid : "", \ - (req->rq_import && req->rq_import->imp_client) ? \ - req->rq_import->imp_client->cli_request_portal : -1, \ - req->rq_reqlen, req->rq_replen, \ - atomic_read(&req->rq_refcount), \ - DEBUG_REQ_FLAGS(req), \ - req->rq_reqmsg ? lustre_msg_get_flags(req->rq_reqmsg) : 0, \ - req->rq_repmsg ? lustre_msg_get_flags(req->rq_repmsg) : 0, \ - req->rq_status, req->rq_repmsg ? lustre_msg_get_status(req->rq_repmsg) : 0) +void debug_req(cfs_debug_limit_state_t *cdls, + __u32 level, struct ptlrpc_request *req, + const char *file, const char *func, const int line, + const char *fmt, ...); /* for most callers (level is a constant) this is resolved at compile time */ #define DEBUG_REQ(level, req, fmt, args...) \ do { \ - if ((level) & (D_ERROR | D_WARNING)) \ - __DEBUG_REQ(CDEBUG_LIMIT, level, req, fmt, ## args); \ - else \ - __DEBUG_REQ(CDEBUG, level, req, fmt, ## args); \ + if ((level) & (D_ERROR | D_WARNING)) { \ + static cfs_debug_limit_state_t cdls; \ + debug_req(&cdls, level, req, __FILE__, __func__, __LINE__, \ + "@@@ "fmt"\n", ## args); \ + } else \ + debug_req(NULL, level, req, __FILE__, __func__, __LINE__, \ + "@@@ "fmt"\n", ## args); \ } while (0) struct ptlrpc_bulk_page { diff --git a/lustre/include/lustre_param.h b/lustre/include/lustre_param.h index adff8f1..95f8a73 100644 --- a/lustre/include/lustre_param.h +++ b/lustre/include/lustre_param.h @@ -48,11 +48,6 @@ int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd, #define PARAM_MDC "mdc." #define PARAM_LLITE "llite." #define PARAM_LOV "lov." -/* LOV_STRIPE_* aren't settable in proc. But match the proc names. */ -#define PARAM_LOV_STRIPE_SIZE PARAM_LOV"stripesize=" -#define PARAM_LOV_STRIPE_COUNT PARAM_LOV"stripecount=" -#define PARAM_LOV_STRIPE_OFFSET PARAM_LOV"stripeoffset=" -#define PARAM_LOV_STRIPE_PATTERN PARAM_LOV"stripetype=" #define PARAM_SEC "security." #define PARAM_SEC_RPC PARAM_SEC"rpc." #define PARAM_SEC_RPC_MDT PARAM_SEC_RPC"mdt=" diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 640a5f6..d9750d7 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -34,6 +34,8 @@ #include #include +#define MAX_OBD_DEVICES 8192 + /* this is really local to the OSC */ struct loi_oap_pages { struct list_head lop_pending; @@ -384,7 +386,6 @@ struct client_obd { int cl_default_mds_easize; int cl_max_mds_easize; int cl_max_mds_cookiesize; - kdev_t cl_sandev; /* security configuration */ struct sec_flavor_config cl_sec_conf; @@ -656,36 +657,28 @@ struct lu_placement_hint { int ph_opc; }; -#define LUSTRE_FLD_NAME "fld" -#define LUSTRE_SEQ_NAME "seq" - -/* device types (not names--FIXME) */ -/* FIXME all the references to these defines need to be updated */ -#define LUSTRE_MDS_NAME "mds" -#define LUSTRE_MDT_NAME "mdt" - -/* new MDS layers. Prototype */ -#define LUSTRE_CMM_NAME "cmm" -#define LUSTRE_MDD_NAME "mdd" -#define LUSTRE_OSD_NAME "osd" -#define LUSTRE_CMM_MDC_NAME "cmm-mdc" - -#define LUSTRE_MDC_NAME "mdc" -#define LUSTRE_LOV_NAME "lov" -#define LUSTRE_LMV_NAME "lmv" - -/* FIXME just the names need to be changed */ -#define LUSTRE_OSS_NAME "ost" /* FIXME oss */ -#define LUSTRE_OST_NAME "obdfilter" /* FIXME ost */ -#define LUSTRE_OSTSAN_NAME "sanobdfilter" - -#define LUSTRE_OSC_NAME "osc" -#define LUSTRE_FILTER_NAME "filter" -#define LUSTRE_SANOSC_NAME "sanosc" -#define LUSTRE_SANOST_NAME "sanost" -#define LUSTRE_MGS_NAME "mgs" -#define LUSTRE_MGC_NAME "mgc" - +#define LUSTRE_FLD_NAME "fld" +#define LUSTRE_SEQ_NAME "seq" + +#define LUSTRE_CMM_NAME "cmm" +#define LUSTRE_MDD_NAME "mdd" +#define LUSTRE_OSD_NAME "osd" +#define LUSTRE_LMV_NAME "lmv" +#define LUSTRE_CMM_MDC_NAME "cmm-mdc" + +/* obd device type names */ + /* FIXME all the references to LUSTRE_MDS_NAME should be swapped with LUSTRE_MDT_NAME */ +#define LUSTRE_MDS_NAME "mds" +#define LUSTRE_MDT_NAME "mdt" +#define LUSTRE_MDC_NAME "mdc" +#define LUSTRE_OSS_NAME "ost" /* FIXME change name to oss */ +#define LUSTRE_OST_NAME "obdfilter" /* FIXME change name to ost */ +#define LUSTRE_OSC_NAME "osc" +#define LUSTRE_LOV_NAME "lov" +#define LUSTRE_MGS_NAME "mgs" +#define LUSTRE_MGC_NAME "mgc" + +#define LUSTRE_CACHEOBD_NAME "cobd" #define LUSTRE_ECHO_NAME "obdecho" #define LUSTRE_ECHO_CLIENT_NAME "echo_client" @@ -1071,10 +1064,6 @@ struct obd_ops { int flags, void *opaque); int (*o_join_lru)(struct obd_export *, struct lov_stripe_md *, int join); - int (*o_san_preprw)(int cmd, struct obd_export *exp, - struct obdo *oa, int objcount, - struct obd_ioobj *obj, int niocount, - struct niobuf_remote *remote); int (*o_init_export)(struct obd_export *exp); int (*o_destroy_export)(struct obd_export *exp); int (*o_extent_calc)(struct obd_export *, struct lov_stripe_md *, @@ -1082,7 +1071,8 @@ struct obd_ops { /* llog related obd_methods */ int (*o_llog_init)(struct obd_device *obd, struct obd_device *disk_obd, - int count, struct llog_catid *logid); + int count, struct llog_catid *logid, + struct obd_uuid *uuid); int (*o_llog_finish)(struct obd_device *obd, int count); /* metadata-only methods */ @@ -1194,6 +1184,7 @@ struct lsm_operations { void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *, unsigned long *); obd_off (*lsm_stripe_offset_by_index)(struct lov_stripe_md *, int); + obd_off (*lsm_stripe_offset_by_offset)(struct lov_stripe_md *, obd_off); int (*lsm_stripe_index_by_offset)(struct lov_stripe_md *, obd_off); int (*lsm_revalidate) (struct lov_stripe_md *, struct obd_device *obd); int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes, diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index 995d2af..fbe385b 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -42,7 +42,6 @@ #endif /* OBD Device Declarations */ -#define MAX_OBD_DEVICES 2048 extern struct obd_device *obd_devs[MAX_OBD_DEVICES]; extern spinlock_t obd_dev_lock; @@ -1388,22 +1387,6 @@ static inline int obd_join_lru(struct obd_export *exp, RETURN(rc); } -static inline int obd_san_preprw(int cmd, struct obd_export *exp, - struct obdo *oa, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *remote) -{ - int rc; - - EXP_CHECK_DT_OP(exp, preprw); - OBD_COUNTER_INCREMENT(exp->exp_obd, preprw); - - rc = OBP(exp->exp_obd, san_preprw)(cmd, exp, oa, objcount, obj, - niocount, remote); - class_export_put(exp); - return(rc); -} - static inline int obd_pin(struct obd_export *exp, const struct lu_fid *fid, struct obd_client_handle *handle, int flag) { diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 161d28a..cad9b68 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -30,12 +30,13 @@ extern atomic_t obd_memory; extern int obd_memmax; extern unsigned int obd_fail_loc; extern unsigned int obd_dump_on_timeout; +extern unsigned int obd_dump_on_eviction; extern unsigned int obd_timeout; /* seconds */ #define PING_INTERVAL max(obd_timeout / 4, 1U) #define RECONNECT_INTERVAL max(obd_timeout / 10, 10U) extern unsigned int ldlm_timeout; extern unsigned int obd_health_check_timeout; -extern char obd_lustre_upcall[128]; +extern unsigned int obd_sync_filter; extern cfs_waitq_t obd_race_waitq; extern int obd_race_state; @@ -185,6 +186,8 @@ extern int obd_race_state; #define OBD_FAIL_SEC_CTX_INIT_CONT_NET 0x1210 #define OBD_FAIL_SEC_CTX_FINI_NET 0x1220 +#define OBD_FAIL_QUOTA_QD_COUNT_32BIT 0xA00 + /* preparation for a more advanced failure testbed (not functional yet) */ #define OBD_FAIL_MASK_SYS 0x0000FF00 #define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) @@ -233,16 +236,18 @@ do { \ * first thread that calls this with a matching fail_loc is put to * sleep. The next thread that calls with the same fail_loc wakes up * the first and continues. */ -#define OBD_RACE(id) \ -do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ - CERROR("obd_race id %x sleeping\n", (id)); \ - OBD_SLEEP_ON(&obd_race_waitq); \ - CERROR("obd_fail_race id %x awake\n", (id)); \ - } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) == \ - ((id) & OBD_FAIL_MASK_LOC)) { \ - cfs_waitq_signal(&obd_race_waitq); \ - } \ +#define OBD_RACE(id) \ +do { \ + if (OBD_FAIL_CHECK_ONCE(id)) { \ + obd_race_state = 0; \ + CERROR("obd_race id %x sleeping\n", (id)); \ + OBD_SLEEP_ON(obd_race_waitq, obd_race_state != 0); \ + CERROR("obd_fail_race id %x awake\n", (id)); \ + } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) == \ + ((id) & OBD_FAIL_MASK_LOC)) { \ + CERROR("obd_fail_race id %x waking\n", (id)); \ + obd_race_state = 1; \ + } \ } while(0) #else /* sigh. an expedient fix until OBD_RACE is fixed up */ diff --git a/lustre/kernel_patches/patches/bitops_ext2_find_next_le_bit-2.6.patch b/lustre/kernel_patches/patches/bitops_ext2_find_next_le_bit-2.6.patch new file mode 100644 index 0000000..e549597 --- /dev/null +++ b/lustre/kernel_patches/patches/bitops_ext2_find_next_le_bit-2.6.patch @@ -0,0 +1,153 @@ +Index: linux-2.6.9/include/asm-i386/bitops.h +=================================================================== +--- linux-2.6.9.orig/include/asm-i386/bitops.h 2004-10-19 05:54:37.000000000 +0800 ++++ linux-2.6.9/include/asm-i386/bitops.h 2006-09-01 14:04:19.000000000 +0800 +@@ -448,6 +448,8 @@ + find_first_zero_bit((unsigned long*)addr, size) + #define ext2_find_next_zero_bit(addr, size, off) \ + find_next_zero_bit((unsigned long*)addr, size, off) ++#define ext2_find_next_le_bit(addr, size, off) \ ++ find_next_bit((unsigned long*)(addr), (size), (off)) + + /* Bitmap functions for the minix filesystem. */ + #define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,(void*)addr) +Index: linux-2.6.9/include/asm-x86_64/bitops.h +=================================================================== +--- linux-2.6.9.orig/include/asm-x86_64/bitops.h 2004-10-19 05:53:51.000000000 +0800 ++++ linux-2.6.9/include/asm-x86_64/bitops.h 2006-09-01 14:04:19.000000000 +0800 +@@ -399,6 +399,8 @@ + find_first_zero_bit((unsigned long*)addr, size) + #define ext2_find_next_zero_bit(addr, size, off) \ + find_next_zero_bit((unsigned long*)addr, size, off) ++#define ext2_find_next_le_bit(addr, size, off) \ ++ find_next_bit((unsigned long*)(addr), (size), (off)) + + /* Bitmap functions for the minix filesystem. */ + #define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,(void*)addr) +Index: linux-2.6.9/include/asm-ia64/bitops.h +=================================================================== +--- linux-2.6.9.orig/include/asm-ia64/bitops.h 2004-10-19 05:55:07.000000000 +0800 ++++ linux-2.6.9/include/asm-ia64/bitops.h 2006-09-01 14:04:19.000000000 +0800 +@@ -387,6 +387,8 @@ + #define ext2_test_bit test_bit + #define ext2_find_first_zero_bit find_first_zero_bit + #define ext2_find_next_zero_bit find_next_zero_bit ++#define ext2_find_next_le_bit(addr, size, off) \ ++ __find_next_bit((addr), (size), (off)) + + /* Bitmap functions for the minix filesystem. */ + #define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr) +Index: linux-2.6.9/include/asm-ppc/bitops.h +=================================================================== +--- linux-2.6.9.orig/include/asm-ppc/bitops.h 2004-10-19 05:54:08.000000000 +0800 ++++ linux-2.6.9/include/asm-ppc/bitops.h 2006-09-01 14:04:35.000000000 +0800 +@@ -449,6 +449,47 @@ + return result + ffz(tmp); + } + ++#define ext2_find_next_le_bit(addr, size, off) \ ++ generic_find_next_le_bit((addr), (size), (off)) ++ ++static __inline__ unsigned long generic_find_next_le_bit(const void *addr, ++ unsigned long size, unsigned long offset) ++{ ++ unsigned int *p = ((unsigned int*) addr) + (offset >> 5); ++ unsigned int result = offset & ~31UL; ++ unsigned int tmp; ++ ++ if (offset >= size) ++ return size; ++ size -= result; ++ offset &= 31UL; ++ if (offset) { ++ tmp = cpu_to_le32p(p++); ++ tmp &= ~0UL << offset; ++ if (size < 32) ++ goto found_first; ++ if (tmp) ++ goto found_middle; ++ size -= 32; ++ result += 32; ++ } ++ while (size >= 32) { ++ if ((tmp = cpu_to_le32p(p++))) ++ goto found_middle; ++ result += 32; ++ size -= 32; ++ } ++ if (!size) ++ return result; ++ tmp = cpu_to_le32p(p); ++found_first: ++ tmp &= ~0U >> (32 - size); ++ if (tmp == 0UL) /* Are any bits set? */ ++ return result + size; /* Nope. */ ++found_middle: ++ return result + __ffs(tmp); ++} ++ + /* Bitmap functions for the minix filesystem. */ + #define minix_test_and_set_bit(nr,addr) ext2_set_bit(nr,addr) + #define minix_set_bit(nr,addr) ((void)ext2_set_bit(nr,addr)) +Index: linux-2.6.9/include/asm-ppc64/bitops.h +=================================================================== +--- linux-2.6.9.orig/include/asm-ppc64/bitops.h 2004-10-19 05:55:43.000000000 +0800 ++++ linux-2.6.9/include/asm-ppc64/bitops.h 2006-09-01 14:05:00.000000000 +0800 +@@ -349,6 +349,9 @@ + find_first_zero_le_bit((unsigned long*)addr, size) + #define ext2_find_next_zero_bit(addr, size, off) \ + find_next_zero_le_bit((unsigned long*)addr, size, off) ++#define ext2_find_next_le_bit(addr, size, off) \ ++ generic_find_next_le_bit((unsigned long*)(addr), (size), (off)) ++extern unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned long size, unsigned long offset); + + #define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr) + #define minix_set_bit(nr,addr) set_bit(nr,addr) +Index: linux-2.6.9/arch/ppc64/kernel/bitops.c +=================================================================== +--- linux-2.6.9.orig/arch/ppc64/kernel/bitops.c 2004-10-19 05:54:37.000000000 +0800 ++++ linux-2.6.9/arch/ppc64/kernel/bitops.c 2006-09-01 14:05:25.000000000 +0800 +@@ -145,3 +145,43 @@ + } + + EXPORT_SYMBOL(find_next_zero_le_bit); ++ ++unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned long size, ++ unsigned long offset) ++{ ++ const unsigned long *p = addr + offset / BITS_PER_LONG; ++ unsigned long result = offset & ~(BITS_PER_LONG - 1); ++ unsigned long tmp; ++ ++ if (offset >= size) ++ return size; ++ size -= result; ++ offset %= BITS_PER_LONG; ++ if (offset) { ++ tmp = __swab64p(p++); ++ tmp &= (~0UL << offset); ++ if (size < BITS_PER_LONG) ++ goto found_first; ++ if (tmp) ++ goto found_middle; ++ size -= BITS_PER_LONG; ++ result += BITS_PER_LONG; ++ } ++ while (size & ~(BITS_PER_LONG - 1)) { ++ if ((tmp = __swab64p(p++))) ++ goto found_middle; ++ result += BITS_PER_LONG; ++ size -= BITS_PER_LONG; ++ } ++ if (!size) ++ return result; ++ tmp = __swab64p(p); ++found_first: ++ tmp &= ~0UL >> (BITS_PER_LONG - size); ++ if (tmp == 0UL) ++ return result + size; ++found_middle: ++ return result + __ffs(tmp); ++} ++ ++EXPORT_SYMBOL(generic_find_next_le_bit); diff --git a/lustre/kernel_patches/patches/dcache-qstr-api-fix-2.6-suse.patch b/lustre/kernel_patches/patches/dcache-qstr-api-fix-2.6-suse.patch index 64b8bd3..84b88fd 100644 --- a/lustre/kernel_patches/patches/dcache-qstr-api-fix-2.6-suse.patch +++ b/lustre/kernel_patches/patches/dcache-qstr-api-fix-2.6-suse.patch @@ -1,7 +1,7 @@ -Index: linux-2.6.5-7.201/include/linux/dcache.h +Index: linux-2.6.5-7.276/include/linux/dcache.h =================================================================== ---- linux-2.6.5-7.201.orig/include/linux/dcache.h 2005-10-11 00:12:48.000000000 +0400 -+++ linux-2.6.5-7.201/include/linux/dcache.h 2005-12-20 23:16:31.000000000 +0300 +--- linux-2.6.5-7.276.orig/include/linux/dcache.h ++++ linux-2.6.5-7.276/include/linux/dcache.h @@ -38,7 +38,6 @@ struct qstr { const unsigned char * name; unsigned int len; @@ -18,131 +18,15 @@ Index: linux-2.6.5-7.201/include/linux/dcache.h struct dentry * d_parent; /* parent directory */ struct qstr d_name; struct hlist_node d_hash; /* lookup hash list */ -Index: linux-2.6.5-7.201/fs/dcache.c +Index: linux-2.6.5-7.276/fs/dcache.c =================================================================== ---- linux-2.6.5-7.201.orig/fs/dcache.c 2005-10-11 00:12:45.000000000 +0400 -+++ linux-2.6.5-7.201/fs/dcache.c 2005-12-20 23:16:31.000000000 +0300 -@@ -41,6 +41,8 @@ EXPORT_SYMBOL(dcache_lock); - - static kmem_cache_t *dentry_cache; - -+#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) -+ - /* - * This is the single most critical data structure when it comes - * to the dcache: the hashtable for lookups. Somebody should try -@@ -67,7 +69,7 @@ static void d_callback(void *arg) - struct dentry * dentry = (struct dentry *)arg; - - if (dname_external(dentry)) { -- kfree(dentry->d_qstr); -+ kfree(dentry->d_name.name); - } - kmem_cache_free(dentry_cache, dentry); - } -@@ -678,8 +680,6 @@ static int shrink_dcache_memory(int nr, - return dentry_stat.nr_unused; - } - --#define NAME_ALLOC_LEN(len) ((len+16) & ~15) -- - /** - * d_alloc - allocate a dcache entry - * @parent: parent of entry to allocate -@@ -694,26 +694,18 @@ struct dentry * d_alloc(struct dentry * - { - char * str; - struct dentry *dentry; -- struct qstr * qstr; - - dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); - if (!dentry) - return NULL; - - if (name->len > DNAME_INLINE_LEN-1) { -- qstr = kmalloc(sizeof(*qstr) + NAME_ALLOC_LEN(name->len), -- GFP_KERNEL); -- if (!qstr) { -+ str = kmalloc(name->len + 1, GFP_KERNEL); -+ if (!str) { - kmem_cache_free(dentry_cache, dentry); - return NULL; - } -- qstr->name = qstr->name_str; -- qstr->len = name->len; -- qstr->hash = name->hash; -- dentry->d_qstr = qstr; -- str = qstr->name_str; - } else { -- dentry->d_qstr = &dentry->d_name; - str = dentry->d_iname; - } - -@@ -1010,7 +1002,7 @@ struct dentry * __d_lookup(struct dentry - if (dentry->d_parent != parent) - continue; - -- qstr = dentry->d_qstr; -+ qstr = &dentry->d_name; - smp_read_barrier_depends(); - if (parent->d_op && parent->d_op->d_compare) { - if (parent->d_op->d_compare(parent, qstr, name)) -@@ -1163,26 +1155,38 @@ void d_rehash(struct dentry * entry) - */ - static inline void switch_names(struct dentry * dentry, struct dentry * target) - { -- const unsigned char *old_name, *new_name; -- struct qstr *old_qstr, *new_qstr; -- -- memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); -- old_qstr = target->d_qstr; -- old_name = target->d_name.name; -- new_qstr = dentry->d_qstr; -- new_name = dentry->d_name.name; -- if (old_name == target->d_iname) { -- old_name = dentry->d_iname; -- old_qstr = &dentry->d_name; -- } -- if (new_name == dentry->d_iname) { -- new_name = target->d_iname; -- new_qstr = &target->d_name; -- } -- target->d_name.name = new_name; -- dentry->d_name.name = old_name; -- target->d_qstr = new_qstr; -- dentry->d_qstr = old_qstr; -+ if (dname_external(target)) { -+ if (dname_external(dentry)) { -+ /* -+ * Both external: swap the pointers -+ */ -+ do_switch(target->d_name.name, dentry->d_name.name); -+ } else { -+ /* -+ * dentry:internal, target:external. Steal target's -+ * storage and make target internal. -+ */ -+ dentry->d_name.name = target->d_name.name; -+ target->d_name.name = target->d_iname; -+ } -+ } else { -+ if (dname_external(dentry)) { -+ /* -+ * dentry:external, target:internal. Give dentry's -+ * storage to target and make dentry internal -+ */ -+ memcpy(dentry->d_iname, target->d_name.name, -+ target->d_name.len + 1); -+ target->d_name.name = dentry->d_name.name; -+ dentry->d_name.name = dentry->d_iname; -+ } else { -+ /* -+ * Both are internal. Just copy target to dentry -+ */ -+ memcpy(dentry->d_iname, target->d_name.name, -+ target->d_name.len + 1); -+ } -+ } - } - - /* +--- linux-2.6.5-7.276.orig/fs/dcache.c ++++ linux-2.6.5-7.276/fs/dcache.c +@@ -775,7 +775,6 @@ struct dentry * d_alloc(struct dentry * + dentry->d_parent = NULL; + dentry->d_move_count = 0; + dentry->d_sb = NULL; +- dentry->d_qstr = &dentry->d_name; + dentry->d_name.name = str; + dentry->d_name.len = name->len; + dentry->d_name.hash = name->hash; diff --git a/lustre/kernel_patches/patches/export-do_kern_mount.patch b/lustre/kernel_patches/patches/export-do_kern_mount.patch new file mode 100644 index 0000000..4abb386 --- /dev/null +++ b/lustre/kernel_patches/patches/export-do_kern_mount.patch @@ -0,0 +1,13 @@ +Index: linux-2.6/fs/super.c +=================================================================== +--- linux-2.6.orig/fs/super.c 2006-07-20 10:51:39.000000000 +0800 ++++ linux-2.6/fs/super.c 2006-07-20 10:51:59.000000000 +0800 +@@ -877,6 +877,8 @@ do_kern_mount(const char *fstype, int fl + return mnt; + } + ++EXPORT_SYMBOL_GPL(do_kern_mount); ++ + struct vfsmount *kern_mount(struct file_system_type *type) + { + return vfs_kern_mount(type, 0, type->name, NULL); diff --git a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch b/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch index dca4676..e54774f 100644 --- a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch +++ b/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch @@ -1,7 +1,19 @@ -Index: linux-2.6.5-7.201/fs/ext3/super.c +Index: linux-2.6.5-7.201-full/include/linux/ext3_fs.h =================================================================== ---- linux-2.6.5-7.201.orig/fs/ext3/super.c 2006-06-20 19:40:44.000000000 +0400 -+++ linux-2.6.5-7.201/fs/ext3/super.c 2006-06-20 19:42:08.000000000 +0400 +--- linux-2.6.5-7.201-full.orig/include/linux/ext3_fs.h 2006-08-09 17:59:34.000000000 +0400 ++++ linux-2.6.5-7.201-full/include/linux/ext3_fs.h 2006-08-22 12:35:55.000000000 +0400 +@@ -793,6 +793,7 @@ extern void ext3_put_super (struct super + extern void ext3_write_super (struct super_block *); + extern void ext3_write_super_lockfs (struct super_block *); + extern void ext3_unlockfs (struct super_block *); ++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int); + extern int ext3_remount (struct super_block *, int *, char *); + extern int ext3_statfs (struct super_block *, struct kstatfs *); + +Index: linux-2.6.5-7.201-full/fs/ext3/super.c +=================================================================== +--- linux-2.6.5-7.201-full.orig/fs/ext3/super.c 2006-08-09 17:59:37.000000000 +0400 ++++ linux-2.6.5-7.201-full/fs/ext3/super.c 2006-08-09 17:59:37.000000000 +0400 @@ -39,7 +39,7 @@ static int ext3_load_journal(struct super_block *, struct ext3_super_block *); static int ext3_create_journal(struct super_block *, struct ext3_super_block *, @@ -20,10 +32,10 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c struct ext3_super_block * es, int sync) { -Index: linux-2.6.5-7.201/fs/ext3/namei.c +Index: linux-2.6.5-7.201-full/fs/ext3/namei.c =================================================================== ---- linux-2.6.5-7.201.orig/fs/ext3/namei.c 2006-06-20 19:40:44.000000000 +0400 -+++ linux-2.6.5-7.201/fs/ext3/namei.c 2006-06-20 19:42:08.000000000 +0400 +--- linux-2.6.5-7.201-full.orig/fs/ext3/namei.c 2006-08-09 17:59:37.000000000 +0400 ++++ linux-2.6.5-7.201-full/fs/ext3/namei.c 2006-08-09 17:59:37.000000000 +0400 @@ -1598,7 +1598,7 @@ static int ext3_delete_entry (handle_t * struct buffer_head * bh) { @@ -44,10 +56,10 @@ Index: linux-2.6.5-7.201/fs/ext3/namei.c if (pde) pde->rec_len = cpu_to_le16(le16_to_cpu(pde->rec_len) + -Index: linux-2.6.5-7.201/fs/ext3/xattr.c +Index: linux-2.6.5-7.201-full/fs/ext3/xattr.c =================================================================== ---- linux-2.6.5-7.201.orig/fs/ext3/xattr.c 2006-06-20 19:40:44.000000000 +0400 -+++ linux-2.6.5-7.201/fs/ext3/xattr.c 2006-06-20 19:42:30.000000000 +0400 +--- linux-2.6.5-7.201-full.orig/fs/ext3/xattr.c 2006-07-14 01:53:23.000000000 +0400 ++++ linux-2.6.5-7.201-full/fs/ext3/xattr.c 2006-08-09 17:59:37.000000000 +0400 @@ -107,7 +107,7 @@ ext3_xattr_register(int name_index, stru { int error = -EINVAL; @@ -57,10 +69,10 @@ Index: linux-2.6.5-7.201/fs/ext3/xattr.c write_lock(&ext3_handler_lock); if (!ext3_xattr_handlers[name_index-1]) { ext3_xattr_handlers[name_index-1] = handler; -Index: linux-2.6.5-7.201/fs/ext3/inode.c +Index: linux-2.6.5-7.201-full/fs/ext3/inode.c =================================================================== ---- linux-2.6.5-7.201.orig/fs/ext3/inode.c 2006-06-20 19:40:44.000000000 +0400 -+++ linux-2.6.5-7.201/fs/ext3/inode.c 2006-06-20 19:42:08.000000000 +0400 +--- linux-2.6.5-7.201-full.orig/fs/ext3/inode.c 2006-07-14 01:53:22.000000000 +0400 ++++ linux-2.6.5-7.201-full/fs/ext3/inode.c 2006-08-22 12:35:28.000000000 +0400 @@ -1517,9 +1517,14 @@ out_stop: if (end > inode->i_size) { ei->i_disksize = end; diff --git a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch b/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch index df3d2ea..f6904f2 100644 --- a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch +++ b/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch @@ -1,7 +1,19 @@ +Index: linux-2.6.9-full/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2006-08-09 17:56:39.000000000 +0400 ++++ linux-2.6.9-full/include/linux/ext3_fs.h 2006-08-22 12:36:22.000000000 +0400 +@@ -826,6 +826,7 @@ extern void ext3_put_super (struct super + extern void ext3_write_super (struct super_block *); + extern void ext3_write_super_lockfs (struct super_block *); + extern void ext3_unlockfs (struct super_block *); ++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int); + extern int ext3_remount (struct super_block *, int *, char *); + extern int ext3_statfs (struct super_block *, struct kstatfs *); + Index: linux-2.6.9-full/fs/ext3/super.c =================================================================== ---- linux-2.6.9-full.orig/fs/ext3/super.c 2006-06-02 23:37:51.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/super.c 2006-06-02 23:56:29.000000000 +0400 +--- linux-2.6.9-full.orig/fs/ext3/super.c 2006-08-09 17:56:40.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/super.c 2006-08-09 17:56:40.000000000 +0400 @@ -43,7 +43,7 @@ static int ext3_load_journal(struct supe unsigned long journal_devnum); static int ext3_create_journal(struct super_block *, struct ext3_super_block *, @@ -22,8 +34,8 @@ Index: linux-2.6.9-full/fs/ext3/super.c { Index: linux-2.6.9-full/fs/ext3/namei.c =================================================================== ---- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-06-02 23:37:49.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/namei.c 2006-06-02 23:43:31.000000000 +0400 +--- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-08-09 17:56:40.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/namei.c 2006-08-09 17:56:40.000000000 +0400 @@ -1599,7 +1599,7 @@ static int ext3_delete_entry (handle_t * struct buffer_head * bh) { @@ -47,7 +59,7 @@ Index: linux-2.6.9-full/fs/ext3/namei.c Index: linux-2.6.9-full/fs/ext3/xattr.c =================================================================== --- linux-2.6.9-full.orig/fs/ext3/xattr.c 2006-06-01 14:58:48.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/xattr.c 2006-06-03 00:02:00.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/xattr.c 2006-08-09 17:56:40.000000000 +0400 @@ -132,7 +132,7 @@ ext3_xattr_handler(int name_index) { struct xattr_handler *handler = NULL; @@ -60,7 +72,7 @@ Index: linux-2.6.9-full/fs/ext3/xattr.c Index: linux-2.6.9-full/fs/ext3/inode.c =================================================================== --- linux-2.6.9-full.orig/fs/ext3/inode.c 2006-06-02 23:37:38.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/inode.c 2006-06-03 00:27:41.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/inode.c 2006-08-22 12:34:28.000000000 +0400 @@ -1513,9 +1513,14 @@ out_stop: if (end > inode->i_size) { ei->i_disksize = end; diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch index 325d080..b807900 100644 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch @@ -1387,8 +1387,8 @@ Index: linux-2.6.16.i686/fs/ext3/mballoc.c + * Someone more lucky has already allocated it. + * The only thing we can do is just take first + * found block(s) -+ */ + printk(KERN_ERR "EXT3-fs: and someone won our chunk\n"); ++ */ + ac.ac_b_ex.fe_group = 0; + ac.ac_b_ex.fe_start = 0; + ac.ac_b_ex.fe_len = 0; diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch index c77ebdd..646e4fe 100644 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch @@ -1,8 +1,8 @@ -Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h +Index: linux-stage/include/linux/ext3_fs.h =================================================================== ---- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs.h 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/include/linux/ext3_fs.h 2006-04-26 23:40:28.000000000 +0400 -@@ -57,6 +57,14 @@ struct statfs; +--- linux-stage.orig/include/linux/ext3_fs.h 2006-09-06 12:37:01.000000000 +0800 ++++ linux-stage/include/linux/ext3_fs.h 2006-09-06 12:37:27.000000000 +0800 +@@ -57,6 +57,14 @@ #define ext3_debug(f, a...) do {} while (0) #endif @@ -17,7 +17,7 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h /* * Special inodes numbers */ -@@ -339,6 +347,7 @@ struct ext3_inode { +@@ -339,6 +347,7 @@ #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ @@ -25,7 +25,22 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt -@@ -700,7 +709,9 @@ extern int ext3_bg_has_super(struct supe +@@ -361,6 +370,14 @@ + #define ext3_find_first_zero_bit ext2_find_first_zero_bit + #define ext3_find_next_zero_bit ext2_find_next_zero_bit + ++#ifndef ext2_find_next_le_bit ++#ifdef __LITTLE_ENDIAN ++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off)) ++#else ++#error "mballoc needs a patch for big-endian systems - CFS bug 10634" ++#endif /* __LITTLE_ENDIAN */ ++#endif /* !ext2_find_next_le_bit */ ++ + /* + * Maximal mount counts between two filesystem checks + */ +@@ -700,7 +717,9 @@ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, @@ -36,7 +51,7 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h extern unsigned long ext3_count_free_blocks (struct super_block *); extern void ext3_check_blocks_bitmap (struct super_block *); extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, -@@ -822,6 +833,17 @@ extern void ext3_extents_initialize_bloc +@@ -824,6 +843,17 @@ extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); @@ -54,10 +69,10 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h #endif /* __KERNEL__ */ #define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) -Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h +Index: linux-stage/include/linux/ext3_fs_sb.h =================================================================== ---- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs_sb.h 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h 2006-04-26 23:40:28.000000000 +0400 +--- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-09-06 12:37:00.000000000 +0800 ++++ linux-stage/include/linux/ext3_fs_sb.h 2006-09-06 12:37:01.000000000 +0800 @@ -23,9 +23,15 @@ #define EXT_INCLUDE #include @@ -74,7 +89,7 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h /* * third extended-fs super-block data in memory -@@ -78,6 +84,43 @@ struct ext3_sb_info { +@@ -78,6 +84,43 @@ struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ #endif @@ -112,17 +127,17 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h + unsigned long s_mb_buddies_generated; + unsigned long long s_mb_generation_time; }; -+ + +#define EXT3_GROUP_INFO(sb, group) \ + EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \ + [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)] - ++ #endif /* _LINUX_EXT3_FS_SB */ -Index: linux-2.6.5-7.252-full/fs/ext3/super.c +Index: linux-stage/fs/ext3/super.c =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/super.c 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/fs/ext3/super.c 2006-04-26 23:40:28.000000000 +0400 -@@ -389,6 +389,7 @@ void ext3_put_super (struct super_block +--- linux-stage.orig/fs/ext3/super.c 2006-09-06 12:37:01.000000000 +0800 ++++ linux-stage/fs/ext3/super.c 2006-09-06 12:37:01.000000000 +0800 +@@ -389,6 +389,7 @@ struct ext3_super_block *es = sbi->s_es; int i; @@ -130,7 +145,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c ext3_ext_release(sb); ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); -@@ -545,6 +546,7 @@ enum { +@@ -546,6 +547,7 @@ Opt_err, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_extents, Opt_noextents, Opt_extdebug, @@ -138,7 +153,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c }; static match_table_t tokens = { -@@ -591,6 +592,9 @@ static match_table_t tokens = { +@@ -592,6 +594,9 @@ {Opt_extents, "extents"}, {Opt_noextents, "noextents"}, {Opt_extdebug, "extdebug"}, @@ -148,7 +163,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c {Opt_barrier, "barrier=%u"}, {Opt_err, NULL} }; -@@ -813,6 +815,19 @@ static int parse_options (char * options +@@ -817,6 +822,19 @@ case Opt_extdebug: set_opt (sbi->s_mount_opt, EXTDEBUG); break; @@ -168,7 +183,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1466,6 +1471,7 @@ static int ext3_fill_super (struct super +@@ -1470,6 +1488,7 @@ ext3_count_dirs(sb)); ext3_ext_init(sb); @@ -176,7 +191,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c return 0; -@@ -2114,7 +2120,13 @@ static struct file_system_type ext3_fs_t +@@ -2118,7 +2137,13 @@ static int __init init_ext3_fs(void) { @@ -191,7 +206,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c if (err) return err; err = init_inodecache(); -@@ -2143,6 +2155,7 @@ static void __exit exit_ext3_fs(void) +@@ -2147,6 +2172,7 @@ unregister_filesystem(&ext3_fs_type); destroy_inodecache(); exit_ext3_xattr(); @@ -199,11 +214,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c } int ext3_prep_san_write(struct inode *inode, long *blocks, -Index: linux-2.6.5-7.252-full/fs/ext3/extents.c +Index: linux-stage/fs/ext3/extents.c =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/extents.c 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/fs/ext3/extents.c 2006-04-26 23:40:28.000000000 +0400 -@@ -777,7 +777,7 @@ cleanup: +--- linux-stage.orig/fs/ext3/extents.c 2006-09-06 12:37:01.000000000 +0800 ++++ linux-stage/fs/ext3/extents.c 2006-09-06 12:37:01.000000000 +0800 +@@ -779,7 +779,7 @@ for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; @@ -212,7 +227,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c } } kfree(ablocks); -@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st +@@ -1438,7 +1438,7 @@ path->p_idx->ei_leaf); bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); @@ -221,7 +236,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c return err; } -@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t +@@ -1923,10 +1923,12 @@ int needed = ext3_remove_blocks_credits(tree, ex, from, to); handle_t *handle = ext3_journal_start(tree->inode, needed); struct buffer_head *bh; @@ -235,7 +250,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { /* tail removal */ unsigned long num, start; -@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t +@@ -1938,7 +1940,7 @@ bh = sb_find_get_block(tree->inode->i_sb, start + i); ext3_forget(handle, 0, tree->inode, bh, start + i); } @@ -244,11 +259,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { printk("strange request: removal %lu-%lu from %u:%u\n", from, to, ex->ee_block, ex->ee_len); -Index: linux-2.6.5-7.252-full/fs/ext3/inode.c +Index: linux-stage/fs/ext3/inode.c =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/inode.c 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/fs/ext3/inode.c 2006-04-26 23:40:28.000000000 +0400 -@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h +--- linux-stage.orig/fs/ext3/inode.c 2006-09-06 12:37:01.000000000 +0800 ++++ linux-stage/fs/ext3/inode.c 2006-09-06 12:37:01.000000000 +0800 +@@ -574,7 +574,7 @@ ext3_journal_forget(handle, branch[i].bh); } for (i = 0; i < keys; i++) @@ -257,7 +272,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c return err; } -@@ -675,7 +675,7 @@ err_out: +@@ -675,7 +675,7 @@ if (err == -EAGAIN) for (i = 0; i < num; i++) ext3_free_blocks(handle, inode, @@ -266,7 +281,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c return err; } -@@ -1837,7 +1837,7 @@ ext3_clear_blocks(handle_t *handle, stru +@@ -1837,7 +1837,7 @@ } } @@ -275,7 +290,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c } /** -@@ -2008,7 +2008,7 @@ static void ext3_free_branches(handle_t +@@ -2008,7 +2008,7 @@ ext3_journal_test_restart(handle, inode); } @@ -284,11 +299,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c if (parent_bh) { /* -Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c +Index: linux-stage/fs/ext3/balloc.c =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/balloc.c 2006-02-14 15:26:58.000000000 +0300 -+++ linux-2.6.5-7.252-full/fs/ext3/balloc.c 2006-04-26 23:40:28.000000000 +0400 -@@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_ +--- linux-stage.orig/fs/ext3/balloc.c 2006-09-06 12:36:59.000000000 +0800 ++++ linux-stage/fs/ext3/balloc.c 2006-09-06 12:37:01.000000000 +0800 +@@ -78,7 +78,7 @@ * * Return buffer_head on success or NULL in case of failure. */ @@ -297,7 +312,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext3_group_desc * desc; -@@ -274,7 +274,7 @@ void ext3_discard_reservation(struct ino +@@ -274,7 +274,7 @@ } /* Free given blocks, update quota and i_blocks field */ @@ -306,7 +321,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c unsigned long block, unsigned long count) { struct buffer_head *bitmap_bh = NULL; -@@ -1142,7 +1142,7 @@ int ext3_should_retry_alloc(struct super +@@ -1142,7 +1142,7 @@ * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */ @@ -315,11 +330,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c unsigned long goal, int *errp) { struct buffer_head *bitmap_bh = NULL; -Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c +Index: linux-stage/fs/ext3/xattr.c =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/xattr.c 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/fs/ext3/xattr.c 2006-04-26 23:40:28.000000000 +0400 -@@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle, +--- linux-stage.orig/fs/ext3/xattr.c 2006-09-06 12:37:00.000000000 +0800 ++++ linux-stage/fs/ext3/xattr.c 2006-09-06 12:37:01.000000000 +0800 +@@ -1371,7 +1371,7 @@ new_bh = sb_getblk(sb, block); if (!new_bh) { getblk_failed: @@ -328,7 +343,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c error = -EIO; goto cleanup; } -@@ -1411,7 +1411,7 @@ getblk_failed: +@@ -1411,7 +1411,7 @@ if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { /* Free the old block. */ ea_bdebug(old_bh, "freeing"); @@ -337,7 +352,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c /* ext3_forget() calls bforget() for us, but we let our caller release old_bh, so we need to -@@ -1519,7 +1519,7 @@ ext3_xattr_delete_inode(handle_t *handle +@@ -1519,7 +1519,7 @@ mb_cache_entry_free(ce); ce = NULL; } @@ -346,10 +361,10 @@ Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c get_bh(bh); ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); } else { -Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c +Index: linux-stage/fs/ext3/mballoc.c =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400 -+++ linux-2.6.5-7.252-full/fs/ext3/mballoc.c 2006-04-26 23:42:45.000000000 +0400 +--- linux-stage.orig/fs/ext3/mballoc.c 2006-09-06 11:16:28.656439250 +0800 ++++ linux-stage/fs/ext3/mballoc.c 2006-09-06 12:37:34.000000000 +0800 @@ -0,0 +1,2702 @@ +/* + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com @@ -792,7 +807,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c + while (i < max) { + fragments++; + first = i; -+ i = find_next_bit(bitmap, max, i); ++ i = ext2_find_next_le_bit(bitmap, max, i); + len = i - first; + free += len; + if (len > 1) @@ -3053,11 +3068,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c + remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3); + remove_proc_entry(EXT3_ROOT, proc_root_fs); +} -Index: linux-2.6.5-7.252-full/fs/ext3/Makefile +Index: linux-stage/fs/ext3/Makefile =================================================================== ---- linux-2.6.5-7.252-full.orig/fs/ext3/Makefile 2006-04-25 17:42:19.000000000 +0400 -+++ linux-2.6.5-7.252-full/fs/ext3/Makefile 2006-04-26 23:40:28.000000000 +0400 -@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o +--- linux-stage.orig/fs/ext3/Makefile 2006-09-06 12:37:01.000000000 +0800 ++++ linux-stage/fs/ext3/Makefile 2006-09-06 12:37:01.000000000 +0800 +@@ -6,7 +6,7 @@ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ ioctl.o namei.o super.o symlink.o hash.o \ diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch index 0040a6f..13f3482 100644 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch @@ -1356,8 +1356,8 @@ Index: linux-stage/fs/ext3/mballoc.c + * Someone more lucky has already allocated it. + * The only thing we can do is just take first + * found block(s) -+ */ + printk(KERN_ERR "EXT3-fs: and someone won our chunk\n"); ++ */ + ac.ac_b_ex.fe_group = 0; + ac.ac_b_ex.fe_start = 0; + ac.ac_b_ex.fe_len = 0; diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch index e34c411..a00cd4a 100644 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch @@ -1,8 +1,8 @@ Index: linux-stage/include/linux/ext3_fs.h =================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/include/linux/ext3_fs.h 2006-05-25 10:36:04.000000000 -0600 -@@ -57,6 +57,14 @@ struct statfs; +--- linux-stage.orig/include/linux/ext3_fs.h 2006-09-06 12:27:37.000000000 +0800 ++++ linux-stage/include/linux/ext3_fs.h 2006-09-06 12:29:38.000000000 +0800 +@@ -57,6 +57,14 @@ #define ext3_debug(f, a...) do {} while (0) #endif @@ -17,7 +17,7 @@ Index: linux-stage/include/linux/ext3_fs.h /* * Special inodes numbers */ -@@ -365,6 +373,7 @@ struct ext3_inode { +@@ -365,6 +373,7 @@ #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ @@ -25,7 +25,22 @@ Index: linux-stage/include/linux/ext3_fs.h /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt -@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe +@@ -387,6 +396,14 @@ + #define ext3_find_first_zero_bit ext2_find_first_zero_bit + #define ext3_find_next_zero_bit ext2_find_next_zero_bit + ++#ifndef ext2_find_next_le_bit ++#ifdef __LITTLE_ENDIAN ++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off)) ++#else ++#error "mballoc needs a patch for big-endian systems - CFS bug 10634" ++#endif /* __LITTLE_ENDIAN */ ++#endif /* !ext2_find_next_le_bit */ ++ + /* + * Maximal mount counts between two filesystem checks + */ +@@ -726,7 +743,7 @@ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, @@ -34,7 +49,7 @@ Index: linux-stage/include/linux/ext3_fs.h extern void ext3_free_blocks_sb (handle_t *, struct super_block *, unsigned long, unsigned long, int *); extern unsigned long ext3_count_free_blocks (struct super_block *); -@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc +@@ -859,6 +876,17 @@ extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); @@ -54,8 +69,8 @@ Index: linux-stage/include/linux/ext3_fs.h /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ Index: linux-stage/include/linux/ext3_fs_sb.h =================================================================== ---- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/include/linux/ext3_fs_sb.h 2006-05-25 10:59:14.000000000 -0600 +--- linux-stage.orig/include/linux/ext3_fs_sb.h 2006-09-06 12:27:36.000000000 +0800 ++++ linux-stage/include/linux/ext3_fs_sb.h 2006-09-06 12:27:37.000000000 +0800 @@ -23,9 +23,15 @@ #define EXT_INCLUDE #include @@ -72,7 +87,7 @@ Index: linux-stage/include/linux/ext3_fs_sb.h /* * third extended-fs super-block data in memory -@@ -81,6 +87,43 @@ struct ext3_sb_info { +@@ -81,6 +87,43 @@ char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ int s_jquota_fmt; /* Format of quota to use */ #endif @@ -110,17 +125,17 @@ Index: linux-stage/include/linux/ext3_fs_sb.h + unsigned long s_mb_buddies_generated; + unsigned long long s_mb_generation_time; }; -+ + +#define EXT3_GROUP_INFO(sb, group) \ + EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \ + [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)] - ++ #endif /* _LINUX_EXT3_FS_SB */ Index: linux-stage/fs/ext3/super.c =================================================================== ---- linux-stage.orig/fs/ext3/super.c 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/fs/ext3/super.c 2006-05-25 10:36:04.000000000 -0600 -@@ -394,6 +394,7 @@ void ext3_put_super (struct super_block +--- linux-stage.orig/fs/ext3/super.c 2006-09-06 12:27:37.000000000 +0800 ++++ linux-stage/fs/ext3/super.c 2006-09-06 12:27:37.000000000 +0800 +@@ -394,6 +394,7 @@ struct ext3_super_block *es = sbi->s_es; int i; @@ -128,7 +143,7 @@ Index: linux-stage/fs/ext3/super.c ext3_ext_release(sb); ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); -@@ -597,6 +598,7 @@ enum { +@@ -597,6 +598,7 @@ Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_extents, Opt_noextents, Opt_extdebug, @@ -136,7 +151,7 @@ Index: linux-stage/fs/ext3/super.c }; static match_table_t tokens = { -@@ -649,6 +651,9 @@ static match_table_t tokens = { +@@ -649,6 +651,9 @@ {Opt_extents, "extents"}, {Opt_noextents, "noextents"}, {Opt_extdebug, "extdebug"}, @@ -146,7 +161,7 @@ Index: linux-stage/fs/ext3/super.c {Opt_barrier, "barrier=%u"}, {Opt_err, NULL}, {Opt_resize, "resize"}, -@@ -962,6 +967,19 @@ static int parse_options (char * options +@@ -962,6 +967,19 @@ case Opt_extdebug: set_opt (sbi->s_mount_opt, EXTDEBUG); break; @@ -166,7 +181,7 @@ Index: linux-stage/fs/ext3/super.c default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1651,6 +1669,7 @@ static int ext3_fill_super (struct super +@@ -1651,6 +1669,7 @@ ext3_count_dirs(sb)); ext3_ext_init(sb); @@ -174,7 +189,7 @@ Index: linux-stage/fs/ext3/super.c return 0; -@@ -2433,7 +2452,13 @@ static struct file_system_type ext3_fs_t +@@ -2433,7 +2452,13 @@ static int __init init_ext3_fs(void) { @@ -189,7 +204,7 @@ Index: linux-stage/fs/ext3/super.c if (err) return err; err = init_inodecache(); -@@ -2455,6 +2480,7 @@ static void __exit exit_ext3_fs(void) +@@ -2455,6 +2480,7 @@ unregister_filesystem(&ext3_fs_type); destroy_inodecache(); exit_ext3_xattr(); @@ -199,9 +214,9 @@ Index: linux-stage/fs/ext3/super.c int ext3_prep_san_write(struct inode *inode, long *blocks, Index: linux-stage/fs/ext3/extents.c =================================================================== ---- linux-stage.orig/fs/ext3/extents.c 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/fs/ext3/extents.c 2006-05-25 10:36:04.000000000 -0600 -@@ -777,7 +777,7 @@ cleanup: +--- linux-stage.orig/fs/ext3/extents.c 2006-09-06 12:27:37.000000000 +0800 ++++ linux-stage/fs/ext3/extents.c 2006-09-06 12:27:37.000000000 +0800 +@@ -779,7 +779,7 @@ for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; @@ -210,7 +225,7 @@ Index: linux-stage/fs/ext3/extents.c } } kfree(ablocks); -@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st +@@ -1438,7 +1438,7 @@ path->p_idx->ei_leaf); bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); @@ -219,7 +234,7 @@ Index: linux-stage/fs/ext3/extents.c return err; } -@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t +@@ -1923,10 +1923,12 @@ int needed = ext3_remove_blocks_credits(tree, ex, from, to); handle_t *handle = ext3_journal_start(tree->inode, needed); struct buffer_head *bh; @@ -233,7 +248,7 @@ Index: linux-stage/fs/ext3/extents.c if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { /* tail removal */ unsigned long num, start; -@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t +@@ -1938,7 +1940,7 @@ bh = sb_find_get_block(tree->inode->i_sb, start + i); ext3_forget(handle, 0, tree->inode, bh, start + i); } @@ -244,9 +259,9 @@ Index: linux-stage/fs/ext3/extents.c from, to, ex->ee_block, ex->ee_len); Index: linux-stage/fs/ext3/inode.c =================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/fs/ext3/inode.c 2006-05-25 10:36:04.000000000 -0600 -@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h +--- linux-stage.orig/fs/ext3/inode.c 2006-09-06 12:27:37.000000000 +0800 ++++ linux-stage/fs/ext3/inode.c 2006-09-06 12:27:37.000000000 +0800 +@@ -572,7 +572,7 @@ ext3_journal_forget(handle, branch[i].bh); } for (i = 0; i < keys; i++) @@ -255,7 +270,7 @@ Index: linux-stage/fs/ext3/inode.c return err; } -@@ -673,7 +673,7 @@ err_out: +@@ -673,7 +673,7 @@ if (err == -EAGAIN) for (i = 0; i < num; i++) ext3_free_blocks(handle, inode, @@ -264,7 +279,7 @@ Index: linux-stage/fs/ext3/inode.c return err; } -@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru +@@ -1831,7 +1831,7 @@ } } @@ -273,7 +288,7 @@ Index: linux-stage/fs/ext3/inode.c } /** -@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t +@@ -2004,7 +2004,7 @@ ext3_journal_test_restart(handle, inode); } @@ -284,9 +299,9 @@ Index: linux-stage/fs/ext3/inode.c /* Index: linux-stage/fs/ext3/balloc.c =================================================================== ---- linux-stage.orig/fs/ext3/balloc.c 2006-05-25 10:36:02.000000000 -0600 -+++ linux-stage/fs/ext3/balloc.c 2006-05-25 10:36:04.000000000 -0600 -@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_ +--- linux-stage.orig/fs/ext3/balloc.c 2006-09-06 12:27:36.000000000 +0800 ++++ linux-stage/fs/ext3/balloc.c 2006-09-06 12:27:37.000000000 +0800 +@@ -79,7 +79,7 @@ * * Return buffer_head on success or NULL in case of failure. */ @@ -331,9 +346,9 @@ Index: linux-stage/fs/ext3/balloc.c struct buffer_head *bitmap_bh = NULL; Index: linux-stage/fs/ext3/xattr.c =================================================================== ---- linux-stage.orig/fs/ext3/xattr.c 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/fs/ext3/xattr.c 2006-05-25 10:36:04.000000000 -0600 -@@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle, +--- linux-stage.orig/fs/ext3/xattr.c 2006-09-06 12:27:37.000000000 +0800 ++++ linux-stage/fs/ext3/xattr.c 2006-09-06 12:27:37.000000000 +0800 +@@ -1281,7 +1281,7 @@ new_bh = sb_getblk(sb, block); if (!new_bh) { getblk_failed: @@ -342,7 +357,7 @@ Index: linux-stage/fs/ext3/xattr.c error = -EIO; goto cleanup; } -@@ -1328,7 +1328,7 @@ getblk_failed: +@@ -1328,7 +1328,7 @@ if (ce) mb_cache_entry_free(ce); ea_bdebug(old_bh, "freeing"); @@ -351,7 +366,7 @@ Index: linux-stage/fs/ext3/xattr.c /* ext3_forget() calls bforget() for us, but we let our caller release old_bh, so we need to -@@ -1427,7 +1427,7 @@ ext3_xattr_delete_inode(handle_t *handle +@@ -1427,7 +1427,7 @@ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { if (ce) mb_cache_entry_free(ce); @@ -362,8 +377,8 @@ Index: linux-stage/fs/ext3/xattr.c } else { Index: linux-stage/fs/ext3/mballoc.c =================================================================== ---- linux-stage.orig/fs/ext3/mballoc.c 2006-05-23 17:33:37.579436680 -0600 -+++ linux-stage/fs/ext3/mballoc.c 2006-05-25 10:59:14.000000000 -0600 +--- linux-stage.orig/fs/ext3/mballoc.c 2006-09-06 11:16:28.656439250 +0800 ++++ linux-stage/fs/ext3/mballoc.c 2006-09-06 12:30:11.000000000 +0800 @@ -0,0 +1,2701 @@ +/* + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com @@ -806,7 +821,7 @@ Index: linux-stage/fs/ext3/mballoc.c + while (i < max) { + fragments++; + first = i; -+ i = find_next_bit(bitmap, max, i); ++ i = ext2_find_next_le_bit(bitmap, max, i); + len = i - first; + free += len; + if (len > 1) @@ -3068,8 +3083,8 @@ Index: linux-stage/fs/ext3/mballoc.c +} Index: linux-stage/fs/ext3/Makefile =================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2006-05-25 10:36:04.000000000 -0600 -+++ linux-stage/fs/ext3/Makefile 2006-05-25 10:36:04.000000000 -0600 +--- linux-stage.orig/fs/ext3/Makefile 2006-09-06 12:27:37.000000000 +0800 ++++ linux-stage/fs/ext3/Makefile 2006-09-06 12:27:37.000000000 +0800 @@ -6,7 +6,7 @@ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ diff --git a/lustre/kernel_patches/patches/jbd-stats-2.6.5.patch b/lustre/kernel_patches/patches/jbd-stats-2.6.5.patch new file mode 100644 index 0000000..b2abf46 --- /dev/null +++ b/lustre/kernel_patches/patches/jbd-stats-2.6.5.patch @@ -0,0 +1,772 @@ +Index: linux-2.6.5-7.201/include/linux/jbd.h +=================================================================== +--- linux-2.6.5-7.201.orig/include/linux/jbd.h 2005-10-11 00:12:48.000000000 +0400 ++++ linux-2.6.5-7.201/include/linux/jbd.h 2006-07-28 02:40:14.000000000 +0400 +@@ -411,6 +411,16 @@ struct handle_s + }; + + ++/* ++ * Some stats for checkpoint phase ++ */ ++struct transaction_chp_stats_s { ++ unsigned long cs_chp_time; ++ unsigned long cs_forced_to_close; ++ unsigned long cs_written; ++ unsigned long cs_dropped; ++}; ++ + /* The transaction_t type is the guts of the journaling mechanism. It + * tracks a compound transaction through its various states: + * +@@ -542,6 +552,21 @@ struct transaction_s + spinlock_t t_handle_lock; + + /* ++ * Longest time some handle had to wait for running transaction ++ */ ++ unsigned long t_max_wait; ++ ++ /* ++ * When transaction started ++ */ ++ unsigned long t_start; ++ ++ /* ++ * Checkpointing stats [j_checkpoint_sem] ++ */ ++ struct transaction_chp_stats_s t_chp_stats; ++ ++ /* + * Number of outstanding updates running on this transaction + * [t_handle_lock] + */ +@@ -581,6 +606,57 @@ struct transaction_s + struct list_head t_jcb; + }; + ++struct transaction_run_stats_s { ++ unsigned long rs_wait; ++ unsigned long rs_running; ++ unsigned long rs_locked; ++ unsigned long rs_flushing; ++ unsigned long rs_logging; ++ ++ unsigned long rs_handle_count; ++ unsigned long rs_blocks; ++ unsigned long rs_blocks_logged; ++}; ++ ++struct transaction_stats_s ++{ ++ int ts_type; ++ unsigned long ts_tid; ++ union { ++ struct transaction_run_stats_s run; ++ struct transaction_chp_stats_s chp; ++ } u; ++}; ++ ++#define JBD_STATS_RUN 1 ++#define JBD_STATS_CHECKPOINT 2 ++ ++#define ts_wait u.run.rs_wait ++#define ts_running u.run.rs_running ++#define ts_locked u.run.rs_locked ++#define ts_flushing u.run.rs_flushing ++#define ts_logging u.run.rs_logging ++#define ts_handle_count u.run.rs_handle_count ++#define ts_blocks u.run.rs_blocks ++#define ts_blocks_logged u.run.rs_blocks_logged ++ ++#define ts_chp_time u.chp.cs_chp_time ++#define ts_forced_to_close u.chp.cs_forced_to_close ++#define ts_written u.chp.cs_written ++#define ts_dropped u.chp.cs_dropped ++ ++#define CURRENT_MSECS (jiffies_to_msecs(jiffies)) ++ ++static inline unsigned int ++jbd_time_diff(unsigned int start, unsigned int end) ++{ ++ if (unlikely(start > end)) ++ end = end + (~0UL - start); ++ else ++ end -= start; ++ return end; ++} ++ + /** + * struct journal_s - The journal_s type is the concrete type associated with + * journal_t. +@@ -817,6 +893,16 @@ struct journal_s + struct jbd_revoke_table_s *j_revoke_table[2]; + + /* ++ * ++ */ ++ struct transaction_stats_s *j_history; ++ int j_history_max; ++ int j_history_cur; ++ spinlock_t j_history_lock; ++ struct proc_dir_entry *j_proc_entry; ++ struct transaction_stats_s j_stats; ++ ++ /* + * An opaque pointer to fs-private information. ext3 puts its + * superblock pointer here + */ +Index: linux-2.6.5-7.201/fs/jbd/commit.c +=================================================================== +--- linux-2.6.5-7.201.orig/fs/jbd/commit.c 2005-10-11 00:12:45.000000000 +0400 ++++ linux-2.6.5-7.201/fs/jbd/commit.c 2006-07-28 02:40:14.000000000 +0400 +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + /* + * Default IO end handler for temporary BJ_IO buffer_heads. +@@ -101,6 +102,7 @@ static int inverted_lock(journal_t *jour + */ + void journal_commit_transaction(journal_t *journal) + { ++ struct transaction_stats_s stats; + transaction_t *commit_transaction; + struct journal_head *jh, *new_jh, *descriptor; + struct buffer_head *wbuf[64]; +@@ -147,6 +149,11 @@ void journal_commit_transaction(journal_ + spin_lock(&journal->j_state_lock); + commit_transaction->t_state = T_LOCKED; + ++ stats.ts_wait = commit_transaction->t_max_wait; ++ stats.ts_locked = CURRENT_MSECS; ++ stats.ts_running = jbd_time_diff(commit_transaction->t_start, ++ stats.ts_locked); ++ + spin_lock(&commit_transaction->t_handle_lock); + while (commit_transaction->t_updates) { + DEFINE_WAIT(wait); +@@ -219,6 +226,9 @@ void journal_commit_transaction(journal_ + */ + journal_switch_revoke_table(journal); + ++ stats.ts_flushing = CURRENT_MSECS; ++ stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing); ++ + commit_transaction->t_state = T_FLUSH; + journal->j_committing_transaction = commit_transaction; + journal->j_running_transaction = NULL; +@@ -366,6 +376,11 @@ write_out_data: + */ + commit_transaction->t_state = T_COMMIT; + ++ stats.ts_logging = CURRENT_MSECS; ++ stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging); ++ stats.ts_blocks = commit_transaction->t_outstanding_credits; ++ stats.ts_blocks_logged = 0; ++ + descriptor = 0; + bufs = 0; + while (commit_transaction->t_buffers) { +@@ -514,6 +529,7 @@ start_journal_io: + submit_bh(WRITE, bh); + } + cond_resched(); ++ stats.ts_blocks_logged += bufs; + + /* Force a new descriptor to be generated next + time round the loop. */ +@@ -759,6 +775,7 @@ skip_commit: /* The journal should be un + cp_transaction = jh->b_cp_transaction; + if (cp_transaction) { + JBUFFER_TRACE(jh, "remove from old cp transaction"); ++ cp_transaction->t_chp_stats.cs_dropped++; + __journal_remove_checkpoint(jh); + } + +@@ -805,6 +822,36 @@ skip_commit: /* The journal should be un + + J_ASSERT(commit_transaction->t_state == T_COMMIT); + ++ commit_transaction->t_start = CURRENT_MSECS; ++ stats.ts_logging = jbd_time_diff(stats.ts_logging, ++ commit_transaction->t_start); ++ ++ /* ++ * File the transaction for history ++ */ ++ stats.ts_type = JBD_STATS_RUN; ++ stats.ts_tid = commit_transaction->t_tid; ++ stats.ts_handle_count = commit_transaction->t_handle_count; ++ spin_lock(&journal->j_history_lock); ++ memcpy(journal->j_history + journal->j_history_cur, &stats, ++ sizeof(stats)); ++ if (++journal->j_history_cur == journal->j_history_max) ++ journal->j_history_cur = 0; ++ ++ /* ++ * Calculate overall stats ++ */ ++ journal->j_stats.ts_tid++; ++ journal->j_stats.ts_wait += stats.ts_wait; ++ journal->j_stats.ts_running += stats.ts_running; ++ journal->j_stats.ts_locked += stats.ts_locked; ++ journal->j_stats.ts_flushing += stats.ts_flushing; ++ journal->j_stats.ts_logging += stats.ts_logging; ++ journal->j_stats.ts_handle_count += stats.ts_handle_count; ++ journal->j_stats.ts_blocks += stats.ts_blocks; ++ journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged; ++ spin_unlock(&journal->j_history_lock); ++ + /* + * This is a bit sleazy. We borrow j_list_lock to protect + * journal->j_committing_transaction in __journal_remove_checkpoint. +Index: linux-2.6.5-7.201/fs/jbd/checkpoint.c +=================================================================== +--- linux-2.6.5-7.201.orig/fs/jbd/checkpoint.c 2005-10-11 00:12:45.000000000 +0400 ++++ linux-2.6.5-7.201/fs/jbd/checkpoint.c 2006-07-28 02:40:14.000000000 +0400 +@@ -166,6 +166,7 @@ static int __cleanup_transaction(journal + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; + ++ transaction->t_chp_stats.cs_forced_to_close++; + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + log_start_commit(journal, tid); +@@ -227,7 +228,7 @@ __flush_batch(journal_t *journal, struct + */ + static int __flush_buffer(journal_t *journal, struct journal_head *jh, + struct buffer_head **bhs, int *batch_count, +- int *drop_count) ++ int *drop_count, transaction_t *transaction) + { + struct buffer_head *bh = jh2bh(jh); + int ret = 0; +@@ -248,6 +249,7 @@ static int __flush_buffer(journal_t *jou + set_buffer_jwrite(bh); + bhs[*batch_count] = bh; + jbd_unlock_bh_state(bh); ++ transaction->t_chp_stats.cs_written++; + (*batch_count)++; + if (*batch_count == NR_BATCH) { + __flush_batch(journal, bhs, batch_count); +@@ -316,6 +318,8 @@ int log_do_checkpoint(journal_t *journal + tid_t this_tid; + + transaction = journal->j_checkpoint_transactions; ++ if (transaction->t_chp_stats.cs_chp_time == 0) ++ transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS; + this_tid = transaction->t_tid; + jh = transaction->t_checkpoint_list; + last_jh = jh->b_cpprev; +@@ -332,7 +336,8 @@ int log_do_checkpoint(journal_t *journal + retry = 1; + break; + } +- retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); ++ retry = __flush_buffer(journal, jh, bhs, &batch_count, ++ &drop_count, transaction); + } while (jh != last_jh && !retry); + + if (batch_count) { +@@ -598,6 +603,8 @@ void __journal_insert_checkpoint(struct + + void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) + { ++ struct transaction_stats_s stats; ++ + assert_spin_locked(&journal->j_list_lock); + if (transaction->t_cpnext) { + transaction->t_cpnext->t_cpprev = transaction->t_cpprev; +@@ -623,5 +630,25 @@ void __journal_drop_transaction(journal_ + J_ASSERT(journal->j_running_transaction != transaction); + + jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); ++ ++ /* ++ * File the transaction for history ++ */ ++ if (transaction->t_chp_stats.cs_written != 0 || ++ transaction->t_chp_stats.cs_chp_time != 0) { ++ stats.ts_type = JBD_STATS_CHECKPOINT; ++ stats.ts_tid = transaction->t_tid; ++ stats.u.chp = transaction->t_chp_stats; ++ if (stats.ts_chp_time) ++ stats.ts_chp_time = ++ jbd_time_diff(stats.ts_chp_time, CURRENT_MSECS); ++ spin_lock(&journal->j_history_lock); ++ memcpy(journal->j_history + journal->j_history_cur, &stats, ++ sizeof(stats)); ++ if (++journal->j_history_cur == journal->j_history_max) ++ journal->j_history_cur = 0; ++ spin_unlock(&journal->j_history_lock); ++ } ++ + kfree(transaction); + } +Index: linux-2.6.5-7.201/fs/jbd/transaction.c +=================================================================== +--- linux-2.6.5-7.201.orig/fs/jbd/transaction.c 2005-10-11 00:12:45.000000000 +0400 ++++ linux-2.6.5-7.201/fs/jbd/transaction.c 2006-07-28 02:40:14.000000000 +0400 +@@ -60,6 +60,8 @@ get_transaction(journal_t *journal, tran + + J_ASSERT(journal->j_running_transaction == NULL); + journal->j_running_transaction = transaction; ++ transaction->t_max_wait = 0; ++ transaction->t_start = CURRENT_MSECS; + + return transaction; + } +@@ -86,6 +88,7 @@ static int start_this_handle(journal_t * + int nblocks = handle->h_buffer_credits; + transaction_t *new_transaction = NULL; + int ret = 0; ++ unsigned long ts = CURRENT_MSECS; + + if (nblocks > journal->j_max_transaction_buffers) { + printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", +@@ -219,6 +222,12 @@ repeat_locked: + /* OK, account for the buffers that this operation expects to + * use and add the handle to the running transaction. */ + ++ if (time_after(transaction->t_start, ts)) { ++ ts = jbd_time_diff(ts, transaction->t_start); ++ if (ts > transaction->t_max_wait) ++ transaction->t_max_wait= ts; ++ } ++ + handle->h_transaction = transaction; + transaction->t_outstanding_credits += nblocks; + transaction->t_updates++; +Index: linux-2.6.5-7.201/fs/jbd/journal.c +=================================================================== +--- linux-2.6.5-7.201.orig/fs/jbd/journal.c 2005-10-11 00:12:45.000000000 +0400 ++++ linux-2.6.5-7.201/fs/jbd/journal.c 2006-08-02 01:20:09.000000000 +0400 +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + + EXPORT_SYMBOL(journal_start); + EXPORT_SYMBOL(journal_restart); +@@ -615,6 +616,337 @@ struct journal_head *journal_get_descrip + return journal_add_journal_head(bh); + } + ++struct jbd_stats_proc_session { ++ journal_t *journal; ++ struct transaction_stats_s *stats; ++ int start; ++ int max; ++}; ++ ++static void *jbd_history_skip_empty(struct jbd_stats_proc_session *s, ++ struct transaction_stats_s *ts, ++ int first) ++{ ++ if (ts == s->stats + s->max) ++ ts = s->stats; ++ if (!first && ts == s->stats + s->start) ++ return NULL; ++ while (ts->ts_type == 0) { ++ ts++; ++ if (ts == s->stats + s->max) ++ ts = s->stats; ++ if (ts == s->stats + s->start) ++ return NULL; ++ } ++ return ts; ++ ++} ++ ++static void *jbd_seq_history_start(struct seq_file *seq, loff_t *pos) ++{ ++ struct jbd_stats_proc_session *s = seq->private; ++ struct transaction_stats_s *ts; ++ int l = *pos; ++ ++ if (l == 0) ++ return SEQ_START_TOKEN; ++ ts = jbd_history_skip_empty(s, s->stats + s->start, 1); ++ if (!ts) ++ return NULL; ++ while (--l && (ts = jbd_history_skip_empty(s, ++ts, 0)) != NULL); ++ return ts; ++} ++ ++static void *jbd_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) ++{ ++ struct jbd_stats_proc_session *s = seq->private; ++ struct transaction_stats_s *ts = v; ++ ++ ++*pos; ++ if (v == SEQ_START_TOKEN) ++ return jbd_history_skip_empty(s, s->stats + s->start, 1); ++ else ++ return jbd_history_skip_empty(s, ++ts, 0); ++} ++ ++static int jbd_seq_history_show(struct seq_file *seq, void *v) ++{ ++ struct transaction_stats_s *ts = v; ++ if (v == SEQ_START_TOKEN) { ++ seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s " ++ "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid", ++ "wait", "run", "lock", "flush", "log", "hndls", ++ "block", "inlog", "ctime", "write", "drop", ++ "close"); ++ return 0; ++ } ++ if (ts->ts_type == JBD_STATS_RUN) ++ seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu " ++ "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid, ++ ts->ts_wait, ts->ts_running, ts->ts_locked, ++ ts->ts_flushing, ts->ts_logging, ++ ts->ts_handle_count, ts->ts_blocks, ++ ts->ts_blocks_logged); ++ else if (ts->ts_type == JBD_STATS_CHECKPOINT) ++ seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n", ++ "C", ts->ts_tid, " ", ts->ts_chp_time, ++ ts->ts_written, ts->ts_dropped, ++ ts->ts_forced_to_close); ++ else ++ J_ASSERT(0); ++ return 0; ++} ++ ++static void jbd_seq_history_stop(struct seq_file *seq, void *v) ++{ ++} ++ ++static struct seq_operations jbd_seq_history_ops = { ++ .start = jbd_seq_history_start, ++ .next = jbd_seq_history_next, ++ .stop = jbd_seq_history_stop, ++ .show = jbd_seq_history_show, ++}; ++ ++static int jbd_seq_history_open(struct inode *inode, struct file *file) ++{ ++ journal_t *journal = PDE(inode)->data; ++ struct jbd_stats_proc_session *s; ++ int rc, size; ++ ++ s = kmalloc(sizeof(*s), GFP_KERNEL); ++ if (s == NULL) ++ return -EIO; ++ size = sizeof(struct transaction_stats_s) * journal->j_history_max; ++ s->stats = kmalloc(size, GFP_KERNEL); ++ if (s == NULL) { ++ kfree(s); ++ return -EIO; ++ } ++ spin_lock(&journal->j_history_lock); ++ memcpy(s->stats, journal->j_history, size); ++ s->max = journal->j_history_max; ++ s->start = journal->j_history_cur % s->max; ++ spin_unlock(&journal->j_history_lock); ++ ++ rc = seq_open(file, &jbd_seq_history_ops); ++ if (rc == 0) { ++ struct seq_file *m = (struct seq_file *)file->private_data; ++ m->private = s; ++ } else { ++ kfree(s->stats); ++ kfree(s); ++ } ++ return rc; ++ ++} ++ ++static ssize_t jbd_seq_history_write(struct file *file, const char __user *buf, ++ size_t len, loff_t * ppos) ++{ ++ journal_t *journal = PDE(file->f_dentry->d_inode)->data; ++ int size; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ spin_lock(&journal->j_history_lock); ++ size = sizeof(struct transaction_stats_s) * journal->j_history_max; ++ journal->j_history_cur = 0; ++ memset(journal->j_history, 0, size); ++ spin_unlock(&journal->j_history_lock); ++ ++ return len; ++} ++ ++static int jbd_seq_history_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = (struct seq_file *)file->private_data; ++ struct jbd_stats_proc_session *s = seq->private; ++ kfree(s->stats); ++ kfree(s); ++ return seq_release(inode, file); ++} ++ ++static struct file_operations jbd_seq_history_fops = { ++ .owner = THIS_MODULE, ++ .open = jbd_seq_history_open, ++ .read = seq_read, ++ .write = jbd_seq_history_write, ++ .llseek = seq_lseek, ++ .release = jbd_seq_history_release, ++}; ++ ++static void *jbd_seq_info_start(struct seq_file *seq, loff_t *pos) ++{ ++ return *pos ? NULL : SEQ_START_TOKEN; ++} ++ ++static void *jbd_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) ++{ ++ return NULL; ++} ++ ++static int jbd_seq_info_show(struct seq_file *seq, void *v) ++{ ++ struct jbd_stats_proc_session *s = seq->private; ++ if (v != SEQ_START_TOKEN) ++ return 0; ++ seq_printf(seq, "%lu transaction, each upto %u blocks\n", ++ s->stats->ts_tid, ++ s->journal->j_max_transaction_buffers); ++ if (s->stats->ts_tid == 0) ++ return 0; ++ seq_printf(seq, "average: \n %lums waiting for transaction\n", ++ s->stats->ts_wait / s->stats->ts_tid); ++ seq_printf(seq, " %lums running transaction\n", ++ s->stats->ts_running / s->stats->ts_tid); ++ seq_printf(seq, " %lums transaction was being locked\n", ++ s->stats->ts_locked / s->stats->ts_tid); ++ seq_printf(seq, " %lums flushing data (in ordered mode)\n", ++ s->stats->ts_flushing / s->stats->ts_tid); ++ seq_printf(seq, " %lums logging transaction\n", ++ s->stats->ts_logging / s->stats->ts_tid); ++ seq_printf(seq, " %lu handles per transaction\n", ++ s->stats->ts_handle_count / s->stats->ts_tid); ++ seq_printf(seq, " %lu blocks per transaction\n", ++ s->stats->ts_blocks / s->stats->ts_tid); ++ seq_printf(seq, " %lu logged blocks per transaction\n", ++ s->stats->ts_blocks_logged / s->stats->ts_tid); ++ return 0; ++} ++ ++static void jbd_seq_info_stop(struct seq_file *seq, void *v) ++{ ++} ++ ++static struct seq_operations jbd_seq_info_ops = { ++ .start = jbd_seq_info_start, ++ .next = jbd_seq_info_next, ++ .stop = jbd_seq_info_stop, ++ .show = jbd_seq_info_show, ++}; ++ ++static int jbd_seq_info_open(struct inode *inode, struct file *file) ++{ ++ journal_t *journal = PDE(inode)->data; ++ struct jbd_stats_proc_session *s; ++ int rc, size; ++ ++ s = kmalloc(sizeof(*s), GFP_KERNEL); ++ if (s == NULL) ++ return -EIO; ++ size = sizeof(struct transaction_stats_s); ++ s->stats = kmalloc(size, GFP_KERNEL); ++ if (s == NULL) { ++ kfree(s); ++ return -EIO; ++ } ++ spin_lock(&journal->j_history_lock); ++ memcpy(s->stats, &journal->j_stats, size); ++ s->journal = journal; ++ spin_unlock(&journal->j_history_lock); ++ ++ rc = seq_open(file, &jbd_seq_info_ops); ++ if (rc == 0) { ++ struct seq_file *m = (struct seq_file *)file->private_data; ++ m->private = s; ++ } else { ++ kfree(s->stats); ++ kfree(s); ++ } ++ return rc; ++ ++} ++ ++static ssize_t jbd_seq_info_write(struct file *file, const char __user *buf, ++ size_t len, loff_t * ppos) ++{ ++ journal_t *journal = PDE(file->f_dentry->d_inode)->data; ++ int size; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ size = sizeof(struct transaction_stats_s); ++ spin_lock(&journal->j_history_lock); ++ memset(&journal->j_stats, 0, size); ++ spin_unlock(&journal->j_history_lock); ++ ++ return len; ++} ++ ++static int jbd_seq_info_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = (struct seq_file *)file->private_data; ++ struct jbd_stats_proc_session *s = seq->private; ++ kfree(s->stats); ++ kfree(s); ++ return seq_release(inode, file); ++} ++ ++static struct file_operations jbd_seq_info_fops = { ++ .owner = THIS_MODULE, ++ .open = jbd_seq_info_open, ++ .read = seq_read, ++ .write = jbd_seq_info_write, ++ .llseek = seq_lseek, ++ .release = jbd_seq_info_release, ++}; ++ ++static struct proc_dir_entry *proc_jbd_stats = NULL; ++ ++static void jbd_stats_proc_init(journal_t *journal) ++{ ++ char name[64]; ++ ++ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); ++ journal->j_proc_entry = proc_mkdir(name, proc_jbd_stats); ++ if (journal->j_proc_entry) { ++ struct proc_dir_entry *p; ++ p = create_proc_entry("history", S_IRUGO, ++ journal->j_proc_entry); ++ if (p) { ++ p->proc_fops = &jbd_seq_history_fops; ++ p->data = journal; ++ p = create_proc_entry("info", S_IRUGO, ++ journal->j_proc_entry); ++ if (p) { ++ p->proc_fops = &jbd_seq_info_fops; ++ p->data = journal; ++ } ++ } ++ } ++} ++ ++static void jbd_stats_proc_exit(journal_t *journal) ++{ ++ char name[64]; ++ ++ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); ++ remove_proc_entry("info", journal->j_proc_entry); ++ remove_proc_entry("history", journal->j_proc_entry); ++ remove_proc_entry(name, proc_jbd_stats); ++} ++ ++static void journal_init_stats(journal_t *journal) ++{ ++ int size; ++ ++ if (proc_jbd_stats == NULL) ++ return; ++ ++ journal->j_history_max = 1500; ++ size = sizeof(struct transaction_stats_s) * journal->j_history_max; ++ journal->j_history = kmalloc(size, GFP_KERNEL); ++ if (journal->j_history == NULL) { ++ journal->j_history_max = 0; ++ return; ++ } ++ memset(journal->j_history, 0, size); ++ spin_lock_init(&journal->j_history_lock); ++} ++ + /* + * Management for journal control blocks: functions to create and + * destroy journal_t structures, and to initialise and read existing +@@ -657,6 +989,9 @@ static journal_t * journal_init_common ( + kfree(journal); + goto fail; + } ++ ++ journal_init_stats(journal); ++ + return journal; + fail: + return NULL; +@@ -699,6 +1034,7 @@ journal_t * journal_init_dev(struct bloc + journal->j_blk_offset = start; + journal->j_maxlen = len; + journal->j_blocksize = blocksize; ++ jbd_stats_proc_init(journal); + + bh = __getblk(journal->j_dev, start, journal->j_blocksize); + J_ASSERT(bh != NULL); +@@ -736,6 +1072,7 @@ journal_t * journal_init_inode (struct i + + journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; + journal->j_blocksize = inode->i_sb->s_blocksize; ++ jbd_stats_proc_init(journal); + + err = journal_bmap(journal, 0, &blocknr); + /* If that failed, give up */ +@@ -1106,6 +1443,8 @@ void journal_destroy(journal_t *journal) + brelse(journal->j_sb_buffer); + } + ++ if (journal->j_proc_entry) ++ jbd_stats_proc_exit(journal); + if (journal->j_inode) + iput(journal->j_inode); + if (journal->j_revoke) +@@ -1861,6 +2200,28 @@ static void __exit remove_jbd_proc_entry + + #endif + ++#if defined(CONFIG_PROC_FS) ++ ++#define JBD_STATS_PROC_NAME "fs/jbd" ++ ++static void __init create_jbd_stats_proc_entry(void) ++{ ++ proc_jbd_stats = proc_mkdir(JBD_STATS_PROC_NAME, NULL); ++} ++ ++static void __exit remove_jbd_stats_proc_entry(void) ++{ ++ if (proc_jbd_stats) ++ remove_proc_entry(JBD_STATS_PROC_NAME, NULL); ++} ++ ++#else ++ ++#define create_jbd_stats_proc_entry() do {} while (0) ++#define remove_jbd_stats_proc_entry() do {} while (0) ++ ++#endif ++ + kmem_cache_t *jbd_handle_cache; + + static int __init journal_init_handle_cache(void) +@@ -1915,6 +2276,7 @@ static int __init journal_init(void) + if (ret != 0) + journal_destroy_caches(); + create_jbd_proc_entry(); ++ create_jbd_stats_proc_entry(); + return ret; + } + +@@ -1926,6 +2288,7 @@ static void __exit journal_exit(void) + printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); + #endif + remove_jbd_proc_entry(); ++ remove_jbd_stats_proc_entry(); + journal_destroy_caches(); + } + diff --git a/lustre/kernel_patches/patches/jbd-stats-2.6.9.patch b/lustre/kernel_patches/patches/jbd-stats-2.6.9.patch new file mode 100644 index 0000000..7a48375 --- /dev/null +++ b/lustre/kernel_patches/patches/jbd-stats-2.6.9.patch @@ -0,0 +1,735 @@ +Index: linux-2.6.9/include/linux/jbd.h +=================================================================== +--- linux-2.6.9.orig/include/linux/jbd.h 2006-03-10 18:20:03.000000000 +0300 ++++ linux-2.6.9/include/linux/jbd.h 2006-07-28 02:32:18.000000000 +0400 +@@ -422,6 +422,16 @@ struct handle_s + }; + + ++/* ++ * Some stats for checkpoint phase ++ */ ++struct transaction_chp_stats_s { ++ unsigned long cs_chp_time; ++ unsigned long cs_forced_to_close; ++ unsigned long cs_written; ++ unsigned long cs_dropped; ++}; ++ + /* The transaction_t type is the guts of the journaling mechanism. It + * tracks a compound transaction through its various states: + * +@@ -553,6 +563,21 @@ struct transaction_s + spinlock_t t_handle_lock; + + /* ++ * Longest time some handle had to wait for running transaction ++ */ ++ unsigned long t_max_wait; ++ ++ /* ++ * When transaction started ++ */ ++ unsigned long t_start; ++ ++ /* ++ * Checkpointing stats [j_checkpoint_sem] ++ */ ++ struct transaction_chp_stats_s t_chp_stats; ++ ++ /* + * Number of outstanding updates running on this transaction + * [t_handle_lock] + */ +@@ -592,6 +617,57 @@ struct transaction_s + struct list_head t_jcb; + }; + ++struct transaction_run_stats_s { ++ unsigned long rs_wait; ++ unsigned long rs_running; ++ unsigned long rs_locked; ++ unsigned long rs_flushing; ++ unsigned long rs_logging; ++ ++ unsigned long rs_handle_count; ++ unsigned long rs_blocks; ++ unsigned long rs_blocks_logged; ++}; ++ ++struct transaction_stats_s ++{ ++ int ts_type; ++ unsigned long ts_tid; ++ union { ++ struct transaction_run_stats_s run; ++ struct transaction_chp_stats_s chp; ++ } u; ++}; ++ ++#define JBD_STATS_RUN 1 ++#define JBD_STATS_CHECKPOINT 2 ++ ++#define ts_wait u.run.rs_wait ++#define ts_running u.run.rs_running ++#define ts_locked u.run.rs_locked ++#define ts_flushing u.run.rs_flushing ++#define ts_logging u.run.rs_logging ++#define ts_handle_count u.run.rs_handle_count ++#define ts_blocks u.run.rs_blocks ++#define ts_blocks_logged u.run.rs_blocks_logged ++ ++#define ts_chp_time u.chp.cs_chp_time ++#define ts_forced_to_close u.chp.cs_forced_to_close ++#define ts_written u.chp.cs_written ++#define ts_dropped u.chp.cs_dropped ++ ++#define CURRENT_MSECS (jiffies_to_msecs(jiffies)) ++ ++static inline unsigned int ++jbd_time_diff(unsigned int start, unsigned int end) ++{ ++ if (unlikely(start > end)) ++ end = end + (~0UL - start); ++ else ++ end -= start; ++ return end; ++} ++ + /** + * struct journal_s - The journal_s type is the concrete type associated with + * journal_t. +@@ -828,6 +904,16 @@ struct journal_s + struct jbd_revoke_table_s *j_revoke_table[2]; + + /* ++ * ++ */ ++ struct transaction_stats_s *j_history; ++ int j_history_max; ++ int j_history_cur; ++ spinlock_t j_history_lock; ++ struct proc_dir_entry *j_proc_entry; ++ struct transaction_stats_s j_stats; ++ ++ /* + * An opaque pointer to fs-private information. ext3 puts its + * superblock pointer here + */ +Index: linux-2.6.9/fs/jbd/commit.c +=================================================================== +--- linux-2.6.9.orig/fs/jbd/commit.c 2006-03-10 18:20:39.000000000 +0300 ++++ linux-2.6.9/fs/jbd/commit.c 2006-07-28 02:32:18.000000000 +0400 +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + /* + * Default IO end handler for temporary BJ_IO buffer_heads. +@@ -101,6 +102,7 @@ static int inverted_lock(journal_t *jour + */ + void journal_commit_transaction(journal_t *journal) + { ++ struct transaction_stats_s stats; + transaction_t *commit_transaction; + struct journal_head *jh, *new_jh, *descriptor; + struct buffer_head *wbuf[64]; +@@ -147,6 +149,11 @@ void journal_commit_transaction(journal_ + spin_lock(&journal->j_state_lock); + commit_transaction->t_state = T_LOCKED; + ++ stats.ts_wait = commit_transaction->t_max_wait; ++ stats.ts_locked = CURRENT_MSECS; ++ stats.ts_running = jbd_time_diff(commit_transaction->t_start, ++ stats.ts_locked); ++ + spin_lock(&commit_transaction->t_handle_lock); + while (commit_transaction->t_updates) { + DEFINE_WAIT(wait); +@@ -219,6 +226,9 @@ void journal_commit_transaction(journal_ + */ + journal_switch_revoke_table(journal); + ++ stats.ts_flushing = CURRENT_MSECS; ++ stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing); ++ + commit_transaction->t_state = T_FLUSH; + journal->j_committing_transaction = commit_transaction; + journal->j_running_transaction = NULL; +@@ -365,6 +375,11 @@ write_out_data: + */ + commit_transaction->t_state = T_COMMIT; + ++ stats.ts_logging = CURRENT_MSECS; ++ stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging); ++ stats.ts_blocks = commit_transaction->t_outstanding_credits; ++ stats.ts_blocks_logged = 0; ++ + descriptor = NULL; + bufs = 0; + while (commit_transaction->t_buffers) { +@@ -513,6 +528,7 @@ start_journal_io: + submit_bh(WRITE, bh); + } + cond_resched(); ++ stats.ts_blocks_logged += bufs; + + /* Force a new descriptor to be generated next + time round the loop. */ +@@ -760,6 +776,7 @@ skip_commit: /* The journal should be un + cp_transaction = jh->b_cp_transaction; + if (cp_transaction) { + JBUFFER_TRACE(jh, "remove from old cp transaction"); ++ cp_transaction->t_chp_stats.cs_dropped++; + __journal_remove_checkpoint(jh); + } + +@@ -806,6 +823,36 @@ skip_commit: /* The journal should be un + + J_ASSERT(commit_transaction->t_state == T_COMMIT); + ++ commit_transaction->t_start = CURRENT_MSECS; ++ stats.ts_logging = jbd_time_diff(stats.ts_logging, ++ commit_transaction->t_start); ++ ++ /* ++ * File the transaction for history ++ */ ++ stats.ts_type = JBD_STATS_RUN; ++ stats.ts_tid = commit_transaction->t_tid; ++ stats.ts_handle_count = commit_transaction->t_handle_count; ++ spin_lock(&journal->j_history_lock); ++ memcpy(journal->j_history + journal->j_history_cur, &stats, ++ sizeof(stats)); ++ if (++journal->j_history_cur == journal->j_history_max) ++ journal->j_history_cur = 0; ++ ++ /* ++ * Calculate overall stats ++ */ ++ journal->j_stats.ts_tid++; ++ journal->j_stats.ts_wait += stats.ts_wait; ++ journal->j_stats.ts_running += stats.ts_running; ++ journal->j_stats.ts_locked += stats.ts_locked; ++ journal->j_stats.ts_flushing += stats.ts_flushing; ++ journal->j_stats.ts_logging += stats.ts_logging; ++ journal->j_stats.ts_handle_count += stats.ts_handle_count; ++ journal->j_stats.ts_blocks += stats.ts_blocks; ++ journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged; ++ spin_unlock(&journal->j_history_lock); ++ + /* + * This is a bit sleazy. We borrow j_list_lock to protect + * journal->j_committing_transaction in __journal_remove_checkpoint. +Index: linux-2.6.9/fs/jbd/checkpoint.c +=================================================================== +--- linux-2.6.9.orig/fs/jbd/checkpoint.c 2006-03-10 18:20:03.000000000 +0300 ++++ linux-2.6.9/fs/jbd/checkpoint.c 2006-07-28 02:35:21.000000000 +0400 +@@ -166,6 +166,7 @@ static int __cleanup_transaction(journal + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; + ++ transaction->t_chp_stats.cs_forced_to_close++; + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + log_start_commit(journal, tid); +@@ -227,7 +228,7 @@ __flush_batch(journal_t *journal, struct + */ + static int __flush_buffer(journal_t *journal, struct journal_head *jh, + struct buffer_head **bhs, int *batch_count, +- int *drop_count) ++ int *drop_count, transaction_t *transaction) + { + struct buffer_head *bh = jh2bh(jh); + int ret = 0; +@@ -248,6 +249,7 @@ static int __flush_buffer(journal_t *jou + set_buffer_jwrite(bh); + bhs[*batch_count] = bh; + jbd_unlock_bh_state(bh); ++ transaction->t_chp_stats.cs_written++; + (*batch_count)++; + if (*batch_count == NR_BATCH) { + __flush_batch(journal, bhs, batch_count); +@@ -316,6 +318,8 @@ int log_do_checkpoint(journal_t *journal + tid_t this_tid; + + transaction = journal->j_checkpoint_transactions; ++ if (transaction->t_chp_stats.cs_chp_time == 0) ++ transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS; + this_tid = transaction->t_tid; + jh = transaction->t_checkpoint_list; + last_jh = jh->b_cpprev; +@@ -332,7 +336,8 @@ int log_do_checkpoint(journal_t *journal + retry = 1; + break; + } +- retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); ++ retry = __flush_buffer(journal, jh, bhs, &batch_count, ++ &drop_count, transaction); + } while (jh != last_jh && !retry); + + if (batch_count) { +@@ -598,6 +603,8 @@ void __journal_insert_checkpoint(struct + + void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) + { ++ struct transaction_stats_s stats; ++ + assert_spin_locked(&journal->j_list_lock); + if (transaction->t_cpnext) { + transaction->t_cpnext->t_cpprev = transaction->t_cpprev; +@@ -623,5 +630,25 @@ void __journal_drop_transaction(journal_ + J_ASSERT(journal->j_running_transaction != transaction); + + jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); ++ ++ /* ++ * File the transaction for history ++ */ ++ if (transaction->t_chp_stats.cs_written != 0 || ++ transaction->t_chp_stats.cs_chp_time != 0) { ++ stats.ts_type = JBD_STATS_CHECKPOINT; ++ stats.ts_tid = transaction->t_tid; ++ stats.u.chp = transaction->t_chp_stats; ++ if (stats.ts_chp_time) ++ stats.ts_chp_time = ++ jbd_time_diff(stats.ts_chp_time, CURRENT_MSECS); ++ spin_lock(&journal->j_history_lock); ++ memcpy(journal->j_history + journal->j_history_cur, &stats, ++ sizeof(stats)); ++ if (++journal->j_history_cur == journal->j_history_max) ++ journal->j_history_cur = 0; ++ spin_unlock(&journal->j_history_lock); ++ } ++ + kfree(transaction); + } +Index: linux-2.6.9/fs/jbd/transaction.c +=================================================================== +--- linux-2.6.9.orig/fs/jbd/transaction.c 2006-03-10 18:20:03.000000000 +0300 ++++ linux-2.6.9/fs/jbd/transaction.c 2006-07-28 02:32:18.000000000 +0400 +@@ -60,6 +60,8 @@ get_transaction(journal_t *journal, tran + + J_ASSERT(journal->j_running_transaction == NULL); + journal->j_running_transaction = transaction; ++ transaction->t_max_wait = 0; ++ transaction->t_start = CURRENT_MSECS; + + return transaction; + } +@@ -86,6 +88,7 @@ static int start_this_handle(journal_t * + int nblocks = handle->h_buffer_credits; + transaction_t *new_transaction = NULL; + int ret = 0; ++ unsigned long ts = CURRENT_MSECS; + + if (nblocks > journal->j_max_transaction_buffers) { + printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", +@@ -219,6 +222,12 @@ repeat_locked: + /* OK, account for the buffers that this operation expects to + * use and add the handle to the running transaction. */ + ++ if (time_after(transaction->t_start, ts)) { ++ ts = jbd_time_diff(ts, transaction->t_start); ++ if (ts > transaction->t_max_wait) ++ transaction->t_max_wait= ts; ++ } ++ + handle->h_transaction = transaction; + transaction->t_outstanding_credits += nblocks; + transaction->t_updates++; +Index: linux-2.6.9/fs/jbd/journal.c +=================================================================== +--- linux-2.6.9.orig/fs/jbd/journal.c 2006-06-19 21:31:57.000000000 +0400 ++++ linux-2.6.9/fs/jbd/journal.c 2006-07-28 02:32:18.000000000 +0400 +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + + EXPORT_SYMBOL(journal_start); + EXPORT_SYMBOL(journal_restart); +@@ -649,6 +650,300 @@ struct journal_head *journal_get_descrip + return journal_add_journal_head(bh); + } + ++struct jbd_stats_proc_session { ++ journal_t *journal; ++ struct transaction_stats_s *stats; ++ int start; ++ int max; ++}; ++ ++static void *jbd_history_skip_empty(struct jbd_stats_proc_session *s, ++ struct transaction_stats_s *ts, ++ int first) ++{ ++ if (ts == s->stats + s->max) ++ ts = s->stats; ++ if (!first && ts == s->stats + s->start) ++ return NULL; ++ while (ts->ts_type == 0) { ++ ts++; ++ if (ts == s->stats + s->max) ++ ts = s->stats; ++ if (ts == s->stats + s->start) ++ return NULL; ++ } ++ return ts; ++ ++} ++ ++static void *jbd_seq_history_start(struct seq_file *seq, loff_t *pos) ++{ ++ struct jbd_stats_proc_session *s = seq->private; ++ struct transaction_stats_s *ts; ++ int l = *pos; ++ ++ if (l == 0) ++ return SEQ_START_TOKEN; ++ ts = jbd_history_skip_empty(s, s->stats + s->start, 1); ++ if (!ts) ++ return NULL; ++ while (--l && (ts = jbd_history_skip_empty(s, ++ts, 0)) != NULL); ++ return ts; ++} ++ ++static void *jbd_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) ++{ ++ struct jbd_stats_proc_session *s = seq->private; ++ struct transaction_stats_s *ts = v; ++ ++ ++*pos; ++ if (v == SEQ_START_TOKEN) ++ return jbd_history_skip_empty(s, s->stats + s->start, 1); ++ else ++ return jbd_history_skip_empty(s, ++ts, 0); ++} ++ ++static int jbd_seq_history_show(struct seq_file *seq, void *v) ++{ ++ struct transaction_stats_s *ts = v; ++ if (v == SEQ_START_TOKEN) { ++ seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s " ++ "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid", ++ "wait", "run", "lock", "flush", "log", "hndls", ++ "block", "inlog", "ctime", "write", "drop", ++ "close"); ++ return 0; ++ } ++ if (ts->ts_type == JBD_STATS_RUN) ++ seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu " ++ "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid, ++ ts->ts_wait, ts->ts_running, ts->ts_locked, ++ ts->ts_flushing, ts->ts_logging, ++ ts->ts_handle_count, ts->ts_blocks, ++ ts->ts_blocks_logged); ++ else if (ts->ts_type == JBD_STATS_CHECKPOINT) ++ seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n", ++ "C", ts->ts_tid, " ", ts->ts_chp_time, ++ ts->ts_written, ts->ts_dropped, ++ ts->ts_forced_to_close); ++ else ++ J_ASSERT(0); ++ return 0; ++} ++ ++static void jbd_seq_history_stop(struct seq_file *seq, void *v) ++{ ++} ++ ++static struct seq_operations jbd_seq_history_ops = { ++ .start = jbd_seq_history_start, ++ .next = jbd_seq_history_next, ++ .stop = jbd_seq_history_stop, ++ .show = jbd_seq_history_show, ++}; ++ ++static int jbd_seq_history_open(struct inode *inode, struct file *file) ++{ ++ journal_t *journal = PDE(inode)->data; ++ struct jbd_stats_proc_session *s; ++ int rc, size; ++ ++ s = kmalloc(sizeof(*s), GFP_KERNEL); ++ if (s == NULL) ++ return -EIO; ++ size = sizeof(struct transaction_stats_s) * journal->j_history_max; ++ s->stats = kmalloc(size, GFP_KERNEL); ++ if (s == NULL) { ++ kfree(s); ++ return -EIO; ++ } ++ spin_lock(&journal->j_history_lock); ++ memcpy(s->stats, journal->j_history, size); ++ s->max = journal->j_history_max; ++ s->start = journal->j_history_cur % s->max; ++ spin_unlock(&journal->j_history_lock); ++ ++ rc = seq_open(file, &jbd_seq_history_ops); ++ if (rc == 0) { ++ struct seq_file *m = (struct seq_file *)file->private_data; ++ m->private = s; ++ } else { ++ kfree(s->stats); ++ kfree(s); ++ } ++ return rc; ++ ++} ++ ++static int jbd_seq_history_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = (struct seq_file *)file->private_data; ++ struct jbd_stats_proc_session *s = seq->private; ++ kfree(s->stats); ++ kfree(s); ++ return seq_release(inode, file); ++} ++ ++static struct file_operations jbd_seq_history_fops = { ++ .owner = THIS_MODULE, ++ .open = jbd_seq_history_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = jbd_seq_history_release, ++}; ++ ++static void *jbd_seq_info_start(struct seq_file *seq, loff_t *pos) ++{ ++ return *pos ? NULL : SEQ_START_TOKEN; ++} ++ ++static void *jbd_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) ++{ ++ return NULL; ++} ++ ++static int jbd_seq_info_show(struct seq_file *seq, void *v) ++{ ++ struct jbd_stats_proc_session *s = seq->private; ++ if (v != SEQ_START_TOKEN) ++ return 0; ++ seq_printf(seq, "%lu transaction, each upto %u blocks\n", ++ s->stats->ts_tid, ++ s->journal->j_max_transaction_buffers); ++ if (s->stats->ts_tid == 0) ++ return 0; ++ seq_printf(seq, "average: \n %lums waiting for transaction\n", ++ s->stats->ts_wait / s->stats->ts_tid); ++ seq_printf(seq, " %lums running transaction\n", ++ s->stats->ts_running / s->stats->ts_tid); ++ seq_printf(seq, " %lums transaction was being locked\n", ++ s->stats->ts_locked / s->stats->ts_tid); ++ seq_printf(seq, " %lums flushing data (in ordered mode)\n", ++ s->stats->ts_flushing / s->stats->ts_tid); ++ seq_printf(seq, " %lums logging transaction\n", ++ s->stats->ts_logging / s->stats->ts_tid); ++ seq_printf(seq, " %lu handles per transaction\n", ++ s->stats->ts_handle_count / s->stats->ts_tid); ++ seq_printf(seq, " %lu blocks per transaction\n", ++ s->stats->ts_blocks / s->stats->ts_tid); ++ seq_printf(seq, " %lu logged blocks per transaction\n", ++ s->stats->ts_blocks_logged / s->stats->ts_tid); ++ return 0; ++} ++ ++static void jbd_seq_info_stop(struct seq_file *seq, void *v) ++{ ++} ++ ++static struct seq_operations jbd_seq_info_ops = { ++ .start = jbd_seq_info_start, ++ .next = jbd_seq_info_next, ++ .stop = jbd_seq_info_stop, ++ .show = jbd_seq_info_show, ++}; ++ ++static int jbd_seq_info_open(struct inode *inode, struct file *file) ++{ ++ journal_t *journal = PDE(inode)->data; ++ struct jbd_stats_proc_session *s; ++ int rc, size; ++ ++ s = kmalloc(sizeof(*s), GFP_KERNEL); ++ if (s == NULL) ++ return -EIO; ++ size = sizeof(struct transaction_stats_s); ++ s->stats = kmalloc(size, GFP_KERNEL); ++ if (s == NULL) { ++ kfree(s); ++ return -EIO; ++ } ++ spin_lock(&journal->j_history_lock); ++ memcpy(s->stats, &journal->j_stats, size); ++ s->journal = journal; ++ spin_unlock(&journal->j_history_lock); ++ ++ rc = seq_open(file, &jbd_seq_info_ops); ++ if (rc == 0) { ++ struct seq_file *m = (struct seq_file *)file->private_data; ++ m->private = s; ++ } else { ++ kfree(s->stats); ++ kfree(s); ++ } ++ return rc; ++ ++} ++ ++static int jbd_seq_info_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = (struct seq_file *)file->private_data; ++ struct jbd_stats_proc_session *s = seq->private; ++ kfree(s->stats); ++ kfree(s); ++ return seq_release(inode, file); ++} ++ ++static struct file_operations jbd_seq_info_fops = { ++ .owner = THIS_MODULE, ++ .open = jbd_seq_info_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = jbd_seq_info_release, ++}; ++ ++static struct proc_dir_entry *proc_jbd_stats = NULL; ++ ++static void jbd_stats_proc_init(journal_t *journal) ++{ ++ char name[64]; ++ ++ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); ++ journal->j_proc_entry = proc_mkdir(name, proc_jbd_stats); ++ if (journal->j_proc_entry) { ++ struct proc_dir_entry *p; ++ p = create_proc_entry("history", S_IRUGO, ++ journal->j_proc_entry); ++ if (p) { ++ p->proc_fops = &jbd_seq_history_fops; ++ p->data = journal; ++ p = create_proc_entry("info", S_IRUGO, ++ journal->j_proc_entry); ++ if (p) { ++ p->proc_fops = &jbd_seq_info_fops; ++ p->data = journal; ++ } ++ } ++ } ++} ++ ++static void jbd_stats_proc_exit(journal_t *journal) ++{ ++ char name[64]; ++ ++ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); ++ remove_proc_entry("info", journal->j_proc_entry); ++ remove_proc_entry("history", journal->j_proc_entry); ++ remove_proc_entry(name, proc_jbd_stats); ++} ++ ++static void journal_init_stats(journal_t *journal) ++{ ++ int size; ++ ++ if (proc_jbd_stats == NULL) ++ return; ++ ++ journal->j_history_max = 100; ++ size = sizeof(struct transaction_stats_s) * journal->j_history_max; ++ journal->j_history = kmalloc(size, GFP_KERNEL); ++ if (journal->j_history == NULL) { ++ journal->j_history_max = 0; ++ return; ++ } ++ memset(journal->j_history, 0, size); ++ spin_lock_init(&journal->j_history_lock); ++} ++ + /* + * Management for journal control blocks: functions to create and + * destroy journal_t structures, and to initialise and read existing +@@ -691,6 +986,9 @@ static journal_t * journal_init_common ( + kfree(journal); + goto fail; + } ++ ++ journal_init_stats(journal); ++ + return journal; + fail: + return NULL; +@@ -733,6 +1031,7 @@ journal_t * journal_init_dev(struct bloc + journal->j_blk_offset = start; + journal->j_maxlen = len; + journal->j_blocksize = blocksize; ++ jbd_stats_proc_init(journal); + + bh = __getblk(journal->j_dev, start, journal->j_blocksize); + J_ASSERT(bh != NULL); +@@ -770,6 +1069,7 @@ journal_t * journal_init_inode (struct i + + journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; + journal->j_blocksize = inode->i_sb->s_blocksize; ++ jbd_stats_proc_init(journal); + + err = journal_bmap(journal, 0, &blocknr); + /* If that failed, give up */ +@@ -1140,6 +1440,8 @@ void journal_destroy(journal_t *journal) + brelse(journal->j_sb_buffer); + } + ++ if (journal->j_proc_entry) ++ jbd_stats_proc_exit(journal); + if (journal->j_inode) + iput(journal->j_inode); + if (journal->j_revoke) +@@ -1895,6 +2197,28 @@ static void __exit remove_jbd_proc_entry + + #endif + ++#if defined(CONFIG_PROC_FS) ++ ++#define JBD_STATS_PROC_NAME "fs/jbd" ++ ++static void __init create_jbd_stats_proc_entry(void) ++{ ++ proc_jbd_stats = proc_mkdir(JBD_STATS_PROC_NAME, NULL); ++} ++ ++static void __exit remove_jbd_stats_proc_entry(void) ++{ ++ if (proc_jbd_stats) ++ remove_proc_entry(JBD_STATS_PROC_NAME, NULL); ++} ++ ++#else ++ ++#define create_jbd_stats_proc_entry() do {} while (0) ++#define remove_jbd_stats_proc_entry() do {} while (0) ++ ++#endif ++ + kmem_cache_t *jbd_handle_cache; + + static int __init journal_init_handle_cache(void) +@@ -1949,6 +2273,7 @@ static int __init journal_init(void) + if (ret != 0) + journal_destroy_caches(); + create_jbd_proc_entry(); ++ create_jbd_stats_proc_entry(); + return ret; + } + +@@ -1960,6 +2285,7 @@ static void __exit journal_exit(void) + printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); + #endif + remove_jbd_proc_entry(); ++ remove_jbd_stats_proc_entry(); + journal_destroy_caches(); + } + diff --git a/lustre/kernel_patches/patches/lookup_bdev_init_intent-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/lookup_bdev_init_intent-2.6.18-vanilla.patch new file mode 100644 index 0000000..287ed90 --- /dev/null +++ b/lustre/kernel_patches/patches/lookup_bdev_init_intent-2.6.18-vanilla.patch @@ -0,0 +1,12 @@ +Index: linux-2.6.7/fs/block_dev.c +=================================================================== +--- linux-2.6.7.orig/fs/block_dev.c 2004-06-16 13:20:26.000000000 +0800 ++++ linux-2.6.7/fs/block_dev.c 2004-08-30 17:36:57.000000000 +0800 +@@ -832,6 +832,7 @@ + if (!path || !*path) + return ERR_PTR(-EINVAL); + ++ intent_init(&nd.intent.open, IT_LOOKUP); + error = path_lookup(path, LOOKUP_FOLLOW, &nd); + if (error) + return ERR_PTR(error); diff --git a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6.18-vanilla.patch deleted file mode 100644 index 5639fe4..0000000 --- a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6.18-vanilla.patch +++ /dev/null @@ -1,120 +0,0 @@ -Index: linux-2.6/fs/cifs/dir.c -=================================================================== ---- linux-2.6.orig/fs/cifs/dir.c 2006-07-15 21:04:01.000000000 +0800 -+++ linux-2.6/fs/cifs/dir.c 2006-07-15 21:04:47.000000000 +0800 -@@ -146,7 +146,7 @@ cifs_create(struct inode *inode, struct - } - - if(nd && (nd->flags & LOOKUP_OPEN)) { -- int oflags = nd->intent.open.flags; -+ int oflags = nd->intent.flags; - - desiredAccess = 0; - if (oflags & FMODE_READ) -Index: linux-2.6/fs/nfs/dir.c -=================================================================== ---- linux-2.6.orig/fs/nfs/dir.c 2006-07-15 21:04:01.000000000 +0800 -+++ linux-2.6/fs/nfs/dir.c 2006-07-15 21:04:47.000000000 +0800 -@@ -867,7 +867,7 @@ int nfs_is_exclusive_create(struct inode - return 0; - if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) - return 0; -- return (nd->intent.open.flags & O_EXCL) != 0; -+ return (nd->intent.it_flags & O_EXCL) != 0; - } - - static inline int nfs_reval_fsid(struct inode *dir, -@@ -955,7 +955,7 @@ static int is_atomic_open(struct inode * - if (nd->flags & LOOKUP_DIRECTORY) - return 0; - /* Are we trying to write to a read only partition? */ -- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) -+ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) - return 0; - return 1; - } -@@ -979,7 +979,7 @@ static struct dentry *nfs_atomic_lookup( - dentry->d_op = NFS_PROTO(dir)->dentry_ops; - - /* Let vfs_create() deal with O_EXCL */ -- if (nd->intent.open.flags & O_EXCL) { -+ if (nd->intent.it_flags & O_EXCL) { - d_add(dentry, NULL); - goto out; - } -@@ -994,7 +994,7 @@ static struct dentry *nfs_atomic_lookup( - goto out; - } - -- if (nd->intent.open.flags & O_CREAT) { -+ if (nd->intent.it_flags & O_CREAT) { - nfs_begin_data_update(dir); - res = nfs4_atomic_open(dir, dentry, nd); - nfs_end_data_update(dir); -@@ -1013,7 +1013,7 @@ static struct dentry *nfs_atomic_lookup( - case -ENOTDIR: - goto no_open; - case -ELOOP: -- if (!(nd->intent.open.flags & O_NOFOLLOW)) -+ if (!(nd->intent.it_flags & O_NOFOLLOW)) - goto no_open; - /* case -EINVAL: */ - default: -@@ -1049,7 +1049,7 @@ static int nfs_open_revalidate(struct de - /* NFS only supports OPEN on regular files */ - if (!S_ISREG(inode->i_mode)) - goto no_open; -- openflags = nd->intent.open.flags; -+ openflags = nd->intent.it_flags; - /* We cannot do exclusive creation on a positive dentry */ - if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) - goto no_open; -@@ -1182,7 +1182,7 @@ static int nfs_create(struct inode *dir, - attr.ia_valid = ATTR_MODE; - - if (nd && (nd->flags & LOOKUP_CREATE)) -- open_flags = nd->intent.open.flags; -+ open_flags = nd->intent.it_flags; - - lock_kernel(); - nfs_begin_data_update(dir); -Index: linux-2.6/fs/nfs/nfs4proc.c -=================================================================== ---- linux-2.6.orig/fs/nfs/nfs4proc.c 2006-07-15 21:04:01.000000000 +0800 -+++ linux-2.6/fs/nfs/nfs4proc.c 2006-07-15 21:09:29.000000000 +0800 -@@ -1246,7 +1246,7 @@ static int nfs4_intent_set_file(struct n - ctx->state = state; - return 0; - } -- nfs4_close_state(state, nd->intent.open.flags); -+ nfs4_close_state(state, nd->intent.flags); - return PTR_ERR(filp); - } - -@@ -1259,22 +1259,22 @@ nfs4_atomic_open(struct inode *dir, stru - struct dentry *res; - - if (nd->flags & LOOKUP_CREATE) { -- attr.ia_mode = nd->intent.open.create_mode; -+ attr.ia_mode = nd->intent.create_mode; - attr.ia_valid = ATTR_MODE; - if (!IS_POSIXACL(dir)) - attr.ia_mode &= ~current->fs->umask; - } else { - attr.ia_valid = 0; -- BUG_ON(nd->intent.open.flags & O_CREAT); -+ BUG_ON(nd->intent.flags & O_CREAT); - } - - cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); - if (IS_ERR(cred)) - return (struct dentry *)cred; -- state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); -+ state = nfs4_do_open(dir, dentry, nd->intent.flags, &attr, cred); - put_rpccred(cred); - if (IS_ERR(state)) { -- if (PTR_ERR(state) == -ENOENT) -+ ose_statef (PTR_ERR(state) == -ENOENT); - d_add(dentry, NULL); - return (struct dentry *)state; - } diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6-sles10.patch b/lustre/kernel_patches/patches/vfs_intent-2.6-sles10.patch index 393cf78..7d6f4ad 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.6-sles10.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.6-sles10.patch @@ -662,7 +662,7 @@ Index: linux-2.6.16.21-0.8/include/linux/fs.h #define FMODE_READ 1 #define FMODE_WRITE 2 -+#define FMODE_EXEC 4 ++#define FMODE_EXEC 16 /* Internal kernel extensions */ #define FMODE_LSEEK 4 diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/vfs_intent-2.6.18-vanilla.patch index 6e86bde..1344b59 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.6.18-vanilla.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.6.18-vanilla.patch @@ -1,7 +1,7 @@ Index: linux-2.6/fs/inode.c =================================================================== ---- linux-2.6.orig/fs/inode.c 2006-07-15 21:04:02.000000000 +0800 -+++ linux-2.6/fs/inode.c 2006-07-15 21:04:08.000000000 +0800 +--- linux-2.6.orig/fs/inode.c 2006-08-31 11:17:39.000000000 +0800 ++++ linux-2.6/fs/inode.c 2006-08-31 11:23:48.000000000 +0800 @@ -234,6 +234,7 @@ void __iget(struct inode * inode) inodes_stat.nr_unused--; } @@ -12,14 +12,13 @@ Index: linux-2.6/fs/inode.c * @inode: inode to clear Index: linux-2.6/fs/open.c =================================================================== ---- linux-2.6.orig/fs/open.c 2006-07-15 21:04:02.000000000 +0800 -+++ linux-2.6/fs/open.c 2006-07-15 21:04:08.000000000 +0800 -@@ -225,12 +225,12 @@ static long do_sys_truncate(const char _ - struct nameidata nd; +--- linux-2.6.orig/fs/open.c 2006-08-31 11:17:39.000000000 +0800 ++++ linux-2.6/fs/open.c 2006-08-31 11:59:09.000000000 +0800 +@@ -226,11 +226,12 @@ static long do_sys_truncate(const char _ struct inode * inode; int error; -- -+ intent_init(&nd.intent, IT_GETATTR); + ++ intent_init(&nd.intent.open, IT_GETATTR); error = -EINVAL; if (length < 0) /* sorry, but loff_t says... */ goto out; @@ -29,15 +28,15 @@ Index: linux-2.6/fs/open.c if (error) goto out; inode = nd.dentry->d_inode; -@@ -495,6 +495,7 @@ asmlinkage long sys_faccessat(int dfd, c +@@ -495,6 +496,7 @@ asmlinkage long sys_faccessat(int dfd, c int old_fsuid, old_fsgid; kernel_cap_t old_cap; int res; -+ intent_init(&nd.intent, IT_GETATTR); ++ intent_init(&nd.intent.open, IT_GETATTR); if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; -@@ -519,7 +520,7 @@ asmlinkage long sys_faccessat(int dfd, c +@@ -519,7 +521,7 @@ asmlinkage long sys_faccessat(int dfd, c else current->cap_effective = current->cap_permitted; @@ -46,29 +45,29 @@ Index: linux-2.6/fs/open.c if (!res) { res = vfs_permission(&nd, mode); /* SuS v2 requires we report a read only fs too */ -@@ -545,8 +546,9 @@ asmlinkage long sys_chdir(const char __u +@@ -545,8 +547,9 @@ asmlinkage long sys_chdir(const char __u { struct nameidata nd; int error; -+ intent_init(&nd.intent, IT_GETATTR); ++ intent_init(&nd.intent.open, IT_GETATTR); - error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); + error = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); if (error) goto out; -@@ -596,8 +598,9 @@ asmlinkage long sys_chroot(const char __ +@@ -596,8 +599,9 @@ asmlinkage long sys_chroot(const char __ { struct nameidata nd; int error; -+ intent_init(&nd.intent, IT_GETATTR); ++ intent_init(&nd.intent.open, IT_GETATTR); - error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); + error = __user_walk_it(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); if (error) goto out; -@@ -823,6 +826,7 @@ static struct file *__dentry_open(struct +@@ -823,6 +827,7 @@ static struct file *__dentry_open(struct error = open(inode, f); if (error) goto cleanup_all; @@ -76,7 +75,7 @@ Index: linux-2.6/fs/open.c } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); -@@ -849,6 +853,7 @@ cleanup_all: +@@ -849,6 +854,7 @@ cleanup_all: f->f_dentry = NULL; f->f_vfsmnt = NULL; cleanup_file: @@ -84,67 +83,39 @@ Index: linux-2.6/fs/open.c put_filp(f); dput(dentry); mntput(mnt); -@@ -874,6 +879,7 @@ static struct file *do_filp_open(int dfd +@@ -874,6 +880,7 @@ static struct file *do_filp_open(int dfd { int namei_flags, error; struct nameidata nd; -+ intent_init(&nd.intent, IT_OPEN); ++ intent_init(&nd.intent.open, IT_OPEN); namei_flags = flags; if ((namei_flags+1) & O_ACCMODE) -@@ -914,19 +920,19 @@ EXPORT_SYMBOL(filp_open); - struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, - int (*open)(struct inode *, struct file *)) - { -- if (IS_ERR(nd->intent.open.file)) -+ if (IS_ERR(nd->intent.file)) - goto out; - if (IS_ERR(dentry)) - goto out_err; -- nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt), -- nd->intent.open.flags - 1, -- nd->intent.open.file, -+ nd->intent.file = __dentry_open(dget(dentry), mntget(nd->mnt), -+ nd->intent.flags - 1, -+ nd->intent.file, - open); - out: -- return nd->intent.open.file; -+ return nd->intent.file; - out_err: - release_open_intent(nd); -- nd->intent.open.file = (struct file *)dentry; -+ nd->intent.file = (struct file *)dentry; - goto out; - } - EXPORT_SYMBOL_GPL(lookup_instantiate_filp); -@@ -943,7 +949,8 @@ struct file *nameidata_to_filp(struct na - struct file *filp; +@@ -944,6 +951,7 @@ struct file *nameidata_to_filp(struct na /* Pick up the filp from the open intent */ -- filp = nd->intent.open.file; -+ filp = nd->intent.file; -+ filp->f_it = &nd->intent; + filp = nd->intent.open.file; ++ filp->f_it = &nd->intent.open; /* Has the filesystem initialised the file for us? */ if (filp->f_dentry == NULL) filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL); Index: linux-2.6/fs/nfsctl.c =================================================================== ---- linux-2.6.orig/fs/nfsctl.c 2006-07-15 21:04:02.000000000 +0800 -+++ linux-2.6/fs/nfsctl.c 2006-07-15 21:04:08.000000000 +0800 +--- linux-2.6.orig/fs/nfsctl.c 2006-08-31 11:17:39.000000000 +0800 ++++ linux-2.6/fs/nfsctl.c 2006-08-31 11:23:48.000000000 +0800 @@ -25,6 +25,7 @@ static struct file *do_open(char *name, struct nameidata nd; int error; -+ intent_init(&nd.intent, IT_OPEN); ++ intent_init(&nd.intent.open, IT_OPEN); nd.mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); if (IS_ERR(nd.mnt)) Index: linux-2.6/fs/namei.c =================================================================== ---- linux-2.6.orig/fs/namei.c 2006-07-15 21:04:02.000000000 +0800 -+++ linux-2.6/fs/namei.c 2006-07-15 21:04:36.000000000 +0800 -@@ -337,8 +337,19 @@ int deny_write_access(struct file * file +--- linux-2.6.orig/fs/namei.c 2006-08-31 11:17:39.000000000 +0800 ++++ linux-2.6/fs/namei.c 2006-08-31 11:59:09.000000000 +0800 +@@ -344,8 +344,19 @@ int deny_write_access(struct file * file return 0; } @@ -164,21 +135,7 @@ Index: linux-2.6/fs/namei.c dput(nd->dentry); mntput(nd->mnt); } -@@ -359,10 +370,10 @@ void path_release_on_umount(struct namei - */ - void release_open_intent(struct nameidata *nd) - { -- if (nd->intent.open.file->f_dentry == NULL) -- put_filp(nd->intent.open.file); -+ if (nd->intent.file->f_dentry == NULL) -+ put_filp(nd->intent.file); - else -- fput(nd->intent.open.file); -+ fput(nd->intent.file); - } - - /* -@@ -440,8 +451,12 @@ static struct dentry * real_lookup(struc +@@ -447,8 +458,12 @@ static struct dentry * real_lookup(struc { struct dentry * result; struct inode *dir = parent->d_inode; @@ -191,7 +148,7 @@ Index: linux-2.6/fs/namei.c /* * First re-do the cached lookup just in case it was created * while we waited for the directory semaphore.. -@@ -475,13 +490,16 @@ static struct dentry * real_lookup(struc +@@ -482,13 +497,16 @@ static struct dentry * real_lookup(struc * Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. */ @@ -210,28 +167,28 @@ Index: linux-2.6/fs/namei.c return result; } -@@ -509,7 +527,9 @@ walk_init_root(const char *name, struct +@@ -516,7 +534,9 @@ walk_init_root(const char *name, struct static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) { int res = 0; -+ struct lookup_intent it = nd->intent; ++ struct lookup_intent it = nd->intent.open; char *name; + if (IS_ERR(link)) goto fail; -@@ -519,6 +539,10 @@ static __always_inline int __vfs_follow_ +@@ -526,6 +546,10 @@ static __always_inline int __vfs_follow_ /* weird __emul_prefix() stuff did it */ goto out; } -+ intent_init(&nd->intent, it.it_op); -+ nd->intent.it_flags = it.it_flags; -+ nd->intent.it_create_mode = it.it_create_mode; -+ nd->intent.file = it.file; ++ intent_init(&nd->intent.open, it.it_op); ++ nd->intent.open.it_flags = it.it_flags; ++ nd->intent.open.it_create_mode = it.it_create_mode; ++ nd->intent.open.file = it.file; res = link_path_walk(link, nd); out: if (nd->depth || res || nd->last_type!=LAST_NORM) -@@ -771,6 +795,33 @@ fail: +@@ -778,6 +802,33 @@ fail: return PTR_ERR(dentry); } @@ -265,7 +222,7 @@ Index: linux-2.6/fs/namei.c /* * Name resolution. * This is the basic name resolution function, turning a pathname into -@@ -867,7 +918,11 @@ static fastcall int __link_path_walk(con +@@ -874,7 +925,11 @@ static fastcall int __link_path_walk(con goto out_dput; if (inode->i_op->follow_link) { @@ -277,7 +234,7 @@ Index: linux-2.6/fs/namei.c if (err) goto return_err; err = -ENOENT; -@@ -902,6 +957,23 @@ last_component: +@@ -909,6 +964,23 @@ last_component: inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -301,7 +258,7 @@ Index: linux-2.6/fs/namei.c goto return_reval; } if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { -@@ -909,7 +981,9 @@ last_component: +@@ -916,7 +988,9 @@ last_component: if (err < 0) break; } @@ -311,26 +268,7 @@ Index: linux-2.6/fs/namei.c if (err) break; inode = next.dentry->d_inode; -@@ -1152,13 +1226,13 @@ static int __path_lookup_intent_open(int - - if (filp == NULL) - return -ENFILE; -- nd->intent.open.file = filp; -- nd->intent.open.flags = open_flags; -- nd->intent.open.create_mode = create_mode; -+ nd->intent.file = filp; -+ nd->intent.flags = open_flags; -+ nd->intent.create_mode = create_mode; - err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); -- if (IS_ERR(nd->intent.open.file)) { -+ if (IS_ERR(nd->intent.file)) { - if (err == 0) { -- err = PTR_ERR(nd->intent.open.file); -+ err = PTR_ERR(nd->intent.file); - path_release(nd); - } - } else if (err != 0) -@@ -1261,7 +1335,7 @@ static struct dentry *lookup_hash(struct +@@ -1268,7 +1342,7 @@ static struct dentry *lookup_hash(struct } /* SMP-safe */ @@ -339,7 +277,7 @@ Index: linux-2.6/fs/namei.c { unsigned long hash; struct qstr this; -@@ -1281,11 +1355,17 @@ struct dentry * lookup_one_len(const cha +@@ -1288,11 +1362,17 @@ struct dentry * lookup_one_len(const cha } this.hash = end_name_hash(hash); @@ -358,7 +296,7 @@ Index: linux-2.6/fs/namei.c /* * namei() * -@@ -1297,8 +1377,9 @@ access: +@@ -1304,8 +1384,9 @@ access: * that namei follows links, while lnamei does not. * SMP-safe */ @@ -370,14 +308,14 @@ Index: linux-2.6/fs/namei.c { char *tmp = getname(name); int err = PTR_ERR(tmp); -@@ -1310,9 +1391,22 @@ int fastcall __user_walk_fd(int dfd, con +@@ -1317,9 +1398,22 @@ int fastcall __user_walk_fd(int dfd, con return err; } +int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags, + struct nameidata *nd) +{ -+ intent_init(&nd->intent, IT_LOOKUP); ++ intent_init(&nd->intent.open, IT_LOOKUP); + return __user_walk_fd_it(dfd, name, flags, nd); +} + @@ -389,29 +327,29 @@ Index: linux-2.6/fs/namei.c int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) { - return __user_walk_fd(AT_FDCWD, name, flags, nd); -+ intent_init(&nd->intent, IT_LOOKUP); ++ intent_init(&nd->intent.open, IT_LOOKUP); + return __user_walk_it(name, flags, nd); } /* -@@ -1593,6 +1687,8 @@ int open_namei(int dfd, const char *path +@@ -1600,6 +1694,8 @@ int open_namei(int dfd, const char *path if (flag & O_APPEND) acc_mode |= MAY_APPEND; -+ nd->intent.it_flags = flag; -+ nd->intent.it_create_mode = mode; ++ nd->intent.open.it_flags = flag; ++ nd->intent.open.it_create_mode = mode; /* * The simplest case - just a plain lookup. */ -@@ -1607,6 +1703,7 @@ int open_namei(int dfd, const char *path +@@ -1614,6 +1710,7 @@ int open_namei(int dfd, const char *path /* * Create - we need to know the parent. */ -+ nd->intent.it_op |= IT_CREAT; ++ nd->intent.open.it_op |= IT_CREAT; error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode); if (error) return error; -@@ -1623,7 +1720,9 @@ int open_namei(int dfd, const char *path +@@ -1630,7 +1727,9 @@ int open_namei(int dfd, const char *path dir = nd->dentry; nd->flags &= ~LOOKUP_PARENT; mutex_lock(&dir->d_inode->i_mutex); @@ -421,28 +359,7 @@ Index: linux-2.6/fs/namei.c path.mnt = nd->mnt; do_last: -@@ -1633,9 +1732,9 @@ do_last: - goto exit; - } - -- if (IS_ERR(nd->intent.open.file)) { -+ if (IS_ERR(nd->intent.file)) { - mutex_unlock(&dir->d_inode->i_mutex); -- error = PTR_ERR(nd->intent.open.file); -+ error = PTR_ERR(nd->intent.file); - goto exit_dput; - } - -@@ -1688,7 +1787,7 @@ ok: - exit_dput: - dput_path(&path, nd); - exit: -- if (!IS_ERR(nd->intent.open.file)) -+ if (!IS_ERR(nd->intent.file)) - release_open_intent(nd); - path_release(nd); - return error; -@@ -1731,7 +1830,9 @@ do_link: +@@ -1746,7 +1845,9 @@ do_link: } dir = nd->dentry; mutex_lock(&dir->d_inode->i_mutex); @@ -452,17 +369,17 @@ Index: linux-2.6/fs/namei.c path.mnt = nd->mnt; __putname(nd->last.name); goto do_last; -@@ -2243,6 +2344,9 @@ asmlinkage long sys_linkat(int olddfd, c +@@ -2260,6 +2361,9 @@ asmlinkage long sys_linkat(int olddfd, c int error; char * to; -+ intent_init(&nd.intent, IT_LOOKUP); -+ intent_init(&old_nd.intent, IT_LOOKUP); ++ intent_init(&nd.intent.open, IT_LOOKUP); ++ intent_init(&old_nd.intent.open, IT_LOOKUP); + if ((flags & ~AT_SYMLINK_FOLLOW) != 0) return -EINVAL; -@@ -2250,7 +2354,7 @@ asmlinkage long sys_linkat(int olddfd, c +@@ -2267,7 +2371,7 @@ asmlinkage long sys_linkat(int olddfd, c if (IS_ERR(to)) return PTR_ERR(to); @@ -473,8 +390,8 @@ Index: linux-2.6/fs/namei.c if (error) Index: linux-2.6/fs/stat.c =================================================================== ---- linux-2.6.orig/fs/stat.c 2006-07-15 21:04:02.000000000 +0800 -+++ linux-2.6/fs/stat.c 2006-07-15 21:04:08.000000000 +0800 +--- linux-2.6.orig/fs/stat.c 2006-08-31 11:17:39.000000000 +0800 ++++ linux-2.6/fs/stat.c 2006-08-31 11:23:48.000000000 +0800 @@ -37,7 +37,7 @@ void generic_fillattr(struct inode *inod EXPORT_SYMBOL(generic_fillattr); @@ -510,11 +427,11 @@ Index: linux-2.6/fs/stat.c int error; - error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd); -+ intent_init(&nd.intent, IT_GETATTR); ++ intent_init(&nd.intent.open, IT_GETATTR); + error = __user_walk_fd_it(dfd, name, LOOKUP_FOLLOW, &nd); if (!error) { - error = vfs_getattr(nd.mnt, nd.dentry, stat); -+ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); ++ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent.open, stat); path_release(&nd); } return error; @@ -523,11 +440,11 @@ Index: linux-2.6/fs/stat.c int error; - error = __user_walk_fd(dfd, name, 0, &nd); -+ intent_init(&nd.intent, IT_GETATTR); ++ intent_init(&nd.intent.open, IT_GETATTR); + error = __user_walk_fd_it(dfd, name, 0, &nd); if (!error) { - error = vfs_getattr(nd.mnt, nd.dentry, stat); -+ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); ++ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent.open, stat); path_release(&nd); } return error; @@ -536,19 +453,19 @@ Index: linux-2.6/fs/stat.c struct file *f = fget(fd); int error = -EBADF; + struct nameidata nd; -+ intent_init(&nd.intent, IT_GETATTR); ++ intent_init(&nd.intent.open, IT_GETATTR); if (f) { - error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat); -+ error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat); -+ intent_release(&nd.intent); ++ error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent.open, stat); ++ intent_release(&nd.intent.open); fput(f); } return error; Index: linux-2.6/fs/namespace.c =================================================================== ---- linux-2.6.orig/fs/namespace.c 2006-07-15 21:04:02.000000000 +0800 -+++ linux-2.6/fs/namespace.c 2006-07-15 21:04:08.000000000 +0800 +--- linux-2.6.orig/fs/namespace.c 2006-08-31 11:17:39.000000000 +0800 ++++ linux-2.6/fs/namespace.c 2006-08-31 11:59:07.000000000 +0800 @@ -73,6 +73,7 @@ struct vfsmount *alloc_vfsmnt(const char INIT_LIST_HEAD(&mnt->mnt_share); INIT_LIST_HEAD(&mnt->mnt_slave_list); @@ -588,7 +505,7 @@ Index: linux-2.6/fs/namespace.c return err; if (!old_name || !*old_name) return -EINVAL; -+ intent_init(&old_nd.intent, IT_LOOKUP); ++ intent_init(&old_nd.intent.open, IT_LOOKUP); err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); if (err) return err; @@ -596,7 +513,7 @@ Index: linux-2.6/fs/namespace.c return -EPERM; if (!old_name || !*old_name) return -EINVAL; -+ intent_init(&old_nd.intent, IT_LOOKUP); ++ intent_init(&old_nd.intent.open, IT_LOOKUP); err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); if (err) return err; @@ -604,19 +521,19 @@ Index: linux-2.6/fs/namespace.c int retval = 0; int mnt_flags = 0; -+ intent_init(&nd.intent, IT_LOOKUP); ++ intent_init(&nd.intent.open, IT_LOOKUP); /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; Index: linux-2.6/fs/exec.c =================================================================== ---- linux-2.6.orig/fs/exec.c 2006-07-15 21:04:02.000000000 +0800 -+++ linux-2.6/fs/exec.c 2006-07-15 21:04:08.000000000 +0800 +--- linux-2.6.orig/fs/exec.c 2006-08-31 11:17:39.000000000 +0800 ++++ linux-2.6/fs/exec.c 2006-08-31 11:59:09.000000000 +0800 @@ -127,6 +127,7 @@ asmlinkage long sys_uselib(const char __ struct nameidata nd; int error; -+ intent_init(&nd.intent, IT_OPEN); ++ intent_init(&nd.intent.open, IT_OPEN); error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC); if (error) goto out; @@ -624,26 +541,19 @@ Index: linux-2.6/fs/exec.c int err; struct file *file; -+ intent_init(&nd.intent, IT_OPEN); ++ intent_init(&nd.intent.open, IT_OPEN); err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC); file = ERR_PTR(err); Index: linux-2.6/include/linux/dcache.h =================================================================== ---- linux-2.6.orig/include/linux/dcache.h 2006-07-15 21:04:02.000000000 +0800 -+++ linux-2.6/include/linux/dcache.h 2006-07-15 21:04:08.000000000 +0800 -@@ -4,6 +4,7 @@ - #ifdef __KERNEL__ - - #include -+#include - #include - #include - #include -@@ -36,6 +37,8 @@ struct qstr { +--- linux-2.6.orig/include/linux/dcache.h 2006-08-31 11:17:39.000000000 +0800 ++++ linux-2.6/include/linux/dcache.h 2006-08-31 12:00:07.000000000 +0800 +@@ -36,6 +36,9 @@ struct qstr { const unsigned char *name; }; ++struct inode; +#include + struct dentry_stat_t { @@ -651,8 +561,8 @@ Index: linux-2.6/include/linux/dcache.h int nr_unused; Index: linux-2.6/include/linux/fs.h =================================================================== ---- linux-2.6.orig/include/linux/fs.h 2006-07-15 21:04:02.000000000 +0800 -+++ linux-2.6/include/linux/fs.h 2006-07-15 21:04:08.000000000 +0800 +--- linux-2.6.orig/include/linux/fs.h 2006-08-31 11:17:39.000000000 +0800 ++++ linux-2.6/include/linux/fs.h 2006-08-31 11:59:09.000000000 +0800 @@ -280,6 +280,8 @@ typedef void (dio_iodone_t)(struct kiocb #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 @@ -670,7 +580,7 @@ Index: linux-2.6/include/linux/fs.h __u32 i_generation; -@@ -699,6 +702,7 @@ struct file { +@@ -700,6 +703,7 @@ struct file { spinlock_t f_ep_lock; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; @@ -678,7 +588,7 @@ Index: linux-2.6/include/linux/fs.h }; extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); -@@ -1099,7 +1103,9 @@ struct inode_operations { +@@ -1100,7 +1104,9 @@ struct inode_operations { void (*truncate) (struct inode *); int (*permission) (struct inode *, int, struct nameidata *); int (*setattr) (struct dentry *, struct iattr *); @@ -688,7 +598,7 @@ Index: linux-2.6/include/linux/fs.h int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); -@@ -1140,6 +1146,7 @@ struct super_operations { +@@ -1141,6 +1147,7 @@ struct super_operations { int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); void (*umount_begin) (struct vfsmount *, int); @@ -696,7 +606,7 @@ Index: linux-2.6/include/linux/fs.h int (*show_options)(struct seq_file *, struct vfsmount *); int (*show_stats)(struct seq_file *, struct vfsmount *); -@@ -1362,6 +1369,7 @@ extern int may_umount_tree(struct vfsmou +@@ -1363,6 +1370,7 @@ extern int may_umount_tree(struct vfsmou extern int may_umount(struct vfsmount *); extern void umount_tree(struct vfsmount *, int, struct list_head *); extern void release_mounts(struct list_head *); @@ -704,7 +614,7 @@ Index: linux-2.6/include/linux/fs.h extern long do_mount(char *, char *, char *, unsigned long, void *); extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, -@@ -1423,6 +1431,7 @@ extern long do_sys_open(int fdf, const c +@@ -1424,6 +1432,7 @@ extern long do_sys_open(int fdf, const c int mode); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); @@ -714,8 +624,8 @@ Index: linux-2.6/include/linux/fs.h Index: linux-2.6/include/linux/namei.h =================================================================== ---- linux-2.6.orig/include/linux/namei.h 2006-07-15 21:04:02.000000000 +0800 -+++ linux-2.6/include/linux/namei.h 2006-07-15 21:04:08.000000000 +0800 +--- linux-2.6.orig/include/linux/namei.h 2006-08-31 11:17:39.000000000 +0800 ++++ linux-2.6/include/linux/namei.h 2006-08-31 11:23:48.000000000 +0800 @@ -5,10 +5,39 @@ struct vfsmount; @@ -758,16 +668,9 @@ Index: linux-2.6/include/linux/namei.h + } d; }; - enum { MAX_NESTED_LINKS = 5 }; -@@ -22,12 +51,16 @@ struct nameidata { - unsigned depth; - char *saved_names[MAX_NESTED_LINKS + 1]; - -- /* Intent data */ -- union { -- struct open_intent open; -- } intent; -+ struct lookup_intent intent; + enum { MAX_NESTED_LINKS = 8 }; +@@ -28,6 +57,13 @@ struct nameidata { + } intent; }; +static inline void intent_init(struct lookup_intent *it, int op) @@ -780,7 +683,7 @@ Index: linux-2.6/include/linux/namei.h /* * Type of the last component on LOOKUP_PARENT */ -@@ -48,6 +81,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA +@@ -48,6 +84,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA #define LOOKUP_PARENT 16 #define LOOKUP_NOALT 32 #define LOOKUP_REVAL 64 @@ -789,7 +692,7 @@ Index: linux-2.6/include/linux/namei.h /* * Intent data */ -@@ -57,10 +92,19 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA +@@ -57,10 +95,19 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *)); @@ -811,8 +714,8 @@ Index: linux-2.6/include/linux/namei.h extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); Index: linux-2.6/include/linux/mount.h =================================================================== ---- linux-2.6.orig/include/linux/mount.h 2006-07-15 21:04:02.000000000 +0800 -+++ linux-2.6/include/linux/mount.h 2006-07-15 21:04:08.000000000 +0800 +--- linux-2.6.orig/include/linux/mount.h 2006-08-31 11:17:40.000000000 +0800 ++++ linux-2.6/include/linux/mount.h 2006-08-31 11:23:48.000000000 +0800 @@ -53,6 +53,8 @@ struct vfsmount { struct list_head mnt_slave; /* slave list entry */ struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */ diff --git a/lustre/kernel_patches/patches/vfs_races-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/vfs_races-2.6.18-vanilla.patch index 36b2e65..5898ff7 100644 --- a/lustre/kernel_patches/patches/vfs_races-2.6.18-vanilla.patch +++ b/lustre/kernel_patches/patches/vfs_races-2.6.18-vanilla.patch @@ -1,7 +1,7 @@ Index: linux-2.6/fs/dcache.c =================================================================== ---- linux-2.6.orig/fs/dcache.c 2006-07-15 16:08:36.000000000 +0800 -+++ linux-2.6/fs/dcache.c 2006-07-15 16:10:41.000000000 +0800 +--- linux-2.6.orig/fs/dcache.c 2006-08-31 11:59:09.000000000 +0800 ++++ linux-2.6/fs/dcache.c 2006-09-06 14:01:37.000000000 +0800 @@ -226,6 +226,13 @@ int d_invalidate(struct dentry * dentry) spin_unlock(&dcache_lock); return 0; @@ -48,14 +48,13 @@ Index: linux-2.6/fs/dcache.c x = y; y = __tmp; } while (0) Index: linux-2.6/include/linux/dcache.h =================================================================== ---- linux-2.6.orig/include/linux/dcache.h 2006-07-15 16:10:33.000000000 +0800 -+++ linux-2.6/include/linux/dcache.h 2006-07-15 16:10:41.000000000 +0800 -@@ -176,6 +176,8 @@ d_iput: no no no yes +--- linux-2.6.orig/include/linux/dcache.h 2006-08-31 12:00:23.000000000 +0800 ++++ linux-2.6/include/linux/dcache.h 2006-09-06 14:02:36.000000000 +0800 +@@ -176,6 +176,7 @@ d_iput: no no no yes #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ #define DCACHE_UNHASHED 0x0010 -+#define DCACHE_LUSTRE_INVALID 0x0020 /* Lustre invalidated */ -+ ++#define DCACHE_LUSTRE_INVALID 0x0040 /* Lustre invalidated */ #define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */ diff --git a/lustre/kernel_patches/series/2.6-fc3.series b/lustre/kernel_patches/series/2.6-fc3.series index 90ada9a..b2090f0 100644 --- a/lustre/kernel_patches/series/2.6-fc3.series +++ b/lustre/kernel_patches/series/2.6-fc3.series @@ -21,3 +21,4 @@ ext3-patch-fuzz-fixup-fc3.patch uml-exprt-clearuser.patch fsprivate-2.6.patch linux-2.6.9-ext3-sub-second-timestamp.patch +bitops_ext2_find_next_le_bit-2.6.patch diff --git a/lustre/kernel_patches/series/2.6-rhel4-titech.series b/lustre/kernel_patches/series/2.6-rhel4-titech.series index 4d881fe..c9c7c23 100644 --- a/lustre/kernel_patches/series/2.6-rhel4-titech.series +++ b/lustre/kernel_patches/series/2.6-rhel4-titech.series @@ -28,3 +28,4 @@ raid5-stripe-by-stripe-handling.patch raid5-optimize-memcpy.patch raid5-merge-ios.patch raid5-serialize-ovelapping-reqs.patch +bitops_ext2_find_next_le_bit-2.6.patch diff --git a/lustre/kernel_patches/series/2.6-rhel4.series b/lustre/kernel_patches/series/2.6-rhel4.series index 1c9aeb7..8c7d33c 100644 --- a/lustre/kernel_patches/series/2.6-rhel4.series +++ b/lustre/kernel_patches/series/2.6-rhel4.series @@ -27,3 +27,5 @@ raid5-large-io.patch raid5-stripe-by-stripe-handling.patch raid5-merge-ios.patch raid5-serialize-ovelapping-reqs.patch +jbd-stats-2.6.9.patch +bitops_ext2_find_next_le_bit-2.6.patch diff --git a/lustre/kernel_patches/series/2.6-sles10.series b/lustre/kernel_patches/series/2.6-sles10.series index 0b06087..6ecff57 100644 --- a/lustre/kernel_patches/series/2.6-sles10.series +++ b/lustre/kernel_patches/series/2.6-sles10.series @@ -1,7 +1,7 @@ lustre_version.patch vfs_intent-2.6-sles10.patch vfs_nointent-2.6-sles10.patch -vfs_races-2.6.12.patch +vfs_races-2.6.18-vanilla.patch ext3-wantedi-misc-2.6-suse.patch jbd-2.6.10-jcberr.patch nfs-cifs-intent-2.6-fc5.patch @@ -11,7 +11,6 @@ export_symbols-2.6.12.patch dev_read_only-2.6-fc5.patch export-2.6-fc5.patch lookup_bdev_init_intent.patch -8kstack-2.6.12.patch remove-suid-2.6-suse.patch export-show_task-2.6-fc5.patch sd_iostats-2.6-rhel4.patch diff --git a/lustre/kernel_patches/series/2.6-suse-newer.series b/lustre/kernel_patches/series/2.6-suse-newer.series index 4068bed..f2b2c7b 100644 --- a/lustre/kernel_patches/series/2.6-suse-newer.series +++ b/lustre/kernel_patches/series/2.6-suse-newer.series @@ -9,3 +9,5 @@ fsprivate-2.6.patch dcache-qstr-api-fix-2.6-suse.patch iallocsem_consistency.patch tcp-zero-copy-2.6.5-7.244.patch +jbd-stats-2.6.5.patch +bitops_ext2_find_next_le_bit-2.6.patch diff --git a/lustre/kernel_patches/series/2.6.18-vanilla.series b/lustre/kernel_patches/series/2.6.18-vanilla.series index 0ac21df..64a1e82 100644 --- a/lustre/kernel_patches/series/2.6.18-vanilla.series +++ b/lustre/kernel_patches/series/2.6.18-vanilla.series @@ -4,17 +4,16 @@ vfs_nointent-2.6.18-vanilla.patch vfs_races-2.6.18-vanilla.patch ext3-wantedi-misc-2.6.18-vanilla.patch jbd-jcberr-2.6.18-vanilla.patch -nfs-cifs-intent-2.6.18-vanilla.patch iopen-misc-2.6.18-vanilla.patch export-truncate-2.6.18-vanilla.patch export_symbols-2.6.18-vanilla.patch dev_read_only-2.6.18-vanilla.patch export-2.6.18-vanilla.patch -lookup_bdev_init_intent.patch +lookup_bdev_init_intent-2.6.18-vanilla.patch 8kstack-2.6.12.patch remove-suid-2.6-suse.patch export-show_task-2.6.18-vanilla.patch sd_iostats-2.6-rhel4.patch export_symbol_numa-2.6-fc5.patch tcp-zero-copy-2.6.18-vanilla.patch -vfs_intent-2.6-fc5-fix.patch +export-do_kern_mount.patch diff --git a/lustre/kernel_patches/targets/2.6-suse.target.in b/lustre/kernel_patches/targets/2.6-suse.target.in index e4d8f5c..22a8b5c 100644 --- a/lustre/kernel_patches/targets/2.6-suse.target.in +++ b/lustre/kernel_patches/targets/2.6-suse.target.in @@ -1,5 +1,5 @@ lnxmaj="2.6.5" -lnxrel="7.267" +lnxrel="7.276" KERNEL=linux-$lnxmaj-$lnxrel.tar.bz2 # they include our patches diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 5ccc999..1bdaff4 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -324,7 +324,6 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) /* cli->cl_max_mds_{easize,cookiesize} updated by mdc_init_ea_size() */ cli->cl_max_mds_easize = sizeof(struct lov_mds_md); cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie); - cli->cl_sandev = to_kdev_t(0); if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) { if (!strcmp(lustre_cfg_string(lcfg, 3), "inactive")) { @@ -1033,14 +1032,13 @@ void target_abort_recovery(void *data) target_cancel_recovery_timer(obd); spin_unlock_bh(&obd->obd_processing_task_lock); - CERROR("%s: recovery period over; disconnecting unfinished clients.\n", - obd->obd_name); + LCONSOLE_WARN("%s: recovery period over; disconnecting unfinished " + "clients.\n", obd->obd_name); class_disconnect_stale_exports(obd); abort_recovery_queue(obd); target_finish_recovery(obd); - - ptlrpc_run_recovery_over_upcall(obd); + CDEBUG(D_HA, "%s: recovery complete\n", obd_uuid2str(&obd->obd_uuid)); EXIT; } @@ -1345,7 +1343,8 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc) spin_unlock_bh(&obd->obd_processing_task_lock); target_finish_recovery(obd); - ptlrpc_run_recovery_over_upcall(obd); + CDEBUG(D_HA, "%s: recovery complete\n", + obd_uuid2str(&obd->obd_uuid)); } else { CWARN("%s: %d recoverable clients remain\n", obd->obd_name, obd->obd_recoverable_clients); @@ -1510,7 +1509,9 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req) struct obd_device *obd = req->rq_export->exp_obd; struct obd_device *master_obd; struct lustre_quota_ctxt *qctxt; - struct qunit_data *qdata, *rep; + struct qunit_data *qdata; + void* rep; + struct qunit_data_old *qdata_old; int rc = 0; int repsize[2] = { sizeof(struct ptlrpc_body), sizeof(struct qunit_data) }; @@ -1521,11 +1522,27 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req) CERROR("packing reply failed!: rc = %d\n", rc); RETURN(rc); } - rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*rep)); - LASSERT(rep); - - qdata = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata), - lustre_swab_qdata); + LASSERT(req->rq_export); + + /* fixed for bug10707 */ + if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) && + !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) { + CDEBUG(D_QUOTA, "qd_count is 64bit!\n"); + rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, + sizeof(struct qunit_data)); + LASSERT(rep); + qdata = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata), + lustre_swab_qdata); + } else { + CDEBUG(D_QUOTA, "qd_count is 32bit!\n"); + rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, + sizeof(struct qunit_data_old)); + LASSERT(rep); + qdata_old = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata_old), + lustre_swab_qdata_old); + qdata = lustre_quota_old_to_new(qdata_old); + } + if (qdata == NULL) { CERROR("Can't unpack qunit_data\n"); RETURN(-EPROTO); @@ -1544,7 +1561,13 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req) "dqacq failed! (rc:%d)\n", rc); /* the qd_count might be changed in lqc_handler */ - memcpy(rep, qdata, sizeof(*rep)); + if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) && + !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) { + memcpy(rep,qdata,sizeof(*qdata)); + } else { + qdata_old = lustre_quota_new_to_old(qdata); + memcpy(rep,qdata_old,sizeof(*qdata_old)); + } req->rq_status = rc; rc = ptlrpc_reply(req); diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index fa4657b..16b65fb 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -1401,3 +1401,134 @@ void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh) LDLM_LOCK_PUT(lock); } + +void cdebug_va(cfs_debug_limit_state_t *cdls, __u32 mask, + const char *file, const char *func, const int line, + const char *fmt, va_list args); +void cdebug(cfs_debug_limit_state_t *cdls, __u32 mask, + const char *file, const char *func, const int line, + const char *fmt, ...); + +void +ldlm_lock_debug(cfs_debug_limit_state_t *cdls, + __u32 level, struct ldlm_lock *lock, + const char *file, const char *func, const int line, + char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + cdebug_va(cdls, level, file, func, line, fmt, args); + va_end(args); + + if (lock->l_resource == NULL) { + cdebug(cdls, level, file, func, line, + " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: " + LPX64" expref: %d pid: %u\n", lock, + lock->l_handle.h_cookie, atomic_read(&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + lock->l_flags, lock->l_remote_handle.cookie, + lock->l_export ? + atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_pid); + return; + } + + switch (lock->l_resource->lr_type) { + case LDLM_EXTENT: + cdebug(cdls, level, file, func, line, + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64 + "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64 + " expref: %d pid: %u\n", + lock->l_resource->lr_namespace->ns_name, lock, + lock->l_handle.h_cookie, atomic_read(&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + lock->l_resource->lr_name.name[0], + lock->l_resource->lr_name.name[1], + atomic_read(&lock->l_resource->lr_refcount), + ldlm_typename[lock->l_resource->lr_type], + lock->l_policy_data.l_extent.start, + lock->l_policy_data.l_extent.end, + lock->l_req_extent.start, lock->l_req_extent.end, + lock->l_flags, lock->l_remote_handle.cookie, + lock->l_export ? + atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_pid); + break; + + case LDLM_FLOCK: + cdebug(cdls, level, file, func, line, + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d " + "["LPU64"->"LPU64"] flags: %x remote: "LPX64 + " expref: %d pid: %u\n", + lock->l_resource->lr_namespace->ns_name, lock, + lock->l_handle.h_cookie, atomic_read(&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + lock->l_resource->lr_name.name[0], + lock->l_resource->lr_name.name[1], + atomic_read(&lock->l_resource->lr_refcount), + ldlm_typename[lock->l_resource->lr_type], + lock->l_policy_data.l_flock.pid, + lock->l_policy_data.l_flock.start, + lock->l_policy_data.l_flock.end, + lock->l_flags, lock->l_remote_handle.cookie, + lock->l_export ? + atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_pid); + break; + + case LDLM_IBITS: + cdebug(cdls, level, file, func, line, + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s " + "flags: %x remote: "LPX64" expref: %d " + "pid %u\n", + lock->l_resource->lr_namespace->ns_name, + lock, lock->l_handle.h_cookie, + atomic_read (&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + lock->l_resource->lr_name.name[0], + lock->l_resource->lr_name.name[1], + lock->l_policy_data.l_inodebits.bits, + atomic_read(&lock->l_resource->lr_refcount), + ldlm_typename[lock->l_resource->lr_type], + lock->l_flags, lock->l_remote_handle.cookie, + lock->l_export ? + atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_pid); + break; + + default: + cdebug(cdls, level, file, func, line, + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x " + "remote: "LPX64" expref: %d pid: %u\n", + lock->l_resource->lr_namespace->ns_name, + lock, lock->l_handle.h_cookie, + atomic_read (&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + lock->l_resource->lr_name.name[0], + lock->l_resource->lr_name.name[1], + atomic_read(&lock->l_resource->lr_refcount), + ldlm_typename[lock->l_resource->lr_type], + lock->l_flags, lock->l_remote_handle.cookie, + lock->l_export ? + atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_pid); + break; + } +} +EXPORT_SYMBOL(ldlm_lock_debug); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index b12a4bf..e683de7 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -118,6 +118,7 @@ static int expired_lock_main(void *arg) { struct list_head *expired = &expired_lock_thread.elt_expired_locks; struct l_wait_info lwi = { 0 }; + int do_dump; ENTRY; cfs_daemonize("ldlm_elt"); @@ -145,6 +146,8 @@ static int expired_lock_main(void *arg) expired_lock_thread.elt_dump = 0; } + do_dump = 0; + while (!list_empty(expired)) { struct obd_export *export; struct ldlm_lock *lock; @@ -169,12 +172,18 @@ static int expired_lock_main(void *arg) export = class_export_get(lock->l_export); spin_unlock_bh(&waiting_locks_spinlock); + do_dump++; class_fail_export(export); class_export_put(export); spin_lock_bh(&waiting_locks_spinlock); } spin_unlock_bh(&waiting_locks_spinlock); + if (do_dump && obd_dump_on_eviction) { + CERROR("dump the log upon eviction\n"); + libcfs_debug_dumplog(); + } + if (expired_lock_thread.elt_state == ELT_TERMINATE) break; } diff --git a/lustre/liblustre/tests/sanity.c b/lustre/liblustre/tests/sanity.c index 9cc6806..9edfc92 100644 --- a/lustre/liblustre/tests/sanity.c +++ b/lustre/liblustre/tests/sanity.c @@ -894,6 +894,7 @@ static int pages_io(int xfer, loff_t pos) } } + unlink(path); t_touch(path); fd = t_open(path); @@ -972,7 +973,8 @@ int t50(char *name) while (np <= _npages) { printf("%3d per xfer(total %d)...\t", np, _npages); fflush(stdout); - pages_io(np, offset); + if (pages_io(np, offset) != 0) + return 1; np += np; } LEAVE(); diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index ca33b76..9c40c36 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -192,7 +192,6 @@ int ll_drop_dentry(struct dentry *dentry) if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) { #else if (!d_unhashed(dentry)) { - struct inode *inode = dentry->d_inode; #endif CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p " "inode %p refc %d\n", dentry->d_name.len, @@ -203,19 +202,19 @@ int ll_drop_dentry(struct dentry *dentry) * sys_getcwd() could return -ENOENT -bzzz */ #ifdef LUSTRE_KERNEL_VERSION dentry->d_flags |= DCACHE_LUSTRE_INVALID; -#else - if (!inode || !S_ISDIR(inode->i_mode)) - __d_drop(dentry); -#endif - #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) __d_drop(dentry); - if (inode) { + if (dentry->d_inode) { /* Put positive dentries to orphan list */ - hlist_add_head(&dentry->d_hash, - &ll_i2sbi(inode)->ll_orphan_dentry_list); + list_add(&dentry->d_hash, + &ll_i2sbi(dentry->d_inode)->ll_orphan_dentry_list); } #endif +#else + if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode)) + __d_drop(dentry); +#endif + } unlock_dentry(dentry); return 0; diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 21052ba..e9e07c0 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -648,7 +648,7 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, ldlm_policy_data_t tmpex; unsigned long start, end, count, skip, i, j; struct page *page; - int rc, rc2, discard = lock->l_flags & LDLM_FL_DISCARD_DATA; + int rc, rc2, l_flags, discard = lock->l_flags & LDLM_FL_DISCARD_DATA; struct lustre_handle lockh; ENTRY; @@ -709,7 +709,6 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, * batching writeback under the lock explicitly. */ for (i = start, j = start % count; i <= end; j++, i++, tmpex.l_extent.start += PAGE_CACHE_SIZE) { - int l_flags; if (j == count) { CDEBUG(D_PAGE, "skip index %lu to %lu\n", i, i + skip); i += skip; @@ -747,17 +746,13 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, lock_page(page); } - l_flags = LDLM_FL_BLOCK_GRANTED|LDLM_FL_CBPENDING | - LDLM_FL_TEST_LOCK; - tmpex.l_extent.end = tmpex.l_extent.start + PAGE_CACHE_SIZE - 1; - /* check to see if another DLM lock covers this page */ + l_flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK; + /* check to see if another DLM lock covers this page b=2765 */ rc2 = obd_match(ll_s2dtexp(inode->i_sb), lsm, LDLM_EXTENT, &tmpex, LCK_PR | LCK_PW, &l_flags, inode, &lockh); - /* rc2 < 0 means some error occured, e.g. export was down. - * rc2 == 0 means nothing was matched */ - if (rc2 <= 0 && page->mapping != NULL) { + if (rc2 == 0 && page->mapping != NULL) { struct ll_async_page *llap = llap_cast_private(page); // checking again to account for writeback's lock_page() LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n"); @@ -1290,6 +1285,7 @@ repeat: /* BUG: 5972 */ file_accessed(file); retval = generic_file_read(file, buf, chunk, ppos); + ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, count, 0); ll_tree_unlock(&tree); @@ -1398,6 +1394,7 @@ repeat: CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n", inode->i_ino, chunk, *ppos); retval = generic_file_write(file, buf, chunk, ppos); + ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, count, 1); out: ll_tree_unlock(&tree); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 523ac02..db2094b 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -27,15 +27,7 @@ #endif #define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0") -#if !defined(LUSTRE_KERNEL_VERSION) || (LUSTRE_KERNEL_VERSION < 46) #define LUSTRE_FPRIVATE(file) ((file)->private_data) -#else -#if (LUSTRE_KERNEL_VERSION < 46) -#define LUSTRE_FPRIVATE(file) ((file)->private_data) -#else -#define LUSTRE_FPRIVATE(file) ((file)->fs_private) -#endif -#endif #ifdef LUSTRE_KERNEL_VERSION static inline struct lookup_intent *ll_nd2it(struct nameidata *nd) @@ -165,6 +157,34 @@ struct ll_ra_info { unsigned long ra_stats[_NR_RA_STAT]; }; +/* LL_HIST_MAX=32 causes an overflow */ +#define LL_HIST_MAX 28 +#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */ +#define LL_PROCESS_HIST_MAX 10 +struct per_process_info { + pid_t pid; + struct obd_histogram pp_r_hist; + struct obd_histogram pp_w_hist; +}; + +/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */ +struct ll_rw_extents_info { + struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1]; +}; + +#define LL_OFFSET_HIST_MAX 100 +struct ll_rw_process_info { + pid_t rw_pid; + int rw_op; + loff_t rw_range_start; + loff_t rw_range_end; + loff_t rw_last_file_pos; + loff_t rw_offset; + size_t rw_smallest_extent; + size_t rw_largest_extent; + struct file *rw_last_file; +}; + /* flags for sbi->ll_flags */ #define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */ #define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */ @@ -188,7 +208,7 @@ struct ll_sb_info { struct list_head ll_conn_chain; /* per-conn chain of SBs */ struct lustre_client_ocd ll_lco; - struct hlist_head ll_orphan_dentry_list; /*please don't ask -p*/ + struct list_head ll_orphan_dentry_list; /*please don't ask -p*/ struct ll_close_queue *ll_lcq; struct lprocfs_stats *ll_stats; /* lprocfs stats counter */ @@ -207,6 +227,12 @@ struct ll_sb_info { /* =0 - hold lock over whole read/write * >0 - max. chunk to be read/written w/o lock re-acquiring */ unsigned long ll_max_rw_chunk; + struct ll_rw_extents_info ll_rw_extents_info; + int ll_extent_process_count; + struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX]; + unsigned int ll_offset_process_count; + struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX]; + unsigned int ll_rw_offset_entry_count; }; #define LL_DEFAULT_MAX_RW_CHUNK (32 * 1024 * 1024) @@ -442,6 +468,8 @@ int ll_release_openhandle(struct dentry *, struct lookup_intent *); int ll_md_close(struct obd_export *md_exp, struct inode *inode, struct file *file); int ll_md_real_close(struct inode *inode, int flags); +extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file + *file, size_t count, int rw); #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, struct lookup_intent *it, struct kstat *stat); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index d36139f..f4faaab 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -54,6 +54,7 @@ static struct ll_sb_info *ll_init_sbi(void) { struct ll_sb_info *sbi = NULL; class_uuid_t uuid; + int i; ENTRY; OBD_ALLOC(sbi, sizeof(*sbi)); @@ -63,7 +64,6 @@ static struct ll_sb_info *ll_init_sbi(void) spin_lock_init(&sbi->ll_lock); spin_lock_init(&sbi->ll_lco.lco_lock); INIT_LIST_HEAD(&sbi->ll_pglist); - sbi->ll_pglist_gen = 0; if (num_physpages >> (20 - PAGE_SHIFT) < 512) sbi->ll_async_page_max = num_physpages / 2; else @@ -74,7 +74,7 @@ static struct ll_sb_info *ll_init_sbi(void) SBI_DEFAULT_READAHEAD_WHOLE_MAX; INIT_LIST_HEAD(&sbi->ll_conn_chain); - INIT_HLIST_HEAD(&sbi->ll_orphan_dentry_list); + INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); class_generate_random_uuid(uuid); class_uuid_unparse(uuid, &sbi->ll_sb_uuid); @@ -86,6 +86,11 @@ static struct ll_sb_info *ll_init_sbi(void) INIT_LIST_HEAD(&sbi->ll_deathrow); spin_lock_init(&sbi->ll_deathrow_lock); + for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { + spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_r_hist.oh_lock); + spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_w_hist.oh_lock); + } + RETURN(sbi); } @@ -143,7 +148,8 @@ static int ll_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp) RETURN(rc); } -int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) +static int client_common_fill_super(struct super_block *sb, + char *md, char *dt) { struct inode *root = 0; struct ll_sb_info *sbi = ll_s2sbi(sb); @@ -151,16 +157,16 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) struct lu_fid rootfid; struct obd_statfs osfs; struct ptlrpc_request *request = NULL; - struct lustre_handle osc_conn = {0, }; + struct lustre_handle dt_conn = {0, }; struct lustre_handle md_conn = {0, }; struct obd_connect_data *data = NULL; - struct lustre_md md; + struct lustre_md lmd; int err; ENTRY; - obd = class_name2obd(mdc); + obd = class_name2obd(md); if (!obd) { - CERROR("MDC %s: not setup or attached\n", mdc); + CERROR("MD %s: not setup or attached\n", md); RETURN(-EINVAL); } @@ -170,7 +176,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) if (proc_lustre_fs_root) { err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb, - osc, mdc); + dt, md); if (err < 0) CERROR("could not register mount in /proc/lustre"); } @@ -197,19 +203,20 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, data); if (err == -EBUSY) { - CERROR("An MDT (mdc %s) is performing recovery, of which this" - " client is not a part. Please wait for recovery to " - "complete, abort, or time out.\n", mdc); + LCONSOLE_ERROR("An MDT (md %s) is performing recovery, of " + "which this client is not a part. Please wait " + "for recovery to complete, abort, or " + "time out.\n", md); GOTO(out, err); } else if (err) { - CERROR("cannot connect to %s: rc = %d\n", mdc, err); + CERROR("cannot connect to %s: rc = %d\n", md, err); GOTO(out, err); } sbi->ll_md_exp = class_conn2export(&md_conn); err = obd_statfs(obd, &osfs, cfs_time_current_64() - HZ); if (err) - GOTO(out_mdc, err); + GOTO(out_md, err); LASSERT(osfs.os_bsize); sb->s_blocksize = osfs.os_bsize; @@ -243,6 +250,8 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) * on all clients. */ /* s_dev is also used in lt_compare() to compare two fs, but that is * only a node-local comparison. */ + + /* XXX: this will not work with LMV */ sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_target_uuid.uuid, strlen(sbi2mdc(sbi)->cl_target_uuid.uuid)); #endif @@ -251,12 +260,12 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) err = ll_fid_md_init(sbi); if (err) { CERROR("can't init FIDs framework, rc %d\n", err); - GOTO(out_mdc, err); + GOTO(out_md, err); } - obd = class_name2obd(osc); + obd = class_name2obd(dt); if (!obd) { - CERROR("OSC %s: not setup or attached\n", osc); + CERROR("DT %s: not setup or attached\n", dt); GOTO(out_md_fid, err = -ENODEV); } @@ -271,19 +280,18 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) obd->obd_upcall.onu_upcall = ll_ocd_update; data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << PAGE_SHIFT; - - err = obd_connect(NULL, &osc_conn, obd, &sbi->ll_sb_uuid, data); + err = obd_connect(NULL, &dt_conn, obd, &sbi->ll_sb_uuid, data); if (err == -EBUSY) { - CERROR("An OST (osc %s) is performing recovery, of which this" - " client is not a part. Please wait for recovery to " - "complete, abort, or time out.\n", osc); + LCONSOLE_ERROR("An OST (dt %s) is performing recovery, of which this" + " client is not a part. Please wait for recovery to " + "complete, abort, or time out.\n", dt); GOTO(out, err); } else if (err) { - CERROR("cannot connect to %s: rc = %d\n", osc, err); - GOTO(out_mdc, err); + CERROR("cannot connect to %s: rc = %d\n", dt, err); + GOTO(out_md, err); } - sbi->ll_dt_exp = class_conn2export(&osc_conn); + sbi->ll_dt_exp = class_conn2export(&dt_conn); spin_lock(&sbi->ll_lco.lco_lock); sbi->ll_lco.lco_flags = data->ocd_connect_flags; @@ -297,7 +305,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) LCONSOLE_ERROR("There are no OST's in this filesystem. " "There must be at least one active OST for " "a client to start.\n"); - GOTO(out_osc, err); + GOTO(out_dt, err); } if (!ll_async_page_slab) { @@ -307,14 +315,14 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) ll_async_page_slab_size, 0, 0, NULL, NULL); if (!ll_async_page_slab) - GOTO(out_osc, -ENOMEM); + GOTO(out_dt, -ENOMEM); } /* init FIDs framework */ err = ll_fid_dt_init(sbi); if (err) { CERROR("can't init FIDs framework, rc %d\n", err); - GOTO(out_osc, err); + GOTO(out_dt, err); } err = md_getstatus(sbi->ll_md_exp, &rootfid); @@ -338,24 +346,24 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) 0, &request); if (err) { CERROR("md_getattr failed for root: rc = %d\n", err); - GOTO(out_osc, err); + GOTO(out_dt, err); } err = md_get_lustre_md(sbi->ll_md_exp, request, REPLY_REC_OFF, sbi->ll_dt_exp, sbi->ll_md_exp, - &md); + &lmd); if (err) { CERROR("failed to understand root inode md: rc = %d\n", err); ptlrpc_req_finished (request); - GOTO(out_osc, err); + GOTO(out_dt, err); } LASSERT(fid_is_sane(&sbi->ll_root_fid)); - root = ll_iget(sb, ll_fid_build_ino(sbi, &sbi->ll_root_fid), &md); + root = ll_iget(sb, ll_fid_build_ino(sbi, &sbi->ll_root_fid), &lmd); ptlrpc_req_finished(request); if (root == NULL || is_bad_inode(root)) { - md_free_lustre_md(sbi->ll_dt_exp, &md); + md_free_lustre_md(sbi->ll_dt_exp, &lmd); CERROR("lustre_lite: bad iget4 for root\n"); GOTO(out_root, err = -EBADF); } @@ -386,12 +394,12 @@ out_root: iput(root); out_dt_fid: obd_fid_fini(sbi->ll_dt_exp); -out_osc: +out_dt: obd_disconnect(sbi->ll_dt_exp); sbi->ll_dt_exp = NULL; out_md_fid: obd_fid_fini(sbi->ll_md_exp); -out_mdc: +out_md: obd_disconnect(sbi->ll_md_exp); sbi->ll_md_exp = NULL; out: @@ -461,12 +469,12 @@ void lustre_dump_dentry(struct dentry *dentry, int recur) #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) void lustre_throw_orphan_dentries(struct super_block *sb) { - struct hlist_node *tmp, *next; + struct dentry *dentry, *next; struct ll_sb_info *sbi = ll_s2sbi(sb); /* Do this to get rid of orphaned dentries. That is not really trw. */ - hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) { - struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash); + list_for_each_entry_safe(dentry, next, &sbi->ll_orphan_dentry_list, + d_hash) { CWARN("found orphan dentry %.*s (%p->%p) at unmount, dumping " "before and after shrink_dcache_parent\n", dentry->d_name.len, dentry->d_name.name, dentry, next); @@ -901,10 +909,14 @@ int ll_fill_super(struct super_block *sb) CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb); + cfs_module_get(); + /* client additional sb info */ lsi->lsi_llsbi = sbi = ll_init_sbi(); - if (!sbi) + if (!sbi) { + cfs_module_put(); RETURN(-ENOMEM); + } err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags); if (err) @@ -1042,6 +1054,9 @@ void ll_put_super(struct super_block *sb) lustre_common_put_super(sb); LCONSOLE_WARN("client %s umount complete\n", ll_instance); + + cfs_module_put(); + EXIT; } /* client_put_super */ @@ -1082,15 +1097,11 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock) inode = igrab(lock->l_ast_data); } else { inode = lock->l_ast_data; - if (inode->i_state & I_FREEING) - __LDLM_DEBUG(D_INFO, lock, - "l_ast_data %p is bogus: magic %08x", - lock->l_ast_data, lli->lli_inode_magic); - else - __LDLM_DEBUG(D_WARNING, lock, - "l_ast_data %p is bogus: magic %08x", - lock->l_ast_data, lli->lli_inode_magic); - + ldlm_lock_debug(NULL, inode->i_state & I_FREEING ? + D_INFO : D_WARNING, + lock, __FILE__, __func__, __LINE__, + "l_ast_data %p is bogus: magic %08x", + lock->l_ast_data, lli->lli_inode_magic); inode = NULL; } } diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 4cc1777..8b03321 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -35,6 +35,9 @@ struct proc_dir_entry *proc_lustre_fs_root; /* /proc/lustre/llite mount point registration */ struct file_operations llite_dump_pgcache_fops; struct file_operations ll_ra_stats_fops; +struct file_operations ll_rw_extents_stats_fops; +struct file_operations ll_rw_extents_stats_pp_fops; +struct file_operations ll_rw_offset_stats_fops; static int ll_rd_blksize(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -483,12 +486,31 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, entry->proc_fops = &llite_dump_pgcache_fops; entry->data = sbi; - entry = create_proc_entry("read_ahead_stats", 0444, sbi->ll_proc_root); + entry = create_proc_entry("read_ahead_stats", 0644, sbi->ll_proc_root); if (entry == NULL) GOTO(out, err = -ENOMEM); entry->proc_fops = &ll_ra_stats_fops; entry->data = sbi; + entry = create_proc_entry("extents_stats", 0644, sbi->ll_proc_root); + if (entry == NULL) + GOTO(out, err = -ENOMEM); + entry->proc_fops = &ll_rw_extents_stats_fops; + entry->data = sbi; + + entry = create_proc_entry("extents_stats_per_process", 0644, + sbi->ll_proc_root); + if (entry == NULL) + GOTO(out, err = -ENOMEM); + entry->proc_fops = &ll_rw_extents_stats_pp_fops; + entry->data = sbi; + + entry = create_proc_entry("offset_stats", 0644, sbi->ll_proc_root); + if (entry == NULL) + GOTO(out, err = -ENOMEM); + entry->proc_fops = &ll_rw_offset_stats_fops; + entry->data = sbi; + svc_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES); if (svc_stats == NULL) { err = -ENOMEM; @@ -687,13 +709,13 @@ static void *llite_dump_pgcache_seq_next(struct seq_file *seq, void *v, return dummy_llap; } -static void llite_dump_pgcache_seq_stop(struct seq_file *seq, void *v) +static void null_stop(struct seq_file *seq, void *v) { } struct seq_operations llite_dump_pgcache_seq_sops = { .start = llite_dump_pgcache_seq_start, - .stop = llite_dump_pgcache_seq_stop, + .stop = null_stop, .next = llite_dump_pgcache_seq_next, .show = llite_dump_pgcache_seq_show, }; @@ -801,24 +823,22 @@ static int ll_ra_stats_seq_show(struct seq_file *seq, void *v) return 0; } -static void *ll_ra_stats_seq_start(struct seq_file *p, loff_t *pos) +static void *ll_stats_seq_start(struct seq_file *p, loff_t *pos) { if (*pos == 0) return (void *)1; return NULL; } -static void *ll_ra_stats_seq_next(struct seq_file *p, void *v, loff_t *pos) +static void *ll_stats_seq_next(struct seq_file *p, void *v, loff_t *pos) { ++*pos; return NULL; } -static void ll_ra_stats_seq_stop(struct seq_file *p, void *v) -{ -} + struct seq_operations ll_ra_stats_seq_sops = { - .start = ll_ra_stats_seq_start, - .stop = ll_ra_stats_seq_stop, - .next = ll_ra_stats_seq_next, + .start = ll_stats_seq_start, + .stop = null_stop, + .next = ll_stats_seq_next, .show = ll_ra_stats_seq_show, }; @@ -859,5 +879,393 @@ struct file_operations ll_ra_stats_fops = { .release = seq_release, }; +#define pct(a,b) (b ? a * 100 / b : 0) + +static void ll_display_extents_info(struct ll_rw_extents_info *io_extents, + struct seq_file *seq, int which) +{ + unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum; + unsigned long start, end, r, w; + char *unitp = "KMGTPEZY"; + int i, units = 10; + struct per_process_info *pp_info = &io_extents->pp_extents[which]; + + read_cum = 0; + write_cum = 0; + start = 0; + + for(i = 0; i < LL_HIST_MAX; i++) { + read_tot += pp_info->pp_r_hist.oh_buckets[i]; + write_tot += pp_info->pp_w_hist.oh_buckets[i]; + } + + for(i = 0; i < LL_HIST_MAX; i++) { + r = pp_info->pp_r_hist.oh_buckets[i]; + w = pp_info->pp_w_hist.oh_buckets[i]; + read_cum += r; + write_cum += w; + end = 1 << (i + LL_HIST_START - units); + seq_printf(seq, "%4lu%c - %4lu%c%c: %14lu %4lu %4lu | " + "%14lu %4lu %4lu\n", start, *unitp, end, *unitp, + (i == LL_HIST_MAX - 1) ? '+' : ' ', + r, pct(r, read_tot), pct(read_cum, read_tot), + w, pct(w, write_tot), pct(write_cum, write_tot)); + start = end; + if (start == 1<<10) { + start = 1; + units += 10; + unitp++; + } + if (read_cum == read_tot && write_cum == write_tot) + break; + } +} + +static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v) +{ + struct timeval now; + struct ll_sb_info *sbi = seq->private; + struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; + int k; + + do_gettimeofday(&now); + + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", + now.tv_sec, now.tv_usec); + seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write"); + seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", + "extents", "calls", "%", "cum%", + "calls", "%", "cum%"); + + spin_lock(&sbi->ll_lock); + for(k = 0; k < LL_PROCESS_HIST_MAX; k++) { + if(io_extents->pp_extents[k].pid != 0) { + seq_printf(seq, "\nPID: %d\n", + io_extents->pp_extents[k].pid); + ll_display_extents_info(io_extents, seq, k); + } + } + spin_unlock(&sbi->ll_lock); + + return 0; +} + +struct seq_operations ll_rw_extents_stats_pp_seq_sops = { + .start = ll_stats_seq_start, + .stop = null_stop, + .next = ll_stats_seq_next, + .show = ll_rw_extents_stats_pp_seq_show, +}; + +static int ll_rw_extents_stats_pp_seq_open(struct inode *inode, + struct file *file) +{ + struct proc_dir_entry *dp = PDE(inode); + struct seq_file *seq; + int rc; + + rc = seq_open(file, &ll_rw_extents_stats_pp_seq_sops); + if(rc) + return rc; + seq = file->private_data; + seq->private = dp->data; + return 0; +} + +static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file, + const char *buf, size_t len, + loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct ll_sb_info *sbi = seq->private; + struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; + int i; + + spin_lock(&sbi->ll_lock); + for(i = 0; i < LL_PROCESS_HIST_MAX; i++) { + io_extents->pp_extents[i].pid = 0; + lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist); + lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist); + } + spin_unlock(&sbi->ll_lock); + return len; +} + +struct file_operations ll_rw_extents_stats_pp_fops = { + .owner = THIS_MODULE, + .open = ll_rw_extents_stats_pp_seq_open, + .read = seq_read, + .write = ll_rw_extents_stats_pp_seq_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v) +{ + struct timeval now; + struct ll_sb_info *sbi = seq->private; + struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; + + do_gettimeofday(&now); + + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", + now.tv_sec, now.tv_usec); + + seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write"); + seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", + "extents", "calls", "%", "cum%", + "calls", "%", "cum%"); + + spin_lock(&sbi->ll_lock); + ll_display_extents_info(io_extents, seq, LL_PROCESS_HIST_MAX); + spin_unlock(&sbi->ll_lock); + + return 0; +} + +struct seq_operations ll_rw_extents_stats_seq_sops = { + .start = ll_stats_seq_start, + .stop = null_stop, + .next = ll_stats_seq_next, + .show = ll_rw_extents_stats_seq_show, +}; + +static int ll_rw_extents_stats_seq_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(inode); + struct seq_file *seq; + int rc; + + rc = seq_open(file, &ll_rw_extents_stats_seq_sops); + if(rc) + return rc; + seq = file->private_data; + seq->private = dp->data; + return 0; +} + +static ssize_t ll_rw_extents_stats_seq_write(struct file *file, const char *buf, + size_t len, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct ll_sb_info *sbi = seq->private; + struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; + + lprocfs_oh_clear(&io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist); + lprocfs_oh_clear(&io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist); + + return len; +} + +struct file_operations ll_rw_extents_stats_fops = { + .owner = THIS_MODULE, + .open = ll_rw_extents_stats_seq_open, + .read = seq_read, + .write = ll_rw_extents_stats_seq_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file + *file, size_t count, int rw) +{ + int i, cur = -1; + struct ll_rw_process_info *process; + struct ll_rw_process_info *offset; + int *off_count = &sbi->ll_rw_offset_entry_count; + int *process_count = &sbi->ll_offset_process_count; + struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; + + process = sbi->ll_rw_process_info; + offset = sbi->ll_rw_offset_info; + + spin_lock(&sbi->ll_lock); + /* Extent statistics */ + for(i = 0; i < LL_PROCESS_HIST_MAX; i++) { + if(io_extents->pp_extents[i].pid == pid) { + cur = i; + break; + } + } + + if (cur == -1) { + /* new process */ + sbi->ll_extent_process_count = + (sbi->ll_extent_process_count + 1) % LL_PROCESS_HIST_MAX; + cur = sbi->ll_extent_process_count; + io_extents->pp_extents[cur].pid = pid; + lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_r_hist); + lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist); + } + + for(i = 0; (count >= (1 << LL_HIST_START << i)) && + (i < (LL_HIST_MAX - 1)); i++); + if (rw == 0) { + io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++; + io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++; + } else { + io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++; + io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++; + } + + /* Offset statistics */ + for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { + if (process[i].rw_pid == pid) { + if (process[i].rw_last_file != file) { + process[i].rw_range_start = file->f_pos; + process[i].rw_last_file_pos = + file->f_pos + count; + process[i].rw_smallest_extent = count; + process[i].rw_largest_extent = count; + process[i].rw_offset = 0; + process[i].rw_last_file = file; + spin_unlock(&sbi->ll_lock); + return; + } + if (process[i].rw_last_file_pos != file->f_pos) { + *off_count = + (*off_count + 1) % LL_OFFSET_HIST_MAX; + offset[*off_count].rw_op = process[i].rw_op; + offset[*off_count].rw_pid = pid; + offset[*off_count].rw_range_start = + process[i].rw_range_start; + offset[*off_count].rw_range_end = + process[i].rw_last_file_pos; + offset[*off_count].rw_smallest_extent = + process[i].rw_smallest_extent; + offset[*off_count].rw_largest_extent = + process[i].rw_largest_extent; + offset[*off_count].rw_offset = + process[i].rw_offset; + process[i].rw_op = rw; + process[i].rw_range_start = file->f_pos; + process[i].rw_smallest_extent = count; + process[i].rw_largest_extent = count; + process[i].rw_offset = file->f_pos - + process[i].rw_last_file_pos; + } + if(process[i].rw_smallest_extent > count) + process[i].rw_smallest_extent = count; + if(process[i].rw_largest_extent < count) + process[i].rw_largest_extent = count; + process[i].rw_last_file_pos = file->f_pos + count; + spin_unlock(&sbi->ll_lock); + return; + } + } + *process_count = (*process_count + 1) % LL_PROCESS_HIST_MAX; + process[*process_count].rw_pid = pid; + process[*process_count].rw_op = rw; + process[*process_count].rw_range_start = file->f_pos; + process[*process_count].rw_last_file_pos = file->f_pos + count; + process[*process_count].rw_smallest_extent = count; + process[*process_count].rw_largest_extent = count; + process[*process_count].rw_offset = 0; + process[*process_count].rw_last_file = file; + spin_unlock(&sbi->ll_lock); +} + +char lpszt[] = LPSZ; + +static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v) +{ + struct timeval now; + struct ll_sb_info *sbi = seq->private; + struct ll_rw_process_info *offset = sbi->ll_rw_offset_info; + struct ll_rw_process_info *process = sbi->ll_rw_process_info; + char format[50]; + int i; + + do_gettimeofday(&now); + + spin_lock(&sbi->ll_lock); + + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", + now.tv_sec, now.tv_usec); + seq_printf(seq, "%3s %10s %14s %14s %17s %17s %14s\n", + "R/W", "PID", "RANGE START", "RANGE END", + "SMALLEST EXTENT", "LARGEST EXTENT", "OFFSET"); + sprintf(format, "%s%s%s%s%s\n", + "%3c %10d %14Lu %14Lu %17", lpszt+1, " %17", lpszt+1, " %14Ld"); + /* We stored the discontiguous offsets here; print them first */ + for(i = 0; i < LL_OFFSET_HIST_MAX; i++) { + if (offset[i].rw_pid != 0) + /* Is there a way to snip the '%' off of LPSZ? */ + seq_printf(seq, format, + offset[i].rw_op ? 'W' : 'R', + offset[i].rw_pid, + offset[i].rw_range_start, + offset[i].rw_range_end, + offset[i].rw_smallest_extent, + offset[i].rw_largest_extent, + offset[i].rw_offset); + } + /* Then print the current offsets for each process */ + for(i = 0; i < LL_PROCESS_HIST_MAX; i++) { + if (process[i].rw_pid != 0) + seq_printf(seq, format, + process[i].rw_op ? 'W' : 'R', + process[i].rw_pid, + process[i].rw_range_start, + process[i].rw_last_file_pos, + process[i].rw_smallest_extent, + process[i].rw_largest_extent, + process[i].rw_offset); + } + spin_unlock(&sbi->ll_lock); + + return 0; +} + +struct seq_operations ll_rw_offset_stats_seq_sops = { + .start = ll_stats_seq_start, + .stop = null_stop, + .next = ll_stats_seq_next, + .show = ll_rw_offset_stats_seq_show, +}; + +static int ll_rw_offset_stats_seq_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(inode); + struct seq_file *seq; + int rc; + + rc = seq_open(file, &ll_rw_offset_stats_seq_sops); + if (rc) + return rc; + seq = file->private_data; + seq->private = dp->data; + return 0; +} + +static ssize_t ll_rw_offset_stats_seq_write(struct file *file, const char *buf, + size_t len, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct ll_sb_info *sbi = seq->private; + struct ll_rw_process_info *process_info = sbi->ll_rw_process_info; + struct ll_rw_process_info *offset_info = sbi->ll_rw_offset_info; + + spin_lock(&sbi->ll_lock); + sbi->ll_offset_process_count = 0; + sbi->ll_rw_offset_entry_count = 0; + memset(process_info, 0, sizeof(struct ll_rw_process_info) * + LL_PROCESS_HIST_MAX); + memset(offset_info, 0, sizeof(struct ll_rw_process_info) * + LL_OFFSET_HIST_MAX); + spin_unlock(&sbi->ll_lock); + + return len; +} + +struct file_operations ll_rw_offset_stats_fops = { + .owner = THIS_MODULE, + .open = ll_rw_offset_stats_seq_open, + .read = seq_read, + .write = ll_rw_offset_stats_seq_write, + .llseek = seq_lseek, + .release = seq_release, +}; + LPROCFS_INIT_VARS(llite, NULL, lprocfs_obd_vars) #endif /* LPROCFS */ diff --git a/lustre/llite/super.c b/lustre/llite/super.c index a5ca4b2..1b1351c 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -88,11 +88,10 @@ static int __init init_lustre_lite(void) static void __exit exit_lustre_lite(void) { int rc; - int rc; lustre_register_client_fill_super(NULL); lustre_register_client_process_config(NULL); - + ll_unregister_cache(&ll_cache_definition); rc = kmem_cache_destroy(ll_file_data_slab); diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c index 0765586..e44dcbf 100644 --- a/lustre/llite/symlink.c +++ b/lustre/llite/symlink.c @@ -159,8 +159,9 @@ static int ll_follow_link(struct dentry *dentry, struct nameidata *nd) rc = vfs_follow_link(nd, symname); ptlrpc_req_finished(request); + EXIT; out: - RETURN(rc); + return rc; } struct inode_operations ll_fast_symlink_inode_operations = { diff --git a/lustre/llite/xattr.c b/lustre/llite/xattr.c index 63fe107..1f4b2dc 100644 --- a/lustre/llite/xattr.c +++ b/lustre/llite/xattr.c @@ -120,6 +120,10 @@ int ll_setxattr_common(struct inode *inode, const char *name, if (rc) RETURN(rc); + /* b10667: ignore lustre special xattr for now */ + if (xattr_type == XATTR_TRUSTED_T && strcmp(name, "trusted.lov") == 0) + RETURN(0); + rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, name, value, size, 0, flags, &req); if (rc) { diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 5d2763e..4e3763e 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -487,7 +487,7 @@ int lmv_add_target(struct obd_device *obd, struct obd_uuid *tgt_uuid) RETURN(-EINVAL); } - rc = obd_llog_init(obd, mdc_obd, 0, NULL); + rc = obd_llog_init(obd, mdc_obd, 0, NULL, tgt_uuid); if (rc) { lmv_init_unlock(lmv); CERROR("lmv failed to setup llogging subsystems\n"); @@ -2065,7 +2065,7 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data, } static int lmv_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid) + int count, struct llog_catid *logid, struct obd_uuid *uuid) { struct llog_ctxt *ctxt; int rc; diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index 4a6a951..5ab5297 100755 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -70,7 +70,7 @@ static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes, } if (lmm->lmm_stripe_size == 0 || - (__u64)le32_to_cpu(lmm->lmm_stripe_size) * stripe_count > ~0UL) { + (__u64)le32_to_cpu(lmm->lmm_stripe_size)*stripe_count > 0xffffffff){ CERROR("bad stripe size %u\n", le32_to_cpu(lmm->lmm_stripe_size)); lov_dump_lmm_v1(D_WARNING, lmm); @@ -93,7 +93,7 @@ lsm_stripe_by_index_plain(struct lov_stripe_md *lsm, int *stripeno, obd_off *lov_off, unsigned long *swidth) { if (swidth) - *swidth = lsm->lsm_stripe_size * lsm->lsm_stripe_count; + *swidth = (ulong)lsm->lsm_stripe_size * lsm->lsm_stripe_count; } static void @@ -101,7 +101,7 @@ lsm_stripe_by_offset_plain(struct lov_stripe_md *lsm, int *stripeno, obd_off *lov_off, unsigned long *swidth) { if (swidth) - *swidth = lsm->lsm_stripe_size * lsm->lsm_stripe_count; + *swidth = (ulong)lsm->lsm_stripe_size * lsm->lsm_stripe_count; } static obd_off @@ -111,6 +111,13 @@ lsm_stripe_offset_by_index_plain(struct lov_stripe_md *lsm, return 0; } +static obd_off +lsm_stripe_offset_by_offset_plain(struct lov_stripe_md *lsm, + obd_off lov_off) +{ + return 0; +} + static int lsm_stripe_index_by_offset_plain(struct lov_stripe_md *lsm, obd_off lov_off) @@ -193,8 +200,9 @@ struct lsm_operations lsm_plain_ops = { .lsm_stripe_by_index = lsm_stripe_by_index_plain, .lsm_stripe_by_offset = lsm_stripe_by_offset_plain, .lsm_revalidate = lsm_revalidate_plain, - .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_plain, - .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_plain, + .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_plain, + .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_plain, + .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_plain, .lsm_lmm_verify = lsm_lmm_verify_plain, .lsm_unpackmd = lsm_unpackmd_plain, }; @@ -281,7 +289,7 @@ lsm_stripe_by_index_join(struct lov_stripe_md *lsm, int *stripeno, *stripeno -= le->le_loi_idx; if (swidth) - *swidth = lsm->lsm_stripe_size * le->le_stripe_count; + *swidth = (ulong)lsm->lsm_stripe_size * le->le_stripe_count; if (lov_off) { struct lov_extent *lov_le = lovea_off2le(lsm, *lov_off); @@ -314,7 +322,7 @@ lsm_stripe_by_offset_join(struct lov_stripe_md *lsm, int *stripeno, *stripeno -= le->le_loi_idx; if (swidth) - *swidth = lsm->lsm_stripe_size * le->le_stripe_count; + *swidth = (ulong)lsm->lsm_stripe_size * le->le_stripe_count; } static obd_off @@ -328,6 +336,17 @@ lsm_stripe_offset_by_index_join(struct lov_stripe_md *lsm, return le ? le->le_start : 0; } +static obd_off +lsm_stripe_offset_by_offset_join(struct lov_stripe_md *lsm, + obd_off lov_off) +{ + struct lov_extent *le; + + le = lovea_off2le(lsm, lov_off); + + return le ? le->le_start : 0; +} + static int lsm_stripe_index_by_offset_join(struct lov_stripe_md *lsm, obd_off lov_off) @@ -541,8 +560,9 @@ struct lsm_operations lsm_join_ops = { .lsm_stripe_by_index = lsm_stripe_by_index_join, .lsm_stripe_by_offset = lsm_stripe_by_offset_join, .lsm_revalidate = lsm_revalidate_join, - .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_join, - .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_join, + .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_join, + .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_join, + .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_join, .lsm_lmm_verify = lsm_lmm_verify_join, .lsm_unpackmd = lsm_unpackmd_join, }; diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index 1b6f31b..5c0fa75 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -215,13 +215,14 @@ int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs, int lov_fini_statfs_set(struct lov_request_set *set); /* lov_obd.c */ +void lov_fix_desc(struct lov_desc *desc); int lov_get_stripecnt(struct lov_obd *lov, int stripe_count); void lov_getref(struct obd_device *obd); void lov_putref(struct obd_device *obd); /* lov_log.c */ int lov_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid); + int count, struct llog_catid *logid, struct obd_uuid *uuid); int lov_llog_finish(struct obd_device *obd, int count); /* lov_pack.c */ diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c index 5b07c30..4ce722c 100644 --- a/lustre/lov/lov_log.c +++ b/lustre/lov/lov_log.c @@ -104,14 +104,9 @@ static int lov_llog_origin_connect(struct llog_ctxt *ctxt, int count, { struct obd_device *obd = ctxt->loc_obd; struct lov_obd *lov = &obd->u.lov; - int i, rc = 0; + int i, rc = 0, err = 0; ENTRY; - /* We might have added an osc and not told the mds yet */ - if (count != lov->desc.ld_tgt_count) - CERROR("Origin connect mds cnt %d != lov cnt %d\n", count, - lov->desc.ld_tgt_count); - lov_getref(obd); for (i = 0; i < count; i++) { struct obd_device *child; @@ -119,20 +114,21 @@ static int lov_llog_origin_connect(struct llog_ctxt *ctxt, int count, if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) continue; - child = lov->lov_tgts[i]->ltd_exp->exp_obd; - cctxt = llog_get_context(child, ctxt->loc_idx); if (uuid && !obd_uuid_equals(uuid, &lov->lov_tgts[i]->ltd_uuid)) continue; - + CDEBUG(D_CONFIG, "connect %d/%d\n", i, count); + child = lov->lov_tgts[i]->ltd_exp->exp_obd; + cctxt = llog_get_context(child, ctxt->loc_idx); rc = llog_connect(cctxt, 1, logid, gen, uuid); if (rc) { CERROR("error osc_llog_connect tgt %d (%d)\n", i, rc); - break; + if (!err) + err = rc; } } lov_putref(obd); - RETURN(rc); + RETURN(err); } /* the replicators commit callback */ @@ -181,7 +177,7 @@ static struct llog_operations lov_size_repl_logops = { }; int lov_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid) + int count, struct llog_catid *logid, struct obd_uuid *uuid) { struct lov_obd *lov = &obd->u.lov; struct obd_device *child; @@ -198,23 +194,17 @@ int lov_llog_init(struct obd_device *obd, struct obd_device *tgt, if (rc) RETURN(rc); - CDEBUG(D_CONFIG, "llog init with %d/%d targets\n", - count, lov->desc.ld_tgt_count); - /* count may not match ld_tgt_count during dynamic ost add */ - lov_getref(obd); - for (i = 0; i < lov->desc.ld_tgt_count; i++) { + /* count may not match lov->desc.ld_tgt_count during dynamic ost add */ + for (i = 0; i < count; i++) { if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) continue; + if (uuid && !obd_uuid_equals(uuid, &lov->lov_tgts[i]->ltd_uuid)) + continue; + CDEBUG(D_CONFIG, "init %d/%d\n", i, count); LASSERT(lov->lov_tgts[i]->ltd_exp); child = lov->lov_tgts[i]->ltd_exp->exp_obd; - if (lov->lov_tgts[i]->ltd_exp->exp_imp_reverse) { - CERROR("NZR: idx %d import state %s\n", i, - ptlrpc_import_state_name(lov->lov_tgts[i]->ltd_exp->exp_imp_reverse->imp_state)); - } else { - CERROR("NZR: idx %d no import\n", i); - } - rc = obd_llog_init(child, tgt, 1, logid + i); + rc = obd_llog_init(child, tgt, 1, logid + i, uuid); if (rc) { CERROR("error osc_llog_init idx %d osc '%s' tgt '%s' " "(rc=%d)\n", i, child->obd_name, tgt->obd_name, diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 48b7c326..c1f4c5d 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -643,18 +643,18 @@ static void __lov_del_obd(struct obd_device *obd, __u32 index) } } -static void lov_fix_desc(struct lov_desc *desc) +void lov_fix_desc(struct lov_desc *desc) { if (desc->ld_default_stripe_size < PTLRPC_MAX_BRW_SIZE) { - CWARN("Increasing default_stripe_size "LPU64" to %u\n", - desc->ld_default_stripe_size, PTLRPC_MAX_BRW_SIZE); + LCONSOLE_WARN("Increasing default stripe size to min %u\n", + PTLRPC_MAX_BRW_SIZE); desc->ld_default_stripe_size = PTLRPC_MAX_BRW_SIZE; } else if (desc->ld_default_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) { - CWARN("default_stripe_size "LPU64" isn't a multiple of %u\n", - desc->ld_default_stripe_size, LOV_MIN_STRIPE_SIZE); desc->ld_default_stripe_size &= ~(LOV_MIN_STRIPE_SIZE - 1); - CWARN("changing to "LPU64"\n", desc->ld_default_stripe_size); - } + LCONSOLE_WARN("Changing default stripe size to "LPU64" (a " + "multiple of %u)\n", + desc->ld_default_stripe_size,LOV_MIN_STRIPE_SIZE); + } if (desc->ld_default_stripe_count == 0) desc->ld_default_stripe_count = 1; @@ -662,8 +662,7 @@ static void lov_fix_desc(struct lov_desc *desc) /* from lov_setstripe */ if ((desc->ld_pattern != 0) && (desc->ld_pattern != LOV_PATTERN_RAID0)) { - CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n", - desc->ld_pattern); + LCONSOLE_WARN("Unknown stripe pattern: %#x\n",desc->ld_pattern); desc->ld_pattern = 0; } } @@ -708,9 +707,9 @@ static int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg) * of 4GB or larger on 32-bit CPUs. */ count = desc->ld_default_stripe_count; if ((count > 0 ? count : desc->ld_tgt_count) * - desc->ld_default_stripe_size > ~0UL) { - CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n", - desc->ld_default_stripe_size, count, ~0UL); + desc->ld_default_stripe_size > 0xffffffff) { + CERROR("LOV: stripe width "LPU64"x%u > 4294967295 bytes\n", + desc->ld_default_stripe_size, count); RETURN(-EINVAL); } @@ -836,53 +835,15 @@ static int lov_process_config(struct obd_device *obd, obd_count len, void *buf) } case LCFG_PARAM: { struct lprocfs_static_vars lvars; - struct lov_obd *lov = &obd->u.lov; - struct lov_desc *desc = &(lov->desc); - int i; + struct lov_desc *desc = &(obd->u.lov.desc); if (!desc) GOTO(out, rc = -EINVAL); lprocfs_init_vars(lov, &lvars); - /* setparam 0:lov_mdsA 1:default_stripe_size=1048576 - 2:default_stripe_pattern=0 3:default_stripe_offset=0 */ - for (i = 1; i < lcfg->lcfg_bufcount; i++) { - char *key, *sval; - long val; - key = lustre_cfg_buf(lcfg, i); - sval = strchr(key, '='); - if (!sval || (*(sval + 1) == 0)) { - CERROR("Can't parse param %s\n", key); - rc = -EINVAL; - /* continue parsing other params */ - continue; - } - val = simple_strtol(sval + 1, NULL, 0); - rc = 0; - /* LOV_STRIPE_* aren't settable in proc */ - if (class_match_param(key, - PARAM_LOV_STRIPE_SIZE,0) == 0) - desc->ld_default_stripe_size = val; - else if (class_match_param(key, - PARAM_LOV_STRIPE_COUNT, 0) == 0) - desc->ld_default_stripe_count = val; - else if (class_match_param(key, - PARAM_LOV_STRIPE_OFFSET, 0) == 0) - desc->ld_default_stripe_offset = val; - else if (class_match_param(key, - PARAM_LOV_STRIPE_PATTERN, 0) == 0) - desc->ld_pattern = val; - else - rc = class_process_proc_param(PARAM_LOV, - lvars.obd_vars, - lcfg, obd); - if (rc >= 0) { - LCONSOLE_INFO("set %s to %ld\n", key, val); - rc = 0; - } - } - lov_fix_desc(desc); + rc = class_process_proc_param(PARAM_LOV, lvars.obd_vars, + lcfg, obd); GOTO(out, rc); } default: { diff --git a/lustre/lov/lov_offset.c b/lustre/lov/lov_offset.c index 22af87e..87597b2 100644 --- a/lustre/lov/lov_offset.c +++ b/lustre/lov/lov_offset.c @@ -119,6 +119,7 @@ int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off, { unsigned long ssize = lsm->lsm_stripe_size; unsigned long swidth, stripe_off, this_stripe; + uint64_t l_off, s_off; int magic = lsm->lsm_magic; int ret = 0; @@ -128,6 +129,23 @@ int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off, } LASSERT(lsm_op_find(magic) != NULL); + /*It will check whether the lov_off and stripeno + *are in the same extent. + *1) lov_off extent < stripeno extent, ret = -1, obd_off = 0 + *2) lov_off extent > stripeno extent, ret = 1, + * obd_off = lov_off extent offset*/ + l_off = lsm_op_find(magic)->lsm_stripe_offset_by_index(lsm, stripeno); + s_off = lsm_op_find(magic)->lsm_stripe_offset_by_offset(lsm, lov_off); + if (s_off < l_off) { + ret = -1; + *obd_off = 0; + return ret; + } else if (s_off > l_off) { + ret = 1; + *obd_off = s_off; + return ret; + } + /*If they are in the same extent, original logic*/ lsm_op_find(magic)->lsm_stripe_by_index(lsm, &stripeno, &lov_off, &swidth); diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index 2107483..29f3746 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -687,7 +687,7 @@ int lov_update_create_set(struct lov_request_set *set, loi->loi_id = req->rq_oi.oi_oa->o_id; loi->loi_gr = req->rq_oi.oi_oa->o_gr; loi->loi_ost_idx = req->rq_idx; - CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPX64" at idx %d\n", + CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPU64" at idx %d\n", lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx); loi_init(loi); @@ -1426,8 +1426,8 @@ int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success) do_div(osfs->os_ffree, expected_stripes); spin_lock(&obd->obd_osfs_lock); - memcpy(&obd->obd_osfs, osfs, sizeof(osfs)); - obd->obd_osfs_age = cfs_time_current_64(); + memcpy(&obd->obd_osfs, osfs, sizeof(*osfs)); + obd->obd_osfs_age = get_jiffies_64(); spin_unlock(&obd->obd_osfs_lock); RETURN(0); } @@ -1458,8 +1458,8 @@ void lov_update_statfs(struct obd_device *obd, struct obd_statfs *osfs, struct obd_statfs *lov_sfs, int success) { spin_lock(&obd->obd_osfs_lock); - memcpy(&obd->obd_osfs, lov_sfs, sizeof(osfs)); - obd->obd_osfs_age = cfs_time_current_64(); + memcpy(&obd->obd_osfs, lov_sfs, sizeof(*lov_sfs)); + obd->obd_osfs_age = get_jiffies_64(); spin_unlock(&obd->obd_osfs_lock); if (success == 0) { diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c index 8930167..2fca4b1 100644 --- a/lustre/lov/lproc_lov.c +++ b/lustre/lov/lproc_lov.c @@ -31,6 +31,7 @@ #include #include #include +#include "lov_internal.h" #ifdef LPROCFS static int lov_rd_stripesize(char *page, char **start, off_t off, int count, @@ -45,6 +46,25 @@ static int lov_rd_stripesize(char *page, char **start, off_t off, int count, return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size); } +static int lov_wr_stripesize(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *dev = (struct obd_device *)data; + struct lov_desc *desc; + __u64 val; + int rc; + + LASSERT(dev != NULL); + desc = &dev->u.lov.desc; + rc = lprocfs_write_u64_helper(buffer, count, &val); + if (rc) + return rc; + + desc->ld_default_stripe_size = val; + lov_fix_desc(desc); + return count; +} + static int lov_rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -57,6 +77,25 @@ static int lov_rd_stripeoffset(char *page, char **start, off_t off, int count, return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset); } +static int lov_wr_stripeoffset(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *dev = (struct obd_device *)data; + struct lov_desc *desc; + __u64 val; + int rc; + + LASSERT(dev != NULL); + desc = &dev->u.lov.desc; + rc = lprocfs_write_u64_helper(buffer, count, &val); + if (rc) + return rc; + + desc->ld_default_stripe_offset = val; + lov_fix_desc(desc); + return count; +} + static int lov_rd_stripetype(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -69,6 +108,24 @@ static int lov_rd_stripetype(char *page, char **start, off_t off, int count, return snprintf(page, count, "%u\n", desc->ld_pattern); } +static int lov_wr_stripetype(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *dev = (struct obd_device *)data; + struct lov_desc *desc; + int val, rc; + + LASSERT(dev != NULL); + desc = &dev->u.lov.desc; + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + desc->ld_pattern = val; + lov_fix_desc(desc); + return count; +} + static int lov_rd_stripecount(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -81,6 +138,24 @@ static int lov_rd_stripecount(char *page, char **start, off_t off, int count, return snprintf(page, count, "%u\n", desc->ld_default_stripe_count); } +static int lov_wr_stripecount(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *dev = (struct obd_device *)data; + struct lov_desc *desc; + int val, rc; + + LASSERT(dev != NULL); + desc = &dev->u.lov.desc; + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + desc->ld_default_stripe_count = val; + lov_fix_desc(desc); + return count; +} + static int lov_rd_numobd(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -238,24 +313,22 @@ static int lov_target_seq_open(struct inode *inode, struct file *file) struct lprocfs_vars lprocfs_obd_vars[] = { { "uuid", lprocfs_rd_uuid, 0, 0 }, - /* If you change the stripe* names, - make sure lustre_param.h is updated */ - { "stripesize", lov_rd_stripesize, 0, 0 }, - { "stripeoffset", lov_rd_stripeoffset, 0, 0 }, - { "stripecount", lov_rd_stripecount, 0, 0 }, - { "stripetype", lov_rd_stripetype, 0, 0 }, + { "stripesize", lov_rd_stripesize, lov_wr_stripesize, 0 }, + { "stripeoffset", lov_rd_stripeoffset, lov_wr_stripeoffset, 0 }, + { "stripecount", lov_rd_stripecount, lov_wr_stripecount, 0 }, + { "stripetype", lov_rd_stripetype, lov_wr_stripetype, 0 }, { "numobd", lov_rd_numobd, 0, 0 }, { "activeobd", lov_rd_activeobd, 0, 0 }, { "filestotal", lprocfs_rd_filestotal, 0, 0 }, { "filesfree", lprocfs_rd_filesfree, 0, 0 }, - /*{ "filegroups", lprocfs_rd_filegroups, 0, 0 },*/ + /*{ "filegroups", lprocfs_rd_filegroups, 0, 0 },*/ { "blocksize", lprocfs_rd_blksize, 0, 0 }, { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 }, { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, { "kbytesavail", lprocfs_rd_kbytesavail, 0, 0 }, { "desc_uuid", lov_rd_desc_uuid, 0, 0 }, - { "qos_prio_free", lov_rd_qos_priofree, lov_wr_qos_priofree, 0 }, - { "qos_maxage", lov_rd_qos_maxage, lov_wr_qos_maxage, 0 }, + { "qos_prio_free",lov_rd_qos_priofree, lov_wr_qos_priofree, 0 }, + { "qos_maxage", lov_rd_qos_maxage, lov_wr_qos_maxage, 0 }, { 0 } }; diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 06296b8..760bb78 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -1433,8 +1433,10 @@ struct chk_dqblk{ __u32 dqb_valid; /* flag for above fields */ }; -static inline unsigned int const -chkquot_hash(qid_t id, int type) +static inline unsigned int chkquot_hash(qid_t id, int type) + __attribute__((__const__)); + +static inline unsigned int chkquot_hash(qid_t id, int type) { return (id * (MAXQUOTAS - type)) % NR_DQHASH; } diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index a4d3811..171bd18 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -1274,7 +1274,7 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) lprocfs_init_vars(mdc, &lvars); lprocfs_obd_setup(obd, lvars.obd_vars); - rc = obd_llog_init(obd, obd, 0, NULL); + rc = obd_llog_init(obd, obd, 0, NULL, NULL); if (rc) { mdc_cleanup(obd); CERROR("failed to setup llogging subsystems\n"); @@ -1350,7 +1350,8 @@ static int mdc_cleanup(struct obd_device *obd) static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid) + int count, struct llog_catid *logid, + struct obd_uuid *uuid) { struct llog_ctxt *ctxt; int rc; @@ -1457,7 +1458,8 @@ int __init mdc_init(void) int rc; struct lprocfs_static_vars lvars; lprocfs_init_vars(mdc, &lvars); - + + request_module("lquota"); quota_interface = PORTAL_SYMBOL_GET(mdc_quota_interface); init_obd_quota_ops(quota_interface, &mdc_obd_ops); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 1196a49..b0ff285 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -261,7 +261,7 @@ static int mds_connect_internal(struct obd_export *exp, { struct obd_device *obd = exp->exp_obd; if (data != NULL) { - data->ocd_connect_flags &= MDS_CONNECT_SUPPORTED; + data->ocd_connect_flags &= MDT_CONNECT_SUPPORTED; data->ocd_ibits_known &= MDS_INODELOCK_FULL; /* If no known bits (which should not happen, probably, @@ -982,6 +982,8 @@ static int mds_getattr(struct ptlrpc_request *req, int offset) int rc = 0; ENTRY; + OBD_COUNTER_INCREMENT(obd, getattr); + body = lustre_swab_reqbuf(req, offset, sizeof(*body), lustre_swab_mds_body); if (body == NULL) @@ -1043,6 +1045,7 @@ static int mds_statfs(struct ptlrpc_request *req) /* This will trigger a watchdog timeout */ OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP, (MDS_SERVICE_WATCHDOG_TIMEOUT / 1000) + 1); + OBD_COUNTER_INCREMENT(obd, statfs); rc = lustre_pack_reply(req, 2, size, NULL); if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_STATFS_PACK)) { @@ -1981,7 +1984,31 @@ static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg) GOTO(err_qctxt, rc); lprocfs_init_vars(mds, &lvars); - lprocfs_obd_setup(obd, lvars.obd_vars); + if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0 && + lprocfs_alloc_obd_stats(obd, LPROC_MDS_LAST) == 0) { + /* Init private stats here */ + lprocfs_counter_init(obd->obd_stats, LPROC_MDS_OPEN, + /*LPROCFS_CNTR_AVGMINMAX*/0, + "open", "reqs"); + lprocfs_counter_init(obd->obd_stats, LPROC_MDS_CLOSE, + 0, "close", "reqs"); + lprocfs_counter_init(obd->obd_stats, LPROC_MDS_MKNOD, + 0, "mknod", "reqs"); + lprocfs_counter_init(obd->obd_stats, LPROC_MDS_LINK, + 0, "link", "reqs"); + lprocfs_counter_init(obd->obd_stats, LPROC_MDS_UNLINK, + 0, "unlink", "reqs"); + lprocfs_counter_init(obd->obd_stats, LPROC_MDS_MKDIR, + 0, "mkdir", "reqs"); + lprocfs_counter_init(obd->obd_stats, LPROC_MDS_RMDIR, + 0, "rmdir", "reqs"); + lprocfs_counter_init(obd->obd_stats, LPROC_MDS_RENAME, + 0, "rename", "reqs"); + lprocfs_counter_init(obd->obd_stats, LPROC_MDS_GETXATTR, + 0, "getxattr", "reqs"); + lprocfs_counter_init(obd->obd_stats, LPROC_MDS_SETXATTR, + 0, "setxattr", "reqs"); + } uuid_ptr = fsfilt_uuid(obd, obd->u.obt.obt_sb); if (uuid_ptr != NULL) { @@ -2217,6 +2244,7 @@ static int mds_cleanup(struct obd_device *obd) we just need to drop our ref */ class_export_put(mds->mds_osc_exp); + lprocfs_free_obd_stats(obd); lprocfs_obd_cleanup(obd); lquota_cleanup(quota_interface, obd); @@ -2390,6 +2418,8 @@ static int mds_intent_policy(struct ldlm_namespace *ns, switch ((long)it->opc) { case IT_OPEN: case IT_CREAT|IT_OPEN: + lprocfs_counter_incr(req->rq_export->exp_obd->obd_stats, + LPROC_MDS_OPEN); fixup_handle_for_resent_req(req, DLM_LOCKREQ_OFF, lock, NULL, &lockh); /* XXX swab here to assert that an mds_open reint @@ -2416,6 +2446,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns, getattr_part = MDS_INODELOCK_LOOKUP; case IT_GETATTR: getattr_part |= MDS_INODELOCK_LOOKUP; + OBD_COUNTER_INCREMENT(req->rq_export->exp_obd, getattr); case IT_READDIR: fixup_handle_for_resent_req(req, DLM_LOCKREQ_OFF, lock, &new_lock, &lockh); @@ -2714,6 +2745,7 @@ static __attribute__((unused)) int __init mds_init(void) int rc; struct lprocfs_static_vars lvars; + request_module("lquota"); quota_interface = PORTAL_SYMBOL_GET(mds_quota_interface); rc = lquota_init(quota_interface); if (rc) { diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index 308102a..ab74030 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -177,7 +177,7 @@ int mds_cleanup_pending(struct obd_device *obd); /* mds/mds_log.c */ int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, int count, - struct llog_catid *logid); + struct llog_catid *logid, struct obd_uuid *uuid); int mds_llog_finish(struct obd_device *obd, int count); /* mds/mds_lov.c */ @@ -256,4 +256,19 @@ static inline int mds_fid2str(char *str, __u64 id, __u32 generation) return sprintf(str, "%llx:%08x", (unsigned long long)id, generation); } +/* mds/lproc_mds.c */ +enum { + LPROC_MDS_OPEN = 0, + LPROC_MDS_CLOSE, + LPROC_MDS_MKNOD, + LPROC_MDS_LINK, + LPROC_MDS_UNLINK, + LPROC_MDS_MKDIR, + LPROC_MDS_RMDIR, + LPROC_MDS_RENAME, + LPROC_MDS_GETXATTR, + LPROC_MDS_SETXATTR, + LPROC_MDS_LAST, +}; + #endif /* _MDS_INTERNAL_H */ diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c index a0f0a7a..c9d33f4 100644 --- a/lustre/mds/mds_lib.c +++ b/lustre/mds/mds_lib.c @@ -363,7 +363,7 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req, LASSERT(body != NULL); /* previously verified & swabbed by caller */ -#if CRAY_XT3 +#ifdef CRAY_XT3 if (req->rq_uid != LNET_UID_ANY) { /* Non-root local cluster client */ LASSERT (req->rq_uid != 0); @@ -386,7 +386,7 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req, return rc; } -#if CRAY_XT3 +#ifdef CRAY_XT3 if (ucred->luc_uce) ucred->luc_fsgid = ucred->luc_uce->ue_primary; #endif diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c index 67403eb..fa31b5f 100644 --- a/lustre/mds/mds_log.c +++ b/lustre/mds/mds_log.c @@ -178,7 +178,7 @@ static struct llog_operations mds_size_repl_logops = { }; int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid) + int count, struct llog_catid *logid, struct obd_uuid *uuid) { struct obd_device *lov_obd = obd->u.mds.mds_osc_obd; int rc; @@ -194,9 +194,9 @@ int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, if (rc) RETURN(rc); - rc = obd_llog_init(lov_obd, tgt, count, logid); + rc = obd_llog_init(lov_obd, tgt, count, logid, uuid); if (rc) - CERROR("error lov_llog_init\n"); + CERROR("lov_llog_init err %d\n", rc); RETURN(rc); } diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index cc92186..eb2845d 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -70,8 +70,8 @@ static int mds_lov_read_objids(struct obd_device *obd) LASSERT(!mds->mds_lov_objids_size); LASSERT(!mds->mds_lov_objids_dirty); - /* Read everything in the file, even if our current lov desc - has fewer targets. Old targets not in the lov descriptor + /* Read everything in the file, even if our current lov desc + has fewer targets. Old targets not in the lov descriptor during mds setup may still have valid objids. */ size = mds->mds_lov_objid_filp->f_dentry->d_inode->i_size; if (size == 0) @@ -88,9 +88,9 @@ static int mds_lov_read_objids(struct obd_device *obd) CERROR("Error reading objids %d\n", rc); RETURN(rc); } - - mds->mds_lov_objids_in_file = size / sizeof(*ids); - + + mds->mds_lov_objids_in_file = size / sizeof(*ids); + for (i = 0; i < mds->mds_lov_objids_in_file; i++) { CDEBUG(D_INFO, "read last object "LPU64" for idx %d\n", mds->mds_lov_objids[i], i); @@ -102,7 +102,7 @@ int mds_lov_write_objids(struct obd_device *obd) { struct mds_obd *mds = &obd->u.mds; loff_t off = 0; - int i, rc, tgts; + int i, rc, tgts; ENTRY; if (!mds->mds_lov_objids_dirty) @@ -170,9 +170,9 @@ int mds_lov_set_nextid(struct obd_device *obd) KEY_NEXT_ID, mds->mds_lov_desc.ld_tgt_count, mds->mds_lov_objids, NULL); - - if (rc) - CERROR ("%s: mds_lov_set_nextid failed (%d)\n", + + if (rc) + CERROR ("%s: mds_lov_set_nextid failed (%d)\n", obd->obd_name, rc); RETURN(rc); @@ -182,7 +182,7 @@ int mds_lov_set_nextid(struct obd_device *obd) static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov) { struct mds_obd *mds = &obd->u.mds; - struct lov_desc *ld; + struct lov_desc *ld; __u32 size, stripes, valsize = sizeof(mds->mds_lov_desc); int rc = 0; ENTRY; @@ -198,13 +198,13 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov) /* The size of the LOV target table may have increased. */ size = ld->ld_tgt_count * sizeof(obd_id); - if ((mds->mds_lov_objids_size == 0) || + if ((mds->mds_lov_objids_size == 0) || (size > mds->mds_lov_objids_size)) { obd_id *ids; - + /* add room by powers of 2 */ size = 1; - while (size < ld->ld_tgt_count) + while (size < ld->ld_tgt_count) size = size << 1; size = size * sizeof(obd_id); @@ -214,7 +214,7 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov) memset(ids, 0, size); if (mds->mds_lov_objids_size) { obd_id *old_ids = mds->mds_lov_objids; - memcpy(ids, mds->mds_lov_objids, + memcpy(ids, mds->mds_lov_objids, mds->mds_lov_objids_size); mds->mds_lov_objids = ids; OBD_FREE(old_ids, mds->mds_lov_objids_size); @@ -229,9 +229,9 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov) CDEBUG(D_CONFIG, "updated lov_desc, tgt_count: %d\n", mds->mds_lov_desc.ld_tgt_count); - stripes = min((__u32)LOV_MAX_STRIPE_COUNT, - max(mds->mds_lov_desc.ld_tgt_count, - mds->mds_lov_objids_in_file)); + stripes = min_t(__u32, LOV_MAX_STRIPE_COUNT, + max(mds->mds_lov_desc.ld_tgt_count, + mds->mds_lov_objids_in_file)); mds->mds_max_mdsize = lov_mds_md_size(stripes); mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie); CDEBUG(D_CONFIG, "updated max_mdsize/max_cookiesize: %d/%d\n", @@ -246,9 +246,9 @@ out: #define MDSLOV_NO_INDEX -1 /* Inform MDS about new/updated target */ -static int mds_lov_update_mds(struct obd_device *obd, - struct obd_device *watched, - __u32 idx) +static int mds_lov_update_mds(struct obd_device *obd, + struct obd_device *watched, + __u32 idx, struct obd_uuid *uuid) { struct mds_obd *mds = &obd->u.mds; int old_count; @@ -261,23 +261,23 @@ static int mds_lov_update_mds(struct obd_device *obd, RETURN(rc); CDEBUG(D_CONFIG, "idx=%d, recov=%d/%d, cnt=%d/%d\n", - idx, obd->obd_recovering, obd->obd_async_recov, old_count, + idx, obd->obd_recovering, obd->obd_async_recov, old_count, mds->mds_lov_desc.ld_tgt_count); /* idx is set as data from lov_notify. */ if (idx != MDSLOV_NO_INDEX && !obd->obd_recovering) { if (idx >= mds->mds_lov_desc.ld_tgt_count) { - CERROR("index %d > count %d!\n", idx, + CERROR("index %d > count %d!\n", idx, mds->mds_lov_desc.ld_tgt_count); RETURN(-EINVAL); } - + if (idx >= mds->mds_lov_objids_in_file) { /* We never read this lastid; ask the osc */ obd_id lastid; __u32 size = sizeof(lastid); rc = obd_get_info(watched->obd_self_export, - strlen("last_id"), + strlen("last_id"), "last_id", &size, &lastid); if (rc) RETURN(rc); @@ -286,10 +286,10 @@ static int mds_lov_update_mds(struct obd_device *obd, mds_lov_write_objids(obd); } else { /* We have read this lastid from disk; tell the osc. - Don't call this during recovery. */ + Don't call this during recovery. */ rc = mds_lov_set_nextid(obd); } - + CDEBUG(D_CONFIG, "last object "LPU64" from OST %d\n", mds->mds_lov_objids[idx], idx); } @@ -298,7 +298,9 @@ static int mds_lov_update_mds(struct obd_device *obd, /* We only _need_ to do this at first add (idx), or the first time after recovery. However, it should now be safe to call anytime. */ CDEBUG(D_CONFIG, "reset llogs idx=%d\n", idx); - llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count); + mutex_down(&obd->obd_dev_sem); + llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, uuid); + mutex_up(&obd->obd_dev_sem); RETURN(rc); } @@ -329,7 +331,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) if (data == NULL) RETURN(-ENOMEM); data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX | - OBD_CONNECT_REQPORTAL; + OBD_CONNECT_REQPORTAL | OBD_CONNECT_QUOTA64; data->ocd_version = LUSTRE_VERSION_CODE; data->ocd_group = mds->mds_id + FILTER_GROUP_MDS0; /* NB: lov_connect() needs to fill in .ocd_index for each OST */ @@ -360,7 +362,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) GOTO(err_reg, rc); /* tgt_count may be 0! */ - rc = llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count); + rc = llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL); if (rc) { CERROR("failed to initialize catalog %d\n", rc); GOTO(err_reg, rc); @@ -589,8 +591,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); rc = llog_ioctl(ctxt, cmd, data); pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); - llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count); - + llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL); group = FILTER_GROUP_MDS0 + mds->mds_id; rc2 = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_MDS_CONN), KEY_MDS_CONN, @@ -631,9 +632,9 @@ struct mds_lov_sync_info { }; /* We only sync one osc at a time, so that we don't have to hold - any kind of lock on the whole mds_lov_desc, which may change + any kind of lock on the whole mds_lov_desc, which may change (grow) as a result of mds_lov_add_ost. This also avoids any - kind of mismatch between the lov_desc and the mds_lov_desc, + kind of mismatch between the lov_desc and the mds_lov_desc, which are not in lock-step during lov_add_obd */ static int __mds_lov_synchronize(void *data) { @@ -654,7 +655,7 @@ static int __mds_lov_synchronize(void *data) uuid = &watched->u.cli.cl_target_uuid; LASSERT(uuid); - rc = mds_lov_update_mds(obd, watched, idx); + rc = mds_lov_update_mds(obd, watched, idx, uuid); if (rc != 0) GOTO(out, rc); group = FILTER_GROUP_MDS0 + mds->mds_id; @@ -666,7 +667,7 @@ static int __mds_lov_synchronize(void *data) rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT), mds->mds_lov_desc.ld_tgt_count, NULL, NULL, uuid); - + if (rc != 0) { CERROR("%s: failed at llog_origin_connect: %d\n", obd->obd_name, rc); @@ -705,7 +706,7 @@ int mds_lov_synchronize(void *data) char name[20]; if (mlsi->mlsi_index == MDSLOV_NO_INDEX) - /* There is still a watched target, + /* There is still a watched target, but we don't know its index */ sprintf(name, "ll_sync_tgt"); else @@ -715,7 +716,7 @@ int mds_lov_synchronize(void *data) RETURN(__mds_lov_synchronize(data)); } -int mds_lov_start_synchronize(struct obd_device *obd, +int mds_lov_start_synchronize(struct obd_device *obd, struct obd_device *watched, void *data, int nonblock) { @@ -732,7 +733,7 @@ int mds_lov_start_synchronize(struct obd_device *obd, mlsi->mlsi_obd = obd; mlsi->mlsi_watched = watched; - if (data) + if (data) mlsi->mlsi_index = *(__u32 *)data; else mlsi->mlsi_index = MDSLOV_NO_INDEX; @@ -794,9 +795,9 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, if (obd->obd_recovering) { CWARN("MDS %s: in recovery, not resetting orphans on %s\n", - obd->obd_name, + obd->obd_name, obd_uuid2str(&watched->u.cli.cl_target_uuid)); - /* We still have to fix the lov descriptor for ost's added + /* We still have to fix the lov descriptor for ost's added after the mdt in the config log. They didn't make it into mds_lov_connect. */ rc = mds_lov_update_desc(obd, obd->u.mds.mds_osc_exp); @@ -804,11 +805,11 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, } LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL); - rc = mds_lov_start_synchronize(obd, watched, data, + rc = mds_lov_start_synchronize(obd, watched, data, !(ev == OBD_NOTIFY_SYNC)); - + lquota_recovery(quota_interface, obd); - + RETURN(rc); } @@ -832,14 +833,14 @@ int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode, int rc, err; ENTRY; - if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC || + if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC || le32_to_cpu(lmm->lmm_magic == LOV_MAGIC_JOIN)) RETURN(0); CDEBUG(D_INODE, "converting LOV EA on %lu/%u from %#08x to %#08x\n", inode->i_ino, inode->i_generation, le32_to_cpu(lmm->lmm_magic), LOV_MAGIC); - + rc = obd_unpackmd(obd->u.mds.mds_osc_exp, &lsm, lmm, lmm_size); if (rc < 0) GOTO(conv_end, rc); diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index d7e7645..ec405c1 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -881,6 +881,7 @@ int mds_open(struct mds_update_record *rec, int offset, int lock_flags = 0; ENTRY; + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_OPEN); OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PAUSE_OPEN | OBD_FAIL_ONCE, (obd_timeout + 1) / 4); @@ -1202,6 +1203,9 @@ found_child: else ptlrpc_save_lock(req, &parent_lockh, parent_mode); } + /* trigger dqacq on the owner of child and parent */ + lquota_adjust(quota_interface, obd, qcids, qpids, rc, FSFILT_OP_CREATE); + /* If we have not taken the "open" lock, we may not return 0 here, because caller expects 0 to mean "lock is taken", and it needs nonzero return here for caller to return EDLM_LOCK_ABORTED to @@ -1211,8 +1215,6 @@ found_child: if ((cleanup_phase != 3) && !rc) rc = ENOLCK; - /* trigger dqacq on the owner of child and parent */ - lquota_adjust(quota_interface, obd, qcids, qpids, rc, FSFILT_OP_CREATE); RETURN(rc); } @@ -1436,6 +1438,7 @@ int mds_close(struct ptlrpc_request *req, int offset) CDEBUG(D_HA, "close req->rep_len %d mdsize %d cookiesize %d\n", req->rq_replen, obd->u.mds.mds_max_mdsize, obd->u.mds.mds_max_cookiesize); + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_CLOSE); body = lustre_swab_reqbuf(req, offset, sizeof(*body), lustre_swab_mds_body); diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 4e605a9..e8574054 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -501,6 +501,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, DEBUG_REQ(D_INODE, req, "setattr "LPU64"/%u %x", rec->ur_fid1->id, rec->ur_fid1->generation, rec->ur_iattr.ia_valid); + OBD_COUNTER_INCREMENT(obd, setattr); MDS_CHECK_RESENT(req, reconstruct_reint_setattr(rec, offset, req)); @@ -810,6 +811,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); rc = ll_vfs_create(dir, dchild, rec->ur_mode, NULL); + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_MKNOD); EXIT; break; } @@ -818,6 +820,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); rc = vfs_mkdir(dir, dchild, rec->ur_mode); + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_MKDIR); EXIT; break; } @@ -829,6 +832,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, rc = -EINVAL; /* -EPROTO? */ else rc = ll_vfs_symlink(dir, dchild, rec->ur_tgt, S_IALLUGO); + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_MKNOD); EXIT; break; } @@ -841,6 +845,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev); + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_MKNOD); EXIT; break; } @@ -1634,6 +1639,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); rc = vfs_rmdir(dparent->d_inode, dchild); + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_RMDIR); break; case S_IFREG: { struct lov_mds_md *lmm = lustre_msg_buf(req->rq_repmsg, @@ -1644,6 +1650,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); rc = vfs_unlink(dparent->d_inode, dchild); + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_UNLINK); break; } case S_IFLNK: @@ -1656,6 +1663,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); rc = vfs_unlink(dparent->d_inode, dchild); + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_UNLINK); break; default: CERROR("bad file type %o unlinking %s\n", rec->ur_mode, @@ -1769,6 +1777,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset, DEBUG_REQ(D_INODE, req, "original "LPU64"/%u to "LPU64"/%u %s", rec->ur_fid1->id, rec->ur_fid1->generation, rec->ur_fid2->id, rec->ur_fid2->generation, rec->ur_name); + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_LINK); MDS_CHECK_RESENT(req, mds_reconstruct_generic(req)); @@ -2112,7 +2121,8 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset, DEBUG_REQ(D_INODE, req, "parent "LPU64"/%u %s to "LPU64"/%u %s", rec->ur_fid1->id, rec->ur_fid1->generation, rec->ur_name, rec->ur_fid2->id, rec->ur_fid2->generation, rec->ur_tgt); - + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_RENAME); + MDS_CHECK_RESENT(req, mds_reconstruct_generic(req)); rc = mds_get_parents_children_locked(obd, mds, rec->ur_fid1, &de_srcdir, @@ -2301,7 +2311,7 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, int rc; ENTRY; -#if CRAY_XT3 +#ifdef CRAY_XT3 if (req->rq_uid != LNET_UID_ANY) { /* non-root local cluster client * NB root's creds are believed... */ @@ -2326,7 +2336,7 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, /* checked by unpacker */ LASSERT(rec->ur_opcode < REINT_MAX && reinters[rec->ur_opcode] != NULL); -#if CRAY_XT3 +#ifdef CRAY_XT3 if (rec->ur_uc.luc_uce) rec->ur_uc.luc_fsgid = rec->ur_uc.luc_uce->ue_primary; #endif diff --git a/lustre/mds/mds_xattr.c b/lustre/mds/mds_xattr.c index b60d429..ca46092 100644 --- a/lustre/mds/mds_xattr.c +++ b/lustre/mds/mds_xattr.c @@ -174,6 +174,8 @@ int mds_getxattr(struct ptlrpc_request *req) int rc = 0; ENTRY; + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_GETXATTR); + body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body), lustre_swab_mds_body); if (body == NULL) @@ -334,6 +336,8 @@ int mds_setxattr(struct ptlrpc_request *req) int rc; ENTRY; + lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_SETXATTR); + body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body), lustre_swab_mds_body); if (body == NULL) diff --git a/lustre/mgc/libmgc.c b/lustre/mgc/libmgc.c index a268422..40959c1 100644 --- a/lustre/mgc/libmgc.c +++ b/lustre/mgc/libmgc.c @@ -64,7 +64,7 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) if (rc) GOTO(err_decref, rc); - rc = obd_llog_init(obd, obd, 0, NULL); + rc = obd_llog_init(obd, obd, 0, NULL, NULL); if (rc) { CERROR("failed to setup llogging subsystems\n"); GOTO(err_cleanup, rc); @@ -80,7 +80,8 @@ err_decref: } static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid) + int count, struct llog_catid *logid, + struct obd_uuid *uuid) { struct llog_ctxt *ctxt; int rc; diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index fe912bf..4af0211 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -390,7 +390,7 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) if (rc) GOTO(err_decref, rc); - rc = obd_llog_init(obd, obd, 0, NULL); + rc = obd_llog_init(obd, obd, 0, NULL, NULL); if (rc) { CERROR("failed to setup llogging subsystems\n"); GOTO(err_cleanup, rc); @@ -788,7 +788,8 @@ static int mgc_import_event(struct obd_device *obd, } static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid) + int count, struct llog_catid *logid, + struct obd_uuid *uuid) { struct llog_ctxt *ctxt; int rc; diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 9148e3b..34c6459 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -417,7 +417,6 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req) mti->mti_flags |= LDD_F_UPDATE; } - if (mti->mti_flags & LDD_F_UPDATE) { CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname, mti->mti_stripe_index); diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index dfca3ee..3a3477d 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -1626,11 +1626,19 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb, /* We also have to update the other logs where this osc is part of the lov */ - /* Append ost info to mdt log */ if (mti->mti_flags & LDD_F_UPGRADE14) /* If we're upgrading, the old mdt log already has our entry. Let's do a fake one for fun. */ flags = CM_SKIP | CM_UPGRADE146; + + if ((mti->mti_flags & LDD_F_UPDATE) != LDD_F_UPDATE) { + /* If the update flag isn't set, don't update client/mdt + logs. */ + flags |= CM_SKIP; + LCONSOLE_WARN("Client log for %s was not updated; writeconf " + "the MDT first to regenerate it.\n", + mti->mti_svname); + } // for_all_existing_mdt for (i = 0; i < INDEX_MAP_SIZE * 8; i++){ @@ -1951,10 +1959,15 @@ int mgs_write_log_target(struct obd_device *obd, mti->mti_stripe_index, mti->mti_svname); /* FIXME mark old log sections as invalid, inc config ver #, add new log sections. - Make sure to update client and mds logs too + Make sure to update client and mdt logs too if needed */ - /* in the mean time, assume all logs were lost - (writeconf), and recreate this one */ + /* In the meantime, if we found the index in the + client log, we can't add it again. So recreate + the target log, but do _not_ update the client/mdt + logs. For "full" writeconf, the client log won't + have an entry for this target, so we won't get + here. */ + mti->mti_flags &= ~LDD_F_UPDATE; } } @@ -2089,12 +2102,12 @@ int mgs_erase_logs(struct obd_device *obd, char *fsname) RETURN(rc); } - /* Delete the fs db */ down(&mgs->mgs_sem); + + /* Delete the fs db */ fsdb = mgs_find_fsdb(obd, fsname); if (fsdb) mgs_free_fsdb(fsdb); - up(&mgs->mgs_sem); list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) { list_del(&dirent->lld_list); @@ -2105,6 +2118,8 @@ int mgs_erase_logs(struct obd_device *obd, char *fsname) OBD_FREE(dirent, sizeof(*dirent)); } + up(&mgs->mgs_sem); + RETURN(rc); } diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 36f49d4..53b9644 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -62,10 +62,10 @@ int obd_memmax; /* The following are visible and mutable through /proc/sys/lustre/. */ unsigned int obd_fail_loc; unsigned int obd_dump_on_timeout; +unsigned int obd_dump_on_eviction; unsigned int obd_timeout = 100; /* seconds */ unsigned int ldlm_timeout = 20; /* seconds */ unsigned int obd_health_check_timeout = 120; /* seconds */ -char obd_lustre_upcall[128] = "DEFAULT"; /* or NONE or /full/path/to/upcall */ cfs_waitq_t obd_race_waitq; int obd_race_state; @@ -383,10 +383,10 @@ EXPORT_SYMBOL(obd_print_fail_loc); EXPORT_SYMBOL(obd_race_waitq); EXPORT_SYMBOL(obd_race_state); EXPORT_SYMBOL(obd_dump_on_timeout); +EXPORT_SYMBOL(obd_dump_on_eviction); EXPORT_SYMBOL(obd_timeout); EXPORT_SYMBOL(ldlm_timeout); EXPORT_SYMBOL(obd_health_check_timeout); -EXPORT_SYMBOL(obd_lustre_upcall); EXPORT_SYMBOL(ptlrpc_put_connection_superhack); EXPORT_SYMBOL(proc_lustre_root); @@ -570,7 +570,9 @@ int init_obdclass(void) if (err) return err; err = class_procfs_init(); - lustre_register_fs(); + if (err) + return err; + err = lustre_register_fs(); #endif return err; diff --git a/lustre/obdclass/darwin/darwin-sysctl.c b/lustre/obdclass/darwin/darwin-sysctl.c index 59b7e45..3443d83 100644 --- a/lustre/obdclass/darwin/darwin-sysctl.c +++ b/lustre/obdclass/darwin/darwin-sysctl.c @@ -26,7 +26,6 @@ extern unsigned int obd_fail_loc; extern unsigned int obd_dump_on_timeout; extern unsigned int obd_timeout; extern unsigned int ldlm_timeout; -extern char obd_lustre_upcall[128]; extern unsigned int obd_sync_filter; extern atomic_t obd_memory; @@ -50,9 +49,6 @@ SYSCTL_PROC(_lustre, OID_AUTO, lustre_kernel_version, SYSCTL_INT(_lustre, OID_AUTO, dump_on_timeout, CTLTYPE_INT | CTLFLAG_RW, &obd_dump_on_timeout, 0, "lustre_dump_on_timeout"); -SYSCTL_STRING(_lustre, OID_AUTO, upcall, - CTLTYPE_STRING | CTLFLAG_RW, obd_lustre_upcall, - 128, "lustre_upcall"); SYSCTL_INT(_lustre, OID_AUTO, memused, CTLTYPE_INT | CTLFLAG_RW, (int *)&obd_memory.counter, 0, "lustre_memory_used"); diff --git a/lustre/obdclass/linux/linux-module.c b/lustre/obdclass/linux/linux-module.c index 6eb062b..09a24c9 100644 --- a/lustre/obdclass/linux/linux-module.c +++ b/lustre/obdclass/linux/linux-module.c @@ -63,6 +63,7 @@ #include #include #include +#include #ifdef __KERNEL__ #include #include @@ -218,18 +219,14 @@ int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data) { *eof = 1; - return snprintf(page, count, "%s\n", BUILD_VERSION); -} - -int obd_proc_read_kernel_version(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - *eof = 1; + return snprintf(page, count, "lustre: %s\nkernel: %u\nbuild: %s\n", + LUSTRE_VERSION_STRING, #ifdef LUSTRE_KERNEL_VERSION - return snprintf(page, count, "%u\n", LUSTRE_KERNEL_VERSION); + LUSTRE_KERNEL_VERSION, #else - return snprintf(page, count, "%u\n", "patchless"); + "patchless", #endif + BUILD_VERSION); } int obd_proc_read_pinger(char *page, char **start, off_t off, int count, @@ -312,7 +309,6 @@ struct proc_dir_entry *proc_lustre_root = NULL; struct lprocfs_vars lprocfs_base[] = { { "version", obd_proc_read_version, NULL, NULL }, - { "kernel_version", obd_proc_read_kernel_version, NULL, NULL }, { "pinger", obd_proc_read_pinger, NULL, NULL }, { "health_check", obd_proc_read_health, NULL, NULL }, { "health_check_timeout", obd_proc_rd_health_timeout, diff --git a/lustre/obdclass/linux/linux-sysctl.c b/lustre/obdclass/linux/linux-sysctl.c index 169aecb..fe5cd34 100644 --- a/lustre/obdclass/linux/linux-sysctl.c +++ b/lustre/obdclass/linux/linux-sysctl.c @@ -54,10 +54,10 @@ enum { OBD_FAIL_LOC = 1, /* control test failures instrumentation */ OBD_TIMEOUT, /* RPC timeout before recovery/intr */ OBD_DUMP_ON_TIMEOUT, /* dump kernel debug log upon eviction */ - OBD_UPCALL, /* path to recovery upcall */ OBD_MEMUSED, /* bytes currently OBD_ALLOCated */ OBD_SYNCFILTER, /* XXX temporary, as we play with sync osts.. */ OBD_LDLM_TIMEOUT, /* LDLM timeout for ASTs before client eviction */ + OBD_DUMP_ON_EVICTION, /* dump kernel debug log upon eviction */ }; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) @@ -102,9 +102,8 @@ static ctl_table obd_table[] = { &proc_set_timeout}, {OBD_DUMP_ON_TIMEOUT, "dump_on_timeout", &obd_dump_on_timeout, sizeof(int), 0644, NULL, &proc_dointvec}, - /* XXX need to lock so we avoid update races with recovery upcall! */ - {OBD_UPCALL, "upcall", obd_lustre_upcall, 128, 0644, NULL, - &proc_dostring, &sysctl_string }, + {OBD_DUMP_ON_EVICTION, "dump_on_eviction", &obd_dump_on_eviction, + sizeof(int), 0644, NULL, &proc_dointvec}, {OBD_MEMUSED, "memused", (int *)&obd_memory.counter, sizeof(int), 0644, NULL, &proc_dointvec}, {OBD_LDLM_TIMEOUT, "ldlm_timeout", &ldlm_timeout, sizeof(int), 0644, diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index 3912558..9cad6f4 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -313,22 +313,18 @@ int llog_obd_origin_add(struct llog_ctxt *ctxt, } EXPORT_SYMBOL(llog_obd_origin_add); -int llog_cat_initialize(struct obd_device *obd, int count) +int llog_cat_initialize(struct obd_device *obd, int count, + struct obd_uuid *uuid) { + char name[32] = CATLIST; struct llog_catid *idarray; int size = sizeof(*idarray) * count; - char name[32] = CATLIST; int rc; ENTRY; - /* We don't want multiple mdt threads here at once */ - mutex_down(&obd->obd_dev_sem); - OBD_ALLOC(idarray, size); - if (!idarray) { - mutex_up(&obd->obd_dev_sem); + if (!idarray) RETURN(-ENOMEM); - } rc = llog_get_cat_list(obd, obd, name, count, idarray); if (rc) { @@ -336,7 +332,7 @@ int llog_cat_initialize(struct obd_device *obd, int count) GOTO(out, rc); } - rc = obd_llog_init(obd, obd, count, idarray); + rc = obd_llog_init(obd, obd, count, idarray, uuid); if (rc) { CERROR("rc: %d\n", rc); GOTO(out, rc); @@ -350,20 +346,19 @@ int llog_cat_initialize(struct obd_device *obd, int count) out: OBD_FREE(idarray, size); - mutex_up(&obd->obd_dev_sem); RETURN(rc); } EXPORT_SYMBOL(llog_cat_initialize); int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd, - int count, struct llog_catid *logid) + int count, struct llog_catid *logid, struct obd_uuid *uuid) { int rc; ENTRY; OBD_CHECK_DT_OP(obd, llog_init, 0); OBD_COUNTER_INCREMENT(obd, llog_init); - rc = OBP(obd, llog_init)(obd, disk_obd, count, logid); + rc = OBP(obd, llog_init)(obd, disk_obd, count, logid, uuid); RETURN(rc); } EXPORT_SYMBOL(obd_llog_init); diff --git a/lustre/obdclass/llog_test.c b/lustre/obdclass/llog_test.c index 94edfc9..aedaa30 100644 --- a/lustre/obdclass/llog_test.c +++ b/lustre/obdclass/llog_test.c @@ -598,7 +598,8 @@ static int llog_run_tests(struct obd_device *obd) static int llog_test_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid) + int count, struct llog_catid *logid, + struct obd_uuid *uuid) { int rc; ENTRY; @@ -651,7 +652,7 @@ static int llog_test_setup(struct obd_device *obd, struct lustre_cfg *lcfg) RETURN(-EINVAL); } - rc = obd_llog_init(obd, tgt, 0, NULL); + rc = obd_llog_init(obd, tgt, 0, NULL, NULL); if (rc) RETURN(rc); diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index f4d8a50..ce75703 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -384,6 +384,12 @@ static const char *obd_connect_names[] = { "join_file", "getattr_by_fid", "no_oh_for_devices", + "local_1.8_client", + "remote_1.8_client", + "max_byte_per_rpc", + "64bit_qdata", + "fid_capability", + "oss_capability", NULL }; @@ -640,7 +646,7 @@ int lprocfs_register_stats(struct proc_dir_entry *root, const char *name, struct proc_dir_entry *entry; LASSERT(root != NULL); - entry = create_proc_entry(name, 0444, root); + entry = create_proc_entry(name, 0644, root); if (entry == NULL) return -ENOMEM; entry->proc_fops = &lprocfs_stats_seq_fops; @@ -745,7 +751,6 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel); LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused); LPROCFS_OBD_OP_INIT(num_private_stats, stats, join_lru); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, san_preprw); LPROCFS_OBD_OP_INIT(num_private_stats, stats, init_export); LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export); LPROCFS_OBD_OP_INIT(num_private_stats, stats, extent_calc); @@ -976,6 +981,7 @@ int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count, __u64 *val, int mult) { char kernbuf[22], *end, *pbuf; + __u64 whole, frac = 0, frac_d = 1, units; if (count > (sizeof(kernbuf) - 1) ) return -EINVAL; @@ -985,32 +991,42 @@ int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count, kernbuf[count] = '\0'; pbuf = kernbuf; - if (*pbuf == '-') { - mult = -mult; - pbuf++; - } + if (*pbuf == '-') + return -ERANGE; - *val = simple_strtoull(pbuf, &end, 10) * mult; + whole = simple_strtoull(pbuf, &end, 10); if (pbuf == end) return -EINVAL; if (end != NULL && *end == '.') { - int temp_val; - int i, pow = 1; - + int i; pbuf = end + 1; - if (strlen(pbuf) > 10) - pbuf[10] = '\0'; - - temp_val = (int)simple_strtoull(pbuf, &end, 10) * mult; - - if (pbuf < end) { - for (i = 0; i < (end - pbuf); i++) - pow *= 10; + frac = simple_strtoull(pbuf, &end, 10); + /* count decimal places */ + for (i = 0; i < (end - pbuf); i++) + frac_d *= 10; + } - *val += (__u64)(temp_val / pow); - } + units = 1; + switch(*end) { + case 'p': case 'P': + units <<= 10; + case 't': case 'T': + units <<= 10; + case 'g': case 'G': + units <<= 10; + case 'm': case 'M': + units <<= 10; + case 'k': case 'K': + units <<= 10; } + /* Specified units override the multiplier */ + if (units) + mult = units; + + frac = frac * mult; + do_div(frac, frac_d); + *val = whole * mult + frac; return 0; } diff --git a/lustre/obdclass/lustre_peer.c b/lustre/obdclass/lustre_peer.c index be6efef..6e9ff1b 100644 --- a/lustre/obdclass/lustre_peer.c +++ b/lustre/obdclass/lustre_peer.c @@ -40,6 +40,7 @@ struct uuid_nid_data { struct list_head un_list; lnet_nid_t un_nid; char *un_uuid; + int un_count; /* nid/uuid pair refcount */ }; /* FIXME: This should probably become more elegant than a global linked list */ @@ -85,8 +86,10 @@ int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index) LNET will choose the best one. */ int class_add_uuid(const char *uuid, __u64 nid) { - struct uuid_nid_data *data; + struct list_head *tmp, *n; + struct uuid_nid_data *data, *entry; int nob = strnlen (uuid, PAGE_SIZE) + 1; + int found = 0; LASSERT(nid != 0); /* valid newconfig NID is never zero */ @@ -103,16 +106,34 @@ int class_add_uuid(const char *uuid, __u64 nid) return -ENOMEM; } - CDEBUG(D_INFO, "add uuid %s %s\n", uuid, libcfs_nid2str(nid)); memcpy(data->un_uuid, uuid, nob); data->un_nid = nid; + data->un_count = 1; spin_lock (&g_uuid_lock); - list_add(&data->un_list, &g_uuid_list); + list_for_each_safe(tmp, n, &g_uuid_list) { + entry = list_entry(tmp, struct uuid_nid_data, un_list); + if (entry->un_nid == nid && + (strcmp(entry->un_uuid, uuid) == 0)) { + found++; + entry->un_count++; + break; + } + } + if (!found) + list_add(&data->un_list, &g_uuid_list); spin_unlock (&g_uuid_lock); + if (found) { + CDEBUG(D_INFO, "found uuid %s %s cnt=%d\n", uuid, + libcfs_nid2str(nid), entry->un_count); + OBD_FREE(data->un_uuid, nob); + OBD_FREE(data, sizeof(*data)); + } else { + CDEBUG(D_INFO, "add uuid %s %s\n", uuid, libcfs_nid2str(nid)); + } return 0; } @@ -131,11 +152,16 @@ int class_del_uuid(const char *uuid) list_for_each_safe(tmp, n, &g_uuid_list) { data = list_entry(tmp, struct uuid_nid_data, un_list); - if (uuid == NULL || strcmp(data->un_uuid, uuid) == 0) { + if (uuid == NULL) { list_del (&data->un_list); list_add (&data->un_list, &deathrow); - if (uuid) - break; + } else if (strcmp(data->un_uuid, uuid) == 0) { + --data->un_count; + if (data->un_count <= 0) { + list_del (&data->un_list); + list_add (&data->un_list, &deathrow); + } + break; } } @@ -151,7 +177,8 @@ int class_del_uuid(const char *uuid) data = list_entry(deathrow.next, struct uuid_nid_data, un_list); list_del (&data->un_list); - CDEBUG(D_INFO, "del uuid %s\n", data->un_uuid); + CDEBUG(D_INFO, "del uuid %s %s\n", data->un_uuid, + libcfs_nid2str(data->un_nid)); OBD_FREE(data->un_uuid, strlen(data->un_uuid) + 1); OBD_FREE(data, sizeof(*data)); diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index f0c4ad8..6e8bf9f 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -756,12 +756,8 @@ int class_process_config(struct lustre_cfg *lcfg) GOTO(out, err = 0); } case LCFG_SET_UPCALL: { - CDEBUG(D_IOCTL, "setting lustre ucpall to: %s\n", - lustre_cfg_string(lcfg, 1)); - if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof obd_lustre_upcall) - GOTO(out, err = -EINVAL); - strncpy(obd_lustre_upcall, lustre_cfg_string(lcfg, 1), - sizeof (obd_lustre_upcall)); + LCONSOLE_ERROR("recovery upcall is deprecated\n"); + /* COMPAT_146 Don't fail on old configs */ GOTO(out, err = 0); } case LCFG_MARKER: { @@ -774,7 +770,7 @@ int class_process_config(struct lustre_cfg *lcfg) case LCFG_PARAM: { /* llite has no obd */ if ((class_match_param(lustre_cfg_string(lcfg, 1), - PARAM_LLITE, 0) == 0) && + PARAM_LLITE, 0) == 0) && client_process_config) { err = (*client_process_config)(lcfg); GOTO(out, err); @@ -839,6 +835,7 @@ out: int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, struct lustre_cfg *lcfg, void *data) { +#ifdef __KERNEL__ struct lprocfs_vars *var; char *key, *sval; int i, vallen; @@ -875,9 +872,14 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, if (class_match_param(key, (char *)var->name, 0) == 0) { matched++; rc = -EROFS; - if (var->write_fptr) + if (var->write_fptr) { + mm_segment_t oldfs; + oldfs = get_fs(); + set_fs(KERNEL_DS); rc = (var->write_fptr)(NULL, sval, vallen, data); + set_fs(oldfs); + } if (rc < 0) CERROR("writing proc entry %s err %d\n", var->name, rc); @@ -898,6 +900,10 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, if (rc > 0) rc = 0; RETURN(rc); +#else + CDEBUG(D_CONFIG, "liblustre can't process params.\n"); + return -ENOSYS; +#endif } int class_config_dump_handler(struct llog_handle * handle, diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 83062ac..4f24d10 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -214,15 +214,17 @@ int server_put_mount(const char *name, struct vfsmount *mnt) static void ldd_print(struct lustre_disk_data *ldd) { - PRINT_CMD(PRINT_MASK, " disk data:\n"); - PRINT_CMD(PRINT_MASK, "config: %d\n", ldd->ldd_config_ver); - PRINT_CMD(PRINT_MASK, "fs: %s\n", ldd->ldd_fsname); + PRINT_CMD(PRINT_MASK, " disk data:\n"); PRINT_CMD(PRINT_MASK, "server: %s\n", ldd->ldd_svname); + PRINT_CMD(PRINT_MASK, "uuid: %s\n", (char *)ldd->ldd_uuid); + PRINT_CMD(PRINT_MASK, "fs: %s\n", ldd->ldd_fsname); PRINT_CMD(PRINT_MASK, "index: %04x\n", ldd->ldd_svindex); + PRINT_CMD(PRINT_MASK, "config: %d\n", ldd->ldd_config_ver); PRINT_CMD(PRINT_MASK, "flags: %#x\n", ldd->ldd_flags); PRINT_CMD(PRINT_MASK, "diskfs: %s\n", MT_STR(ldd)); PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts); - PRINT_CMD(PRINT_MASK, "params: %s\n", ldd->ldd_params); + PRINT_CMD(PRINT_MASK, "params: %s\n", ldd->ldd_params); + PRINT_CMD(PRINT_MASK, "comment: %s\n", ldd->ldd_userdata); } static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt, @@ -508,6 +510,8 @@ static int server_stop_mgs(struct super_block *sb) RETURN(rc); } +DECLARE_MUTEX(mgc_start_lock); + /* Set up a mgcobd to process startup logs */ static int lustre_start_mgc(struct super_block *sb) { @@ -562,6 +566,8 @@ static int lustre_start_mgc(struct super_block *sb) GOTO(out_free, rc = -ENOMEM); sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid)); + mutex_down(&mgc_start_lock); + obd = class_name2obd(mgcname); if (obd) { /* Re-using an existing MGC */ @@ -702,6 +708,8 @@ out: to the same mgc.*/ lsi->lsi_mgc = obd; out_free: + mutex_up(&mgc_start_lock); + if (mgcname) OBD_FREE(mgcname, len); if (niduuid) @@ -714,7 +722,7 @@ static int lustre_stop_mgc(struct super_block *sb) struct lustre_sb_info *lsi = s2lsi(sb); struct obd_device *obd; char *niduuid, *ptr = 0; - int i, rc, len; + int i, rc = 0, len; ENTRY; if (!lsi) @@ -724,12 +732,13 @@ static int lustre_stop_mgc(struct super_block *sb) RETURN(-ENOENT); lsi->lsi_mgc = NULL; + mutex_down(&mgc_start_lock); if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) { /* This is not fatal, every client that stops will call in here. */ CDEBUG(D_MOUNT, "mgc still has %d references.\n", atomic_read(&obd->u.cli.cl_mgc_refcount)); - RETURN(-EBUSY); + GOTO(out, rc = -EBUSY); } /* MGC must always stop */ @@ -753,7 +762,7 @@ static int lustre_stop_mgc(struct super_block *sb) rc = class_manual_cleanup(obd); if (rc) - RETURN(rc); + GOTO(out, rc); /* Clean the nid uuids */ if (!niduuid) @@ -769,7 +778,9 @@ static int lustre_stop_mgc(struct super_block *sb) OBD_FREE(niduuid, len); /* class_import_put will get rid of the additional connections */ - RETURN(0); +out: + mutex_up(&mgc_start_lock); + RETURN(rc); } /* Since there's only one mgc per node, we have to change it's fs to get @@ -806,6 +817,8 @@ static int server_mgc_clear_fs(struct obd_device *mgc) RETURN(rc); } +DECLARE_MUTEX(server_start_lock); + /* Stop MDS/OSS if nobody is using them */ static int server_stop_servers(int lddflags, int lsiflags) { @@ -814,8 +827,9 @@ static int server_stop_servers(int lddflags, int lsiflags) int rc = 0; ENTRY; - /* Either an MDT or an OST or neither */ + mutex_down(&server_start_lock); + /* Either an MDT or an OST or neither */ /* if this was an MDT, and there are no more MDT's, clean up the MDS */ if ((lddflags & LDD_F_SV_TYPE_MDT) && (obd = class_name2obd(LUSTRE_MDS_OBDNAME))) { @@ -837,6 +851,8 @@ static int server_stop_servers(int lddflags, int lsiflags) rc = err; } + mutex_up(&server_start_lock); + RETURN(rc); } @@ -1053,6 +1069,7 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt) /* If we're an MDT, make sure the global MDS is running */ if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) { /* make sure the MDS is started */ + mutex_down(&server_start_lock); obd = class_name2obd(LUSTRE_MDS_OBDNAME); if (!obd) { rc = lustre_start_simple(LUSTRE_MDS_OBDNAME, @@ -1061,10 +1078,12 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt) LUSTRE_MDS_OBDNAME"_uuid", 0, 0); if (rc) { + mutex_up(&server_start_lock); CERROR("failed to start MDS: %d\n", rc); RETURN(rc); } } + mutex_up(&server_start_lock); } /* If we're an MDT, make sure the global MDS is running */ if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) { @@ -1087,6 +1106,7 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt) /* If we're an OST, make sure the global OSS is running */ if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) { /* make sure OSS is started */ + mutex_down(&server_start_lock); obd = class_name2obd(LUSTRE_OSS_OBDNAME); if (!obd) { rc = lustre_start_simple(LUSTRE_OSS_OBDNAME, @@ -1094,10 +1114,12 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt) LUSTRE_OSS_OBDNAME"_uuid", 0, 0); if (rc) { + mutex_up(&server_start_lock); CERROR("failed to start OSS: %d\n", rc); RETURN(rc); } } + mutex_up(&server_start_lock); } /* Set the mgc fs to our server disk. This allows the MGC @@ -1569,10 +1591,8 @@ static int server_fill_super(struct super_block *sb) /* start MGS before MGC */ if (IS_MGS(lsi->lsi_ldd)) { rc = server_start_mgs(sb); - if (rc) { - CERROR("ignoring Failed MGS start!!\n"); - //GOTO(out_mnt, rc); - } + if (rc) + GOTO(out_mnt, rc); } rc = lustre_start_mgc(sb); @@ -1712,11 +1732,16 @@ static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr) { char *s1 = ptr, *s2; __u32 index, *exclude_list; - int rc = 0; + int rc = 0, devmax; ENTRY; + + /* The shortest an ost name can be is 8 chars: -OST0000. + We don't actually know the fsname at this time, so in fact + a user could specify any fsname. */ + devmax = strlen(ptr) / 8 + 1; /* temp storage until we figure out how many we have */ - OBD_ALLOC(exclude_list, sizeof(index) * MAX_OBD_DEVICES); + OBD_ALLOC(exclude_list, sizeof(index) * devmax); if (!exclude_list) RETURN(-ENOMEM); @@ -1735,8 +1760,7 @@ static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr) s1 = s2; /* now we are pointing at ':' (next exclude) or ',' (end of excludes) */ - - if (lmd->lmd_exclude_count >= MAX_OBD_DEVICES) + if (lmd->lmd_exclude_count >= devmax) break; } if (rc >= 0) /* non-err */ @@ -1754,7 +1778,7 @@ static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr) lmd->lmd_exclude_count = 0; } } - OBD_FREE(exclude_list, sizeof(index) * MAX_OBD_DEVICES); + OBD_FREE(exclude_list, sizeof(index) * devmax); RETURN(rc); } diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index c1a170a..1429bdf 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -571,6 +571,8 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, #ifdef __KERNEL__ #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#include + static int echo_client_ubrw(struct obd_device *obd, int rw, struct obdo *oa, struct lov_stripe_md *lsm, obd_off offset, obd_size count, char *buffer, diff --git a/lustre/obdfilter/Makefile.in b/lustre/obdfilter/Makefile.in index 0f25c77..8305eb5 100644 --- a/lustre/obdfilter/Makefile.in +++ b/lustre/obdfilter/Makefile.in @@ -1,6 +1,6 @@ MODULES := obdfilter -obdfilter-objs := filter.o filter_io.o filter_log.o filter_san.o +obdfilter-objs := filter.o filter_io.o filter_log.o obdfilter-objs += lproc_obdfilter.o filter_lvb.o ifeq ($(PATCHLEVEL),4) diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index a380f19..12e8070 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -1968,7 +1968,7 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg, ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, "filter_ldlm_cb_client", &obd->obd_ldlm_client); - rc = llog_cat_initialize(obd, 1); + rc = llog_cat_initialize(obd, 1, NULL); if (rc) { CERROR("failed to setup llogging subsystems\n"); GOTO(err_post, rc); @@ -2077,7 +2077,8 @@ static struct llog_operations filter_size_orig_logops = { }; static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *catid) + int count, struct llog_catid *catid, + struct obd_uuid *uuid) { struct llog_ctxt *ctxt; int rc; @@ -2222,6 +2223,9 @@ static int filter_connect_internal(struct obd_export *exp, data->ocd_connect_flags &= OST_CONNECT_SUPPORTED; exp->exp_connect_flags = data->ocd_connect_flags; + if (exp->exp_imp_reverse) + exp->exp_imp_reverse->imp_connect_data.ocd_connect_flags + = data->ocd_connect_flags; data->ocd_version = LUSTRE_VERSION_CODE; if (exp->exp_connect_flags & OBD_CONNECT_GRANT) { @@ -3712,36 +3716,6 @@ static struct obd_ops filter_obd_ops = { .o_process_config = filter_process_config, }; -static struct obd_ops filter_sanobd_ops = { - .o_owner = THIS_MODULE, - .o_get_info = filter_get_info, - .o_set_info_async = filter_set_info_async, - .o_setup = filter_san_setup, - .o_precleanup = filter_precleanup, - .o_cleanup = filter_cleanup, - .o_connect = filter_connect, - .o_reconnect = filter_reconnect, - .o_disconnect = filter_disconnect, - .o_ping = filter_ping, - .o_init_export = filter_init_export, - .o_destroy_export = filter_destroy_export, - .o_statfs = filter_statfs, - .o_getattr = filter_getattr, - .o_unpackmd = filter_unpackmd, - .o_create = filter_create, - .o_setattr = filter_setattr, - .o_destroy = filter_destroy, - .o_brw = filter_brw, - .o_punch = filter_truncate, - .o_sync = filter_sync, - .o_preprw = filter_preprw, - .o_commitrw = filter_commitrw, - .o_san_preprw = filter_san_preprw, - .o_llog_init = filter_llog_init, - .o_llog_finish = filter_llog_finish, - .o_iocontrol = filter_iocontrol, -}; - quota_interface_t *quota_interface; extern quota_interface_t filter_quota_interface; @@ -3754,6 +3728,7 @@ static int __init obdfilter_init(void) lprocfs_init_vars(filter, &lvars); + request_module("lquota"); OBD_ALLOC(obdfilter_created_scratchpad, OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * sizeof(*obdfilter_created_scratchpad)); @@ -3768,20 +3743,12 @@ static int __init obdfilter_init(void) quota_interface = PORTAL_SYMBOL_GET(filter_quota_interface); init_obd_quota_ops(quota_interface, &filter_obd_ops); - init_obd_quota_ops(quota_interface, &filter_sanobd_ops); rc = class_register_type(&filter_obd_ops, NULL, lvars.module_vars, LUSTRE_OST_NAME, NULL); - if (rc) - GOTO(out_fmd, rc); - - rc = class_register_type(&filter_sanobd_ops, NULL, lvars.module_vars, - LUSTRE_OSTSAN_NAME, NULL); if (rc) { int err; - class_unregister_type(LUSTRE_OST_NAME); -out_fmd: err = kmem_cache_destroy(ll_fmd_cachep); LASSERTF(err == 0, "Cannot destroy ll_fmd_cachep: rc %d\n",err); ll_fmd_cachep = NULL; @@ -3808,7 +3775,6 @@ static void __exit obdfilter_exit(void) ll_fmd_cachep = NULL; } - class_unregister_type(LUSTRE_OSTSAN_NAME); class_unregister_type(LUSTRE_OST_NAME); OBD_FREE(obdfilter_created_scratchpad, OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index f8ff9d5..4a3516f 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -169,11 +169,6 @@ void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno, int filter_recov_log_mds_ost_cb(struct llog_handle *llh, struct llog_rec_hdr *rec, void *data); -/* filter_san.c */ -int filter_san_setup(struct obd_device *obd, struct lustre_cfg *cfg); -int filter_san_preprw(int cmd, struct obd_export *, struct obdo *, int objcount, - struct obd_ioobj *, int niocount, struct niobuf_remote *); - #ifdef LPROCFS void filter_tally_write(struct filter_obd *filter, struct page **pages, int nr_pages, unsigned long *blocks, diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index 6b4811a..b2be0d8 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -664,6 +664,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, iattr_from_obdo(&iattr, oa, i); if (iattr.ia_valid & (ATTR_UID | ATTR_GID)) { + unsigned int save; + CDEBUG(D_INODE, "update UID/GID to %lu/%lu\n", (unsigned long)oa->o_uid, (unsigned long)oa->o_gid); @@ -680,10 +682,12 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, /* To avoid problems with quotas, UID and GID must be set * in the inode before filter_direct_io() - see bug 10357. */ - if (iattr.ia_valid & ATTR_UID) - inode->i_uid = iattr.ia_uid; - if (iattr.ia_valid & ATTR_GID) - inode->i_gid = iattr.ia_gid; + save = iattr.ia_valid; + iattr.ia_valid &= (ATTR_UID | ATTR_GID); + rc = fsfilt_setattr(obd, res->dentry, oti->oti_handle, &iattr, 0); + CDEBUG(D_QUOTA, "set uid(%u)/gid(%u) to ino(%lu). rc(%d)\n", + iattr.ia_uid, iattr.ia_gid, inode->i_ino, rc); + iattr.ia_valid = save & ~(ATTR_UID | ATTR_GID); } /* filter_direct_io drops i_mutex */ @@ -735,7 +739,7 @@ cleanup: err = lquota_adjust(quota_interface, obd, qcids, NULL, rc, FSFILT_OP_CREATE); CDEBUG(err ? D_ERROR : D_QUOTA, - "error filter adjust qunit! (rc:%d)\n", err); + "filter adjust qunit! (rc:%d)\n", err); RETURN(rc); } diff --git a/lustre/obdfilter/filter_san.c b/lustre/obdfilter/filter_san.c deleted file mode 100644 index c679b3e..0000000 --- a/lustre/obdfilter/filter_san.c +++ /dev/null @@ -1,129 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * linux/fs/obdfilter/filter_san.c - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Author: Peter Braam - * Author: Andreas Dilger - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. - * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. - */ - -#define DEBUG_SUBSYSTEM S_FILTER - -#include -#include -#include // XXX kill me soon -#include - -#include -#include -#include "filter_internal.h" - -/* sanobd setup methods - use a specific mount option */ -int filter_san_setup(struct obd_device *obd, struct lustre_cfg* lcfg) -{ - unsigned long page; - int rc; - - if (lcfg->lcfg_bufcount < 3 || LUSTRE_CFG_BUFLEN(lcfg, 2) < 1) - RETURN(-EINVAL); - - /* 2.6.9 selinux wants a full option page for do_kern_mount (bug6471) */ - page = get_zeroed_page(GFP_KERNEL); - if (!page) - RETURN(-ENOMEM); - - /* for ext3/ldiskfs filesystem, we must mount in 'writeback' mode */ - if (!strcmp(lustre_cfg_string(lcfg, 2), "ldiskfs")) - strcpy((void *)page, "data=writeback"); - else if (!strcmp(lustre_cfg_string(lcfg, 2), "ext3")) - strcpy((void *)page, "data=writeback,asyncdel"); - else - LBUG(); /* just a reminder */ - - rc = filter_common_setup(obd, lcfg, (void *)page); - free_page(page); - - return rc; -} - -int filter_san_preprw(int cmd, struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, int niocount, - struct niobuf_remote *nb) -{ - struct obd_ioobj *o = obj; - struct niobuf_remote *rnb = nb; - int rc = 0; - int i; - ENTRY; - LASSERT(objcount == 1); - - for (i = 0; i < objcount; i++, o++) { - struct dentry *dentry; - struct inode *inode; -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - sector_t (*fs_bmap)(struct address_space *, sector_t); -#else - int (*fs_bmap)(struct address_space *, long); -#endif - int j; - - dentry = filter_oa2dentry(exp->exp_obd, oa); - if (IS_ERR(dentry)) - GOTO(out, rc = PTR_ERR(dentry)); - - inode = dentry->d_inode; - fs_bmap = inode->i_mapping->a_ops->bmap; - - for (j = 0; j < o->ioo_bufcnt; j++, rnb++) { - long block; - - block = rnb->offset >> inode->i_blkbits; - - if (cmd == OBD_BRW_READ) { - block = fs_bmap(inode->i_mapping, block); - } else { - loff_t newsize = rnb->offset + rnb->len; - /* fs_prep_san_write will also update inode - * size for us: - * (1) new alloced block - * (2) existed block but size extented - */ - /* FIXME We could call fs_prep_san_write() - * only once for all the blocks allocation. - * Now call it once for each block, for - * simplicity. And if error happens, we - * probably need to release previous alloced - * block */ - rc = fs_prep_san_write(exp->exp_obd, inode, - &block, 1, newsize); - if (rc) - break; - } - - rnb->offset = block; - } - f_dput(dentry); - } -out: - RETURN(rc); -} - diff --git a/lustre/osc/Makefile.in b/lustre/osc/Makefile.in index 568a725..ce9107f 100644 --- a/lustre/osc/Makefile.in +++ b/lustre/osc/Makefile.in @@ -1,4 +1,4 @@ MODULES := osc -osc-objs := osc_request.o lproc_osc.o osc_lib.o osc_create.o +osc-objs := osc_request.o lproc_osc.o osc_create.o @INCLUDE_RULES@ diff --git a/lustre/osc/autoMakefile.am b/lustre/osc/autoMakefile.am index c9f2fbb..2b00785 100644 --- a/lustre/osc/autoMakefile.am +++ b/lustre/osc/autoMakefile.am @@ -5,7 +5,7 @@ if LIBLUSTRE noinst_LIBRARIES = libosc.a -libosc_a_SOURCES = osc_request.c osc_lib.c osc_create.c osc_internal.h +libosc_a_SOURCES = osc_request.c osc_create.c osc_internal.h libosc_a_CPPFLAGS = $(LLCPPFLAGS) libosc_a_CFLAGS = $(LLCFLAGS) endif diff --git a/lustre/osc/osc_lib.c b/lustre/osc/osc_lib.c deleted file mode 100644 index 39bd2f8..0000000 --- a/lustre/osc/osc_lib.c +++ /dev/null @@ -1,79 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Cluster File Systems, Inc. - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. - * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_OSC - -#ifdef __KERNEL__ -# include -# include -# include -# include -# include -# include -# include - -/* convert a pathname into a kdev_t */ -static kdev_t path2dev(char *path) -{ - struct dentry *dentry; - struct nameidata nd; - kdev_t dev = KDEVT_INIT(0); - - if (ll_path_lookup(path, LOOKUP_FOLLOW, &nd)) - return val_to_kdev(0); - - dentry = nd.dentry; - if (dentry->d_inode && !is_bad_inode(dentry->d_inode) && - S_ISBLK(dentry->d_inode->i_mode)) - dev = dentry->d_inode->i_rdev; - path_release(&nd); - - return dev; -} - -int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg) -{ - struct client_obd *cli = &obddev->u.cli; - ENTRY; - - if (lcfg->lcfg_bufcount < 4 || LUSTRE_CFG_BUFLEN(lcfg, 3) < 1) { - CERROR("setup requires a SAN device pathname\n"); - RETURN(-EINVAL); - } - - client_obd_setup(obddev, lcfg); - - cli->cl_sandev = path2dev(lustre_cfg_string(lcfg, 3)); - if (!kdev_t_to_nr(cli->cl_sandev)) { - CERROR("%s seems not a valid SAN device\n", - lustre_cfg_string(lcfg, 3)); - RETURN(-EINVAL); - } - - RETURN(0); -} -#endif diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 33a9710..13d7e03 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -2484,298 +2484,6 @@ out: RETURN(rc); } -/* Note: caller will lock/unlock, and set uptodate on the pages */ -#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static int sanosc_brw_read(struct obd_export *exp, struct obd_info *oinfo, - obd_count page_count, struct brw_page *pga) -{ - struct ptlrpc_request *req = NULL; - struct ost_body *body; - struct niobuf_remote *nioptr; - struct obd_ioobj *iooptr; - struct obd_import *imp = class_exp2cliimp(exp); - int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body)}; - int swab, mapped = 0, rc; - ENTRY; - - /* XXX does not handle 'new' brw protocol */ - - size[REQ_REC_OFF + 1] = sizeof(struct obd_ioobj); - size[REQ_REC_OFF + 2] = page_count * sizeof(*nioptr); - - req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_SAN_READ, 4, size, NULL); - if (!req) - RETURN(-ENOMEM); - - /* FIXME bug 249 */ - /* See bug 7198 */ - if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_REQPORTAL) - req->rq_request_portal = OST_IO_PORTAL; - - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - iooptr = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1, - sizeof(*iooptr)); - nioptr = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2, - sizeof(*nioptr) * page_count); - - memcpy(&body->oa, oinfo->oi_oa, sizeof(body->oa)); - - obdo_to_ioobj(oinfo->oi_oa, iooptr); - iooptr->ioo_bufcnt = page_count; - - for (mapped = 0; mapped < page_count; mapped++, nioptr++) { - LASSERT(PageLocked(pga[mapped].pg)); - LASSERT(mapped == 0 || pga[mapped].off > pga[mapped - 1].off); - - nioptr->offset = pga[mapped].off; - nioptr->len = pga[mapped].count; - nioptr->flags = pga[mapped].flag; - } - - size[REPLY_REC_OFF + 1] = page_count * sizeof(*nioptr); - ptlrpc_req_set_repsize(req, 3, size); - - rc = ptlrpc_queue_wait(req); - if (rc) - GOTO(out_req, rc); - - body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body), - lustre_swab_ost_body); - if (body == NULL) { - CERROR("Can't unpack body\n"); - GOTO(out_req, rc = -EPROTO); - } - - memcpy(oinfo->oi_oa, &body->oa, sizeof(*oinfo->oi_oa)); - - swab = lustre_msg_swabbed(req->rq_repmsg); - LASSERT_REPSWAB(req, REPLY_REC_OFF + 1); - nioptr = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, - size[REPLY_REC_OFF + 1]); - if (!nioptr) { - /* nioptr missing or short */ - GOTO(out_req, rc = -EPROTO); - } - - /* actual read */ - for (mapped = 0; mapped < page_count; mapped++, nioptr++) { - struct page *page = pga[mapped].pg; - struct buffer_head *bh; - kdev_t dev; - - if (swab) - lustre_swab_niobuf_remote (nioptr); - - /* got san device associated */ - LASSERT(exp->exp_obd != NULL); - dev = exp->exp_obd->u.cli.cl_sandev; - - /* hole */ - if (!nioptr->offset) { - CDEBUG(D_PAGE, "hole at ino %lu; index %ld\n", - page->mapping->host->i_ino, - page->index); - memset(page_address(page), 0, CFS_PAGE_SIZE); - continue; - } - - if (!page->buffers) { - create_empty_buffers(page, dev, CFS_PAGE_SIZE); - bh = page->buffers; - - clear_bit(BH_New, &bh->b_state); - set_bit(BH_Mapped, &bh->b_state); - bh->b_blocknr = (unsigned long)nioptr->offset; - - clear_bit(BH_Uptodate, &bh->b_state); - - ll_rw_block(READ, 1, &bh); - } else { - bh = page->buffers; - - /* if buffer already existed, it must be the - * one we mapped before, check it */ - LASSERT(!test_bit(BH_New, &bh->b_state)); - LASSERT(test_bit(BH_Mapped, &bh->b_state)); - LASSERT(bh->b_blocknr == (unsigned long)nioptr->offset); - - /* wait it's io completion */ - if (test_bit(BH_Lock, &bh->b_state)) - wait_on_buffer(bh); - - if (!test_bit(BH_Uptodate, &bh->b_state)) - ll_rw_block(READ, 1, &bh); - } - - - /* must do syncronous write here */ - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - /* I/O error */ - rc = -EIO; - goto out_req; - } - } - -out_req: - ptlrpc_req_finished(req); - RETURN(rc); -} - -static int sanosc_brw_write(struct obd_export *exp, struct obd_info *oinfo, - obd_count page_count, struct brw_page *pga) -{ - struct ptlrpc_request *req = NULL; - struct ost_body *body; - struct niobuf_remote *nioptr; - struct obd_ioobj *iooptr; - struct obd_import *imp = class_exp2cliimp(exp); - int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) }; - int swab, mapped = 0, rc; - ENTRY; - - size[REQ_REC_OFF + 1] = sizeof(struct obd_ioobj); - size[REQ_REC_OFF + 2] = page_count * sizeof(*nioptr); - - req = ptlrpc_prep_req_pool(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_SAN_WRITE, 4, size, NULL, - imp->imp_rq_pool, NULL); - if (!req) - RETURN(-ENOMEM); - - /* FIXME bug 249 */ - /* See bug 7198 */ - if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_REQPORTAL) - req->rq_request_portal = OST_IO_PORTAL; - - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - iooptr = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1, - sizeof(*iooptr)); - nioptr = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2, - sizeof(*nioptr) * page_count); - - memcpy(&body->oa, oinfo->oi_oa, sizeof(body->oa)); - - obdo_to_ioobj(oinfo->oi_oa, iooptr); - iooptr->ioo_bufcnt = page_count; - - /* pack request */ - for (mapped = 0; mapped < page_count; mapped++, nioptr++) { - LASSERT(PageLocked(pga[mapped].pg)); - LASSERT(mapped == 0 || pga[mapped].off > pga[mapped - 1].off); - - nioptr->offset = pga[mapped].off; - nioptr->len = pga[mapped].count; - nioptr->flags = pga[mapped].flag; - } - - size[REPLY_REC_OFF + 1] = page_count * sizeof(*nioptr); - ptlrpc_req_set_repsize(req, 3, size); - - rc = ptlrpc_queue_wait(req); - if (rc) - GOTO(out_req, rc); - - swab = lustre_msg_swabbed (req->rq_repmsg); - LASSERT_REPSWAB(req, REPLY_REC_OFF + 1); - nioptr = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, - size[REPLY_REC_OFF + 1]); - if (!nioptr) { - CERROR("absent/short niobuf array\n"); - GOTO(out_req, rc = -EPROTO); - } - - /* actual write */ - for (mapped = 0; mapped < page_count; mapped++, nioptr++) { - struct page *page = pga[mapped].pg; - struct buffer_head *bh; - kdev_t dev; - - if (swab) - lustre_swab_niobuf_remote (nioptr); - - /* got san device associated */ - LASSERT(exp->exp_obd != NULL); - dev = exp->exp_obd->u.cli.cl_sandev; - - if (!page->buffers) { - create_empty_buffers(page, dev, CFS_PAGE_SIZE); - } else { - /* checking */ - LASSERT(!test_bit(BH_New, &page->buffers->b_state)); - LASSERT(test_bit(BH_Mapped, &page->buffers->b_state)); - LASSERT(page->buffers->b_blocknr == - (unsigned long)nioptr->offset); - } - bh = page->buffers; - - LASSERT(bh); - - /* if buffer locked, wait it's io completion */ - if (test_bit(BH_Lock, &bh->b_state)) - wait_on_buffer(bh); - - clear_bit(BH_New, &bh->b_state); - set_bit(BH_Mapped, &bh->b_state); - - /* override the block nr */ - bh->b_blocknr = (unsigned long)nioptr->offset; - - /* we are about to write it, so set it - * uptodate/dirty - * page lock should garentee no race condition here */ - set_bit(BH_Uptodate, &bh->b_state); - set_bit(BH_Dirty, &bh->b_state); - - ll_rw_block(WRITE, 1, &bh); - - /* must do syncronous write here */ - wait_on_buffer(bh); - if (!buffer_uptodate(bh) || test_bit(BH_Dirty, &bh->b_state)) { - /* I/O error */ - rc = -EIO; - goto out_req; - } - } - -out_req: - ptlrpc_req_finished(req); - RETURN(rc); -} - -static int sanosc_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo, - obd_count page_count, struct brw_page *pga, - struct obd_trans_info *oti) -{ - struct obd_import *imp = class_exp2cliimp(exp); - struct client_obd *cli = &imp->imp_obd->u.cli; - ENTRY; - - while (page_count) { - obd_count pages_per_brw; - int rc; - - if (page_count > cli->cl_max_pages_per_rpc) - pages_per_brw = cli->cl_max_pages_per_rpc; - else - pages_per_brw = page_count; - - if (cmd & OBD_BRW_WRITE) - rc = sanosc_brw_write(exp, oinfo, pages_per_brw, pga); - else - rc = sanosc_brw_read(exp, oinfo, pages_per_brw, pga); - - if (rc != 0) - RETURN(rc); - - page_count -= pages_per_brw; - pga += pages_per_brw; - } - RETURN(0); -} -#endif - static void osc_set_data_with_check(struct lustre_handle *lockh, void *data, int flags) { @@ -2930,12 +2638,13 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo, * lov_enqueue() */ } + /* We already have a lock, and it's referenced */ + oinfo->oi_cb_up(oinfo, ELDLM_OK); + /* For async requests, decref the lock. */ if (einfo->ei_rqset) ldlm_lock_decref(oinfo->oi_lockh, einfo->ei_mode); - /* We already have a lock, and it's referenced */ - oinfo->oi_cb_up(oinfo, ELDLM_OK); RETURN(ELDLM_OK); } @@ -2965,8 +2674,8 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo, osc_set_data_with_check(oinfo->oi_lockh, einfo->ei_cbdata, einfo->ei_flags); - ldlm_lock_decref(oinfo->oi_lockh, LCK_PW); oinfo->oi_cb_up(oinfo, ELDLM_OK); + ldlm_lock_decref(oinfo->oi_lockh, LCK_PW); RETURN(ELDLM_OK); } } @@ -3509,16 +3218,21 @@ static struct llog_operations osc_size_repl_logops = { static struct llog_operations osc_mds_ost_orig_logops; static int osc_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *catid) + int count, struct llog_catid *catid, + struct obd_uuid *uuid) { int rc; ENTRY; - osc_mds_ost_orig_logops = llog_lvfs_ops; - osc_mds_ost_orig_logops.lop_setup = llog_obd_origin_setup; - osc_mds_ost_orig_logops.lop_cleanup = llog_obd_origin_cleanup; - osc_mds_ost_orig_logops.lop_add = llog_obd_origin_add; - osc_mds_ost_orig_logops.lop_connect = llog_origin_connect; + spin_lock(&obd->obd_dev_lock); + if (osc_mds_ost_orig_logops.lop_setup != llog_obd_origin_setup) { + osc_mds_ost_orig_logops = llog_lvfs_ops; + osc_mds_ost_orig_logops.lop_setup = llog_obd_origin_setup; + osc_mds_ost_orig_logops.lop_cleanup = llog_obd_origin_cleanup; + osc_mds_ost_orig_logops.lop_add = llog_obd_origin_add; + osc_mds_ost_orig_logops.lop_connect = llog_origin_connect; + } + spin_unlock(&obd->obd_dev_lock); rc = llog_setup(obd, LLOG_MDS_OST_ORIG_CTXT, tgt, count, &catid->lci_logid, &osc_mds_ost_orig_logops); @@ -3821,59 +3535,17 @@ struct obd_ops osc_obd_ops = { .o_process_config = osc_process_config, }; -#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -struct obd_ops sanosc_obd_ops = { - .o_owner = THIS_MODULE, - .o_setup = client_sanobd_setup, - .o_precleanup = osc_precleanup, - .o_cleanup = osc_cleanup, - .o_add_conn = client_import_add_conn, - .o_del_conn = client_import_del_conn, - .o_connect = client_connect_import, - .o_reconnect = osc_reconnect, - .o_disconnect = client_disconnect_export, - .o_statfs = osc_statfs, - .o_statfs_async = osc_statfs_async, - .o_packmd = osc_packmd, - .o_unpackmd = osc_unpackmd, - .o_create = osc_real_create, - .o_destroy = osc_destroy, - .o_getattr = osc_getattr, - .o_getattr_async = osc_getattr_async, - .o_setattr = osc_setattr, - .o_setattr_async = osc_setattr_async, - .o_brw = sanosc_brw, - .o_punch = osc_punch, - .o_sync = osc_sync, - .o_enqueue = osc_enqueue, - .o_match = osc_match, - .o_change_cbdata = osc_change_cbdata, - .o_cancel = osc_cancel, - .o_cancel_unused = osc_cancel_unused, - .o_join_lru = osc_join_lru, - .o_iocontrol = osc_iocontrol, - .o_import_event = osc_import_event, - .o_llog_init = osc_llog_init, - .o_llog_finish = osc_llog_finish, -}; -#endif - extern quota_interface_t osc_quota_interface; int __init osc_init(void) { struct lprocfs_static_vars lvars; -#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - struct lprocfs_static_vars sanlvars; -#endif int rc; ENTRY; lprocfs_init_vars(osc, &lvars); -#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - lprocfs_init_vars(osc, &sanlvars); -#endif + request_module("lquota"); quota_interface = PORTAL_SYMBOL_GET(osc_quota_interface); lquota_init(quota_interface); init_obd_quota_ops(quota_interface, &osc_obd_ops); @@ -3886,17 +3558,6 @@ int __init osc_init(void) RETURN(rc); } -#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - rc = class_register_type(&sanosc_obd_ops, NULL, sanlvars.module_vars, - LUSTRE_SANOSC_NAME, NULL); - if (rc) { - class_unregister_type(LUSTRE_OSC_NAME); - if (quota_interface) - PORTAL_SYMBOL_PUT(osc_quota_interface); - RETURN(rc); - } -#endif - RETURN(rc); } @@ -3907,9 +3568,6 @@ static void /*__exit*/ osc_exit(void) if (quota_interface) PORTAL_SYMBOL_PUT(osc_quota_interface); -#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - class_unregister_type(LUSTRE_SANOSC_NAME); -#endif class_unregister_type(LUSTRE_OSC_NAME); } diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index c2183cf..d883f47 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -1074,95 +1074,6 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) RETURN(rc); } -static int ost_san_brw(struct ptlrpc_request *req, int cmd) -{ - struct niobuf_remote *remote_nb, *res_nb, *pp_rnb = NULL; - struct obd_ioobj *ioo; - struct ost_body *body, *repbody; - int rc, i, objcount, niocount, npages, swab; - int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; - ENTRY; - - /* XXX not set to use latest protocol */ - - swab = lustre_msg_swabbed(req->rq_reqmsg); - body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body), - lustre_swab_ost_body); - if (body == NULL) { - CERROR("Missing/short ost_body\n"); - GOTO(out, rc = -EFAULT); - } - - ioo = lustre_swab_reqbuf(req, REQ_REC_OFF + 1, sizeof(*ioo), - lustre_swab_obd_ioobj); - if (ioo == NULL) { - CERROR("Missing/short ioobj\n"); - GOTO(out, rc = -EFAULT); - } - objcount = lustre_msg_buflen(req->rq_reqmsg, REQ_REC_OFF + 1) / - sizeof(*ioo); - niocount = ioo[0].ioo_bufcnt; - for (i = 1; i < objcount; i++) { - if (swab) - lustre_swab_obd_ioobj (&ioo[i]); - niocount += ioo[i].ioo_bufcnt; - } - - remote_nb = lustre_swab_reqbuf(req, REQ_REC_OFF + 2, - niocount * sizeof(*remote_nb), - lustre_swab_niobuf_remote); - if (remote_nb == NULL) { - CERROR("Missing/short niobuf\n"); - GOTO(out, rc = -EFAULT); - } - if (swab) { /* swab the remaining niobufs */ - for (i = 1; i < niocount; i++) - lustre_swab_niobuf_remote (&remote_nb[i]); - } - - /* - * Per-thread array of struct niobuf_remote's was allocated by - * ost_thread_init(). - */ - pp_rnb = ost_tls(req)->remote; - - /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */ - npages = get_per_page_niobufs(ioo, objcount,remote_nb,niocount,&pp_rnb); - if (npages < 0) - GOTO (out, rc = npages); - - size[REPLY_REC_OFF + 1] = npages * sizeof(*pp_rnb); - rc = lustre_pack_reply(req, 3, size, NULL); - if (rc) - GOTO(out, rc); - - req->rq_status = obd_san_preprw(cmd, req->rq_export, &body->oa, - objcount, ioo, npages, pp_rnb); - - if (req->rq_status) - GOTO(out, rc = 0); - - repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, - sizeof(*repbody)); - memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); - - res_nb = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, - size[REPLY_REC_OFF + 1]); - memcpy(res_nb, remote_nb, size[REPLY_REC_OFF + 1]); - rc = 0; -out: - target_committed_to_req(req); - if (rc) { - req->rq_status = rc; - ptlrpc_error(req); - } else { - ptlrpc_reply(req); - } - - return rc; -} - - static int ost_set_info(struct obd_export *exp, struct ptlrpc_request *req) { char *key, *val = NULL; @@ -1323,8 +1234,6 @@ int ost_msg_check_version(struct lustre_msg *msg) case OST_SETATTR: case OST_WRITE: case OST_READ: - case OST_SAN_READ: - case OST_SAN_WRITE: case OST_PUNCH: case OST_STATFS: case OST_SYNC: @@ -1477,18 +1386,6 @@ static int ost_handle(struct ptlrpc_request *req) LASSERT(current->journal_info == NULL); /* ost_brw_read sends its own replies */ RETURN(rc); - case OST_SAN_READ: - CDEBUG(D_INODE, "san read\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); - rc = ost_san_brw(req, OBD_BRW_READ); - /* ost_san_brw sends its own replies */ - RETURN(rc); - case OST_SAN_WRITE: - CDEBUG(D_INODE, "san write\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); - rc = ost_san_brw(req, OBD_BRW_WRITE); - /* ost_san_brw sends its own replies */ - RETURN(rc); case OST_PUNCH: CDEBUG(D_INODE, "punch\n"); OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0); diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 9b79a1e..bf5d9ba 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1035,6 +1035,9 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req) DEBUG_REQ(D_ERROR, req, "timeout (sent at %lu, %lus ago)", (long)req->rq_sent, CURRENT_SECONDS - req->rq_sent); + if (imp != NULL) + LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer); + spin_lock(&req->rq_lock); req->rq_timedout = 1; req->rq_wait_ctx = 0; diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 8863dc2..e93e40b 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -206,7 +206,7 @@ void request_in_callback(lnet_event_t *ev) req->rq_self = ev->target.nid; req->rq_rqbd = rqbd; req->rq_phase = RQ_PHASE_NEW; -#if CRAY_XT3 +#ifdef CRAY_XT3 req->rq_uid = ev->uid; #endif diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 600b514..740450c 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -310,10 +310,12 @@ static int import_select_connection(struct obd_import *imp) class_export_put(dlmexp); if (imp->imp_conn_current != imp_conn) { - LCONSOLE_INFO("Changing connection for %s to %s/%s\n", - imp->imp_obd->obd_name, imp_conn->oic_uuid.uuid, - libcfs_nid2str(imp_conn->oic_conn->c_peer.nid)); - imp->imp_conn_current = imp_conn; + if (imp->imp_conn_current) + LCONSOLE_INFO("Changing connection for %s to %s/%s\n", + imp->imp_obd->obd_name, + imp_conn->oic_uuid.uuid, + libcfs_nid2str(imp_conn->oic_conn->c_peer.nid)); + imp->imp_conn_current = imp_conn; } CDEBUG(D_HA, "%s: import %p using connection %s/%s\n", @@ -824,6 +826,11 @@ static int ptlrpc_invalidate_import_thread(void *data) ptlrpc_invalidate_import(imp); + if (obd_dump_on_eviction) { + CERROR("dump the log upon eviction\n"); + libcfs_debug_dumplog(); + } + IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); ptlrpc_import_recovery_state_machine(imp); diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 0753225..2911944 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -51,8 +51,8 @@ struct ll_rpc_opcode { { OST_OPEN, "ost_open" }, { OST_CLOSE, "ost_close" }, { OST_STATFS, "ost_statfs" }, - { OST_SAN_READ, "ost_san_read" }, - { OST_SAN_WRITE, "ost_san_write" }, + { 14, NULL }, + { 15, NULL }, { OST_SYNC, "ost_sync" }, { OST_SET_INFO, "ost_set_info" }, { OST_QUOTACHECK, "ost_quotacheck" }, @@ -96,7 +96,7 @@ const char* ll_opcode2str(__u32 opcode) * is missing from the table above. * or 2) The opcode space was renumbered or rearranged, * and the opcode_offset() function in - * ptlrpc_internals.h needs to be modified. + * ptlrpc_internal.h needs to be modified. */ __u32 offset = opcode_offset(opcode); LASSERT(offset < LUSTRE_MAX_OPCODES); diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 5724089..f88fa3f 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -828,7 +828,7 @@ static inline void lustre_msg_set_buflen_v1(void *msg, int n, int len) m->lm_buflens[n] = len; } -static inline int +static inline void lustre_msg_set_buflen_v2(struct lustre_msg_v2 *m, int n, int len) { if (n >= m->lm_bufcount) @@ -2109,8 +2109,97 @@ int llog_log_swabbed(struct llog_log_hdr *hdr) void lustre_swab_qdata(struct qunit_data *d) { __swab32s (&d->qd_id); + __swab32s (&d->qd_flags); + __swab64s (&d->qd_count); +} + +void lustre_swab_qdata_old(struct qunit_data_old *d) +{ + __swab32s (&d->qd_id); __swab32s (&d->qd_type); __swab32s (&d->qd_count); __swab32s (&d->qd_isblk); } +#ifdef __KERNEL__ +struct qunit_data *lustre_quota_old_to_new(struct qunit_data_old *d) +{ + struct qunit_data_old tmp; + struct qunit_data *ret; + ENTRY; + + if (!d) + return NULL; + + tmp = *d; + ret = (struct qunit_data *)d; + ret->qd_id = tmp.qd_id; + ret->qd_flags = (tmp.qd_type ? QUOTA_IS_GRP : 0) | (tmp.qd_isblk ? QUOTA_IS_BLOCK : 0); + ret->qd_count = tmp.qd_count; + RETURN(ret); + +} +EXPORT_SYMBOL(lustre_quota_old_to_new); + +struct qunit_data_old *lustre_quota_new_to_old(struct qunit_data *d) +{ + struct qunit_data tmp; + struct qunit_data_old *ret; + ENTRY; + + if (!d) + return NULL; + + LASSERT(d->qd_count <= MAX_QUOTA_COUNT32); + tmp = *d; + ret = (struct qunit_data_old *)d; + ret->qd_id = tmp.qd_id; + ret->qd_type = ((tmp.qd_flags & QUOTA_IS_GRP) ? GRPQUOTA : USRQUOTA); + ret->qd_count = (__u32)tmp.qd_count; + ret->qd_isblk = ((tmp.qd_flags & QUOTA_IS_BLOCK) ? 1 : 0); + RETURN(ret); +} +EXPORT_SYMBOL(lustre_quota_new_to_old); +#endif /* __KERNEL__ */ + + +void cdebug_va(cfs_debug_limit_state_t *cdls, __u32 mask, + const char *file, const char *func, const int line, + const char *fmt, va_list args); +void cdebug(cfs_debug_limit_state_t *cdls, __u32 mask, + const char *file, const char *func, const int line, + const char *fmt, ...); + +void debug_req(cfs_debug_limit_state_t *cdls, + __u32 level, struct ptlrpc_request *req, + const char *file, const char *func, const int line, + const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + cdebug_va(cdls, level, file, func, line, fmt, args); + va_end(args); + + cdebug(cdls, level, file, func, line, + " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl " + REQ_FLAGS_FMT"/%x/%x rc %d/%d\n", + req, req->rq_xid, req->rq_transno, + req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : -1, + req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : + req->rq_export ? + (char*)req->rq_export->exp_client_uuid.uuid : "", + req->rq_import ? + (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : + req->rq_export ? + (char *)req->rq_export->exp_connection->c_remote_uuid.uuid : "", + (req->rq_import && req->rq_import->imp_client) ? + req->rq_import->imp_client->cli_request_portal : -1, + req->rq_reqlen, req->rq_replen, atomic_read(&req->rq_refcount), + DEBUG_REQ_FLAGS(req), + req->rq_reqmsg ? lustre_msg_get_flags(req->rq_reqmsg) : 0, + req->rq_repmsg ? lustre_msg_get_flags(req->rq_repmsg) : 0, + req->rq_status, + req->rq_repmsg ? lustre_msg_get_status(req->rq_repmsg) : 0); +} +EXPORT_SYMBOL(debug_req); diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index be470c9..b1c852b 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -243,6 +243,7 @@ EXPORT_SYMBOL(lustre_swab_ldlm_lock_desc); EXPORT_SYMBOL(lustre_swab_ldlm_request); EXPORT_SYMBOL(lustre_swab_ldlm_reply); EXPORT_SYMBOL(lustre_swab_qdata); +EXPORT_SYMBOL(lustre_swab_qdata_old); EXPORT_SYMBOL(lustre_msg_get_flags); EXPORT_SYMBOL(lustre_msg_add_flags); EXPORT_SYMBOL(lustre_msg_set_flags); @@ -274,8 +275,6 @@ EXPORT_SYMBOL(lustre_swab_md_fld); EXPORT_SYMBOL(lustre_swab_generic_32s); /* recover.c */ -EXPORT_SYMBOL(ptlrpc_run_recovery_over_upcall); -EXPORT_SYMBOL(ptlrpc_run_failed_import_upcall); EXPORT_SYMBOL(ptlrpc_disconnect_import); EXPORT_SYMBOL(ptlrpc_resend); EXPORT_SYMBOL(ptlrpc_wake_delayed); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 1ca9a20..1849656 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -47,100 +47,12 @@ static int ptlrpc_recover_import_no_retry(struct obd_import *, char *); -void ptlrpc_run_recovery_over_upcall(struct obd_device *obd) -{ - char *argv[4]; - char *envp[3]; - int rc; - ENTRY; - - argv[0] = obd_lustre_upcall; - argv[1] = "RECOVERY_OVER"; - argv[2] = obd->obd_uuid.uuid; - argv[3] = NULL; - - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[2] = NULL; - - rc = USERMODEHELPER(argv[0], argv, envp); - if (rc < 0) { - CERROR("Error invoking recovery upcall %s %s %s: %d; check " - "/proc/sys/lustre/upcall\n", - argv[0], argv[1], argv[2], rc); - - } else { - CWARN("Invoked upcall %s %s %s\n", - argv[0], argv[1], argv[2]); - } -} - -void ptlrpc_run_failed_import_upcall(struct obd_import* imp) -{ -#ifdef __KERNEL__ - char *argv[7]; - char *envp[3]; - int rc; - ENTRY; - - spin_lock(&imp->imp_lock); - if (imp->imp_state == LUSTRE_IMP_CLOSED) { - spin_unlock(&imp->imp_lock); - EXIT; - return; - } - spin_unlock(&imp->imp_lock); - - argv[0] = obd_lustre_upcall; - argv[1] = "FAILED_IMPORT"; - argv[2] = obd2cli_tgt(imp->imp_obd); - argv[3] = imp->imp_obd->obd_name; - argv[4] = imp->imp_connection->c_remote_uuid.uuid; - argv[5] = imp->imp_obd->obd_uuid.uuid; - argv[6] = NULL; - - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[2] = NULL; - - rc = USERMODEHELPER(argv[0], argv, envp); - if (rc < 0) { - CERROR("Error invoking recovery upcall %s %s %s %s %s %s: %d; " - "check /proc/sys/lustre/lustre_upcall\n", - argv[0], argv[1], argv[2], argv[3], argv[4], argv[5],rc); - } else { - CWARN("Invoked upcall %s %s %s %s %s %s\n", - argv[0], argv[1], argv[2], argv[3], argv[4], argv[5]); - } -#else - if (imp->imp_state == LUSTRE_IMP_CLOSED) { - EXIT; - return; - } - ptlrpc_recover_import(imp, NULL); -#endif -} - -/* This might block waiting for the upcall to start, so it should - * not be called from a thread that shouldn't block. (Like ptlrpcd) */ void ptlrpc_initiate_recovery(struct obd_import *imp) { ENTRY; - LASSERT (obd_lustre_upcall != NULL); - - if (strcmp(obd_lustre_upcall, "DEFAULT") == 0) { - CDEBUG(D_HA, "%s: starting recovery without upcall\n", - obd2cli_tgt(imp->imp_obd)); - ptlrpc_connect_import(imp, NULL); - } else if (strcmp(obd_lustre_upcall, "NONE") == 0) { - CDEBUG(D_HA, "%s: recovery disabled\n", - obd2cli_tgt(imp->imp_obd)); - } else { - CDEBUG(D_HA, "%s: calling upcall to start recovery\n", - obd2cli_tgt(imp->imp_obd)); - ptlrpc_run_failed_import_upcall(imp); - } + CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd)); + ptlrpc_connect_import(imp, NULL); EXIT; } diff --git a/lustre/ptlrpc/sec.c b/lustre/ptlrpc/sec.c index b9dddba..474a7b3 100644 --- a/lustre/ptlrpc/sec.c +++ b/lustre/ptlrpc/sec.c @@ -933,11 +933,9 @@ void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode) /* special security flags accoding to opcode */ switch (opcode) { case OST_READ: - case OST_SAN_READ: req->rq_bulk_read = 1; break; case OST_WRITE: - case OST_SAN_WRITE: req->rq_bulk_write = 1; break; case SEC_CTX_INIT: @@ -1473,9 +1471,9 @@ int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req) if (rc == SECSVC_OK) { __u32 opc = lustre_msg_get_opc(req->rq_reqmsg); - if (opc == OST_WRITE || opc == OST_SAN_WRITE) + if (opc == OST_WRITE) req->rq_bulk_write = 1; - else if (opc == OST_READ || opc == OST_SAN_READ) + else if (opc == OST_READ) req->rq_bulk_read = 1; } diff --git a/lustre/quota/quota_context.c b/lustre/quota/quota_context.c index 47506c1..bd69097 100644 --- a/lustre/quota/quota_context.c +++ b/lustre/quota/quota_context.c @@ -49,6 +49,38 @@ struct lustre_qunit { struct list_head lq_waiters; /* All write threads waiting for this qunit */ }; +int should_translate_quota (struct obd_import *imp) +{ + struct obd_device *obd; + struct obd_export *tmp; + ENTRY; + + LASSERT(imp); + if (imp->imp_connect_data.ocd_connect_flags){ + if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64) + return 0; + else + return 1; + } + + obd = imp->imp_obd; + spin_lock(&obd->obd_dev_lock); + list_for_each_entry(tmp,&obd->obd_exports,exp_obd_chain){ + if (tmp->exp_imp_reverse == imp){ + imp->imp_connect_data.ocd_connect_flags = tmp->exp_connect_flags; + spin_unlock(&obd->obd_dev_lock); + if (tmp->exp_connect_flags & OBD_CONNECT_QUOTA64) + return 0; + else + return 1; + } + } + spin_unlock(&obd->obd_dev_lock); + CDEBUG(D_QUOTA, "don't find the corresponding export!"); + + RETURN(0); +} + void qunit_cache_cleanup(void) { int i; @@ -87,11 +119,15 @@ int qunit_cache_init(void) RETURN(0); } -static inline int const +static inline int +qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata) + __attribute__((__const__)); + +static inline int qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata) { unsigned int id = qdata->qd_id; - unsigned int type = qdata->qd_type; + unsigned int type = qdata->qd_flags & QUOTA_IS_GRP; unsigned long tmp = ((unsigned long)qctxt >> L1_CACHE_SHIFT) ^ id; tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH; @@ -110,8 +146,7 @@ static inline struct lustre_qunit *find_qunit(unsigned int hashent, list_for_each_entry(qunit, qunit_hash + hashent, lq_hash) { tmp = &qunit->lq_data; if (qunit->lq_ctxt == qctxt && - qdata->qd_id == tmp->qd_id && qdata->qd_type == tmp->qd_type - && qdata->qd_isblk == tmp->qd_isblk) + qdata->qd_id == tmp->qd_id && qdata->qd_flags == tmp->qd_flags) return qunit; } return NULL; @@ -135,13 +170,15 @@ check_cur_qunit(struct obd_device *obd, __u64 usage, limit; struct obd_quotactl *qctl; int ret = 0; + __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; + __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1; ENTRY; if (!sb_any_quota_enabled(sb)) RETURN(0); /* ignore root user */ - if (qdata->qd_id == 0 && qdata->qd_type == USRQUOTA) + if (qdata->qd_id == 0 && qdata_type == USRQUOTA) RETURN(0); OBD_ALLOC_PTR(qctl); @@ -151,7 +188,7 @@ check_cur_qunit(struct obd_device *obd, /* get fs quota usage & limit */ qctl->qc_cmd = Q_GETQUOTA; qctl->qc_id = qdata->qd_id; - qctl->qc_type = qdata->qd_type; + qctl->qc_type = qdata_type; ret = fsfilt_quotactl(obd, sb, qctl); if (ret) { if (ret == -ESRCH) /* no limit */ @@ -161,7 +198,7 @@ check_cur_qunit(struct obd_device *obd, GOTO(out, ret); } - if (qdata->qd_isblk) { + if (is_blk) { usage = qctl->qc_dqblk.dqb_curspace; limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS; qunit_sz = qctxt->lqc_bunit_sz; @@ -180,8 +217,8 @@ check_cur_qunit(struct obd_device *obd, GOTO(out, ret = 0); /* we don't count the MIN_QLIMIT */ - if ((limit == MIN_QLIMIT && !qdata->qd_isblk) || - (toqb(limit) == MIN_QLIMIT && qdata->qd_isblk)) + if ((limit == MIN_QLIMIT && !is_blk) || + (toqb(limit) == MIN_QLIMIT && is_blk)) limit = 0; LASSERT(qdata->qd_count == 0); @@ -273,11 +310,6 @@ struct qunit_waiter { int qw_rc; }; -#define QDATA_DEBUG(qd, fmt, arg...) \ - CDEBUG(D_QUOTA, "id(%u) type(%u) count(%u) isblk(%u):" \ - fmt, qd->qd_id, qd->qd_type, qd->qd_count, qd->qd_isblk, \ - ## arg); \ - #define INC_QLIMIT(limit, count) (limit == MIN_QLIMIT) ? \ (limit = count) : (limit += count) @@ -294,6 +326,35 @@ static int schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata, int opc, int wait); +static int split_before_schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, + struct qunit_data *qdata, int opc, int wait) +{ + int rc = 0, ret; + struct qunit_data tmp_qdata; + ENTRY; + + LASSERT(qdata); + if (qctxt->lqc_import) + while (should_translate_quota(qctxt->lqc_import) && + qdata->qd_count > MAX_QUOTA_COUNT32) { + + tmp_qdata = *qdata; + tmp_qdata.qd_count = MAX_QUOTA_COUNT32; + qdata->qd_count -= tmp_qdata.qd_count; + ret = schedule_dqacq(obd, qctxt, &tmp_qdata, opc, wait); + if (!rc) + rc = ret; + } + + if (qdata->qd_count){ + ret = schedule_dqacq(obd, qctxt, qdata, opc, wait); + if (!rc) + rc = ret; + } + + RETURN(rc); +} + static int dqacq_completion(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, @@ -304,15 +365,18 @@ dqacq_completion(struct obd_device *obd, unsigned long qunit_sz; struct qunit_waiter *qw, *tmp; int err = 0; + __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; + __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1; ENTRY; LASSERT(qdata); - qunit_sz = qdata->qd_isblk ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz; - LASSERT(!(qdata->qd_count % qunit_sz)); + qunit_sz = is_blk ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz; + /* now qdata->qd_count is 64bit, we can't do it */ + //LASSERT(!(qdata->qd_count % qunit_sz)); /* update local operational quota file */ if (rc == 0) { - __u32 count = QUSG(qdata->qd_count, qdata->qd_isblk); + __u32 count = QUSG(qdata->qd_count, is_blk); struct obd_quotactl *qctl; __u64 *hardlimit; @@ -325,14 +389,14 @@ dqacq_completion(struct obd_device *obd, * set fs quota limit */ qctl->qc_cmd = Q_GETQUOTA; qctl->qc_id = qdata->qd_id; - qctl->qc_type = qdata->qd_type; + qctl->qc_type = qdata_type; err = fsfilt_quotactl(obd, sb, qctl); if (err) { CERROR("error get quota fs limit! (rc:%d)\n", err); GOTO(out_mem, err); } - if (qdata->qd_isblk) { + if (is_blk) { qctl->qc_dqblk.dqb_valid = QIF_BLIMITS; hardlimit = &qctl->qc_dqblk.dqb_bhardlimit; } else { @@ -402,8 +466,8 @@ out: * - local dqacq/dqrel. * - local disk io failure. */ - if (err || (rc && rc != -EBUSY) || - is_master(obd, qctxt, qdata->qd_id, qdata->qd_type)) + if (err || (rc && rc != -EBUSY) || + is_master(obd, qctxt, qdata->qd_id, qdata_type)) RETURN(err); /* reschedule another dqacq/dqrel if needed */ @@ -412,7 +476,7 @@ out: if (rc > 0) { int opc; opc = rc == 1 ? QUOTA_DQACQ : QUOTA_DQREL; - rc = schedule_dqacq(obd, qctxt, qdata, opc, 0); + rc = split_before_schedule_dqacq(obd, qctxt, qdata, opc, 0); QDATA_DEBUG(qdata, "reschedudle opc(%d) rc(%d)\n", opc, rc); } RETURN(err); @@ -430,17 +494,28 @@ static int dqacq_interpret(struct ptlrpc_request *req, void *data, int rc) struct lustre_qunit *qunit = aa->aa_qunit; struct obd_device *obd = req->rq_import->imp_obd; struct qunit_data *qdata = NULL; + struct qunit_data_old *qdata_old = NULL; ENTRY; - qdata = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*qdata), - lustre_swab_qdata); + LASSERT(req); + LASSERT(req->rq_import); + if ((req->rq_import->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64) && + !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) { + CDEBUG(D_QUOTA, "qd_count is 64bit!\n"); + qdata = lustre_swab_reqbuf(req, REPLY_REC_OFF, sizeof(*qdata), lustre_swab_qdata); + } else { + CDEBUG(D_QUOTA, "qd_count is 32bit!\n"); + qdata_old = lustre_swab_reqbuf(req, REPLY_REC_OFF, sizeof(struct qunit_data_old), + lustre_swab_qdata_old); + qdata = lustre_quota_old_to_new(qdata_old); + } if (qdata == NULL) { DEBUG_REQ(D_ERROR, req, "error unpacking qunit_data\n"); RETURN(-EPROTO); } LASSERT(qdata->qd_id == qunit->lq_data.qd_id && - qdata->qd_type == qunit->lq_data.qd_type && + (qdata->qd_flags & QUOTA_IS_GRP) == (qunit->lq_data.qd_flags & QUOTA_IS_GRP) && (qdata->qd_count == qunit->lq_data.qd_count || qdata->qd_count == 0)); @@ -506,7 +581,7 @@ schedule_dqacq(struct obd_device *obd, LASSERT(qunit); /* master is going to dqacq/dqrel from itself */ - if (is_master(obd, qctxt, qdata->qd_id, qdata->qd_type)) { + if (is_master(obd, qctxt, qdata->qd_id, qdata->qd_flags & QUOTA_IS_GRP)) { int rc2; QDATA_DEBUG(qdata, "local %s.\n", opc == QUOTA_DQACQ ? "DQACQ" : "DQREL"); @@ -524,9 +599,26 @@ schedule_dqacq(struct obd_device *obd, RETURN(-ENOMEM); } - reqdata = lustre_msg_buf(req->rq_reqmsg, REPLY_REC_OFF, - sizeof(*reqdata)); - *reqdata = *qdata; + LASSERT(!should_translate_quota(qctxt->lqc_import) || + qdata->qd_count <= MAX_QUOTA_COUNT32); + if (should_translate_quota(qctxt->lqc_import) || + OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) + { + struct qunit_data_old *reqdata_old, *tmp; + + reqdata_old = lustre_msg_buf(req->rq_reqmsg, REPLY_REC_OFF, + sizeof(*reqdata_old)); + tmp = lustre_quota_new_to_old(qdata); + *reqdata_old = *tmp; + size[1] = sizeof(*reqdata_old); + CDEBUG(D_QUOTA, "qd_count is 32bit!\n"); + } else { + reqdata = lustre_msg_buf(req->rq_reqmsg, REPLY_REC_OFF, + sizeof(*reqdata)); + *reqdata = *qdata; + size[1] = sizeof(*reqdata); + CDEBUG(D_QUOTA, "qd_count is 64bit!\n"); + } ptlrpc_req_set_repsize(req, 2, size); CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); @@ -568,8 +660,9 @@ qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, for (i = 0; i < MAXQUOTAS; i++) { qdata[i].qd_id = id[i]; - qdata[i].qd_type = i; - qdata[i].qd_isblk = isblk; + qdata[i].qd_flags = 0; + qdata[i].qd_flags |= i; + qdata[i].qd_flags |= isblk ? QUOTA_IS_BLOCK : 0; qdata[i].qd_count = 0; ret = check_cur_qunit(obd, qctxt, &qdata[i]); @@ -577,7 +670,8 @@ qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, int opc; /* need acquire or release */ opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL; - ret = schedule_dqacq(obd, qctxt, &qdata[i], opc, wait); + ret = split_before_schedule_dqacq(obd, qctxt, &qdata[i], + opc, wait); if (!rc) rc = ret; } @@ -601,8 +695,9 @@ qctxt_wait_pending_dqacq(struct lustre_quota_ctxt *qctxt, unsigned int id, qw.qw_rc = 0; qdata.qd_id = id; - qdata.qd_type = type; - qdata.qd_isblk = isblk; + qdata.qd_flags = 0; + qdata.qd_flags |= type; + qdata.qd_flags |= isblk ? QUOTA_IS_BLOCK : 0; qdata.qd_count = 0; spin_lock(&qunit_hash_lock); @@ -735,15 +830,16 @@ static int qslave_recovery_main(void *arg) goto free; qdata.qd_id = dqid->di_id; - qdata.qd_type = type; - qdata.qd_isblk = 1; + qdata.qd_flags = 0; + qdata.qd_flags |= type; + qdata.qd_flags |= QUOTA_IS_BLOCK; qdata.qd_count = 0; ret = check_cur_qunit(obd, qctxt, &qdata); if (ret > 0) { int opc; opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL; - rc = schedule_dqacq(obd, qctxt, &qdata, opc, 0); + rc = split_before_schedule_dqacq(obd, qctxt, &qdata, opc, 0); } else rc = 0; diff --git a/lustre/quota/quota_interface.c b/lustre/quota/quota_interface.c index b8a3516..69af027 100644 --- a/lustre/quota/quota_interface.c +++ b/lustre/quota/quota_interface.c @@ -412,9 +412,10 @@ static struct list_head qinfo_hash[NR_DQHASH]; /* SLAB cache for client quota context */ cfs_mem_cache_t *qinfo_cachep = NULL; -static inline int const hashfn(struct client_obd *cli, - unsigned long id, - int type) +static inline int hashfn(struct client_obd *cli, unsigned long id, int type) + __attribute__((__const__)); + +static inline int hashfn(struct client_obd *cli, unsigned long id, int type) { unsigned long tmp = ((unsigned long)cli>>6) ^ id; tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH; diff --git a/lustre/quota/quota_internal.h b/lustre/quota/quota_internal.h index 6e8438e..5756e88 100644 --- a/lustre/quota/quota_internal.h +++ b/lustre/quota/quota_internal.h @@ -44,6 +44,13 @@ qinfo->qi_info[0].dqi_free_entry, \ qinfo->qi_info[1].dqi_free_entry, ## arg); +#define QDATA_DEBUG(qd, fmt, arg...) \ + CDEBUG(D_QUOTA, "id(%u) type(%lu) count(%llu) isblk(%lu):" \ + fmt, qd->qd_id, qd->qd_flags & QUOTA_IS_GRP, qd->qd_count, \ + (qd->qd_flags & QUOTA_IS_BLOCK) >> 1, \ + ## arg); + + /* quota_context.c */ void qunit_cache_cleanup(void); int qunit_cache_init(void); diff --git a/lustre/quota/quota_master.c b/lustre/quota/quota_master.c index 7332669..77d6699 100644 --- a/lustre/quota/quota_master.c +++ b/lustre/quota/quota_master.c @@ -77,8 +77,12 @@ void lustre_dquot_exit(void) EXIT; } -static inline int const dquot_hashfn(struct lustre_quota_info *info, - unsigned int id, int type) +static inline int +dquot_hashfn(struct lustre_quota_info *info, unsigned int id, int type) + __attribute__((__const__)); + +static inline int +dquot_hashfn(struct lustre_quota_info *info, unsigned int id, int type) { unsigned long tmp = ((unsigned long)info >> L1_CACHE_SHIFT) ^ id; tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH; @@ -201,6 +205,8 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) struct lustre_dquot *dquot = NULL; __u64 *usage = NULL; __u32 hlimit = 0, slimit = 0; + __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; + __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1; time_t *time = NULL; unsigned int grace = 0; int rc = 0; @@ -209,9 +215,9 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) OBD_FAIL_RETURN(OBD_FAIL_OBD_DQACQ, -EIO); /* slaves never acquires qunit for user root */ - LASSERT(qdata->qd_id || qdata->qd_type == GRPQUOTA); + LASSERT(qdata->qd_id || qdata_type); - dquot = lustre_dqget(obd, info, qdata->qd_id, qdata->qd_type); + dquot = lustre_dqget(obd, info, qdata->qd_id, qdata_type); if (IS_ERR(dquot)) RETURN(PTR_ERR(dquot)); @@ -226,14 +232,14 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) GOTO(out, rc = -EBUSY); } - if (qdata->qd_isblk) { - grace = info->qi_info[qdata->qd_type].dqi_bgrace; + if (is_blk) { + grace = info->qi_info[qdata_type].dqi_bgrace; usage = &dquot->dq_dqb.dqb_curspace; hlimit = dquot->dq_dqb.dqb_bhardlimit; slimit = dquot->dq_dqb.dqb_bsoftlimit; time = &dquot->dq_dqb.dqb_btime; } else { - grace = info->qi_info[qdata->qd_type].dqi_igrace; + grace = info->qi_info[qdata_type].dqi_igrace; usage = (__u64 *) & dquot->dq_dqb.dqb_curinodes; hlimit = dquot->dq_dqb.dqb_ihardlimit; slimit = dquot->dq_dqb.dqb_isoftlimit; @@ -250,11 +256,11 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) switch (opc) { case QUOTA_DQACQ: if (hlimit && - QUSG(*usage + qdata->qd_count, qdata->qd_isblk) > hlimit) + QUSG(*usage + qdata->qd_count, is_blk) > hlimit) GOTO(out, rc = -EDQUOT); if (slimit && - QUSG(*usage + qdata->qd_count, qdata->qd_isblk) > slimit) { + QUSG(*usage + qdata->qd_count, is_blk) > slimit) { if (*time && cfs_time_current_sec() >= *time) GOTO(out, rc = -EDQUOT); else if (!*time) @@ -272,7 +278,7 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) *usage -= qdata->qd_count; /* (usage <= soft limit) but not (usage < soft limit) */ - if (!slimit || QUSG(*usage, qdata->qd_isblk) <= slimit) + if (!slimit || QUSG(*usage, is_blk) <= slimit) *time = 0; break; default: diff --git a/lustre/scripts/Makefile.am b/lustre/scripts/Makefile.am index 287691a..2588e6a 100644 --- a/lustre/scripts/Makefile.am +++ b/lustre/scripts/Makefile.am @@ -6,11 +6,11 @@ # These are scripts that are generated from .in files genscripts = lustre_config.sh lc_modprobe.sh lc_net.sh lc_hb.sh lc_cluman.sh lustre_createcsv.sh lc_md.sh lc_lvm.sh -sbin_SCRIPTS = $(genscripts) lc_servip.sh lustre_up14.sh +sbin_SCRIPTS = $(genscripts) lc_servip.sh lustre_up14.sh lustre_rmmod.sh EXTRA_DIST = license-status maketags.sh version_tag.pl.in lc_common.sh \ $(addsuffix .in,$(genscripts)) lc_mon.sh lc_servip.sh \ - lustre_up14.sh + lustre_up14.sh lustre_rmmod.sh scriptlibdir = $(libdir)/@PACKAGE@ scriptlib_DATA = lc_common.sh diff --git a/lustre/scripts/lmc2csv.pl b/lustre/scripts/lmc2csv.pl index 14a690d..86959bb 100644 --- a/lustre/scripts/lmc2csv.pl +++ b/lustre/scripts/lmc2csv.pl @@ -173,13 +173,13 @@ foreach my $mds (@{$objs{"mds"}}) { my $lov = $mds->{"lov"}; my $mkfs_options=""; if (defined($lov->{"stripe_sz"})) { - $mkfs_options .= "lov.stripe.size=" . $lov->{"stripe_sz"} . " "; + $mkfs_options .= "lov.stripesize=" . $lov->{"stripe_sz"} . " "; } if (defined($lov->{"stripe_cnt"})) { - $mkfs_options .= "lov.stripe.count=" . $lov->{"stripe_cnt"} . " "; + $mkfs_options .= "lov.stripecount=" . $lov->{"stripe_cnt"} . " "; } if (defined($lov->{"stripe_pattern"})) { - $mkfs_options .= "lov.stripe.pattern=" . $lov->{"stripe_pattern"} . " "; + $mkfs_options .= "lov.stripetype=" . $lov->{"stripe_pattern"} . " "; } chop($mkfs_options); if ($mkfs_options ne "") { diff --git a/lustre/scripts/lustre_rmmod.sh b/lustre/scripts/lustre_rmmod.sh new file mode 100755 index 0000000..2f6b6c2 --- /dev/null +++ b/lustre/scripts/lustre_rmmod.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# +# remove all lustre modules. Won't succeed if they're in use, or if you +# manually did a 'lctl network up'. +############################################################################### + +SRCDIR=`dirname $0` +PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH + +lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 +# do it again, in case we tried to unload the lnd's too early +lsmod | grep lnet > /dev/null && lctl modules | awk '{ print $2 }' | xargs rmmod diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 0f8567f..ecd2c7c 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -21,7 +21,7 @@ ALWAYS_EXCEPT=" $CONF_SANITY_EXCEPT $MOUNTCONFSKIP 23" SRCDIR=`dirname $0` PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH -PTLDEBUG=${PTLDEBUG:-1} +PTLDEBUG=${PTLDEBUG:--1} LUSTRE=${LUSTRE:-`dirname $0`/..} RLUSTRE=${RLUSTRE:-$LUSTRE} MOUNTLUSTRE=${MOUNTLUSTRE:-/sbin/mount.lustre} @@ -82,7 +82,7 @@ stop_ost2() { mount_client() { local MOUNTPATH=$1 - echo "mount lustre on ${MOUNTPATH}....." + echo "mount $FSNAME on ${MOUNTPATH}....." zconf_mount `hostname` $MOUNTPATH || return 96 } @@ -856,6 +856,53 @@ test_23() { } run_test 23 "interrupt client during recovery mount delay" +test_24a() { + local fs2mds_HOST=$mds_HOST + add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME}2 --nomgs --mgsnode=$MGSNID --reformat ${MDSDEV}_2 || exit 10 + + local fs2ost_HOST=$ost_HOST + local fs2ostdev=$(ostdevname 1)_2 + add fs2ost $OST_MKFS_OPTS --fsname=${FSNAME}2 --reformat $fs2ostdev || exit 10 + + setup + start fs2mds ${MDSDEV}_2 $MDS_MOUNT_OPTS + start fs2ost $fs2ostdev $OST_MOUNT_OPTS + mkdir -p $MOUNT2 + mount -t lustre $MGSNID:/${FSNAME}2 $MOUNT2 || return 1 + check_mount || return 2 + sleep 5 + cp /etc/passwd $MOUNT2/b || return 3 + rm $MOUNT2/b || return 4 + grep $MOUNT2' ' /proc/mounts > /dev/null || return 5 + df + stop_mds + umount $MOUNT2 + stop fs2mds -f + stop fs2ost -f + cleanup || return 6 +} +run_test 24a "Multiple MDTs on a single node" + +test_24b() { + local fs2mds_HOST=$mds_HOST + add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME}2 --mgs --reformat ${MDSDEV}_2 || exit 10 + setup + start fs2mds ${MDSDEV}_2 $MDS_MOUNT_OPTS && return 2 + cleanup || return 6 +} +run_test 24b "Multiple MGSs on a single node (should return err)" + +test_25() { + setup + check_mount || return 2 + local MODULES=$($LCTL modules | awk '{ print $2 }') + rmmod $MODULES 2>/dev/null || true + cleanup || return 6 +} +run_test 25 "Verify modules are referenced" + + + umount_client $MOUNT cleanup_nocli cleanup_krb5_env diff --git a/lustre/tests/directio.c b/lustre/tests/directio.c index fb9c99b..7e96ae6 100644 --- a/lustre/tests/directio.c +++ b/lustre/tests/directio.c @@ -81,7 +81,8 @@ int main(int argc, char **argv) rc = write(fd, wbuf, len); if (rc != len) { - printf("Write error %s (rc = %d)\n",strerror(errno),rc); + printf("Write error %s (rc = %d, len = %ld)\n", + strerror(errno), rc, len); return 1; } } diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh index 83f6564..460c8f1 100755 --- a/lustre/tests/insanity.sh +++ b/lustre/tests/insanity.sh @@ -14,7 +14,6 @@ ALWAYS_EXCEPT="10 $INSANITY_EXCEPT" SETUP=${SETUP:-"setup"} CLEANUP=${CLEANUP:-"cleanup"} -UPCALL=${UPCALL:-DEFAULT} build_test_filter diff --git a/lustre/tests/lfscktest.sh b/lustre/tests/lfscktest.sh index 35078e0..aae9161 100755 --- a/lustre/tests/lfscktest.sh +++ b/lustre/tests/lfscktest.sh @@ -49,6 +49,16 @@ if [ "$WAS_MOUNTED" ]; then MAX_ERR=4 # max expected error from e2fsck fi +get_mnt_devs() { + DEVS=`cat /proc/fs/lustre/$1/*/mntdev` + for DEV in $DEVS; do + case $DEV in + *loop*) losetup $DEV | sed -e "s/.*(//" -e "s/).*//" ;; + *) echo $DEV ;; + esac + done +} + if [ "$LFSCK_SETUP" != "no" ]; then #Create test directory rm -rf $DIR @@ -112,7 +122,9 @@ if [ "$LFSCK_SETUP" != "no" ]; then done MDS_REMOVE=`echo $MDS_REMOVE | sed "s#$MOUNT/##g"` - OSTDEVS=`cat /proc/fs/lustre/obdfilter/*/mntdev` + MDTDEVS=`get_mnt_devs mds` + OSTDEVS=`get_mnt_devs obdfilter` + OSTCOUNT=`echo $OSTDEVS | wc -w` sh llmountcleanup.sh || exit 40 # Remove objects associated with files @@ -153,8 +165,9 @@ if [ "$LFSCK_SETUP" != "no" ]; then do_umount else - OSTDEVS=`cat /proc/fs/lustre/obdfilter/*/mntdev` - OSTCOUNT=`$LFIND $MOUNT | grep -c "^[0-9]*: "` + MDTDEVS=`get_mnt_devs mds` + OSTDEVS=`get_mnt_devs obdfilter` + OSTCOUNT=`echo $OSTDEVS | wc -w` fi # LFSCK_SETUP # Run e2fsck to get mds and ost info diff --git a/lustre/tests/ll_dirstripe_verify.c b/lustre/tests/ll_dirstripe_verify.c index 85685a6..a09f023 100644 --- a/lustre/tests/ll_dirstripe_verify.c +++ b/lustre/tests/ll_dirstripe_verify.c @@ -203,7 +203,8 @@ int main(int argc, char **argv) } fname = strrchr(argv[2], '/'); - fname++; + fname = (fname == NULL ? argv[2] : fname + 1); + strncpy((char *)lum_file1, fname, lum_size); rc = ioctl(dirfd(dir), IOC_MDC_GETFILESTRIPE, lum_file1); if (rc) { @@ -221,7 +222,7 @@ int main(int argc, char **argv) } fname = strrchr(argv[3], '/'); - fname++; + fname = (fname == NULL ? argv[3] : fname + 1); strncpy((char *)lum_file2, fname, lum_size); rc = ioctl(dirfd(dir), IOC_MDC_GETFILESTRIPE, lum_file2); if (rc) { diff --git a/lustre/tests/llecho.sh b/lustre/tests/llecho.sh index a30a6b6..a345f4f 100644 --- a/lustre/tests/llecho.sh +++ b/lustre/tests/llecho.sh @@ -12,7 +12,7 @@ if [ "$LUSTRE" ]; then lustre_opt="--lustre=$LUSTRE" fi -sh -x $mkconfig $config || exit 1 +[ -f $config ] || sh -x $mkconfig $config || exit 1 $LCONF $lustre_opt --reformat $@ $OPTS $config || exit 4 diff --git a/lustre/tests/llmount-upcall.sh b/lustre/tests/llmount-upcall.sh deleted file mode 100755 index f5a1ef7..0000000 --- a/lustre/tests/llmount-upcall.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -LUSTRE=`dirname $0`/.. -exec >> /tmp/recovery-`hostname`.log -exec 2>&1 - -$LUSTRE/utils/lconf --recover --verbose --tgt_uuid $2 --client_uuid $3 --conn_uuid $4 $LUSTRE/tests/local.xml diff --git a/lustre/tests/recovery-cleanup.sh b/lustre/tests/recovery-cleanup.sh index 8c3c625..c23a845 100755 --- a/lustre/tests/recovery-cleanup.sh +++ b/lustre/tests/recovery-cleanup.sh @@ -129,11 +129,11 @@ try_to_cleanup drop_request "statone /mnt/lustre/2" & wait_for_timeout try_to_cleanup -do_client "cp /etc/resolv.conf /mnt/lustre/resolv.conf" -drop_request "cat /mnt/lustre/resolv.conf > /dev/null" & wait_for_timeout +do_client "cp /etc/inittab /mnt/lustre/inittab" +drop_request "cat /mnt/lustre/inittab > /dev/null" & wait_for_timeout try_to_cleanup -drop_request "mv /mnt/lustre/resolv.conf /mnt/lustre/renamed" & wait_for_timeout +drop_request "mv /mnt/lustre/inittab /mnt/lustre/renamed" & wait_for_timeout try_to_cleanup drop_request "mlink /mnt/lustre/renamed-again /mnt/lustre/link1" & wait_for_timeout diff --git a/lustre/tests/recovery-small-upcall.sh b/lustre/tests/recovery-small-upcall.sh deleted file mode 100755 index b1ad60c..0000000 --- a/lustre/tests/recovery-small-upcall.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -LUSTRE=`dirname $0`/.. -PATH=$LUSTRE/utils:$PATH -lctl --device %$3 recover || logger -p kern.info recovery failed: $@ diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 9f9b8e4..0f73e10 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -2,10 +2,10 @@ set -e -# bug 5494 7288 -ALWAYS_EXCEPT="24 27 $RECOVERY_SMALL_EXCEPT" +# bug 5494 7288 5493 +ALWAYS_EXCEPT="24 27 52 $RECOVERY_SMALL_EXCEPT" -PTLDEBUG=${PTLDEBUG:-1} +PTLDEBUG=${PTLDEBUG:--1} LUSTRE=${LUSTRE:-`dirname $0`/..} . $LUSTRE/tests/test-framework.sh init_test_env $@ @@ -61,14 +61,14 @@ test_3() { run_test 3 "stat: drop req, drop rep" test_4() { - do_facet client "cp /etc/passwd $MOUNT/passwd" || return 1 - drop_request "cat $MOUNT/passwd > /dev/null" || return 2 - drop_reply "cat $MOUNT/passwd > /dev/null" || return 3 + do_facet client "cp /etc/inittab $MOUNT/inittab" || return 1 + drop_request "cat $MOUNT/inittab > /dev/null" || return 2 + drop_reply "cat $MOUNT/inittab > /dev/null" || return 3 } run_test 4 "open: drop req, drop rep" test_5() { - drop_request "mv $MOUNT/passwd $MOUNT/renamed" || return 1 + drop_request "mv $MOUNT/inittab $MOUNT/renamed" || return 1 drop_reint_reply "mv $MOUNT/renamed $MOUNT/renamed-again" || return 2 do_facet client "checkstat -v $MOUNT/renamed-again" || return 3 } diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index ea56524..3ec7485 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -2,10 +2,10 @@ set -e -# bug number: 6088 10124 -ALWAYS_EXCEPT="8 15c $REPLAY_DUAL_EXCEPT" +# bug number: 6088 10124 10800 +ALWAYS_EXCEPT="8 15c 17 $REPLAY_DUAL_EXCEPT" -PTLDEBUG=${PTLDEBUG:-1} +PTLDEBUG=${PTLDEBUG:--1} LUSTRE=${LUSTRE:-`dirname $0`/..} . $LUSTRE/tests/test-framework.sh diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh index 1c40ea2..0caba0b 100755 --- a/lustre/tests/replay-ost-single.sh +++ b/lustre/tests/replay-ost-single.sh @@ -2,7 +2,7 @@ set -e -PTLDEBUG=${PTLDEBUG:-1} +PTLDEBUG=${PTLDEBUG:--1} LUSTRE=${LUSTRE:-`dirname $0`/..} . $LUSTRE/tests/test-framework.sh init_test_env $@ @@ -11,6 +11,10 @@ init_test_env $@ ostfailover_HOST=${ostfailover_HOST:-$ost_HOST} #failover= must be defined in OST_MKFS_OPTIONS if ostfailover_HOST != ost_HOST +# Tests that fail on uml +CPU=`awk '/model/ {print $4}' /proc/cpuinfo` +[ "$CPU" = "UML" ] && EXCEPT="$EXCEPT 6" + # Skip these tests # BUG NUMBER: ALWAYS_EXCEPT="$REPLAY_OST_SINGLE_EXCEPT" diff --git a/lustre/tests/replay-ost-upcall.sh b/lustre/tests/replay-ost-upcall.sh deleted file mode 100755 index 9f9efbf..0000000 --- a/lustre/tests/replay-ost-upcall.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/sh - - -TESTDIR=`dirname $0` -LUSTRE=$TESTDIR/.. - -exec >> $TESTDIR/recovery-`hostname`.log -exec 2>&1 - -set -xv - -failed_import() { -# $LUSTRE/utils/lctl --device %$3 recover || -# logger -p kern.info recovery failed: $@ - - if [ -f $LUSTRE/tests/ostactive ] ; then - source $LUSTRE/tests/ostactive - else - ostactive=ost - fi - - $LUSTRE/utils/lconf --verbose --recover --node client_facet \ - --select ost1=${ostactive}_facet\ - --tgt_uuid $2 --client_uuid $3 --conn_uuid $4 $TESTDIR/replay-ost-single.xml - -} - -recovery_over() { - logger -p kern.info upcall: $@ -} - - -case "$1" in -FAILED_IMPORT) failed_import $@ - ;; -RECOVERY_OVER) recovery_over $@ - ;; -esac diff --git a/lustre/tests/replay-single-upcall.sh b/lustre/tests/replay-single-upcall.sh deleted file mode 100755 index 59c1371..0000000 --- a/lustre/tests/replay-single-upcall.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/sh - - -TESTDIR=`dirname $0` -LUSTRE=$TESTDIR/.. - -mkdir -p $TESTDIR/logs - -exec >> $TESTDIR/logs/recovery-`hostname`.log -exec 2>&1 - -echo ========================================== -echo "start upcall: `date`" -echo "command line: $0 $*" - -set -xv - -failed_import() { - if [ -f $TESTDIR/XMLCONFIG ] ; then - source $TESTDIR/XMLCONFIG - if [ ! -f $TESTDIR/XMLCONFIG ]; then - echo "config file not found: $XMLCONFIG" - exit 1 - fi - else - echo "$TESTDIR/XMLCONFIG: not found" - exit 1 - fi - - if [ -f $TESTDIR/mdsactive ] ; then - source $TESTDIR/mdsactive - MDSSELECT="--select mds_svc=${mdsactive}_facet" - fi - - if [ -f $TESTDIR/ostactive ] ; then - source $TESTDIR/ostactive - OSTSELECT="--select ost_svc=${ostactive}_facet" - fi - - $LUSTRE/utils/lconf --verbose --recover --node client_facet \ - $MDSSELECT $OSTSELECT \ - --tgt_uuid $2 --client_uuid $3 --conn_uuid $4 $XMLCONFIG - -} - -recovery_over() { - logger -p kern.info upcall: $@ -} - - -case "$1" in -FAILED_IMPORT) failed_import $@ - ;; -RECOVERY_OVER) recovery_over $@ - ;; -esac diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 35c4961..443222f 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -14,8 +14,8 @@ init_test_env $@ # Skip these tests -# bug number: 2766 -ALWAYS_EXCEPT="0b $REPLAY_SINGLE_EXCEPT" +# bug number: 2766 4176 +ALWAYS_EXCEPT="0b 39 $REPLAY_SINGLE_EXCEPT" build_test_filter @@ -442,6 +442,26 @@ test_20() { } run_test 20 "|X| open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)" +test_20b() { + BEFORESPACE=`df -P $DIR | tail -1 | awk '{ print $4 }'` + dd if=/dev/zero of=$DIR/$tfile bs=4k count=10000 & + pid=$! + usleep 60 # give dd a chance to start + lfs getstripe $DIR/$tfile || return 1 + rm -f $DIR/$tfile || return 2 # make it an orphan + mds_evict_client + df -P $DIR || df -P $DIR || true # reconnect + + fail mds # start orphan recovery + df -P $DIR || df -P $DIR || true # reconnect + sleep 2 + AFTERSPACE=`df -P $DIR | tail -1 | awk '{ print $4 }'` + [ $AFTERSPACE -lt $((BEFORESPACE - 20)) ] && \ + error "after $AFTERSPACE < before $BEFORESPACE" && return 5 + return 0 +} +run_test 20b "write, unlink, eviction, replay, (test mds_cleanup_orphans)" + test_21() { replay_barrier mds multiop $DIR/$tfile O_tSc & @@ -881,11 +901,10 @@ test_44() { mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices` [ "$mdcdev" ] || exit 2 for i in `seq 1 10`; do - echo iteration $i - #define OBD_FAIL_TGT_CONN_RACE 0x701 - do_facet mds "sysctl -w lustre.fail_loc=0x80000701" - $LCTL --device $mdcdev recover - df $MOUNT + #define OBD_FAIL_TGT_CONN_RACE 0x701 + do_facet mds "sysctl -w lustre.fail_loc=0x80000701" + $LCTL --device $mdcdev recover + df $MOUNT done do_facet mds "sysctl -w lustre.fail_loc=0" return 0 @@ -896,11 +915,10 @@ test_44b() { mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices` [ "$mdcdev" ] || exit 2 for i in `seq 1 10`; do - echo iteration $i - #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704 - do_facet mds "sysctl -w lustre.fail_loc=0x80000704" - $LCTL --device $mdcdev recover - df $MOUNT + #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704 + do_facet mds "sysctl -w lustre.fail_loc=0x80000704" + $LCTL --device $mdcdev recover + df $MOUNT done do_facet mds "sysctl -w lustre.fail_loc=0" return 0 diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index ef1467a..c4fe5f4 100644 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -70,7 +70,7 @@ build_test_filter() { done # turn on/off quota tests must be included eval ONLY_0=true - eval ONLY_9=true + eval ONLY_99=true } _basetest() { @@ -164,7 +164,7 @@ set_blk_tunesz() { echo $(($1 * $BLK_SZ)) > $i done # set btune size on mds - for i in `ls /proc/fs/lustre/mds/mds*/quota_btune_sz`; do + for i in `ls /proc/fs/lustre/mds/lustre-MDT*/quota_btune_sz`; do echo $(($1 * $BLK_SZ)) > $i done } @@ -173,7 +173,7 @@ set_blk_unitsz() { for i in `ls /proc/fs/lustre/obdfilter/*/quota_bunit_sz`; do echo $(($1 * $BLK_SZ)) > $i done - for i in `ls /proc/fs/lustre/mds/mds*/quota_bunit_sz`; do + for i in `ls /proc/fs/lustre/mds/lustre-MDT*/quota_bunit_sz`; do echo $(($1 * $BLK_SZ)) > $i done } @@ -184,7 +184,7 @@ set_file_tunesz() { echo $1 > $i done # set iunit and itune size on mds - for i in `ls /proc/fs/lustre/mds/mds*/quota_itune_sz`; do + for i in `ls /proc/fs/lustre/mds/lustre-MDT*/quota_itune_sz`; do echo $1 > $i done @@ -195,7 +195,7 @@ set_file_unitsz() { for i in `ls /proc/fs/lustre/obdfilter/*/quota_iunit_sz`; do echo $1 > $i done; - for i in `ls /proc/fs/lustre/mds/mds*/quota_iunit_sz`; do + for i in `ls /proc/fs/lustre/mds/lustre-MDT*/quota_iunit_sz`; do echo $1 > $i done } @@ -210,7 +210,7 @@ pre_test() { # set block tunables set_blk_tunesz $BTUNE_SZ set_blk_unitsz $BUNIT_SZ - # set file tunaables + # set file tunables set_file_tunesz $ITUNE_SZ set_file_unitsz $IUNIT_SZ fi @@ -361,7 +361,6 @@ test_block_soft() { echo " Write before timer goes off" $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$BUNIT_SZ >/dev/null 2>&1 || error "write failure, but expect success" - sync; sleep 1; sync; echo " Done" echo " Sleep $GRACE seconds ..." @@ -369,6 +368,7 @@ test_block_soft() { echo " Write after timer goes off" # maybe cache write, ignore. + sync; sleep 1; sync; $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$(($BUNIT_SZ * 2)) >/dev/null 2>&1 || echo " " > /dev/null sync; sleep 1; sync; $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=1 seek=$(($BUNIT_SZ * 3)) >/dev/null 2>&1 && error "write success, but expect EDQUOT" @@ -538,9 +538,9 @@ test_6() { echo " Exceed quota limit ..." $RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ count=$(($LIMIT - $BUNIT_SZ * $OSTCOUNT)) >/dev/null 2>&1 || error "write fileb failure, but expect success" - sync; sleep 1; sync; + #sync; sleep 1; sync; $RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ seek=$LIMIT count=$BUNIT_SZ >/dev/null 2>&1 && error "write fileb success, but expect EDQUOT" - sync; sleep 1; sync; + #sync; sleep 1; sync; echo " Write to OST0 return EDQUOT" # this write maybe cache write, ignore it's failure $RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ count=$(($BUNIT_SZ * 2)) >/dev/null 2>&1 || echo " " > /dev/null @@ -589,7 +589,7 @@ test_7() echo 0 > /proc/sys/lustre/fail_loc echo " Trigger recovery..." - OSC0_UUID="`$LCTL dl | awk '/.* *-osc-* / { print $1 }'`" + OSC0_UUID="`$LCTL dl | awk '$3 ~ /osc/ { print $1 }'`" for i in $OSC0_UUID; do $LCTL --device $i activate > /dev/null 2>&1 || error "activate osc failed!" done @@ -603,8 +603,8 @@ test_7() [ $TOTAL_LIMIT -eq $LIMIT ] || error "total limits not recovery!" echo " total limits = $TOTAL_LIMIT" - OST0_UUID=`$LCTL dl | awk '/.*OST_[^ ]+_UUID.* / { print $5 }'` - [ -z "$OST0_UUID" ] && OST0_UUID=`$LCTL dl | awk '/.*ost1_[^ ]*UUID.* / { print $5 }'` + OST0_UUID=`$LCTL dl | awk '$3 ~ /obdfilter/ { print $5 }'| head -n1` + [ -z "$OST0_UUID" ] && OST0_UUID=`$LCTL dl | awk '$3 ~ /obdfilter/ { print $5 }'|head -n1` OST0_LIMIT="`$LFS quota -o $OST0_UUID -u $TSTUSR $MOUNT | awk '/^.*[[:digit:]+][[:space:]+]/ { print $3 }'`" [ $OST0_LIMIT -eq $BUNIT_SZ ] || error "high limits not released!" echo " limits on $OST0_UUID = $OST0_LIMIT" @@ -644,13 +644,143 @@ test_8() { } run_test 8 "Run dbench with quota enabled ===========" +# run for fixing bug10707, it needs a big room. test for 64bit +test_9() { + lustrefs_size=`df | grep $MOUNT | awk '{print $(NF - 2)}'` + size_file=$((1024 * 1024 * 9 / 2 * $OSTCOUNT)) + echo "lustrefs_size:$lustrefs_size size_file:$size_file" + if [ $lustrefs_size -lt $size_file ]; then + echo "WARN: too few capacity, skip this test." + return 0; + fi + + # set the D_QUOTA flag + debug_flag=`cat /proc/sys/lnet/debug` + D_QUOTA_FLAG=67108864 + set_flag=0 + if [ $((debug_flag & D_QUOTA_FLAG)) -ne $D_QUOTA_FLAG ]; then + echo $((debug_flag | D_QUOTA_FLAG)) > /proc/sys/lnet/debug + set_flag=1 + fi + + TESTFILE="$TSTDIR/quota_tst90" + + echo " Set block limit $LIMIT bytes to $TSTUSR.$TSTUSR" + BLK_LIMIT=$((100 * 1024 * 1024)) # 100G + FILE_LIMIT=1000000 + + echo " Set enough high limit for user: $TSTUSR" + $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $MOUNT + echo " Set enough high limit for group: $TSTUSR" + $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $MOUNT + + echo " Set stripe" + [ $OSTCOUNT -ge 2 ] && $LFS setstripe $TESTFILE 65536 0 $OSTCOUNT + touch $TESTFILE + chown $TSTUSR.$TSTUSR $TESTFILE + + echo " Write the big file of $(($OSTCOUNT * 9 / 2 ))G ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$size_file >/dev/null 2>&1 || error "(usr) write $((9 / 2 * $OSTCOUNT))G file failure, but expect success" + + echo " delete the big file of $(($OSTCOUNT * 9 / 2))G..." + $RUNAS rm -f $TESTFILE >/dev/null 2>&1 + + echo " write the big file of 2G..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$((1024 * 1024 * 2)) >/dev/null 2>&1 || error "(usr) write $((9 / 2 * $OSTCOUNT))G file failure, but expect seccess" + + echo " delete the big file of 2G..." + $RUNAS rm -f $TESTFILE >/dev/null 2>&1 + + RC=$? + + # clear the flage + if [ $set_flag -eq 1 ]; then + echo $debug_flag > /proc/sys/lnet/debug + fi + + return $RC +} +run_test 9 "run for fixing bug10707(64bit) ===========" + +# run for fixing bug10707, it need a big room. test for 32bit +test_10() { + lustrefs_size=`df | grep $MOUNT | awk '{print $(NF - 2)}'` + size_file=$((1024 * 1024 * 9 / 2 * $OSTCOUNT)) + echo "lustrefs_size:$lustrefs_size size_file:$size_file" + if [ $lustrefs_size -lt $size_file ]; then + echo "WARN: too few capacity, skip this test." + return 0; + fi + + if [ ! -d /proc/fs/lustre/ost/ -o ! -d /proc/fs/lustre/mds ]; then + echo "WARN: mds or ost isn't on the local machine, skip this test." + return 0; + fi + + sync; sleep 10; sync; + + # set the D_QUOTA flag + debug_flag=`cat /proc/sys/lnet/debug` + D_QUOTA_FLAG=67108864 + set_flag=0 + if [ $((debug_flag & D_QUOTA_FLAG)) -ne $D_QUOTA_FLAG ]; then + echo $((debug_flag | D_QUOTA_FLAG)) > /proc/sys/lnet/debug + set_flag=1 + fi + + # make qd_count 32 bit + sysctl -w lustre.fail_loc=2560 + + TESTFILE="$TSTDIR/quota_tst100" + + echo " Set block limit $LIMIT bytes to $TSTUSR.$TSTUSR" + BLK_LIMIT=$((100 * 1024 * 1024)) # 100G + FILE_LIMIT=1000000 + + echo " Set enough high limit for user: $TSTUSR" + $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $MOUNT + echo " Set enough high limit for group: $TSTUSR" + $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $MOUNT + + echo " Set stripe" + [ $OSTCOUNT -ge 2 ] && $LFS setstripe $TESTFILE 65536 0 $OSTCOUNT + touch $TESTFILE + chown $TSTUSR.$TSTUSR $TESTFILE + + echo " Write the big file of $(($OSTCOUNT * 9 / 2 ))G ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$size_file >/dev/null 2>&1 || error "(usr) write $((9 / 2 * $OSTCOUNT))G file failure, but expect success" + + echo " delete the big file of $(($OSTCOUNT * 9 / 2))G..." + $RUNAS rm -f $TESTFILE >/dev/null 2>&1 + + echo " write the big file of 2G..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$((1024 * 1024 * 2)) >/dev/null 2>&1 || error "(usr) write $((9 / 2 * $OSTCOUNT))G file failure, but expect success" + + echo " delete the big file of 2G..." + $RUNAS rm -f $TESTFILE >/dev/null 2>&1 + + RC=$? + + # clear the flage + if [ $set_flag -eq 1 ]; then + echo $debug_flag > /proc/sys/lnet/debug + fi + + # make qd_count 64 bit + sysctl -w lustre.fail_loc=0 + + return $RC +} +run_test 10 "run for fixing bug10707(32bit) ===========" + + # turn off quota -test_9() +test_99() { $LFS quotaoff $MOUNT return 0 } -run_test 9 "Quota off ===============================" +run_test 99 "Quota off ===============================" log "cleanup: ======================================================" diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index b320fc4..f3b74c2 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -114,10 +114,10 @@ trace() { TRACE=${TRACE:-""} check_kernel_version() { - VERSION_FILE=$LPROC/kernel_version + VERSION_FILE=$LPROC/version WANT_VER=$1 [ ! -f $VERSION_FILE ] && echo "can't find kernel version" && return 1 - GOT_VER=`cat $VERSION_FILE` + GOT_VER=$(awk '/kernel:/ {print $2}' $VERSION_FILE) [ $GOT_VER -ge $WANT_VER ] && return 0 log "test needs at least kernel version $WANT_VER, running $GOT_VER" return 1 @@ -1105,7 +1105,25 @@ test_27r() { reset_enospc } -run_test 27r "stripe file with some full OSTs (shouldn't LBUG) ===" +run_test 27r "stripe file with some full OSTs (shouldn't LBUG) =" + +test_27s() { + mkdir -p $DIR/$tdir + $LSTRIPE $DIR/$tdir $((2048 * 1024 * 1024)) -1 2 && \ + error "stripe width >= 2^32 succeeded" || true +} +run_test 27s "lsm_xfersize overflow (should error) (bug 10725)" + +test_27t() { # bug 10864 + WDIR=`pwd` + WLFS=`which lfs` + cd $DIR + touch $tfile + $WLFS getstripe $tfile + cd $WDIR +} +run_test 27t "check that utils parse path correctly" + test_28() { mkdir $DIR/d28 @@ -2541,11 +2559,11 @@ run_test 65i "set non-default striping on root directory (bug 6367)=" test_65j() { # bug6367 return # if we aren't already remounting for each test, do so for this test - if [ "$CLEANUP" = ":" ]; then + if [ "$CLEANUP" = ":" -a "$I_MOUNTED" = "yes" ]; then cleanup -f || error "failed to unmount" - setup || error "failed to remount" + setup fi - $SETSTRIPE -d $MOUNT || true + $SETSTRIPE -d $MOUNT } run_test 65j "set default striping on root directory (bug 6367)=" @@ -2751,71 +2769,132 @@ test_74() { # bug 6149, 6184 run_test 74 "ldlm_enqueue freed-export error path (shouldn't LBUG)" JOIN=${JOIN:-"lfs join"} -test_75() { +F75=$DIR/f75 +F128k=${F75}_128k +FHEAD=${F75}_head +FTAIL=${F75}_tail +export T75_PREP=no +test75_prep() { + [ $T75_PREP = "yes" ] && return + echo "using F75=$F75, F128k=$F128k, FHEAD=$FHEAD, FTAIL=$FTAIL" + + dd if=/dev/urandom of=${F75}_128k bs=128k count=1 || error "dd failed" + log "finished dd" + chmod 777 ${F128k} + T75_PREP=yes +} + +test_75a() { # skipped temporarily: we do not have join file currently # please remove this when ready - huanghua return - F=$DIR/$tfile - F128k=${F}_128k - FHEAD=${F}_head - FTAIL=${F}_tail - echo "using F=$F, F128k=$F128k, FHEAD=$FHEAD, FTAIL=$FTAIL" - rm -f $F* - - dd if=/dev/urandom of=${F}_128k bs=1024 count=128 || error "dd failed" - chmod 777 ${F128k} - cp -p ${F128k} ${FHEAD} - cp -p ${F128k} ${FTAIL} - cat ${F128k} ${F128k} > ${F}_sim_sim - - $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error" - cmp ${FHEAD} ${F}_sim_sim || error "${FHEAD} ${F}_sim_sim differ" - $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} still exist after join" - - cp -p ${F128k} ${FTAIL} - cat ${F}_sim_sim >> ${F}_join_sim - cat ${F128k} >> ${F}_join_sim - $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error" - cmp ${FHEAD} ${F}_join_sim || \ - error "${FHEAD} ${F}_join_sim are different" - $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join" - - cp -p ${F128k} ${FTAIL} - cat ${F128k} >> ${F}_sim_join - cat ${F}_join_sim >> ${F}_sim_join - $JOIN ${FTAIL} ${FHEAD} || error "join error" - cmp ${FTAIL} ${F}_sim_join || \ - error "${FTAIL} ${F}_sim_join are different" - $CHECKSTAT -a ${FHEAD} || error "tail ${FHEAD} exist after join" - - cp -p ${F128k} ${FHEAD} - cp -p ${F128k} ${FHEAD}_tmp - cat ${F}_sim_sim >> ${F}_join_join - cat ${F}_sim_join >> ${F}_join_join - $JOIN ${FHEAD} ${FHEAD}_tmp || error "join ${FHEAD} ${FHEAD}_tmp error" - $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error" - cmp ${FHEAD} ${F}_join_join || error "${FHEAD} ${F}_join_join differ" - $CHECKSTAT -a ${FHEAD}_tmp || error "${FHEAD}_tmp exist after join" - $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join (2)" - - rm -rf ${FHEAD} || error "delete join file error" - cp -p ${F128k} ${F}_join_10_compare - cp -p ${F128k} ${F}_join_10 - for ((i = 0; i < 10; i++)); do - cat ${F128k} >> ${F}_join_10_compare - cp -p ${F128k} ${FTAIL} - $JOIN ${F}_join_10 ${FTAIL} || \ - error "join ${F}_join_10 ${FTAIL} error" - $CHECKSTAT -a ${FTAIL} || error "tail file exist after join" - done - cmp ${F}_join_10 ${F}_join_10_compare || \ - error "files ${F}_join_10 ${F}_join_10_compare are different" - $LFS getstripe ${F}_join_10 - $OPENUNLINK ${F}_join_10 ${F}_join_10 || error "files unlink open" - - ls -l $F* + test75_prep + + cp -p ${F128k} ${FHEAD} + log "finished cp to $FHEAD" + cp -p ${F128k} ${FTAIL} + log "finished cp to $FTAIL" + cat ${F128k} ${F128k} > ${F75}_sim_sim + + $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error" + log "finished join $FHEAD to ${F75}_sim_sim" + cmp ${FHEAD} ${F75}_sim_sim || error "${FHEAD} ${F75}_sim_sim differ" + log "finished cmp $FHEAD to ${F75}_sim_sim" + $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} still exist after join" +} +run_test 75a "TEST join file ====================================" + +test_75b() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return + test75_prep + + cp -p ${F128k} ${FTAIL} + cat ${F75}_sim_sim >> ${F75}_join_sim + cat ${F128k} >> ${F75}_join_sim + $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error" + cmp ${FHEAD} ${F75}_join_sim || \ + error "${FHEAD} ${F75}_join_sim are different" + $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join" +} +run_test 75b "TEST join file 2 ==================================" + +test_75c() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return + test75_prep + + cp -p ${F128k} ${FTAIL} + cat ${F128k} >> ${F75}_sim_join + cat ${F75}_join_sim >> ${F75}_sim_join + $JOIN ${FTAIL} ${FHEAD} || error "join error" + cmp ${FTAIL} ${F75}_sim_join || \ + error "${FTAIL} ${F75}_sim_join are different" + $CHECKSTAT -a ${FHEAD} || error "tail ${FHEAD} exist after join" +} +run_test 75c "TEST join file 3 ==================================" + +test_75d() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return + test75_prep + + cp -p ${F128k} ${FHEAD} + cp -p ${F128k} ${FHEAD}_tmp + cat ${F75}_sim_sim >> ${F75}_join_join + cat ${F75}_sim_join >> ${F75}_join_join + $JOIN ${FHEAD} ${FHEAD}_tmp || error "join ${FHEAD} ${FHEAD}_tmp error" + $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error" + cmp ${FHEAD} ${F75}_join_join ||error "${FHEAD} ${F75}_join_join differ" $CHECKSTAT -a ${FHEAD}_tmp || error "${FHEAD}_tmp exist after join" + $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join (2)" +} +run_test 75d "TEST join file 4 ==================================" + +test_75e() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return + test75_prep + + rm -rf ${FHEAD} || "delete join file error" +} +run_test 75e "TEST join file 5 (remove joined file) =============" + +test_75f() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return + test75_prep + + cp -p ${F128k} ${F75}_join_10_compare + cp -p ${F128k} ${F75}_join_10 + for ((i = 0; i < 10; i++)); do + cat ${F128k} >> ${F75}_join_10_compare + cp -p ${F128k} ${FTAIL} + $JOIN ${F75}_join_10 ${FTAIL} || \ + error "join ${F75}_join_10 ${FTAIL} error" + $CHECKSTAT -a ${FTAIL} || error "tail file exist after join" + done + cmp ${F75}_join_10 ${F75}_join_10_compare || \ + error "files ${F75}_join_10 ${F75}_join_10_compare differ" +} +run_test 75f "TEST join file 6 (join 10 files) ==================" + +test_75g() { +# skipped temporarily: we do not have join file currently +# please remove this when ready - huanghua + return + [ ! -f ${F75}_join_10 ] && echo "${F75}_join_10 missing" && return + $LFS getstripe ${F75}_join_10 + + $OPENUNLINK ${F75}_join_10 ${F75}_join_10 || error "files unlink open" + + ls -l $F75* } -run_test 75 "TEST join file ====================================" +run_test 75g "TEST join file 7 (open unlink) ====================" num_inodes() { awk '/lustre_inode_cache|^inode_cache/ {print $2; exit}' /proc/slabinfo @@ -2988,6 +3067,8 @@ test_102() { [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return [ -z "`grep xattr $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return + [ -z "$(which setfattr 2>/dev/null)" ] && echo "skipping $TESTNAME (could not find setfattr)" && return + echo "set/get xattr..." setfattr -n trusted.name1 -v value1 $testfile || error [ "`getfattr -n trusted.name1 $testfile 2> /dev/null | \ @@ -3018,8 +3099,9 @@ test_102() { getfattr -d -m user $testfile 2> /dev/null | \ grep "user.author1" && error || true - echo "set lustre specific xattr (should be denied)..." - setfattr -n "trusted.lov" -v "invalid value" $testfile || true + # b10667: setting lustre special xattr be silently discarded + echo "set lustre special xattr ..." + setfattr -n "trusted.lov" -v "invalid value" $testfile || error rm -f $testfile } diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index aa70f83..8a46e8d 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -120,7 +120,7 @@ build_test_filter() { for O in $ONLY; do eval ONLY_${O}=true done - for E in $EXCEPT $ALWAYS_EXCEPT $SANITY_EXCEPT; do + for E in $EXCEPT $ALWAYS_EXCEPT $SANITYN_EXCEPT; do eval EXCEPT_${E}=true done } diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index c3e147f..637c2c6 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -3,7 +3,7 @@ set -e trap 'echo "test-framework exiting on error"' ERR -#set -vx +#set -x export REFORMAT="" @@ -118,6 +118,7 @@ load_modules() { load_module fid/fid load_module fld/fld load_module lmv/lmv + load_module quota/lquota load_module mdc/mdc load_module osc/osc load_module lov/lov @@ -147,6 +148,29 @@ load_modules() { [ -f $LUSTRE/utils/mount.lustre ] && cp $LUSTRE/utils/mount.lustre /sbin/. || true } +wait_for_lnet() { + local UNLOADED=0 + local WAIT=0 + local MAX=60 + MODULES=$($LCTL modules | awk '{ print $2 }') + while [ -n "$MODULES" ]; do + sleep 5 + rmmod $MODULES >/dev/null 2>&1 || true + MODULES=$($LCTL modules | awk '{ print $2 }') + if [ -z "$MODULES" ]; then + return 0 + else + WAIT=$((WAIT + 5)) + echo "waiting, $((MAX - WAIT)) secs left" + fi + if [ $WAIT -eq $MAX ]; then + echo "LNET modules $MODULES will not unload" + lsmod + return 3 + fi + done +} + unload_modules() { lsmod | grep lnet > /dev/null && $LCTL dl && $LCTL dk $TMP/debug local MODULES=$($LCTL modules | awk '{ print $2 }') @@ -154,14 +178,20 @@ unload_modules() { rmmod $MODULES >/dev/null 2>&1 || true # do it again, in case we tried to unload ksocklnd too early MODULES=$($LCTL modules | awk '{ print $2 }') - [ -n "$MODULES" ] && rmmod $MODULES >/dev/null && sleep 2 || true + [ -n "$MODULES" ] && rmmod $MODULES >/dev/null || true MODULES=$($LCTL modules | awk '{ print $2 }') if [ -n "$MODULES" ]; then - echo "modules still loaded" + echo "Modules still loaded: " echo $MODULES - cat $LPROC/devices || true - lsmod - return 2 + if [ -e $LPROC ]; then + echo "Lustre still loaded" + cat $LPROC/devices || true + lsmod + return 2 + else + echo "Lustre stopped, but LNET is still loaded" + wait_for_lnet || return 3 + fi fi HAVE_MODULES=false diff --git a/lustre/tests/write_disjoint.c b/lustre/tests/write_disjoint.c index 9548cdc..9c2728e 100644 --- a/lustre/tests/write_disjoint.c +++ b/lustre/tests/write_disjoint.c @@ -10,7 +10,7 @@ * * compile: mpicc -g -Wall -o write_disjoint write_disjoint.c * run: mpirun -np N -machlist write_disjoint - * or: pdsh -w write_disjoint + * or: pdsh -w write_disjoint * or: prun -n N [-N M] write_disjoint */ #include @@ -29,27 +29,30 @@ void rprintf(int rank, int loop, const char *fmt, ...) { va_list ap; - + printf("rank %d, loop %d: ", rank, loop); - + va_start(ap, fmt); - + vprintf(fmt, ap); - - MPI_Abort(MPI_COMM_WORLD, -1); + + MPI_Abort(MPI_COMM_WORLD, -1); /* This will exit() according to man */ } +#define CHUNK_SIZE(n) chunk_size[(n) % 2] + int main (int argc, char *argv[]) { - int i, n, fd, chunk_size, file_size; - int rank, noProcessors, done; - int error; - off_t offset; - char **chunk_buf; - char *read_buf, c; - struct stat stat_buf; - ssize_t ret; - char *filename = "/mnt/lustre/write_disjoint"; - int numloops = 1000; + int i, n, fd; + unsigned long chunk_size[2]; + int rank, noProcessors, done; + int error; + off_t offset; + char **chunk_buf; + char *read_buf, c; + struct stat stat_buf; + ssize_t ret; + char *filename = "/mnt/lustre/write_disjoint"; + int numloops = 1000; error = MPI_Init(&argc, &argv); if (error != MPI_SUCCESS) @@ -66,105 +69,116 @@ int main (int argc, char *argv[]) { } } - MPI_Comm_size(MPI_COMM_WORLD, &noProcessors); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - chunk_buf = malloc(noProcessors * sizeof(chunk_buf[0])); - for (i=0; i < noProcessors; i++) { + MPI_Comm_size(MPI_COMM_WORLD, &noProcessors); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + chunk_buf = malloc(noProcessors * sizeof(chunk_buf[0])); + for (i=0; i < noProcessors; i++) { chunk_buf[i] = malloc(CHUNK_MAX_SIZE); memset(chunk_buf[i], 'A'+ i, CHUNK_MAX_SIZE); - } - read_buf = malloc(noProcessors * CHUNK_MAX_SIZE); - - if (rank == 0) { + } + read_buf = malloc(noProcessors * CHUNK_MAX_SIZE); + + if (rank == 0) { fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666); - if (fd < 0) - rprintf(rank, -1, "open() returned %s\n", + if (fd < 0) + rprintf(rank, -1, "open() returned %s\n", strerror(errno)); - } - MPI_Barrier(MPI_COMM_WORLD); - - fd = open(filename, O_RDWR); - if (fd < 0) - rprintf(rank, -1, "open() returned %s\n", strerror(errno)); - - for (n=0; n < numloops; n++) { - /* reset the environment */ - if (rank == 0) { - ret = truncate(filename, 0); - if (ret != 0) - rprintf(rank, n, "truncate() returned %s\n", - strerror(errno) ); - } - chunk_size = rand() % CHUNK_MAX_SIZE; - - if (n % 1000 == 0 && rank == 0) - printf("loop %d: chunk_size %d\n", n, chunk_size); - - MPI_Barrier(MPI_COMM_WORLD); - - /* Do the race */ - offset = rank * chunk_size; - lseek(fd, offset, SEEK_SET); - - done = 0; - do { - ret = write(fd, chunk_buf[rank]+done, chunk_size-done); - if (ret < 0) - rprintf(rank, n, "write() returned %s\n", - strerror(errno)); + } + MPI_Barrier(MPI_COMM_WORLD); + + fd = open(filename, O_RDWR); + if (fd < 0) + rprintf(rank, -1, "open() returned %s\n", strerror(errno)); + + for (n = 0; n < numloops; n++) { + /* reset the environment */ + if (rank == 0) { + ret = truncate(filename, 0); + if (ret != 0) + rprintf(rank, n, "truncate() returned %s\n", + strerror(errno) ); + } + CHUNK_SIZE(n) = rand() % CHUNK_MAX_SIZE; + + if (n % 1000 == 0 && rank == 0) + printf("loop %d: chunk_size %lu\n", n, CHUNK_SIZE(n)); + + MPI_Barrier(MPI_COMM_WORLD); + + /* Do the race */ + offset = rank * CHUNK_SIZE(n); + lseek(fd, offset, SEEK_SET); + + done = 0; + do { + ret = write(fd, chunk_buf[rank] + done, + CHUNK_SIZE(n) - done); + if (ret < 0) + rprintf(rank, n, "write() returned %s\n", + strerror(errno)); done += ret; - } while (done != chunk_size); - - MPI_Barrier(MPI_COMM_WORLD); - - /* Check the result */ - if (rank == 0) { - lseek(fd, 0, SEEK_SET); - - /* quick check */ - stat(filename, &stat_buf); - file_size = stat_buf.st_size; - if (file_size != chunk_size * noProcessors) - rprintf(rank, n, "invalid file size %d" - " instead of %d\n", file_size, - chunk_size * noProcessors); + } while (done != CHUNK_SIZE(n)); + + MPI_Barrier(MPI_COMM_WORLD); + + /* Check the result */ + if (rank == 0) { + if (lseek(fd, 0, SEEK_SET) < 0) + rprintf(rank, n, "error seeking to 0: %s\n", + strerror(errno)); + + /* quick check */ + if (stat(filename, &stat_buf) < 0) + rprintf(rank, n, "error stating %s: %s\n", + filename, strerror(errno)); + + if (stat_buf.st_size != CHUNK_SIZE(n) * noProcessors) + rprintf(rank, n, "invalid file size %lu" + " instead of %lu\n", + (unsigned long)stat_buf.st_size, + CHUNK_SIZE(n) * noProcessors); done = 0; do { - ret = read(fd, read_buf + done, - (chunk_size * noProcessors) - done); - if (ret < 0) + ret = read(fd, read_buf + done, + CHUNK_SIZE(n) * noProcessors - done); + if (ret < 0) rprintf(rank, n, "read returned %s\n", strerror(errno)); done += ret; - } while (done != chunk_size * noProcessors); + } while (done != CHUNK_SIZE(n) * noProcessors); for (i = 0; i < noProcessors; i++) { - char command[4096]; + char command[4096]; int j; - if (!memcmp(read_buf + (i * chunk_size), - chunk_buf[i], chunk_size)) + if (!memcmp(read_buf + (i * CHUNK_SIZE(n)), + chunk_buf[i], CHUNK_SIZE(n))) continue; - printf("rank %d, loop %d: chunk %d corrupted " - "with chunk_size %d, page_size %d\n", - rank, n, i, chunk_size, getpagesize()); - printf("(ranks: page boundry, chunk boundry, " - "page boundry)\n"); + /* print out previous chunk sizes */ + if (n > 0) + printf("loop %d: chunk_size %lu\n", + n - 1, CHUNK_SIZE(n - 1)); + + printf("loop %d: chunk %d corrupted " + "with chunk_size %lu, page_size %d\n", + n, i, CHUNK_SIZE(n), getpagesize()); + printf("ranks:\tpage boundry\tchunk boundry\t" + "page boundry\n"); for (j = 1 ; j < noProcessors; j++) { - int b = j * chunk_size; - printf("\t%c -> %c: %d %d %d\n", - 'A' + j - 1, 'A' + j, - b & ~(getpagesize()-1), b, - (b + getpagesize()) & ~(getpagesize()-1)); + int b = j * CHUNK_SIZE(n); + printf("%c -> %c:\t%d\t%d\t%d\n", + 'A' + j - 1, 'A' + j, + b & ~(getpagesize()-1), b, + (b + getpagesize()) & + ~(getpagesize()-1)); } sprintf(command, "od -Ad -a %s", filename); system(command); - MPI_Finalize(); - exit(1); + rprintf(0, n, "data check error - exiting\n"); } } } diff --git a/lustre/utils/.cvsignore b/lustre/utils/.cvsignore index f1f7030..c888f7c 100644 --- a/lustre/utils/.cvsignore +++ b/lustre/utils/.cvsignore @@ -7,26 +7,20 @@ Makefile.in .deps tags TAGS -obdctl -lctl -obdstat -obdio -obdbarrier -lload -wirecheck -lfs mkfs.lustre -mkfs_lustre mount.lustre -mount_lustre tunefs.lustre -tunefs_lustre -llog_reader -llmount -l_getgroups +lctl +lfs +wirecheck wiretest llog_reader -.*.cmd -.*.d +lr_reader +obdio +obdbarrier +lload llverfs llverdev +l_getgroups +.*.cmd +.*.d diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index 8eb36df..e7aed66 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -13,8 +13,8 @@ LIBPTLCTL := $(top_builddir)/lnet/utils/libptlctl.a sbin_scripts = llstat.pl llobdstat.pl lrun if UTILS -noinst_PROGRAMS = mount_lustre mkfs_lustre tunefs_lustre \ - llog_reader lr_reader wirecheck wiretest lload obdio obdbarrier +noinst_PROGRAMS = llog_reader lr_reader wirecheck wiretest lload obdio obdbarrier + # mount only finds helpers in /sbin rootsbin_PROGRAMS = mount.lustre sbin_PROGRAMS = mkfs.lustre tunefs.lustre lctl \ @@ -96,12 +96,3 @@ newwiretest: wirehdr.c wirecheck cp ../ptlrpc/wirehdr.c ../ptlrpc/wiretest.c ./wirecheck >> ../ptlrpc/wiretest.c -# Apparently I can't use .'s in automake names -mount.lustre$(EXEEXT): mount_lustre - cp $< $@ - -mkfs.lustre$(EXEEXT): mkfs_lustre - cp $< $@ - -tunefs.lustre$(EXEEXT): tunefs_lustre - cp $< $@ diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 423e536..acba01a 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -854,8 +854,8 @@ def def_mount_options(fstype, target, blkdev): # use internal journal return mountfsoptions - # run blkid - blkid = "blkid -o device -t UUID='%s'" % (journal_UUID) + # run blkid, lookup highest-priority device with matching UUID + blkid = "blkid -o device -l -t UUID='%s'" % (journal_UUID) (ret, devname) = run(blkid) if ret or len(devname) == 0: panic("cannot find external journal for ", blkdev) diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index ad988a7..53f5c67 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -69,7 +69,7 @@ static void err_msg(char *fmt, ...) fprintf(stderr, ": %s (%d)\n", strerror(tmp_errno), tmp_errno); } -int llapi_file_create(const char *name, long stripe_size, int stripe_offset, +int llapi_file_create(const char *name, unsigned long stripe_size, int stripe_offset, int stripe_count, int stripe_pattern) { struct lov_user_md lum = { 0 }; @@ -103,7 +103,7 @@ int llapi_file_create(const char *name, long stripe_size, int stripe_offset, "multiple of %d bytes", stripe_size, page_size); goto out; } - if (stripe_offset < -1 || stripe_offset > 2048) { + if (stripe_offset < -1 || stripe_offset > MAX_OBD_DEVICES) { errno = rc = -EINVAL; err_msg("error: bad stripe offset %d", stripe_offset); goto out; @@ -113,10 +113,10 @@ int llapi_file_create(const char *name, long stripe_size, int stripe_offset, err_msg("error: bad stripe count %d", stripe_count); goto out; } - if (stripe_count > 0 && (__u64)stripe_size * stripe_count > ~0UL) { + if (stripe_count > 0 && (__u64)stripe_size * stripe_count > 0xffffffff){ errno = rc = -EINVAL; - err_msg("error: stripe_size %ld * stripe_count %d " - "exceeds %lu bytes", ~0UL); + err_msg("error: stripe_size %lu * stripe_count %u " + "exceeds 4GB", stripe_size, stripe_count); goto out; } @@ -432,7 +432,8 @@ void llapi_lov_dump_user_lmm(struct find_param *param, int llapi_file_get_stripe(const char *path, struct lov_user_md *lum) { - char *dname, *fname; + const char *fname; + char *dname; int fd, rc = 0; fname = strrchr(path, '/'); @@ -724,7 +725,8 @@ static int cb_find_init(char *path, DIR *parent, DIR *dir, void *data) ret = ioctl(dirfd(dir), LL_IOC_MDC_GETINFO, (void *)param->lmd); } else if (!decision && parent) { - char *fname = strrchr(path, '/') + 1; + char *fname = strrchr(path, '/'); + fname = (fname == NULL ? path : fname + 1); /* retrieve needed file info */ strncpy((char *)param->lmd, fname, param->lumlen); @@ -889,7 +891,8 @@ static int cb_getstripe(char *path, DIR *parent, DIR *d, void *data) ret = ioctl(dirfd(d), LL_IOC_LOV_GETSTRIPE, (void *)¶m->lmd->lmd_lmm); } else if (parent) { - char *fname = strrchr(path, '/') + 1; + char *fname = strrchr(path, '/'); + fname = (fname == NULL ? path : fname + 1); strncpy((char *)¶m->lmd->lmd_lmm, fname, param->lumlen); ret = ioctl(dirfd(parent), IOC_MDC_GETFILESTRIPE, @@ -1193,7 +1196,8 @@ static int cb_quotachown(char *path, DIR *parent, DIR *d, void *data) rc = ioctl(dirfd(d), LL_IOC_MDC_GETINFO, (void *)param->lmd); } else if (parent) { - char *fname = strrchr(path, '/') + 1; + char *fname = strrchr(path, '/'); + fname = (fname == NULL ? path : fname + 1); strncpy((char *)param->lmd, fname, param->lumlen); rc = ioctl(dirfd(parent), IOC_MDC_GETFILEINFO, diff --git a/lustre/utils/llobdstat.pl b/lustre/utils/llobdstat.pl index be8fba7..4feb3c1 100755 --- a/lustre/utils/llobdstat.pl +++ b/lustre/utils/llobdstat.pl @@ -1,4 +1,7 @@ #!/usr/bin/perl +# llobdstat.pl is a utility that parses obdfilter statistics files +# found at proc/fs/lustre//stats. +# It is mainly useful to watch the statistics change over time. my $pname = $0; @@ -7,9 +10,11 @@ my $obdstats = "stats"; sub usage() { - print STDERR "Usage: $pname []\n"; - print STDERR "example: $pname help (to get help message)\n"; - print STDERR "example: $pname ost1 1 (monitor /proc/fs/lustre/obdfilter/ost1/stats\n"; + print STDERR "Usage: $pname []\n"; + print STDERR "where ost_name : ost name under $defaultpath/obdfilter\n"; + print STDERR " interval : sample interaval in seconds\n"; + print STDERR "example: $pname lustre-OST0000 2\n"; + print STDERR "Use CTRL + C to stop statistics printing\n"; exit 1; } @@ -46,12 +51,14 @@ print "$pname on $statspath\n"; my %cur; my %last; my $mhz = 0; -my ($read_bytes, $read, $write_bytes, $write, $getattr, $setattr, $open, $close, $create, $destroy, $statfs, $punch, $snapshot_time) = - ("read_bytes", "read", "write_bytes", "write", "getattr", "setattr", "open", "close", "create", "destroy", "statfs", "punch", "snapshot_time"); -my @extinfo = ($setattr, $open, $close, $create, $destroy, $statfs, $punch); -my %shortname = ($setattr => "sa", $open => "op", $close => "cl", - $create => "cx", $destroy => "dx", $statfs => "st", $punch => "pu"); +#Removed some statstics like open, close that obdfilter don't contains. +#To add statistics parameters one need to specify parameter names in below declarations in same sequence. +my ($read_bytes, $write_bytes, $create, $destroy, $statfs, $punch, $snapshot_time) = + ("read_bytes", "write_bytes", "create", "destroy", "statfs", "punch", "snapshot_time"); + +my @extinfo = ($create, $destroy, $statfs, $punch); +my %shortname = ($create => "cx", $destroy => "dx", $statfs => "st", $punch => "pu"); sub get_cpumhz() { @@ -73,6 +80,8 @@ sub get_cpumhz() get_cpumhz(); print "Processor counters run at $mhz MHz\n"; +# readstats subroutine reads statistics from obdfilter stats file. +# This subroutine gets called after every interval specified by user. sub readstat() { my $prevcount; @@ -101,35 +110,35 @@ sub readstat() } } } - +# process_stats subroutine processes stats information read from obdfilter stats file. +# This subroutine gets called after every interval specified by user. sub process_stats() { my $delta; my $data; my $last_time = $last{$snapshot_time}; if (!defined($last_time)) { - printf "R %-g/%-g W %-g/%-g attr %-g/%-g open %-g/%-g create %-g/%-g stat %-g punch %-g\n", - $cur{$read_bytes}, $cur{$read}, - $cur{$write_bytes}, $cur{$write}, - $cur{$getattr}, $cur{$setattr}, - $cur{$open}, $cur{$close}, + printf "Read: %-g, Write: %-g, create/destroy: %-g/%-g, stat: %-g, punch: %-g\n", + $cur{$read_bytes}, $cur{$write_bytes}, $cur{$create}, $cur{$destroy}, $cur{$statfs}, $cur{$punch}; + if ($interval) { + print "[NOTE: cx: create, dx: destroy, st: statfs, pu: punch ]\n\n"; + print "Timestamp Read-delta ReadRate Write-delta WriteRate\n"; + print "--------------------------------------------------------\n"; + } } else { my $timespan = $cur{$snapshot_time} - $last{$snapshot_time}; - - my $rdelta = $cur{$read} - $last{$read}; - my $rvdelta = int ($rdelta / $timespan); - my $rrate = ($cur{$read_bytes} - $last{$read_bytes}) / - ($timespan * ( 1 << 20 )); - my $wdelta = $cur{$write} - $last{$write}; - my $wvdelta = int ($wdelta / $timespan); - my $wrate = ($cur{$write_bytes} - $last{$write_bytes}) / - ($timespan * ( 1 << 20 )); - printf "R %6lu (%5lu %6.2fMB)/s W %6lu (%5lu %6.2fMB)/s", - $rdelta, $rvdelta, $rrate, - $wdelta, $wvdelta, $wrate; + my $rdelta = $cur{$read_bytes} - $last{$read_bytes}; + my $rrate = ($rdelta) / ($timespan * ( 1 << 20 )); + my $wdelta = $cur{$write_bytes} - $last{$write_bytes}; + my $wrate = ($wdelta) / ($timespan * ( 1 << 20 )); + $rdelta = ($rdelta) / (1024 * 1024); + $wdelta = ($wdelta) / (1024 * 1024); + # This print repeats after every interval. + printf "%10lu %6.2fMB %6.2fMB/s %6.2fMB %6.2fMB/s", + $cur{$snapshot_time}, $rdelta, $rrate, $wdelta, $wrate; $delta = $cur{$getattr} - $last{$getattr}; if ( $delta != 0 ) { @@ -147,14 +156,15 @@ sub process_stats() $| = 1; } } - +#Open the obdfilter stat file with STATS open(STATS, $statspath) || die "Cannot open $statspath: $!\n"; do { - readstat(); - process_stats(); - if ($interval) { - sleep($interval); + readstat(); # read the statistics from stat file. + process_stats(); + if ($interval) { + sleep($interval); %last = %cur; } -} while ($interval); +} while ($interval); # Repeat the statistics printing after every "interval" specified in command line. close STATS; +# llobdfilter.pl ends here. diff --git a/lustre/utils/llstat.pl b/lustre/utils/llstat.pl index 0305f3d..5706971 100755 --- a/lustre/utils/llstat.pl +++ b/lustre/utils/llstat.pl @@ -1,37 +1,60 @@ #!/usr/bin/perl - +# llstat.pl is a utility that takes stats files as input with optional clear-flag. +# The clear-flag is used to clear the stats file before printing stats information. +# The lustre stats files generally located inside proc/fs/lustre/ +# llstat.pl first reads the required statistics information from specified stat file, +# process the information and prints the output after every interval specified by user. + my $pname = $0; my $defaultpath = "/proc/fs/lustre"; my $obdstats = "stats"; +# Subroutine for printing usages information sub usage() { - print STDERR "Usage: $pname []\n"; + print STDERR "Usage: $pname [-c] []\n"; + print STDERR " : lustre stats file, full /proc path or substring search\n"; + print STDERR " : Time in seconds to repeat statistics print cycle\n"; + print STDERR " -c : zero stats first\n"; + print STDERR "eg: $pname ost 1 -- monitors /proc/fs/lustre/ost/OSS/ost/stats\n"; + print STDERR "Use CTRL + C to stop statistics printing\n"; exit 1; } my $statspath = "None"; my $interval = 0; - -if (($#ARGV < 0) || ($#ARGV > 1)) { +my $argpos = 0; +# check for number of auguments +if (($#ARGV < 0) || ($#ARGV > 2)) { usage(); -} else { +} else { # Process arguments if ( $ARGV[0] =~ /help$/ ) { usage(); } - if ( -f $ARGV[0] ) { - $statspath = $ARGV[0]; - } elsif ( -f "$ARGV[0]/$obdstats" ) { - $statspath = "$ARGV[0]/$obdstats"; + if ($#ARGV == 1) { + if (($ARGV[0] eq "-c") || ($ARGV[0] eq "-C")) { + $argpos = 1; + } else { + $interval = $ARGV[1]; + } + } + if ( $#ARGV == 2 ) { + $interval = $ARGV[2]; + $argpos = 1; + } + if ( -f $ARGV[$argpos] ) { + $statspath = $ARGV[$argpos]; + } elsif ( -f "$ARGV[$argpos]/$obdstats" ) { + $statspath = "$ARGV[$argpos]/$obdstats"; } else { - my $st = `ls $defaultpath/*/$ARGV[0]/$obdstats 2> /dev/null`; + my $st = `ls $defaultpath/*/$ARGV[$argpos]/$obdstats 2> /dev/null`; chop $st; if ( -f "$st" ) { $statspath = $st; } else { - $st = `ls $defaultpath/*/*/$ARGV[0]/$obdstats 2> /dev/null`; + $st = `ls $defaultpath/*/*/$ARGV[$argpos]/$obdstats 2> /dev/null`; chop $st; if ( -f "$st" ) { $statspath = $st; @@ -39,11 +62,19 @@ if (($#ARGV < 0) || ($#ARGV > 1)) { } } if ( $statspath =~ /^None$/ ) { - die "Cannot locate stat file for: $ARGV[0]\n"; + die "Cannot locate stat file for: $ARGV[$argpos]\n"; + } + if ($#ARGV == 2) { + # Clears stats file before printing information in intervals + if ( ($ARGV[0] eq "-c") || ($ARGV[0] eq "-C" ) ) { + open ( STATS, "> $statspath") || die "Cannot clear $statspath: $!\n"; + print STATS " "; + close STATS; + sleep($interval); + } else { + usage(); + } } - if ($#ARGV == 1) { - $interval = $ARGV[1]; - } } print "$pname on $statspath\n"; @@ -53,6 +84,7 @@ my %sumhash; my $anysum = 0; my $anysumsquare = 0; my $mhz = 0; +my $falg = 0; sub get_cpumhz() { @@ -74,6 +106,8 @@ sub get_cpumhz() get_cpumhz(); print "Processor counters run at $mhz MHz\n"; +# readstats subroutine reads and processes statistics from stats file. +# This subroutine gets called after every interval specified by user. sub readstat() { seek STATS, 0, 0; @@ -87,21 +121,13 @@ sub readstat() $diff = $cumulcount - $prevcount; if ($name eq "snapshot_time") { $tdiff = $diff; - # printf "%-25s prev=$prevcount, cumul=$cumulcount diff=$diff, tdiff=$tdiff\n", $name; - printf "$statspath @ $cumulcount\n"; - printf "%-25s %-10s %-10s %-10s", "Name", "Cur.Count", "Cur.Rate", "#Events"; - if ($anysum) { - printf "%-8s %10s %10s %12s %10s", "Unit", "last", "min", "avg", "max"; - } - if ($anysumsquare) { - printf "%10s", "stddev"; - } - printf "\n"; + printf "\n%-10.0f", $cumulcount; $| = 1; } elsif ($cumulcount!=0) { - printf "%-25s %-10lu %-10lu %-10lu", - $name, $diff, ($diff/$tdiff), $cumulcount; + + printf " %s %lu %lu", + $name, ($diff/$tdiff), $cumulcount; if (defined($sum)) { my $sum_orig = $sum; @@ -118,7 +144,7 @@ sub readstat() $sum_diff = $sum_diff/$mhz; $max = $max/$mhz; } - printf "%-8s %10.2f %10lu %12.2f %10lu", $unit, ($sum_diff/$diff), $min,($sum/$cumulcount),$max; + printf " %lu %.2f %lu", $min,($sum/$cumulcount),$max; if (defined($sumsquare)) { my $s = $sumsquare - (($sum_orig*$sum_orig)/$cumulcount); if ($s >= 0) { @@ -127,17 +153,16 @@ sub readstat() if (($unit eq "[usecs]") && ($mhz != 1)) { $stddev = $stddev/$mhz; } - printf " %10.2f", $stddev; + printf " %.2f ", $stddev; } } } - printf "\n"; $| = 1; } } else { if ($cumulcount!=0) { - printf "%-25s $cumulcount\n", $name + printf "%-25s $cumulcount\n", $name # print info when interval is not specified. } if (defined($sum)) { $anysum = 1; @@ -149,6 +174,18 @@ sub readstat() %cumulhash->{$name} = $cumulcount; %sumhash->{$name} = $sum; } + if ( !$flag && $interval) { + printf "Timestamp [Name Rate Total"; + if ($anysum) { + printf " min avg max"; + } + if ($anysumsquare) { + printf " stddev"; + } + printf " ]..."; + printf "\n--------------------------------------------------------------------"; + $flag = 1; + } } open(STATS, $statspath) || die "Cannot open $statspath: $!\n"; diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index 86e8503..7a15034 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -87,12 +87,13 @@ void usage(FILE *out) "\t\t--failnode=[,<...>] : NID(s) of a failover partner\n" "\t\t--param = : set a permanent parameter\n" "\t\t\te.g. --param sys.timeout=40\n" - "\t\t\t --param lov.stripe.size=4194304\n" + "\t\t\t --param lov.stripesize=2M\n" "\t\t--index=#N : target index (i.e. ost index within the lov)\n" /* FIXME implement 1.6.x "\t\t--configdev=: store configuration info\n" "\t\t\tfor this device on an alternate device\n" */ + "\t\t--comment=: arbitrary user string (%d bytes)\n" "\t\t--mountfsoptions= : permanent mount options\n" #ifndef TUNEFS "\t\t--backfstype= : backing fs type (ext3, ldiskfs)\n" @@ -108,7 +109,8 @@ void usage(FILE *out) "\t\t--noformat: just report what we would do; " "don't write to disk\n" "\t\t--verbose\n" - "\t\t--quiet\n"); + "\t\t--quiet\n", + sizeof(((struct lustre_disk_data *)0)->ldd_userdata)); return; } @@ -577,7 +579,8 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) printf("Index: unassigned\n"); else printf("Index: %d\n", ldd->ldd_svindex); - printf("UUID: %s\n", (char *)ldd->ldd_uuid); + if (ldd->ldd_uuid[0]) + printf("UUID: %s\n", (char *)ldd->ldd_uuid); printf("Lustre FS: %s\n", ldd->ldd_fsname); printf("Mount type: %s\n", MT_STR(ldd)); printf("Flags: %#x\n", ldd->ldd_flags); @@ -592,6 +595,8 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) ldd->ldd_flags & LDD_F_UPGRADE14 ? "upgrade1.4 ":""); printf("Persistent mount opts: %s\n", ldd->ldd_mount_opts); printf("Parameters:%s\n", ldd->ldd_params); + if (ldd->ldd_userdata[0]) + printf("Comment: %s\n", ldd->ldd_userdata); printf("\n"); } @@ -653,8 +658,8 @@ int write_local_files(struct mkfs_opts *mop) sprintf(filepnm, "%s/%s", mntpt, MOUNT_DATA_FILE); filep = fopen(filepnm, "w"); if (!filep) { - fprintf(stderr, "%s: Unable to create %s file\n", - progname, filepnm); + fprintf(stderr, "%s: Unable to create %s file: %s\n", + progname, filepnm, strerror(errno)); goto out_umnt; } fwrite(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep); @@ -957,6 +962,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, static struct option long_opt[] = { {"backfstype", 1, 0, 'b'}, {"stripe-count-hint", 1, 0, 'c'}, + {"comment", 1, 0, 'u'}, {"configdev", 1, 0, 'C'}, {"device-size", 1, 0, 'd'}, {"erase-params", 0, 0, 'e'}, @@ -982,7 +988,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, {"writeconf", 0, 0, 'w'}, {0, 0, 0, 0} }; - char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqrvw"; + char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqru:vw"; char opt; int rc, longidx; @@ -1122,6 +1128,12 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, case 'r': mop->mo_flags |= MO_FORCEFORMAT; break; + case 'u': + strncpy(mop->mo_ldd.ldd_userdata, optarg, + sizeof(mop->mo_ldd.ldd_userdata)); + mop->mo_ldd.ldd_userdata[ + sizeof(mop->mo_ldd.ldd_userdata) - 1] = 0; + break; case 'v': verbose++; break; @@ -1363,9 +1375,6 @@ int main(int argc, char *argv[]) char default_mountopts[512] = ""; int ret = 0; - //printf("pad %d\n", offsetof(struct lustre_disk_data, ldd_padding)); - assert(offsetof(struct lustre_disk_data, ldd_padding) == 200); - if ((progname = strrchr(argv[0], '/')) != NULL) progname++; else diff --git a/lustre/utils/rmmod_all.sh b/lustre/utils/rmmod_all.sh deleted file mode 100755 index be1ff5f..0000000 --- a/lustre/utils/rmmod_all.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh - -SRCDIR=`dirname $0` -PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH - -rmmod quotacheck_test quotactl_test quotafmt_test pingsrv pingcli -lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 -# do it again, in case we tried to unload ksocklnd too early -lsmod | grep lnet > /dev/null && lctl modules | awk '{ print $2 }' | xargs rmmod diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index c103a45..189d133 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -165,6 +165,7 @@ static void check_obd_connect_data(void) CHECK_CDEFINE(OBD_CONNECT_LCL_CLIENT); CHECK_CDEFINE(OBD_CONNECT_RMT_CLIENT); CHECK_CDEFINE(OBD_CONNECT_BRW_SIZE); + CHECK_CDEFINE(OBD_CONNECT_QUOTA64); } static void @@ -919,9 +920,19 @@ check_qunit_data(void) BLANK_LINE(); CHECK_STRUCT(qunit_data); CHECK_MEMBER(qunit_data, qd_id); - CHECK_MEMBER(qunit_data, qd_type); + CHECK_MEMBER(qunit_data, qd_flags); CHECK_MEMBER(qunit_data, qd_count); - CHECK_MEMBER(qunit_data, qd_isblk); +} + +static void +check_qunit_data_old(void) +{ + BLANK_LINE(); + CHECK_STRUCT(qunit_data_old); + CHECK_MEMBER(qunit_data_old, qd_id); + CHECK_MEMBER(qunit_data_old, qd_type); + CHECK_MEMBER(qunit_data_old, qd_count); + CHECK_MEMBER(qunit_data_old, qd_isblk); } static void @@ -957,6 +968,7 @@ check_lustre_disk_data(void) CHECK_MEMBER(lustre_disk_data, ldd_fsname); CHECK_MEMBER(lustre_disk_data, ldd_svname); CHECK_MEMBER(lustre_disk_data, ldd_uuid); + CHECK_MEMBER(lustre_disk_data, ldd_userdata); CHECK_MEMBER(lustre_disk_data, ldd_mount_opts); CHECK_MEMBER(lustre_disk_data, ldd_params); } @@ -1062,8 +1074,6 @@ main(int argc, char **argv) CHECK_VALUE(OST_OPEN); CHECK_VALUE(OST_CLOSE); CHECK_VALUE(OST_STATFS); - CHECK_VALUE(OST_SAN_READ); - CHECK_VALUE(OST_SAN_WRITE); CHECK_VALUE(OST_SYNC); CHECK_VALUE(OST_QUOTACHECK); CHECK_VALUE(OST_QUOTACTL); @@ -1206,6 +1216,7 @@ main(int argc, char **argv) check_llog_array_rec(); check_mds_extent_desc(); check_qunit_data(); + check_qunit_data_old(); check_mgs_target_info(); check_lustre_disk_data(); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index abcffad..30da96b 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -28,5 +28,3 @@ int main() void lustre_assert_wire_constants(void) { } - -