Whamcloud - gitweb
- update from 1_5
authoryury <yury>
Thu, 14 Sep 2006 13:38:33 +0000 (13:38 +0000)
committeryury <yury>
Thu, 14 Sep 2006 13:38:33 +0000 (13:38 +0000)
149 files changed:
ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch
ldiskfs/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch
lustre/ChangeLog
lustre/autoconf/lustre-core.m4
lustre/autoconf/lustre-version.ac
lustre/doc/mkfs.lustre.8
lustre/doc/mount.lustre.8
lustre/doc/tunefs.lustre.8
lustre/include/linux/lvfs_linux.h
lustre/include/linux/obd_support.h
lustre/include/lustre/liblustreapi.h
lustre/include/lustre/lustre_idl.h
lustre/include/lustre_cfg.h
lustre/include/lustre_disk.h
lustre/include/lustre_dlm.h
lustre/include/lustre_ha.h
lustre/include/lustre_log.h
lustre/include/lustre_net.h
lustre/include/lustre_param.h
lustre/include/obd.h
lustre/include/obd_class.h
lustre/include/obd_support.h
lustre/kernel_patches/patches/bitops_ext2_find_next_le_bit-2.6.patch [new file with mode: 0644]
lustre/kernel_patches/patches/dcache-qstr-api-fix-2.6-suse.patch
lustre/kernel_patches/patches/export-do_kern_mount.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.5.patch
lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6.9.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6-fc5.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch
lustre/kernel_patches/patches/jbd-stats-2.6.5.patch [new file with mode: 0644]
lustre/kernel_patches/patches/jbd-stats-2.6.9.patch [new file with mode: 0644]
lustre/kernel_patches/patches/lookup_bdev_init_intent-2.6.18-vanilla.patch [new file with mode: 0644]
lustre/kernel_patches/patches/nfs-cifs-intent-2.6.18-vanilla.patch [deleted file]
lustre/kernel_patches/patches/vfs_intent-2.6-sles10.patch
lustre/kernel_patches/patches/vfs_intent-2.6.18-vanilla.patch
lustre/kernel_patches/patches/vfs_races-2.6.18-vanilla.patch
lustre/kernel_patches/series/2.6-fc3.series
lustre/kernel_patches/series/2.6-rhel4-titech.series
lustre/kernel_patches/series/2.6-rhel4.series
lustre/kernel_patches/series/2.6-sles10.series
lustre/kernel_patches/series/2.6-suse-newer.series
lustre/kernel_patches/series/2.6.18-vanilla.series
lustre/kernel_patches/targets/2.6-suse.target.in
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/liblustre/tests/sanity.c
lustre/llite/dcache.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/lproc_llite.c
lustre/llite/super.c
lustre/llite/symlink.c
lustre/llite/xattr.c
lustre/lmv/lmv_obd.c
lustre/lov/lov_ea.c
lustre/lov/lov_internal.h
lustre/lov/lov_log.c
lustre/lov/lov_obd.c
lustre/lov/lov_offset.c
lustre/lov/lov_request.c
lustre/lov/lproc_lov.c
lustre/lvfs/fsfilt_ext3.c
lustre/mdc/mdc_request.c
lustre/mds/handler.c
lustre/mds/mds_internal.h
lustre/mds/mds_lib.c
lustre/mds/mds_log.c
lustre/mds/mds_lov.c
lustre/mds/mds_open.c
lustre/mds/mds_reint.c
lustre/mds/mds_xattr.c
lustre/mgc/libmgc.c
lustre/mgc/mgc_request.c
lustre/mgs/mgs_handler.c
lustre/mgs/mgs_llog.c
lustre/obdclass/class_obd.c
lustre/obdclass/darwin/darwin-sysctl.c
lustre/obdclass/linux/linux-module.c
lustre/obdclass/linux/linux-sysctl.c
lustre/obdclass/llog_obd.c
lustre/obdclass/llog_test.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/lustre_peer.c
lustre/obdclass/obd_config.c
lustre/obdclass/obd_mount.c
lustre/obdecho/echo_client.c
lustre/obdfilter/Makefile.in
lustre/obdfilter/filter.c
lustre/obdfilter/filter_internal.h
lustre/obdfilter/filter_io_26.c
lustre/obdfilter/filter_san.c [deleted file]
lustre/osc/Makefile.in
lustre/osc/autoMakefile.am
lustre/osc/osc_lib.c [deleted file]
lustre/osc/osc_request.c
lustre/ost/ost_handler.c
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/import.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/ptlrpc_module.c
lustre/ptlrpc/recover.c
lustre/ptlrpc/sec.c
lustre/quota/quota_context.c
lustre/quota/quota_interface.c
lustre/quota/quota_internal.h
lustre/quota/quota_master.c
lustre/scripts/Makefile.am
lustre/scripts/lmc2csv.pl
lustre/scripts/lustre_rmmod.sh [new file with mode: 0755]
lustre/tests/conf-sanity.sh
lustre/tests/directio.c
lustre/tests/insanity.sh
lustre/tests/lfscktest.sh
lustre/tests/ll_dirstripe_verify.c
lustre/tests/llecho.sh
lustre/tests/llmount-upcall.sh [deleted file]
lustre/tests/recovery-cleanup.sh
lustre/tests/recovery-small-upcall.sh [deleted file]
lustre/tests/recovery-small.sh
lustre/tests/replay-dual.sh
lustre/tests/replay-ost-single.sh
lustre/tests/replay-ost-upcall.sh [deleted file]
lustre/tests/replay-single-upcall.sh [deleted file]
lustre/tests/replay-single.sh
lustre/tests/sanity-quota.sh
lustre/tests/sanity.sh
lustre/tests/sanityN.sh
lustre/tests/test-framework.sh
lustre/tests/write_disjoint.c
lustre/utils/.cvsignore
lustre/utils/Makefile.am
lustre/utils/lconf
lustre/utils/liblustreapi.c
lustre/utils/llobdstat.pl
lustre/utils/llstat.pl
lustre/utils/mkfs_lustre.c
lustre/utils/rmmod_all.sh [deleted file]
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index dca4676..e54774f 100644 (file)
@@ -1,7 +1,19 @@
-Index: linux-2.6.5-7.201/fs/ext3/super.c
+Index: linux-2.6.5-7.201-full/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/super.c     2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/super.c  2006-06-20 19:42:08.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/include/linux/ext3_fs.h        2006-08-09 17:59:34.000000000 +0400
++++ linux-2.6.5-7.201-full/include/linux/ext3_fs.h     2006-08-22 12:35:55.000000000 +0400
+@@ -793,6 +793,7 @@ extern void ext3_put_super (struct super
+ extern void ext3_write_super (struct super_block *);
+ extern void ext3_write_super_lockfs (struct super_block *);
+ extern void ext3_unlockfs (struct super_block *);
++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int);
+ extern int ext3_remount (struct super_block *, int *, char *);
+ extern int ext3_statfs (struct super_block *, struct kstatfs *);
+Index: linux-2.6.5-7.201-full/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-7.201-full.orig/fs/ext3/super.c        2006-08-09 17:59:37.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/super.c     2006-08-09 17:59:37.000000000 +0400
 @@ -39,7 +39,7 @@
  static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
  static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
@@ -20,10 +32,10 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
                               struct ext3_super_block * es,
                               int sync)
  {
-Index: linux-2.6.5-7.201/fs/ext3/namei.c
+Index: linux-2.6.5-7.201-full/fs/ext3/namei.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/namei.c     2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/namei.c  2006-06-20 19:42:08.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/fs/ext3/namei.c        2006-08-09 17:59:37.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/namei.c     2006-08-09 17:59:37.000000000 +0400
 @@ -1598,7 +1598,7 @@ static int ext3_delete_entry (handle_t *
                              struct buffer_head * bh)
  {
@@ -44,10 +56,10 @@ Index: linux-2.6.5-7.201/fs/ext3/namei.c
                        if (pde)
                                pde->rec_len =
                                        cpu_to_le16(le16_to_cpu(pde->rec_len) +
-Index: linux-2.6.5-7.201/fs/ext3/xattr.c
+Index: linux-2.6.5-7.201-full/fs/ext3/xattr.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/xattr.c     2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/xattr.c  2006-06-20 19:42:30.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/fs/ext3/xattr.c        2006-07-14 01:53:23.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/xattr.c     2006-08-09 17:59:37.000000000 +0400
 @@ -107,7 +107,7 @@ ext3_xattr_register(int name_index, stru
  {
        int error = -EINVAL;
@@ -57,10 +69,10 @@ Index: linux-2.6.5-7.201/fs/ext3/xattr.c
                write_lock(&ext3_handler_lock);
                if (!ext3_xattr_handlers[name_index-1]) {
                        ext3_xattr_handlers[name_index-1] = handler;
-Index: linux-2.6.5-7.201/fs/ext3/inode.c
+Index: linux-2.6.5-7.201-full/fs/ext3/inode.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/inode.c     2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/inode.c  2006-06-20 19:42:08.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/fs/ext3/inode.c        2006-07-14 01:53:22.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/inode.c     2006-08-22 12:35:28.000000000 +0400
 @@ -1517,9 +1517,14 @@ out_stop:
                        if (end > inode->i_size) {
                                ei->i_disksize = end;
index df3d2ea..f6904f2 100644 (file)
@@ -1,7 +1,19 @@
+Index: linux-2.6.9-full/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.9-full.orig/include/linux/ext3_fs.h      2006-08-09 17:56:39.000000000 +0400
++++ linux-2.6.9-full/include/linux/ext3_fs.h   2006-08-22 12:36:22.000000000 +0400
+@@ -826,6 +826,7 @@ extern void ext3_put_super (struct super
+ extern void ext3_write_super (struct super_block *);
+ extern void ext3_write_super_lockfs (struct super_block *);
+ extern void ext3_unlockfs (struct super_block *);
++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int);
+ extern int ext3_remount (struct super_block *, int *, char *);
+ extern int ext3_statfs (struct super_block *, struct kstatfs *);
 Index: linux-2.6.9-full/fs/ext3/super.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/super.c      2006-06-02 23:37:51.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/super.c   2006-06-02 23:56:29.000000000 +0400
+--- linux-2.6.9-full.orig/fs/ext3/super.c      2006-08-09 17:56:40.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/super.c   2006-08-09 17:56:40.000000000 +0400
 @@ -43,7 +43,7 @@ static int ext3_load_journal(struct supe
                             unsigned long journal_devnum);
  static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
@@ -22,8 +34,8 @@ Index: linux-2.6.9-full/fs/ext3/super.c
  {
 Index: linux-2.6.9-full/fs/ext3/namei.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/namei.c      2006-06-02 23:37:49.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/namei.c   2006-06-02 23:43:31.000000000 +0400
+--- linux-2.6.9-full.orig/fs/ext3/namei.c      2006-08-09 17:56:40.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/namei.c   2006-08-09 17:56:40.000000000 +0400
 @@ -1599,7 +1599,7 @@ static int ext3_delete_entry (handle_t *
                              struct buffer_head * bh)
  {
@@ -47,7 +59,7 @@ Index: linux-2.6.9-full/fs/ext3/namei.c
 Index: linux-2.6.9-full/fs/ext3/xattr.c
 ===================================================================
 --- linux-2.6.9-full.orig/fs/ext3/xattr.c      2006-06-01 14:58:48.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/xattr.c   2006-06-03 00:02:00.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/xattr.c   2006-08-09 17:56:40.000000000 +0400
 @@ -132,7 +132,7 @@ ext3_xattr_handler(int name_index)
  {
        struct xattr_handler *handler = NULL;
@@ -60,7 +72,7 @@ Index: linux-2.6.9-full/fs/ext3/xattr.c
 Index: linux-2.6.9-full/fs/ext3/inode.c
 ===================================================================
 --- linux-2.6.9-full.orig/fs/ext3/inode.c      2006-06-02 23:37:38.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/inode.c   2006-06-03 00:27:41.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/inode.c   2006-08-22 12:34:28.000000000 +0400
 @@ -1513,9 +1513,14 @@ out_stop:
                        if (end > inode->i_size) {
                                ei->i_disksize = end;
index 325d080..b807900 100644 (file)
@@ -1387,8 +1387,8 @@ Index: linux-2.6.16.i686/fs/ext3/mballoc.c
 +                       * Someone more lucky has already allocated it.
 +                       * The only thing we can do is just take first
 +                       * found block(s)
-+                       */
 +                      printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
++                       */
 +                      ac.ac_b_ex.fe_group = 0;
 +                      ac.ac_b_ex.fe_start = 0;
 +                      ac.ac_b_ex.fe_len = 0;
index c77ebdd..646e4fe 100644 (file)
@@ -1,8 +1,8 @@
-Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
+Index: linux-stage/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs.h        2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/include/linux/ext3_fs.h     2006-04-26 23:40:28.000000000 +0400
-@@ -57,6 +57,14 @@ struct statfs;
+--- linux-stage.orig/include/linux/ext3_fs.h   2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/include/linux/ext3_fs.h        2006-09-06 12:37:27.000000000 +0800
+@@ -57,6 +57,14 @@
  #define ext3_debug(f, a...)   do {} while (0)
  #endif
  
@@ -17,7 +17,7 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
  /*
   * Special inodes numbers
   */
-@@ -339,6 +347,7 @@ struct ext3_inode {
+@@ -339,6 +347,7 @@
  #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
  #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
@@ -25,7 +25,22 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef clear_opt
-@@ -700,7 +709,9 @@ extern int ext3_bg_has_super(struct supe
+@@ -361,6 +370,14 @@
+ #define ext3_find_first_zero_bit      ext2_find_first_zero_bit
+ #define ext3_find_next_zero_bit               ext2_find_next_zero_bit
++#ifndef ext2_find_next_le_bit
++#ifdef __LITTLE_ENDIAN
++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off))
++#else
++#error "mballoc needs a patch for big-endian systems - CFS bug 10634"
++#endif        /* __LITTLE_ENDIAN */
++#endif        /* !ext2_find_next_le_bit */
++
+ /*
+  * Maximal mount counts between two filesystem checks
+  */
+@@ -700,7 +717,9 @@
  extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
  extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
  extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
@@ -36,7 +51,7 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
  extern unsigned long ext3_count_free_blocks (struct super_block *);
  extern void ext3_check_blocks_bitmap (struct super_block *);
  extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
-@@ -822,6 +833,17 @@ extern void ext3_extents_initialize_bloc
+@@ -824,6 +843,17 @@
  extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
                          unsigned int cmd, unsigned long arg);
  
@@ -54,10 +69,10 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
  #endif        /* __KERNEL__ */
  
  #define EXT3_IOC_CREATE_INUM                  _IOW('f', 5, long)
-Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h
+Index: linux-stage/include/linux/ext3_fs_sb.h
 ===================================================================
---- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs_sb.h     2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h  2006-04-26 23:40:28.000000000 +0400
+--- linux-stage.orig/include/linux/ext3_fs_sb.h        2006-09-06 12:37:00.000000000 +0800
++++ linux-stage/include/linux/ext3_fs_sb.h     2006-09-06 12:37:01.000000000 +0800
 @@ -23,9 +23,15 @@
  #define EXT_INCLUDE
  #include <linux/blockgroup_lock.h>
@@ -74,7 +89,7 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h
  
  /*
   * third extended-fs super-block data in memory
-@@ -78,6 +84,43 @@ struct ext3_sb_info {
+@@ -78,6 +84,43 @@
        struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
        wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
  #endif
@@ -112,17 +127,17 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h
 +      unsigned long s_mb_buddies_generated;
 +      unsigned long long s_mb_generation_time;
  };
-+
 +#define EXT3_GROUP_INFO(sb, group)                                       \
 +      EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \
 +                               [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)]
++
  #endif        /* _LINUX_EXT3_FS_SB */
-Index: linux-2.6.5-7.252-full/fs/ext3/super.c
+Index: linux-stage/fs/ext3/super.c
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/super.c        2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/super.c     2006-04-26 23:40:28.000000000 +0400
-@@ -389,6 +389,7 @@ void ext3_put_super (struct super_block 
+--- linux-stage.orig/fs/ext3/super.c   2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/super.c        2006-09-06 12:37:01.000000000 +0800
+@@ -389,6 +389,7 @@
        struct ext3_super_block *es = sbi->s_es;
        int i;
  
@@ -130,7 +145,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
        ext3_ext_release(sb);
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
-@@ -545,6 +546,7 @@ enum {
+@@ -546,6 +547,7 @@
        Opt_err,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
        Opt_extents, Opt_noextents, Opt_extdebug,
@@ -138,7 +153,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
  };
  
  static match_table_t tokens = {
-@@ -591,6 +592,9 @@ static match_table_t tokens = {
+@@ -592,6 +594,9 @@
        {Opt_extents, "extents"},
        {Opt_noextents, "noextents"},
        {Opt_extdebug, "extdebug"},
@@ -148,7 +163,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL}
  };
-@@ -813,6 +815,19 @@ static int parse_options (char * options
+@@ -817,6 +822,19 @@
                case Opt_extdebug:
                        set_opt (sbi->s_mount_opt, EXTDEBUG);
                        break;
@@ -168,7 +183,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
                default:
                        printk (KERN_ERR
                                "EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1466,6 +1471,7 @@ static int ext3_fill_super (struct super
+@@ -1470,6 +1488,7 @@
                ext3_count_dirs(sb));
  
        ext3_ext_init(sb);
@@ -176,7 +191,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
  
        return 0;
  
-@@ -2114,7 +2120,13 @@ static struct file_system_type ext3_fs_t
+@@ -2118,7 +2137,13 @@
  
  static int __init init_ext3_fs(void)
  {
@@ -191,7 +206,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
        if (err)
                return err;
        err = init_inodecache();
-@@ -2143,6 +2155,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2147,6 +2172,7 @@
        unregister_filesystem(&ext3_fs_type);
        destroy_inodecache();
        exit_ext3_xattr();
@@ -199,11 +214,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
  }
  
  int ext3_prep_san_write(struct inode *inode, long *blocks,
-Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
+Index: linux-stage/fs/ext3/extents.c
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/extents.c      2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/extents.c   2006-04-26 23:40:28.000000000 +0400
-@@ -777,7 +777,7 @@ cleanup:
+--- linux-stage.orig/fs/ext3/extents.c 2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/extents.c      2006-09-06 12:37:01.000000000 +0800
+@@ -779,7 +779,7 @@
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
@@ -212,7 +227,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
                }
        }
        kfree(ablocks);
-@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1438,7 +1438,7 @@
                  path->p_idx->ei_leaf);
        bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
        ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
@@ -221,7 +236,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
        return err;
  }
  
-@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1923,10 +1923,12 @@
        int needed = ext3_remove_blocks_credits(tree, ex, from, to);
        handle_t *handle = ext3_journal_start(tree->inode, needed);
        struct buffer_head *bh;
@@ -235,7 +250,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
        if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
                /* tail removal */
                unsigned long num, start;
-@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1938,7 +1940,7 @@
                        bh = sb_find_get_block(tree->inode->i_sb, start + i);
                        ext3_forget(handle, 0, tree->inode, bh, start + i);
                }
@@ -244,11 +259,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
        } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
                printk("strange request: removal %lu-%lu from %u:%u\n",
                       from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
+Index: linux-stage/fs/ext3/inode.c
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/inode.c        2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/inode.c     2006-04-26 23:40:28.000000000 +0400
-@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h
+--- linux-stage.orig/fs/ext3/inode.c   2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/inode.c        2006-09-06 12:37:01.000000000 +0800
+@@ -574,7 +574,7 @@
                ext3_journal_forget(handle, branch[i].bh);
        }
        for (i = 0; i < keys; i++)
@@ -257,7 +272,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
        return err;
  }
  
-@@ -675,7 +675,7 @@ err_out:
+@@ -675,7 +675,7 @@
        if (err == -EAGAIN)
                for (i = 0; i < num; i++)
                        ext3_free_blocks(handle, inode, 
@@ -266,7 +281,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
        return err;
  }
  
-@@ -1837,7 +1837,7 @@ ext3_clear_blocks(handle_t *handle, stru
+@@ -1837,7 +1837,7 @@
                }
        }
  
@@ -275,7 +290,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
  }
  
  /**
-@@ -2008,7 +2008,7 @@ static void ext3_free_branches(handle_t 
+@@ -2008,7 +2008,7 @@
                                ext3_journal_test_restart(handle, inode);
                        }
  
@@ -284,11 +299,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
  
                        if (parent_bh) {
                                /*
-Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
+Index: linux-stage/fs/ext3/balloc.c
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/balloc.c       2006-02-14 15:26:58.000000000 +0300
-+++ linux-2.6.5-7.252-full/fs/ext3/balloc.c    2006-04-26 23:40:28.000000000 +0400
-@@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_
+--- linux-stage.orig/fs/ext3/balloc.c  2006-09-06 12:36:59.000000000 +0800
++++ linux-stage/fs/ext3/balloc.c       2006-09-06 12:37:01.000000000 +0800
+@@ -78,7 +78,7 @@
   *
   * Return buffer_head on success or NULL in case of failure.
   */
@@ -297,7 +312,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
  read_block_bitmap(struct super_block *sb, unsigned int block_group)
  {
        struct ext3_group_desc * desc;
-@@ -274,7 +274,7 @@ void ext3_discard_reservation(struct ino
+@@ -274,7 +274,7 @@
  }
  
  /* Free given blocks, update quota and i_blocks field */
@@ -306,7 +321,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
                        unsigned long block, unsigned long count)
  {
        struct buffer_head *bitmap_bh = NULL;
-@@ -1142,7 +1142,7 @@ int ext3_should_retry_alloc(struct super
+@@ -1142,7 +1142,7 @@
   * bitmap, and then for any free bit if that fails.
   * This function also updates quota and i_blocks field.
   */
@@ -315,11 +330,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
                        unsigned long goal, int *errp)
  {
        struct buffer_head *bitmap_bh = NULL;
-Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
+Index: linux-stage/fs/ext3/xattr.c
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/xattr.c        2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/xattr.c     2006-04-26 23:40:28.000000000 +0400
-@@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle,
+--- linux-stage.orig/fs/ext3/xattr.c   2006-09-06 12:37:00.000000000 +0800
++++ linux-stage/fs/ext3/xattr.c        2006-09-06 12:37:01.000000000 +0800
+@@ -1371,7 +1371,7 @@
                        new_bh = sb_getblk(sb, block);
                        if (!new_bh) {
  getblk_failed:
@@ -328,7 +343,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
                                error = -EIO;
                                goto cleanup;
                        }
-@@ -1411,7 +1411,7 @@ getblk_failed:
+@@ -1411,7 +1411,7 @@
                if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
                        /* Free the old block. */
                        ea_bdebug(old_bh, "freeing");
@@ -337,7 +352,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
  
                        /* ext3_forget() calls bforget() for us, but we
                           let our caller release old_bh, so we need to
-@@ -1519,7 +1519,7 @@ ext3_xattr_delete_inode(handle_t *handle
+@@ -1519,7 +1519,7 @@
                        mb_cache_entry_free(ce);
                        ce = NULL;
                }
@@ -346,10 +361,10 @@ Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
                get_bh(bh);
                ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
        } else {
-Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
+Index: linux-stage/fs/ext3/mballoc.c
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/mballoc.c      2006-04-22 17:31:47.543334750 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/mballoc.c   2006-04-26 23:42:45.000000000 +0400
+--- linux-stage.orig/fs/ext3/mballoc.c 2006-09-06 11:16:28.656439250 +0800
++++ linux-stage/fs/ext3/mballoc.c      2006-09-06 12:37:34.000000000 +0800
 @@ -0,0 +1,2702 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
@@ -792,7 +807,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
 +      while (i < max) {
 +              fragments++;
 +              first = i;
-+              i = find_next_bit(bitmap, max, i);
++              i = ext2_find_next_le_bit(bitmap, max, i);
 +              len = i - first;
 +              free += len;
 +              if (len > 1) 
@@ -3053,11 +3068,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-Index: linux-2.6.5-7.252-full/fs/ext3/Makefile
+Index: linux-stage/fs/ext3/Makefile
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/Makefile       2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/Makefile    2006-04-26 23:40:28.000000000 +0400
-@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
+--- linux-stage.orig/fs/ext3/Makefile  2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/Makefile       2006-09-06 12:37:01.000000000 +0800
+@@ -6,7 +6,7 @@
  
  ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
           ioctl.o namei.o super.o symlink.o hash.o \
index 0040a6f..13f3482 100644 (file)
@@ -1356,8 +1356,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                       * Someone more lucky has already allocated it.
 +                       * The only thing we can do is just take first
 +                       * found block(s)
-+                       */
 +                      printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
++                       */
 +                      ac.ac_b_ex.fe_group = 0;
 +                      ac.ac_b_ex.fe_start = 0;
 +                      ac.ac_b_ex.fe_len = 0;
index e34c411..a00cd4a 100644 (file)
@@ -1,8 +1,8 @@
 Index: linux-stage/include/linux/ext3_fs.h
 ===================================================================
---- linux-stage.orig/include/linux/ext3_fs.h   2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/include/linux/ext3_fs.h        2006-05-25 10:36:04.000000000 -0600
-@@ -57,6 +57,14 @@ struct statfs;
+--- linux-stage.orig/include/linux/ext3_fs.h   2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/include/linux/ext3_fs.h        2006-09-06 12:29:38.000000000 +0800
+@@ -57,6 +57,14 @@
  #define ext3_debug(f, a...)   do {} while (0)
  #endif
  
@@ -17,7 +17,7 @@ Index: linux-stage/include/linux/ext3_fs.h
  /*
   * Special inodes numbers
   */
-@@ -365,6 +373,7 @@ struct ext3_inode {
+@@ -365,6 +373,7 @@
  #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
  #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
@@ -25,7 +25,22 @@ Index: linux-stage/include/linux/ext3_fs.h
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef clear_opt
-@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe
+@@ -387,6 +396,14 @@
+ #define ext3_find_first_zero_bit      ext2_find_first_zero_bit
+ #define ext3_find_next_zero_bit               ext2_find_next_zero_bit
++#ifndef ext2_find_next_le_bit
++#ifdef __LITTLE_ENDIAN
++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off))
++#else
++#error "mballoc needs a patch for big-endian systems - CFS bug 10634"
++#endif        /* __LITTLE_ENDIAN */
++#endif        /* !ext2_find_next_le_bit */
++
+ /*
+  * Maximal mount counts between two filesystem checks
+  */
+@@ -726,7 +743,7 @@
  extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
  extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
  extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
@@ -34,7 +49,7 @@ Index: linux-stage/include/linux/ext3_fs.h
  extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
                                 unsigned long, unsigned long, int *);
  extern unsigned long ext3_count_free_blocks (struct super_block *);
-@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc
+@@ -859,6 +876,17 @@
  extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
                          unsigned int cmd, unsigned long arg);
  
@@ -54,8 +69,8 @@ Index: linux-stage/include/linux/ext3_fs.h
  /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
 Index: linux-stage/include/linux/ext3_fs_sb.h
 ===================================================================
---- linux-stage.orig/include/linux/ext3_fs_sb.h        2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/include/linux/ext3_fs_sb.h     2006-05-25 10:59:14.000000000 -0600
+--- linux-stage.orig/include/linux/ext3_fs_sb.h        2006-09-06 12:27:36.000000000 +0800
++++ linux-stage/include/linux/ext3_fs_sb.h     2006-09-06 12:27:37.000000000 +0800
 @@ -23,9 +23,15 @@
  #define EXT_INCLUDE
  #include <linux/blockgroup_lock.h>
@@ -72,7 +87,7 @@ Index: linux-stage/include/linux/ext3_fs_sb.h
  
  /*
   * third extended-fs super-block data in memory
-@@ -81,6 +87,43 @@ struct ext3_sb_info {
+@@ -81,6 +87,43 @@
        char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
        int s_jquota_fmt;                       /* Format of quota to use */
  #endif
@@ -110,17 +125,17 @@ Index: linux-stage/include/linux/ext3_fs_sb.h
 +      unsigned long s_mb_buddies_generated;
 +      unsigned long long s_mb_generation_time;
  };
-+
 +#define EXT3_GROUP_INFO(sb, group)                                       \
 +      EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \
 +                               [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)]
++
  #endif        /* _LINUX_EXT3_FS_SB */
 Index: linux-stage/fs/ext3/super.c
 ===================================================================
---- linux-stage.orig/fs/ext3/super.c   2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/super.c        2006-05-25 10:36:04.000000000 -0600
-@@ -394,6 +394,7 @@ void ext3_put_super (struct super_block
+--- linux-stage.orig/fs/ext3/super.c   2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/super.c        2006-09-06 12:27:37.000000000 +0800
+@@ -394,6 +394,7 @@
        struct ext3_super_block *es = sbi->s_es;
        int i;
  
@@ -128,7 +143,7 @@ Index: linux-stage/fs/ext3/super.c
        ext3_ext_release(sb);
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
-@@ -597,6 +598,7 @@ enum {
+@@ -597,6 +598,7 @@
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
        Opt_extents, Opt_noextents, Opt_extdebug,
@@ -136,7 +151,7 @@ Index: linux-stage/fs/ext3/super.c
  };
  
  static match_table_t tokens = {
-@@ -649,6 +651,9 @@ static match_table_t tokens = {
+@@ -649,6 +651,9 @@
        {Opt_extents, "extents"},
        {Opt_noextents, "noextents"},
        {Opt_extdebug, "extdebug"},
@@ -146,7 +161,7 @@ Index: linux-stage/fs/ext3/super.c
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
-@@ -962,6 +967,19 @@ static int parse_options (char * options
+@@ -962,6 +967,19 @@
                case Opt_extdebug:
                        set_opt (sbi->s_mount_opt, EXTDEBUG);
                        break;
@@ -166,7 +181,7 @@ Index: linux-stage/fs/ext3/super.c
                default:
                        printk (KERN_ERR
                                "EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1651,6 +1669,7 @@ static int ext3_fill_super (struct super
+@@ -1651,6 +1669,7 @@
                ext3_count_dirs(sb));
  
        ext3_ext_init(sb);
@@ -174,7 +189,7 @@ Index: linux-stage/fs/ext3/super.c
  
        return 0;
  
-@@ -2433,7 +2452,13 @@ static struct file_system_type ext3_fs_t
+@@ -2433,7 +2452,13 @@
  
  static int __init init_ext3_fs(void)
  {
@@ -189,7 +204,7 @@ Index: linux-stage/fs/ext3/super.c
        if (err)
                return err;
        err = init_inodecache();
-@@ -2455,6 +2480,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2455,6 +2480,7 @@
        unregister_filesystem(&ext3_fs_type);
        destroy_inodecache();
        exit_ext3_xattr();
@@ -199,9 +214,9 @@ Index: linux-stage/fs/ext3/super.c
  int ext3_prep_san_write(struct inode *inode, long *blocks,
 Index: linux-stage/fs/ext3/extents.c
 ===================================================================
---- linux-stage.orig/fs/ext3/extents.c 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/extents.c      2006-05-25 10:36:04.000000000 -0600
-@@ -777,7 +777,7 @@ cleanup:
+--- linux-stage.orig/fs/ext3/extents.c 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/extents.c      2006-09-06 12:27:37.000000000 +0800
+@@ -779,7 +779,7 @@
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
@@ -210,7 +225,7 @@ Index: linux-stage/fs/ext3/extents.c
                }
        }
        kfree(ablocks);
-@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1438,7 +1438,7 @@
                  path->p_idx->ei_leaf);
        bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
        ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
@@ -219,7 +234,7 @@ Index: linux-stage/fs/ext3/extents.c
        return err;
  }
  
-@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1923,10 +1923,12 @@
        int needed = ext3_remove_blocks_credits(tree, ex, from, to);
        handle_t *handle = ext3_journal_start(tree->inode, needed);
        struct buffer_head *bh;
@@ -233,7 +248,7 @@ Index: linux-stage/fs/ext3/extents.c
        if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
                /* tail removal */
                unsigned long num, start;
-@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1938,7 +1940,7 @@
                        bh = sb_find_get_block(tree->inode->i_sb, start + i);
                        ext3_forget(handle, 0, tree->inode, bh, start + i);
                }
@@ -244,9 +259,9 @@ Index: linux-stage/fs/ext3/extents.c
                       from, to, ex->ee_block, ex->ee_len);
 Index: linux-stage/fs/ext3/inode.c
 ===================================================================
---- linux-stage.orig/fs/ext3/inode.c   2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/inode.c        2006-05-25 10:36:04.000000000 -0600
-@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
+--- linux-stage.orig/fs/ext3/inode.c   2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/inode.c        2006-09-06 12:27:37.000000000 +0800
+@@ -572,7 +572,7 @@
                ext3_journal_forget(handle, branch[i].bh);
        }
        for (i = 0; i < keys; i++)
@@ -255,7 +270,7 @@ Index: linux-stage/fs/ext3/inode.c
        return err;
  }
  
-@@ -673,7 +673,7 @@ err_out:
+@@ -673,7 +673,7 @@
        if (err == -EAGAIN)
                for (i = 0; i < num; i++)
                        ext3_free_blocks(handle, inode, 
@@ -264,7 +279,7 @@ Index: linux-stage/fs/ext3/inode.c
        return err;
  }
  
-@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru
+@@ -1831,7 +1831,7 @@
                }
        }
  
@@ -273,7 +288,7 @@ Index: linux-stage/fs/ext3/inode.c
  }
  
  /**
-@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t
+@@ -2004,7 +2004,7 @@
                                ext3_journal_test_restart(handle, inode);
                        }
  
@@ -284,9 +299,9 @@ Index: linux-stage/fs/ext3/inode.c
                                /*
 Index: linux-stage/fs/ext3/balloc.c
 ===================================================================
---- linux-stage.orig/fs/ext3/balloc.c  2006-05-25 10:36:02.000000000 -0600
-+++ linux-stage/fs/ext3/balloc.c       2006-05-25 10:36:04.000000000 -0600
-@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
+--- linux-stage.orig/fs/ext3/balloc.c  2006-09-06 12:27:36.000000000 +0800
++++ linux-stage/fs/ext3/balloc.c       2006-09-06 12:27:37.000000000 +0800
+@@ -79,7 +79,7 @@
   *
   * Return buffer_head on success or NULL in case of failure.
   */
@@ -331,9 +346,9 @@ Index: linux-stage/fs/ext3/balloc.c
        struct buffer_head *bitmap_bh = NULL;
 Index: linux-stage/fs/ext3/xattr.c
 ===================================================================
---- linux-stage.orig/fs/ext3/xattr.c   2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/xattr.c        2006-05-25 10:36:04.000000000 -0600
-@@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle,
+--- linux-stage.orig/fs/ext3/xattr.c   2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/xattr.c        2006-09-06 12:27:37.000000000 +0800
+@@ -1281,7 +1281,7 @@
                        new_bh = sb_getblk(sb, block);
                        if (!new_bh) {
  getblk_failed:
@@ -342,7 +357,7 @@ Index: linux-stage/fs/ext3/xattr.c
                                error = -EIO;
                                goto cleanup;
                        }
-@@ -1328,7 +1328,7 @@ getblk_failed:
+@@ -1328,7 +1328,7 @@
                        if (ce)
                                mb_cache_entry_free(ce);
                        ea_bdebug(old_bh, "freeing");
@@ -351,7 +366,7 @@ Index: linux-stage/fs/ext3/xattr.c
  
                        /* ext3_forget() calls bforget() for us, but we
                           let our caller release old_bh, so we need to
-@@ -1427,7 +1427,7 @@ ext3_xattr_delete_inode(handle_t *handle
+@@ -1427,7 +1427,7 @@
        if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
                if (ce)
                        mb_cache_entry_free(ce);
@@ -362,8 +377,8 @@ Index: linux-stage/fs/ext3/xattr.c
        } else {
 Index: linux-stage/fs/ext3/mballoc.c
 ===================================================================
---- linux-stage.orig/fs/ext3/mballoc.c 2006-05-23 17:33:37.579436680 -0600
-+++ linux-stage/fs/ext3/mballoc.c      2006-05-25 10:59:14.000000000 -0600
+--- linux-stage.orig/fs/ext3/mballoc.c 2006-09-06 11:16:28.656439250 +0800
++++ linux-stage/fs/ext3/mballoc.c      2006-09-06 12:30:11.000000000 +0800
 @@ -0,0 +1,2701 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
@@ -806,7 +821,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      while (i < max) {
 +              fragments++;
 +              first = i;
-+              i = find_next_bit(bitmap, max, i);
++              i = ext2_find_next_le_bit(bitmap, max, i);
 +              len = i - first;
 +              free += len;
 +              if (len > 1) 
@@ -3068,8 +3083,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +}
 Index: linux-stage/fs/ext3/Makefile
 ===================================================================
---- linux-stage.orig/fs/ext3/Makefile  2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/Makefile       2006-05-25 10:36:04.000000000 -0600
+--- linux-stage.orig/fs/ext3/Makefile  2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/Makefile       2006-09-06 12:27:37.000000000 +0800
 @@ -6,7 +6,7 @@
  
  ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
index 6ecae2e..3310ff0 100644 (file)
@@ -5,7 +5,7 @@ tbd         Cluster File Systems, Inc. <info@clusterfs.com>
         special upgrade step is needed. Please read the
         user documentation before upgrading any part of a live system.
        * WIRE PROTOCOL CHANGE from previous 1.6 beta versions.  This
-         version will not interoperate with older 1.6 betas.  
+         version will not interoperate with 1.6 betas before beta5 (1.5.95).  
        * WARNING: Lustre configuration and startup changes are required with
         this release.  See https://mail.clusterfs.com/wikis/lustre/MountConf
         for details.
@@ -13,18 +13,6 @@ tbd         Cluster File Systems, Inc. <info@clusterfs.com>
 
 
 Severity   : enhancement
-Bugzilla   : 4226
-Description: Permanently set tunables
-Details    : All writable /proc/fs/lustre tunables can now be permanently
-             set on a per-server basis, at mkfs time or on a live
-            system.
-       
-Severity   : enhancement
-Bugzilla   : 10547
-Description: Lustre message v2
-Details    : Add lustre message format v2.
-
-Severity   : enhancement
 Bugzilla   : 8007
 Description: MountConf
 Details    : Lustre configuration is now managed via mkfs and mount
@@ -58,6 +46,18 @@ Details    : stripe assignments are now made based on ost space available,
             to optimize storage space and networking resources. 
        
 Severity   : enhancement
+Bugzilla   : 4226
+Description: Permanently set tunables
+Details    : All writable /proc/fs/lustre tunables can now be permanently
+             set on a per-server basis, at mkfs time or on a live
+            system.
+       
+Severity   : enhancement
+Bugzilla   : 10547
+Description: Lustre message v2
+Details    : Add lustre message format v2.
+
+Severity   : enhancement
 Bugzilla   : 9866
 Description: client OST exclusion list
 Details    : Clients can be started with a list of OSTs that should be
@@ -77,20 +77,42 @@ Details    : Further unserialise some read-only MDS RPCs - learn about intents.
              MDS RPCs in flight for a single client and add /proc controls
              to adjust this limit.
 
+Severity   : enhancement
+Bugzilla   : 22484
+Description: client read/write statistics
+Details    : Add client read/write call usage stats for performance
+            analysis of user processes. 
+            /proc/fs/lustre/llite/*/offset_stats shows non-sequential
+            file access. extents_stats shows chunk size distribution.
+            extents_stats_per_process show chunk size distribution per
+            user process. 
+
+Severity   : enhancement
+Bugzilla   : 22486
+Description: mds statistics
+Details    : Add detailed mds operations statistics in  
+            /proc/fs/lustre/mds/*/stats.
+
+Severity   : minor
+Bugzilla   : 10667
+Description: Failure of copying files with lustre special EAs.
+Details    : Client side always return success for setxattr call for lustre
+             special xattr (currently only "trusted.lov").
        
 ------------------------------------------------------------------------------
 
-tbd         Cluster File Systems, Inc. <info@clusterfs.com>
+08-20-2006  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.4.7
        * Support for kernels:
-       2.6.9-34.EL (RHEL 4)
-       2.6.5-7.252 (SLES 9)
+       2.6.9-42.EL (RHEL 4)
+       2.6.5-7.276 (SLES 9)
+       2.4.21-40.EL (RHEL 3)
        2.6.12.6 vanilla (kernel.org)
        * bug fixes
 
 Severity   : major
 Frequency  : rare
-Bugzilla   : 5719, 9635, 9792, 9684
+Bugzilla   : 5719, 9635, 9792, 9684
 Description: OST (or MDS) trips assertions in (re)connection under heavy load
 Details    : If a server is under heavy load and cannot reply to new
             connection requests before the client resends the (re)connect,
@@ -236,11 +258,11 @@ Details    : Running the "racer" program may cause one MDS thread to rename
 
 Severity   : major
 Frequency  : only very large systems with liblustre clients
-Bugzilla   : 7304 
+Bugzilla   : 7304
 Description: slow eviction of liblustre clients with the "evict_by_nid" RPC
-Details    : Use asynchronous set_info RPCs to send the "evict_by_nid" to 
+Details    : Use asynchronous set_info RPCs to send the "evict_by_nid" to
             all OSTs in parallel.  This allows the eviction of stale liblustre
-            clients to proceed much faster than if they were done in series, 
+            clients to proceed much faster than if they were done in series,
             and also offers similar improvements for other set_info RPCs.
 
 Severity   : minor
@@ -267,7 +289,7 @@ Details    : Do not serialize getattr (non-intent version) and statfs.
 
 Severity   : minor
 Frequency  : occasional, when OST network is overloaded/intermittent
-Bugzilla   : 10416 
+Bugzilla   : 10416
 Description: client evicted by OST after bulk IO timeout
 Details    : If a client sends a bulk IO request (read or write) the OST
             may evict the client if it is unresposive to its data GET/PUT
@@ -279,7 +301,7 @@ Details    : If a client sends a bulk IO request (read or write) the OST
 
 Severity   : minor
 Frequency  : Always when mmapping file with no objects
-Bugzilla   : 10438 
+Bugzilla   : 10438
 Description: client crashes when mmapping file with no objects
 Details    : Check that we actually have objects in a file before doing any
             operations on objects in ll_vm_open, ll_vm_close and
@@ -338,8 +360,8 @@ Details    : Many of the /proc/ tunables can only be tuned at a megabyte
 Severity   : enhancement
 Bugzilla   : 9292
 Description: Getattr by fid
-Details    : Getting a file attributes by its fid, obtaining UPDATE|LOOKUP 
-            locks, avoids extra getattr rpc requests to MDS, allows '/' to 
+Details    : Getting a file attributes by its fid, obtaining UPDATE|LOOKUP
+            locks, avoids extra getattr rpc requests to MDS, allows '/' to
             have locks and avoids getattr rpc requests for it on every stat.
 
 Severity   : major
@@ -457,16 +479,6 @@ Details    : If replay happened on an open request reply before we were able
              assertion in ll_local_open. Now we set the handler right after
              recognising of open request
 
-Severity   : minor
-Frequency  : very rare
-Bugzilla   : 10669
-Description: Deadlock: extent lock cancellation callback vs import invalidation
-Details    : If extent lock cancellation callback takes long enough time, and it
-            happens that import gets invalidated in process, there is a
-            deadlock on page_lock in extent lock cancellation vs ns_lock in
-            import invalidation processes. The fix is to not try to match
-            locks from inactive OSTs.
-
 Severity   : trivial
 Frequency  : very rare
 Bugzilla   : 10584
@@ -483,6 +495,14 @@ Details    : It is now possible to clear the OBD RPC statistics by writing
             to the "stats" file.
 
 Severity   : minor
+Frequency  : rare
+Bugzilla   : 10641
+Description: Client mtime is not the same on different clients after utimes
+Details    : In some cases, the client was using the utimes() syscall on
+            a file cached on another node.  The clients now validate the
+            ctime from the MDS + OSTs to determine which one is right.
+
+Severity   : minor
 Frequency  : always
 Bugzilla   : 10611
 Description: Inability to activate failout mode
@@ -506,6 +526,42 @@ Details    : Under some heavy load conditions it is possible that a
             failed mount can wait for the full obd_timeout interval,
             possibly several minutes, before reporting an error.
             Instead return an error as soon as the status is known.
+Severity   : major
+Frequency  : quota enabled and large files being deleted
+Bugzilla   : 10707
+Description: releasing more than 4GB of quota at once hangs OST
+Details    : If a user deletes more than 4GB of files on a single OST it
+            will cause the OST to spin in an infinite loop.  Release
+            quota in < 4GB chunks, or use a 64-bit value for 1.4.7.1+.
+
+Severity   : trivial
+Frequency  : rare
+Bugzilla   : 10845
+Description: statfs data retrieved from /proc may be stale or zero
+Details    : When reading per-device statfs data from /proc, in the
+            {kbytes,files}_{total,free,avail} files, it may appear
+            as zero or be out of date.
+
+Severity   : trivial
+Frequency  : systems with MD RAID1 external journal devices
+Bugzilla   : 10832
+Description: lconf's call to blkid is confused by RAID1 journal devices
+Details    : Use the "blkid -l" flag to locate the MD RAID device instead
+            of returning all block devices that match the journal UUID.
+
+Severity   : normal
+Frequency  : always, for aggregate stripe size over 4GB
+Bugzilla   : 10725
+Description: assertion fails when trying to use 4GB stripe size
+Details    : Use "setstripe" to set stripe size over 4GB will fail the kernel,
+             complaining "ASSERTION(lsm->lsm_xfersize != 0)"
+
+Severity   : normal
+Frequency  : always on ppc64
+Bugzilla   : 10634
+Description: the first write on an ext3 filesystem with mballoc got stuck
+Details    : ext3_mb_generate_buddy() uses find_next_bit() which does not
+             perform endianness conversion.
 
 ------------------------------------------------------------------------------
 
index ada23ad..19edffa 100644 (file)
@@ -266,7 +266,7 @@ LB_LINUX_TRY_COMPILE([
 #
 AC_DEFUN([LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP],
 [AC_MSG_CHECKING([if kernel defines grab_cache_page_nowait_gfp()])
-HAVE_GCPN_GFP="`grep -c 'grab_cache_page_nowait_gfp' $LINUX/mm/filemap.c`"
+HAVE_GCPN_GFP="`grep -c 'grab_cache_page_nowait_gfp' $LINUX/include/linux/pagemap.h`"
 if test "$HAVE_GCPN_GFP" != 0 ; then
        AC_DEFINE(HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP, 1,
                [kernel has grab_cache_page_nowait_gfp()])
index aff7119..2881e61 100644 (file)
@@ -1,6 +1,6 @@
 m4_define([LUSTRE_MAJOR],[1])
-m4_define([LUSTRE_MINOR],[9])
-m4_define([LUSTRE_PATCH],[0])
+m4_define([LUSTRE_MINOR],[5])
+m4_define([LUSTRE_PATCH],[95])
 m4_define([LUSTRE_FIX],[0])
 
 dnl # 288 stands for 0.0.1.32 , next version with fixes is ok, but next after
index a1469e4..b469034 100644 (file)
@@ -37,6 +37,9 @@ service defined by this command.
 .BI \--backfstype= fstype
 Force a particular format for the backing fs (ext3, ldiskfs)
 .TP
+.BI \--comment= comment
+Set user comment about this disk, ignored by Lustre.
+.TP
 .BI \--device-size= KB
 Set device size for loop devices
 .TP
index 3e830a9..69f1815 100644 (file)
@@ -70,7 +70,7 @@ options:
 Only start the MGC (and MGS, if co-located) for a target service, and not the actual service.
 .TP
 .BI exclude= ostlist
-Start a client or MDT with a list of known inactive OSTs
+Start a client or MDT with a (colon-separated) list of known inactive OSTs
 .TP
 .BI abort_recov
 Abort recovery (targets only)
index 423c8bd..7ca33ce 100644 (file)
@@ -24,6 +24,9 @@ mounted.
 
 .SH OPTIONS
 .TP
+.BI \--comment= comment
+Set user comment about this disk, ignored by Lustre.
+.TP
 .BI \--erase-params
 Remove all previous parameter info
 .TP
index 5c466ce..9c41cd0 100644 (file)
@@ -1,13 +1,15 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
- */ 
+ */
 #ifndef __LVFS_LINUX_H__
 #define __LVFS_LINUX_H__
 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/fs.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #include <linux/namei.h>
+#endif
 #include <linux/sched.h>
 
 #include <lvfs.h>
index bb188e7..833e961 100644 (file)
@@ -85,7 +85,7 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb)
         }
 }
 
-#define OBD_SLEEP_ON(wq)        interruptible_sleep_on(wq)
+#define OBD_SLEEP_ON(wq, state)  wait_event_interruptible(wq, state)
 
 
 #else /* !__KERNEL__ */
index a828b35..e4730f8 100644 (file)
@@ -11,7 +11,7 @@
 typedef void (*llapi_cb_t)(char *obd_type_name, char *obd_name, char *obd_uuid, void *args);
 
 /* liblustreapi.c */
-extern int llapi_file_create(const char *name, long stripe_size,
+extern int llapi_file_create(const char *name, unsigned long stripe_size,
                              int stripe_offset, int stripe_count,
                              int stripe_pattern);
 extern int llapi_file_get_stripe(const char *path, struct lov_user_md *lum);
index bae756b..2662c06 100644 (file)
@@ -308,7 +308,6 @@ struct lmv_stripe_md {
         struct lu_fid mea_ids[0];
 };
 
-
 struct lustre_handle {
         __u64 cookie;
 };
@@ -422,43 +421,43 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define MSG_CONNECT_NEXT_VER    0x80 /* use next version of lustre_msg */
 
 /* Connect flags */
-#define OBD_CONNECT_RDONLY       0x1ULL /* client allowed read-only access */
-#define OBD_CONNECT_INDEX        0x2ULL /* connect to specific LOV idx */
-#define OBD_CONNECT_GRANT        0x8ULL /* OSC acquires grant at connect */
-#define OBD_CONNECT_SRVLOCK     0x10ULL /* server takes locks for client */
-#define OBD_CONNECT_VERSION     0x20ULL /* Server supports versions in ocd */
-#define OBD_CONNECT_REQPORTAL   0x40ULL /* Separate portal for non-IO reqs */
-#define OBD_CONNECT_ACL         0x80ULL /* client using access control lists */
-#define OBD_CONNECT_XATTR      0x100ULL /* client using extended attributes*/
-#define OBD_CONNECT_CROW       0x200ULL /* MDS+OST do object create-on-write */
-#define OBD_CONNECT_TRUNCLOCK  0x400ULL /* server gets locks for punch b=9528 */
-#define OBD_CONNECT_TRANSNO    0x800ULL /* replay is sending initial transno */
-#define OBD_CONNECT_IBITS     0x1000ULL /* support for inodebits locks */
-#define OBD_CONNECT_JOIN      0x2000ULL /* files can be concatenated */
-#define OBD_CONNECT_REAL      0x4000ULL
-#define OBD_CONNECT_ATTRFID   0x8000ULL /* Server supports GetAttr By Fid */
-#define OBD_CONNECT_NODEVOH   0x10000ULL /* No open handle for special nodes */
-#define OBD_CONNECT_LCL_CLIENT 0x20000ULL /* local 1.6 client */
-#define OBD_CONNECT_RMT_CLIENT 0x40000ULL /* Remote client */
-#define OBD_CONNECT_BRW_SIZE    0x80000ULL  /* Maximum pages per RPC */
-
-/* also update obd_connect_names[] for lprocfs_rd_connect_flags() */
-
-#define MDS_CONNECT_SUPPORTED  (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \
+#define OBD_CONNECT_RDONLY         0x1ULL /* client allowed read-only access */
+#define OBD_CONNECT_INDEX          0x2ULL /* connect to specific LOV idx */
+#define OBD_CONNECT_GRANT          0x8ULL /* OSC acquires grant at connect */
+#define OBD_CONNECT_SRVLOCK       0x10ULL /* server takes locks for client */
+#define OBD_CONNECT_VERSION       0x20ULL /* Server supports versions in ocd */
+#define OBD_CONNECT_REQPORTAL     0x40ULL /* Separate portal for non-IO reqs */
+#define OBD_CONNECT_ACL           0x80ULL /* client uses access control lists */
+#define OBD_CONNECT_XATTR        0x100ULL /* client using extended attributes*/
+#define OBD_CONNECT_CROW         0x200ULL /* MDS+OST create objects on write */
+#define OBD_CONNECT_TRUNCLOCK    0x400ULL /* locks on server for punch b=9528 */
+#define OBD_CONNECT_TRANSNO      0x800ULL /* replay sends initial transno */
+#define OBD_CONNECT_IBITS       0x1000ULL /* support for inodebits locks */
+#define OBD_CONNECT_JOIN        0x2000ULL /* files can be concatenated */
+#define OBD_CONNECT_REAL        0x4000ULL
+#define OBD_CONNECT_ATTRFID     0x8000ULL /* Server supports GetAttr By Fid */
+#define OBD_CONNECT_NODEVOH     0x10000ULL /* No open handle for special nodes */
+#define OBD_CONNECT_LCL_CLIENT  0x20000ULL /* local 1.8 client */
+#define OBD_CONNECT_RMT_CLIENT  0x40000ULL /* Remote 1.8 client */
+#define OBD_CONNECT_BRW_SIZE    0x80000ULL /* Max bytes per rpc */
+#define OBD_CONNECT_QUOTA64     0x100000ULL /* 64bit qunit_data.qd_count b=10707*/
+#define OBD_CONNECT_FID_CAPA    0x200000ULL /* fid capability */
+#define OBD_CONNECT_OSS_CAPA    0x400000ULL /* OSS capability */
+/* also update obd_connect_names[] for lprocfs_rd_connect_flags()
+ * and lustre/utils/wirecheck.c */
+
+#define MDT_CONNECT_SUPPORTED  (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \
                                 OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \
                                 OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \
                                 OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID)
 #define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
                                 OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
                                 OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
-                                OBD_CONNECT_BRW_SIZE)
+                                OBD_CONNECT_BRW_SIZE | OBD_CONNECT_QUOTA64)
 #define ECHO_CONNECT_SUPPORTED (0)
 #define MGS_CONNECT_SUPPORTED  (OBD_CONNECT_VERSION)
 
-#define MDT_CONNECT_SUPPORTED  (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \
-                                OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \
-                                OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \
-                                OBD_CONNECT_ATTRFID)
+#define MAX_QUOTA_COUNT32 ((0xffffffffULL >> QUOTABLOCK_BITS) << QUOTABLOCK_BITS)
 
 #define OBD_OCD_VERSION(major,minor,patch,fix) (((major)<<24) + ((minor)<<16) +\
                                                 ((patch)<<8) + (fix))
@@ -509,8 +508,8 @@ typedef enum {
         OST_OPEN       = 11,
         OST_CLOSE      = 12,
         OST_STATFS     = 13,
-        OST_SAN_READ   = 14,
-        OST_SAN_WRITE  = 15,
+/*      OST_SAN_READ   = 14,    deprecated */
+/*      OST_SAN_WRITE  = 15,    deprecated */
         OST_SYNC       = 16,
         OST_SET_INFO   = 17,
         OST_QUOTACHECK = 18,
@@ -652,7 +651,6 @@ struct lov_mds_md_v1 {            /* LOV EA mds/wire data (little-endian) */
 #define XATTR_NAME_ACL_ACCESS   "system.posix_acl_access"
 #define XATTR_NAME_LOV          "trusted.lov"
 
-
 #define OBD_MD_FLID        (0x00000001ULL) /* object ID */
 #define OBD_MD_FLATIME     (0x00000002ULL) /* access time */
 #define OBD_MD_FLMTIME     (0x00000004ULL) /* data modification time */
@@ -1536,9 +1534,9 @@ struct lov_mds_md_join {
 #define LLOG_OP_MASK  0xfff00000
 
 typedef enum {
-        LLOG_PAD_MAGIC   = LLOG_OP_MAGIC | 0,
-        OST_SZ_REC       = LLOG_OP_MAGIC | (OST_SAN_WRITE << 8),
-        OST_RAID1_REC    = LLOG_OP_MAGIC | ((OST_SAN_WRITE + 1) << 8),
+        LLOG_PAD_MAGIC   = LLOG_OP_MAGIC | 0x00000,
+        OST_SZ_REC       = LLOG_OP_MAGIC | 0x00f00,
+        OST_RAID1_REC    = LLOG_OP_MAGIC | 0x01000,
         MDS_UNLINK_REC   = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_UNLINK,
         MDS_SETATTR_REC  = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_SETATTR,
         OBD_CFG_REC      = LLOG_OP_MAGIC | 0x20000,
@@ -1761,14 +1759,28 @@ extern void lustre_swab_llog_rec(struct llog_rec_hdr  *rec,
 struct lustre_cfg;
 extern void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg);
 
-/* quota */
+/* quota. fixed by tianzy for bug10707 */
+#define QUOTA_IS_GRP   0X1UL  /* 0 is user, 1 is group. Used by qd_flags*/
+#define QUOTA_IS_BLOCK 0x2UL  /* 0 is inode, 1 is block. Used by qd_flags*/
+
 struct qunit_data {
+        __u32 qd_id; /* ID appiles to (uid, gid) */
+        __u32 qd_flags; /* Quota type (USRQUOTA, GRPQUOTA) occupy one bit;
+                         * Block quota or file quota occupy one bit */
+        __u64 qd_count; /* acquire/release count (bytes for block quota) */
+};
+
+struct qunit_data_old {
         __u32 qd_id;    /* ID appiles to (uid, gid) */
         __u32 qd_type;  /* Quota type (USRQUOTA, GRPQUOTA) */
         __u32 qd_count; /* acquire/release count (bytes for block quota) */
         __u32 qd_isblk; /* Block quota or file quota */
 };
+
 extern void lustre_swab_qdata(struct qunit_data *d);
+extern void lustre_swab_qdata_old(struct qunit_data_old *d);
+extern struct qunit_data *lustre_quota_old_to_new(struct qunit_data_old *d);
+extern struct qunit_data_old *lustre_quota_new_to_old(struct qunit_data *d);
 
 typedef enum {
         QUOTA_DQACQ     = 601,
index ae21989..f916bc7 100644 (file)
@@ -51,8 +51,8 @@ enum lcfg_command_type {
         LCFG_DEL_CONN       = 0x00cf00c,
         LCFG_LOV_ADD_OBD    = 0x00cf00d,
         LCFG_LOV_DEL_OBD    = 0x00cf00e,
-        LCFG_PARAM          = 0x00ce00f,
-        LCFG_MARKER         = 0x00ce010,
+        LCFG_PARAM          = 0x00cf00f,
+        LCFG_MARKER         = 0x00cf010,
         LCFG_LOG_START      = 0x00ce011,
         LCFG_LOG_END        = 0x00ce012,
         LCFG_LOV_ADD_INA    = 0x00ce013,
index 69f07d7..9c76666 100644 (file)
@@ -92,7 +92,8 @@ struct lustre_disk_data {
         char       ldd_svname[64];      /* this server's name (lustre-mdt0001)*/
         __u8       ldd_uuid[40];        /* server UUID (COMPAT_146) */
    
-/*200*/ __u8       ldd_padding[4096 - 200];
+/*200*/ char       ldd_userdata[1024 - 200]; /* arbitrary user string */
+/*1024*/__u8       ldd_padding[4096 - 1024];
 /*4096*/char       ldd_mount_opts[4096]; /* target fs mount opts */
 /*8192*/char       ldd_params[4096];     /* key=value pairs */
 };
index 4b3cd6a..5ee5a81 100644 (file)
@@ -370,120 +370,21 @@ extern char *ldlm_lockname[];
 extern char *ldlm_typename[];
 extern char *ldlm_it2str(int it);
 
-#define __LDLM_DEBUG(level, lock, format, a...)                               \
-do {                                                                          \
-        if (lock->l_resource == NULL) {                                       \
-                CDEBUG(level, "### " format                                   \
-                       " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\
-                       "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: "   \
-                       LPX64" expref: %d pid: %u\n" , ## a, lock,             \
-                       lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
-                       lock->l_readers, lock->l_writers,                      \
-                       ldlm_lockname[lock->l_granted_mode],                   \
-                       ldlm_lockname[lock->l_req_mode],                       \
-                       lock->l_flags, lock->l_remote_handle.cookie,           \
-                       lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
-                       lock->l_pid);                                          \
-                break;                                                        \
-        }                                                                     \
-        if (lock->l_resource->lr_type == LDLM_EXTENT) {                       \
-                CDEBUG(level, "### " format                                   \
-                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
-                       "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64\
-                       "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64     \
-                       " expref: %d pid: %u\n" , ## a,                        \
-                       lock->l_resource->lr_namespace->ns_name, lock,         \
-                       lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
-                       lock->l_readers, lock->l_writers,                      \
-                       ldlm_lockname[lock->l_granted_mode],                   \
-                       ldlm_lockname[lock->l_req_mode],                       \
-                       lock->l_resource->lr_name.name[0],                     \
-                       lock->l_resource->lr_name.name[1],                     \
-                       atomic_read(&lock->l_resource->lr_refcount),           \
-                       ldlm_typename[lock->l_resource->lr_type],              \
-                       lock->l_policy_data.l_extent.start,                    \
-                       lock->l_policy_data.l_extent.end,                      \
-                       lock->l_req_extent.start, lock->l_req_extent.end,      \
-                       lock->l_flags, lock->l_remote_handle.cookie,           \
-                       lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
-                       lock->l_pid);                                          \
-                break;                                                        \
-        }                                                                     \
-        if (lock->l_resource->lr_type == LDLM_FLOCK) {                        \
-                CDEBUG(level, "### " format                                   \
-                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
-                       "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d "       \
-                       "["LPU64"->"LPU64"] flags: %x remote: "LPX64           \
-                       " expref: %d pid: %u\n" , ## a,                        \
-                       lock->l_resource->lr_namespace->ns_name, lock,         \
-                       lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
-                       lock->l_readers, lock->l_writers,                      \
-                       ldlm_lockname[lock->l_granted_mode],                   \
-                       ldlm_lockname[lock->l_req_mode],                       \
-                       lock->l_resource->lr_name.name[0],                     \
-                       lock->l_resource->lr_name.name[1],                     \
-                       atomic_read(&lock->l_resource->lr_refcount),           \
-                       ldlm_typename[lock->l_resource->lr_type],              \
-                       lock->l_policy_data.l_flock.pid,                       \
-                       lock->l_policy_data.l_flock.start,                     \
-                       lock->l_policy_data.l_flock.end,                       \
-                       lock->l_flags, lock->l_remote_handle.cookie,           \
-                       lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
-                       lock->l_pid);                                          \
-                break;                                                        \
-        }                                                                     \
-        if (lock->l_resource->lr_type == LDLM_IBITS) {                        \
-                CDEBUG(level, "### " format                                   \
-                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
-                       "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s "  \
-                       "flags: %x remote: "LPX64" expref: %d "                \
-                       "pid %u\n" , ## a,                                     \
-                       lock->l_resource->lr_namespace->ns_name,               \
-                       lock, lock->l_handle.h_cookie,                         \
-                       atomic_read (&lock->l_refc),                           \
-                       lock->l_readers, lock->l_writers,                      \
-                       ldlm_lockname[lock->l_granted_mode],                   \
-                       ldlm_lockname[lock->l_req_mode],                       \
-                       lock->l_resource->lr_name.name[0],                     \
-                       lock->l_resource->lr_name.name[1],                     \
-                       lock->l_policy_data.l_inodebits.bits,                  \
-                       atomic_read(&lock->l_resource->lr_refcount),           \
-                       ldlm_typename[lock->l_resource->lr_type],              \
-                       lock->l_flags, lock->l_remote_handle.cookie,           \
-                       lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
-                       lock->l_pid);                                          \
-                break;                                                        \
-        }                                                                     \
-        {                                                                     \
-                CDEBUG(level, "### " format                                   \
-                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
-                       "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x "     \
-                       "remote: "LPX64" expref: %d pid: %u\n" , ## a,         \
-                       lock->l_resource->lr_namespace->ns_name,               \
-                       lock, lock->l_handle.h_cookie,                         \
-                       atomic_read (&lock->l_refc),                           \
-                       lock->l_readers, lock->l_writers,                      \
-                       ldlm_lockname[lock->l_granted_mode],                   \
-                       ldlm_lockname[lock->l_req_mode],                       \
-                       lock->l_resource->lr_name.name[0],                     \
-                       lock->l_resource->lr_name.name[1],                     \
-                       atomic_read(&lock->l_resource->lr_refcount),           \
-                       ldlm_typename[lock->l_resource->lr_type],              \
-                       lock->l_flags, lock->l_remote_handle.cookie,           \
-                       lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
-                       lock->l_pid);                                          \
-        }                                                                     \
+void ldlm_lock_debug(cfs_debug_limit_state_t *cdls,
+                     __u32 level, struct ldlm_lock *lock,
+                     const char *file, const char *func, const int line,
+                     char *fmt, ...);
+
+#define LDLM_DEBUG(lock, fmt, a...) ldlm_lock_debug(NULL, D_DLMTRACE, lock, \
+                   __FILE__, __func__, __LINE__, "### " fmt, ## a)
+
+#define LDLM_ERROR(lock, fmt, a...)                                      \
+do {                                                                     \
+        static cfs_debug_limit_state_t cdls;                             \
+        ldlm_lock_debug(&cdls, D_ERROR, lock,                             \
+                        __FILE__, __func__, __LINE__, "### " fmt, ## a); \
 } while (0)
 
-#define LDLM_DEBUG(lock, format, a...) __LDLM_DEBUG(D_DLMTRACE, lock, \
-                                                    format, ## a)
-#define LDLM_ERROR(lock, format, a...) __LDLM_DEBUG(D_ERROR, lock, format, ## a)
-
 #define LDLM_DEBUG_NOLOCK(format, a...)                 \
         CDEBUG(D_DLMTRACE, "### " format "\n" , ## a)
 
index 8377728..43071ba 100644 (file)
@@ -11,8 +11,6 @@ struct obd_device;
 struct ptlrpc_request;
 
 
-void ptlrpc_run_failed_import_upcall(struct obd_import *imp);
-void ptlrpc_run_recovery_over_upcall(struct obd_device *obd);
 int ptlrpc_replay(struct obd_import *imp);
 int ptlrpc_resend(struct obd_import *imp);
 void ptlrpc_free_committed(struct obd_import *imp);
index c05ce65..412bb02 100644 (file)
@@ -134,9 +134,10 @@ int llog_obd_origin_add(struct llog_ctxt *ctxt,
                         struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
                         struct llog_cookie *logcookies, int numcookies);
 
-int llog_cat_initialize(struct obd_device *obd, int count);
+int llog_cat_initialize(struct obd_device *obd, int count, 
+                        struct obd_uuid *uuid);
 int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd,
-                  int count, struct llog_catid *logid);
+                  int count, struct llog_catid *logid, struct obd_uuid *uuid);
 
 int obd_llog_finish(struct obd_device *obd, int count);
 
index 8c2f0af..1356fbf 100644 (file)
@@ -398,7 +398,7 @@ struct ptlrpc_request {
         struct timeval       rq_arrival_time;       /* request arrival time */
         struct ptlrpc_reply_state *rq_reply_state;  /* separated reply state */
         struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer*/
-#if CRAY_XT3
+#ifdef CRAY_XT3
         __u32                rq_uid;            /* peer uid, used in MDS only */
 #endif
 
@@ -463,34 +463,21 @@ ptlrpc_rqphase2str(const struct ptlrpc_request *req)
 
 #define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s"
 
-#define __DEBUG_REQ(CDEB_TYPE, level, req, fmt, args...)                       \
-CDEB_TYPE(level, "@@@ " fmt                                                    \
-       " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl "         \
-       REQ_FLAGS_FMT"/%x/%x rc %d/%d\n" , ## args, req, req->rq_xid,           \
-       req->rq_transno,                                                        \
-       req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : -1,               \
-       req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) :                 \
-          req->rq_export ? (char*)req->rq_export->exp_client_uuid.uuid : "<?>",\
-       req->rq_import ?                                                        \
-          (char *)req->rq_import->imp_connection->c_remote_uuid.uuid :         \
-          req->rq_export ?                                                     \
-          (char *)req->rq_export->exp_connection->c_remote_uuid.uuid :  "<?>", \
-       (req->rq_import && req->rq_import->imp_client) ?                        \
-           req->rq_import->imp_client->cli_request_portal : -1,                \
-       req->rq_reqlen, req->rq_replen,                                         \
-       atomic_read(&req->rq_refcount),                                         \
-       DEBUG_REQ_FLAGS(req),                                                   \
-       req->rq_reqmsg ? lustre_msg_get_flags(req->rq_reqmsg) : 0,              \
-       req->rq_repmsg ? lustre_msg_get_flags(req->rq_repmsg) : 0,              \
-       req->rq_status, req->rq_repmsg ? lustre_msg_get_status(req->rq_repmsg) : 0)
+void debug_req(cfs_debug_limit_state_t *cdls,
+               __u32 level, struct ptlrpc_request *req,
+               const char *file, const char *func, const int line,
+               const char *fmt, ...);
 
 /* for most callers (level is a constant) this is resolved at compile time */
 #define DEBUG_REQ(level, req, fmt, args...)                                    \
 do {                                                                           \
-        if ((level) & (D_ERROR | D_WARNING))                                   \
-            __DEBUG_REQ(CDEBUG_LIMIT, level, req, fmt, ## args);               \
-        else                                                                   \
-            __DEBUG_REQ(CDEBUG, level, req, fmt, ## args);                     \
+        if ((level) & (D_ERROR | D_WARNING)) {                                 \
+                static cfs_debug_limit_state_t cdls;                           \
+                debug_req(&cdls, level, req, __FILE__, __func__, __LINE__,     \
+                          "@@@ "fmt"\n", ## args);                             \
+        } else                                                                 \
+                debug_req(NULL, level, req, __FILE__, __func__, __LINE__,      \
+                          "@@@ "fmt"\n", ## args);                             \
 } while (0)
 
 struct ptlrpc_bulk_page {
index adff8f1..95f8a73 100644 (file)
@@ -48,11 +48,6 @@ int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
 #define PARAM_MDC                  "mdc."
 #define PARAM_LLITE                "llite."
 #define PARAM_LOV                  "lov."
-/* LOV_STRIPE_* aren't settable in proc. But match the proc names. */
-#define PARAM_LOV_STRIPE_SIZE      PARAM_LOV"stripesize="
-#define PARAM_LOV_STRIPE_COUNT     PARAM_LOV"stripecount="
-#define PARAM_LOV_STRIPE_OFFSET    PARAM_LOV"stripeoffset="
-#define PARAM_LOV_STRIPE_PATTERN   PARAM_LOV"stripetype="
 #define PARAM_SEC                  "security."
 #define PARAM_SEC_RPC              PARAM_SEC"rpc."
 #define PARAM_SEC_RPC_MDT          PARAM_SEC_RPC"mdt="
index 640a5f6..d9750d7 100644 (file)
@@ -34,6 +34,8 @@
 #include <lustre_quota.h>
 #include <lustre_fld.h>
 
+#define MAX_OBD_DEVICES 8192
+
 /* this is really local to the OSC */
 struct loi_oap_pages {
         struct list_head        lop_pending;
@@ -384,7 +386,6 @@ struct client_obd {
         int                      cl_default_mds_easize;
         int                      cl_max_mds_easize;
         int                      cl_max_mds_cookiesize;
-        kdev_t                   cl_sandev;
 
         /* security configuration */
         struct sec_flavor_config cl_sec_conf;
@@ -656,36 +657,28 @@ struct lu_placement_hint {
         int           ph_opc;
 };
 
-#define LUSTRE_FLD_NAME  "fld"
-#define LUSTRE_SEQ_NAME  "seq"
-
-/* device types (not names--FIXME) */
-/* FIXME all the references to these defines need to be updated */
-#define LUSTRE_MDS_NAME  "mds"
-#define LUSTRE_MDT_NAME  "mdt"
-
-/* new MDS layers. Prototype */
-#define LUSTRE_CMM_NAME  "cmm"
-#define LUSTRE_MDD_NAME  "mdd"
-#define LUSTRE_OSD_NAME  "osd"
-#define LUSTRE_CMM_MDC_NAME "cmm-mdc"
-
-#define LUSTRE_MDC_NAME  "mdc"
-#define LUSTRE_LOV_NAME  "lov"
-#define LUSTRE_LMV_NAME  "lmv"
-
-/* FIXME just the names need to be changed */
-#define LUSTRE_OSS_NAME "ost"       /* FIXME oss */
-#define LUSTRE_OST_NAME "obdfilter" /* FIXME ost */
-#define LUSTRE_OSTSAN_NAME "sanobdfilter"
-
-#define LUSTRE_OSC_NAME "osc"
-#define LUSTRE_FILTER_NAME "filter"
-#define LUSTRE_SANOSC_NAME "sanosc"
-#define LUSTRE_SANOST_NAME "sanost"
-#define LUSTRE_MGS_NAME "mgs"
-#define LUSTRE_MGC_NAME "mgc"
-
+#define LUSTRE_FLD_NAME         "fld"
+#define LUSTRE_SEQ_NAME         "seq"
+
+#define LUSTRE_CMM_NAME         "cmm"
+#define LUSTRE_MDD_NAME         "mdd"
+#define LUSTRE_OSD_NAME         "osd"
+#define LUSTRE_LMV_NAME         "lmv"
+#define LUSTRE_CMM_MDC_NAME     "cmm-mdc"
+
+/* obd device type names */
+ /* FIXME all the references to LUSTRE_MDS_NAME should be swapped with LUSTRE_MDT_NAME */
+#define LUSTRE_MDS_NAME         "mds"
+#define LUSTRE_MDT_NAME         "mdt"
+#define LUSTRE_MDC_NAME         "mdc"
+#define LUSTRE_OSS_NAME         "ost"       /* FIXME change name to oss */
+#define LUSTRE_OST_NAME         "obdfilter" /* FIXME change name to ost */
+#define LUSTRE_OSC_NAME         "osc"
+#define LUSTRE_LOV_NAME         "lov"
+#define LUSTRE_MGS_NAME         "mgs"
+#define LUSTRE_MGC_NAME         "mgc"
+
+#define LUSTRE_CACHEOBD_NAME    "cobd"
 #define LUSTRE_ECHO_NAME        "obdecho"
 #define LUSTRE_ECHO_CLIENT_NAME "echo_client"
 
@@ -1071,10 +1064,6 @@ struct obd_ops {
                                int flags, void *opaque);
         int (*o_join_lru)(struct obd_export *, struct lov_stripe_md *,
                          int join);
-        int (*o_san_preprw)(int cmd, struct obd_export *exp,
-                            struct obdo *oa, int objcount,
-                            struct obd_ioobj *obj, int niocount,
-                            struct niobuf_remote *remote);
         int (*o_init_export)(struct obd_export *exp);
         int (*o_destroy_export)(struct obd_export *exp);
         int (*o_extent_calc)(struct obd_export *, struct lov_stripe_md *,
@@ -1082,7 +1071,8 @@ struct obd_ops {
 
         /* llog related obd_methods */
         int (*o_llog_init)(struct obd_device *obd, struct obd_device *disk_obd,
-                           int count, struct llog_catid *logid);
+                           int count, struct llog_catid *logid, 
+                           struct obd_uuid *uuid);
         int (*o_llog_finish)(struct obd_device *obd, int count);
 
         /* metadata-only methods */
@@ -1194,6 +1184,7 @@ struct lsm_operations {
         void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *,
                                      unsigned long *);
         obd_off (*lsm_stripe_offset_by_index)(struct lov_stripe_md *, int);
+        obd_off (*lsm_stripe_offset_by_offset)(struct lov_stripe_md *, obd_off);
         int (*lsm_stripe_index_by_offset)(struct lov_stripe_md *, obd_off);
         int (*lsm_revalidate) (struct lov_stripe_md *, struct obd_device *obd);
         int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes,
index 995d2af..fbe385b 100644 (file)
@@ -42,7 +42,6 @@
 #endif
 
 /* OBD Device Declarations */
-#define MAX_OBD_DEVICES 2048
 extern struct obd_device *obd_devs[MAX_OBD_DEVICES];
 extern spinlock_t obd_dev_lock;
 
@@ -1388,22 +1387,6 @@ static inline int obd_join_lru(struct obd_export *exp,
         RETURN(rc);
 }
 
-static inline int obd_san_preprw(int cmd, struct obd_export *exp,
-                                 struct obdo *oa,
-                                 int objcount, struct obd_ioobj *obj,
-                                 int niocount, struct niobuf_remote *remote)
-{
-        int rc;
-
-        EXP_CHECK_DT_OP(exp, preprw);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
-
-        rc = OBP(exp->exp_obd, san_preprw)(cmd, exp, oa, objcount, obj,
-                                           niocount, remote);
-        class_export_put(exp);
-        return(rc);
-}
-
 static inline int obd_pin(struct obd_export *exp, const struct lu_fid *fid,
                           struct obd_client_handle *handle, int flag)
 {
index 161d28a..cad9b68 100644 (file)
@@ -30,12 +30,13 @@ extern atomic_t obd_memory;
 extern int obd_memmax;
 extern unsigned int obd_fail_loc;
 extern unsigned int obd_dump_on_timeout;
+extern unsigned int obd_dump_on_eviction;
 extern unsigned int obd_timeout;          /* seconds */
 #define PING_INTERVAL max(obd_timeout / 4, 1U)
 #define RECONNECT_INTERVAL max(obd_timeout / 10, 10U)
 extern unsigned int ldlm_timeout;
 extern unsigned int obd_health_check_timeout;
-extern char obd_lustre_upcall[128];
+extern unsigned int obd_sync_filter;
 extern cfs_waitq_t obd_race_waitq;
 extern int obd_race_state;
 
@@ -185,6 +186,8 @@ extern int obd_race_state;
 #define OBD_FAIL_SEC_CTX_INIT_CONT_NET   0x1210
 #define OBD_FAIL_SEC_CTX_FINI_NET        0x1220
 
+#define OBD_FAIL_QUOTA_QD_COUNT_32BIT    0xA00
+
 /* preparation for a more advanced failure testbed (not functional yet) */
 #define OBD_FAIL_MASK_SYS    0x0000FF00
 #define OBD_FAIL_MASK_LOC    (0x000000FF | OBD_FAIL_MASK_SYS)
@@ -233,16 +236,18 @@ do {                                                                         \
  * first thread that calls this with a matching fail_loc is put to
  * sleep. The next thread that calls with the same fail_loc wakes up
  * the first and continues. */
-#define OBD_RACE(id)                                                           \
-do {                                                                           \
-        if  (OBD_FAIL_CHECK_ONCE(id)) {                         \
-                CERROR("obd_race id %x sleeping\n", (id));      \
-                OBD_SLEEP_ON(&obd_race_waitq);        \
-                CERROR("obd_fail_race id %x awake\n", (id));    \
-        } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) ==        \
-                    ((id) & OBD_FAIL_MASK_LOC)) {               \
-                cfs_waitq_signal(&obd_race_waitq);              \
-        }                                                       \
+#define OBD_RACE(id)                                                         \
+do {                                                                         \
+        if  (OBD_FAIL_CHECK_ONCE(id)) {                                      \
+                obd_race_state = 0;                                          \
+                CERROR("obd_race id %x sleeping\n", (id));                   \
+                OBD_SLEEP_ON(obd_race_waitq, obd_race_state != 0);           \
+                CERROR("obd_fail_race id %x awake\n", (id));                 \
+        } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) ==                     \
+                    ((id) & OBD_FAIL_MASK_LOC)) {                            \
+                CERROR("obd_fail_race id %x waking\n", (id));                \
+                obd_race_state = 1;                                          \
+        }                                                                    \
 } while(0)
 #else
 /* sigh.  an expedient fix until OBD_RACE is fixed up */
diff --git a/lustre/kernel_patches/patches/bitops_ext2_find_next_le_bit-2.6.patch b/lustre/kernel_patches/patches/bitops_ext2_find_next_le_bit-2.6.patch
new file mode 100644 (file)
index 0000000..e549597
--- /dev/null
@@ -0,0 +1,153 @@
+Index: linux-2.6.9/include/asm-i386/bitops.h
+===================================================================
+--- linux-2.6.9.orig/include/asm-i386/bitops.h 2004-10-19 05:54:37.000000000 +0800
++++ linux-2.6.9/include/asm-i386/bitops.h      2006-09-01 14:04:19.000000000 +0800
+@@ -448,6 +448,8 @@
+       find_first_zero_bit((unsigned long*)addr, size)
+ #define ext2_find_next_zero_bit(addr, size, off) \
+       find_next_zero_bit((unsigned long*)addr, size, off)
++#define ext2_find_next_le_bit(addr, size, off) \
++      find_next_bit((unsigned long*)(addr), (size), (off))
+ /* Bitmap functions for the minix filesystem.  */
+ #define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,(void*)addr)
+Index: linux-2.6.9/include/asm-x86_64/bitops.h
+===================================================================
+--- linux-2.6.9.orig/include/asm-x86_64/bitops.h       2004-10-19 05:53:51.000000000 +0800
++++ linux-2.6.9/include/asm-x86_64/bitops.h    2006-09-01 14:04:19.000000000 +0800
+@@ -399,6 +399,8 @@
+       find_first_zero_bit((unsigned long*)addr, size)
+ #define ext2_find_next_zero_bit(addr, size, off) \
+       find_next_zero_bit((unsigned long*)addr, size, off)
++#define ext2_find_next_le_bit(addr, size, off) \
++      find_next_bit((unsigned long*)(addr), (size), (off))
+ /* Bitmap functions for the minix filesystem.  */
+ #define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,(void*)addr)
+Index: linux-2.6.9/include/asm-ia64/bitops.h
+===================================================================
+--- linux-2.6.9.orig/include/asm-ia64/bitops.h 2004-10-19 05:55:07.000000000 +0800
++++ linux-2.6.9/include/asm-ia64/bitops.h      2006-09-01 14:04:19.000000000 +0800
+@@ -387,6 +387,8 @@
+ #define ext2_test_bit                 test_bit
+ #define ext2_find_first_zero_bit      find_first_zero_bit
+ #define ext2_find_next_zero_bit               find_next_zero_bit
++#define ext2_find_next_le_bit(addr, size, off) \
++      __find_next_bit((addr), (size), (off))
+ /* Bitmap functions for the minix filesystem.  */
+ #define minix_test_and_set_bit(nr,addr)               test_and_set_bit(nr,addr)
+Index: linux-2.6.9/include/asm-ppc/bitops.h
+===================================================================
+--- linux-2.6.9.orig/include/asm-ppc/bitops.h  2004-10-19 05:54:08.000000000 +0800
++++ linux-2.6.9/include/asm-ppc/bitops.h       2006-09-01 14:04:35.000000000 +0800
+@@ -449,6 +449,47 @@
+       return result + ffz(tmp);
+ }
++#define ext2_find_next_le_bit(addr, size, off) \
++        generic_find_next_le_bit((addr), (size), (off))
++
++static __inline__ unsigned long generic_find_next_le_bit(const void *addr,
++              unsigned long size, unsigned long offset)
++{
++      unsigned int *p = ((unsigned int*) addr) + (offset >> 5);
++      unsigned int result = offset & ~31UL;
++      unsigned int tmp;
++
++      if (offset >= size)
++              return size;
++      size -= result;
++      offset &= 31UL;
++      if (offset) {
++              tmp = cpu_to_le32p(p++);
++              tmp &= ~0UL << offset;
++              if (size < 32)
++                      goto found_first;
++              if (tmp)
++                      goto found_middle;
++              size -= 32;
++              result += 32;
++      }
++      while (size >= 32) {
++              if ((tmp = cpu_to_le32p(p++)))
++                      goto found_middle;
++              result += 32;
++              size -= 32;
++      }
++      if (!size)
++              return result;
++      tmp = cpu_to_le32p(p);
++found_first:
++      tmp &= ~0U >> (32 - size);
++      if (tmp == 0UL)         /* Are any bits set? */
++              return result + size;   /* Nope. */
++found_middle:
++      return result + __ffs(tmp);
++}
++
+ /* Bitmap functions for the minix filesystem.  */
+ #define minix_test_and_set_bit(nr,addr) ext2_set_bit(nr,addr)
+ #define minix_set_bit(nr,addr) ((void)ext2_set_bit(nr,addr))
+Index: linux-2.6.9/include/asm-ppc64/bitops.h
+===================================================================
+--- linux-2.6.9.orig/include/asm-ppc64/bitops.h        2004-10-19 05:55:43.000000000 +0800
++++ linux-2.6.9/include/asm-ppc64/bitops.h     2006-09-01 14:05:00.000000000 +0800
+@@ -349,6 +349,9 @@
+       find_first_zero_le_bit((unsigned long*)addr, size)
+ #define ext2_find_next_zero_bit(addr, size, off) \
+       find_next_zero_le_bit((unsigned long*)addr, size, off)
++#define ext2_find_next_le_bit(addr, size, off) \
++        generic_find_next_le_bit((unsigned long*)(addr), (size), (off))
++extern unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned long size, unsigned long offset);
+ #define minix_test_and_set_bit(nr,addr)               test_and_set_bit(nr,addr)
+ #define minix_set_bit(nr,addr)                        set_bit(nr,addr)
+Index: linux-2.6.9/arch/ppc64/kernel/bitops.c
+===================================================================
+--- linux-2.6.9.orig/arch/ppc64/kernel/bitops.c        2004-10-19 05:54:37.000000000 +0800
++++ linux-2.6.9/arch/ppc64/kernel/bitops.c     2006-09-01 14:05:25.000000000 +0800
+@@ -145,3 +145,43 @@
+ }
+ EXPORT_SYMBOL(find_next_zero_le_bit);
++
++unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned long size,
++              unsigned long offset)
++{
++      const unsigned long *p = addr + offset / BITS_PER_LONG;
++      unsigned long result = offset & ~(BITS_PER_LONG - 1);
++      unsigned long tmp;
++
++      if (offset >= size)
++              return size;
++      size -= result;
++      offset %= BITS_PER_LONG;
++      if (offset) {
++              tmp = __swab64p(p++);
++              tmp &= (~0UL << offset);
++              if (size < BITS_PER_LONG)
++                      goto found_first;
++              if (tmp)
++                      goto found_middle;
++              size -= BITS_PER_LONG;
++              result += BITS_PER_LONG;
++      }
++      while (size & ~(BITS_PER_LONG - 1)) {
++              if ((tmp = __swab64p(p++)))
++                      goto found_middle;
++              result += BITS_PER_LONG;
++              size -= BITS_PER_LONG;
++      }
++      if (!size)
++              return result;
++      tmp = __swab64p(p);
++found_first:
++      tmp &= ~0UL >> (BITS_PER_LONG - size);
++      if (tmp == 0UL)
++              return result + size;
++found_middle:
++      return result + __ffs(tmp);
++}
++
++EXPORT_SYMBOL(generic_find_next_le_bit);
index 64b8bd3..84b88fd 100644 (file)
@@ -1,7 +1,7 @@
-Index: linux-2.6.5-7.201/include/linux/dcache.h
+Index: linux-2.6.5-7.276/include/linux/dcache.h
 ===================================================================
---- linux-2.6.5-7.201.orig/include/linux/dcache.h      2005-10-11 00:12:48.000000000 +0400
-+++ linux-2.6.5-7.201/include/linux/dcache.h   2005-12-20 23:16:31.000000000 +0300
+--- linux-2.6.5-7.276.orig/include/linux/dcache.h
++++ linux-2.6.5-7.276/include/linux/dcache.h
 @@ -38,7 +38,6 @@ struct qstr {
        const unsigned char * name;
        unsigned int len;
@@ -18,131 +18,15 @@ Index: linux-2.6.5-7.201/include/linux/dcache.h
        struct dentry * d_parent;       /* parent directory */
        struct qstr d_name;
        struct hlist_node d_hash;       /* lookup hash list */  
-Index: linux-2.6.5-7.201/fs/dcache.c
+Index: linux-2.6.5-7.276/fs/dcache.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/dcache.c 2005-10-11 00:12:45.000000000 +0400
-+++ linux-2.6.5-7.201/fs/dcache.c      2005-12-20 23:16:31.000000000 +0300
-@@ -41,6 +41,8 @@ EXPORT_SYMBOL(dcache_lock);
- static kmem_cache_t *dentry_cache; 
-+#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
-+
- /*
-  * This is the single most critical data structure when it comes
-  * to the dcache: the hashtable for lookups. Somebody should try
-@@ -67,7 +69,7 @@ static void d_callback(void *arg)
-       struct dentry * dentry = (struct dentry *)arg;
-       if (dname_external(dentry)) {
--              kfree(dentry->d_qstr);
-+              kfree(dentry->d_name.name);
-       }
-       kmem_cache_free(dentry_cache, dentry); 
- }
-@@ -678,8 +680,6 @@ static int shrink_dcache_memory(int nr, 
-       return dentry_stat.nr_unused;
- }
--#define NAME_ALLOC_LEN(len)   ((len+16) & ~15)
--
- /**
-  * d_alloc    -       allocate a dcache entry
-  * @parent: parent of entry to allocate
-@@ -694,26 +694,18 @@ struct dentry * d_alloc(struct dentry * 
- {
-       char * str;
-       struct dentry *dentry;
--      struct qstr * qstr;
-       dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
-       if (!dentry)
-               return NULL;
-       if (name->len > DNAME_INLINE_LEN-1) {
--              qstr = kmalloc(sizeof(*qstr) + NAME_ALLOC_LEN(name->len), 
--                              GFP_KERNEL);  
--              if (!qstr) {
-+              str = kmalloc(name->len + 1, GFP_KERNEL);
-+              if (!str) {
-                       kmem_cache_free(dentry_cache, dentry); 
-                       return NULL;
-               }
--              qstr->name = qstr->name_str;
--              qstr->len = name->len;
--              qstr->hash = name->hash;
--              dentry->d_qstr = qstr;
--              str = qstr->name_str;
-       } else  {
--              dentry->d_qstr = &dentry->d_name;
-               str = dentry->d_iname;
-       }       
-@@ -1010,7 +1002,7 @@ struct dentry * __d_lookup(struct dentry
-               if (dentry->d_parent != parent)
-                       continue;
--              qstr = dentry->d_qstr;
-+              qstr = &dentry->d_name;
-               smp_read_barrier_depends();
-               if (parent->d_op && parent->d_op->d_compare) {
-                       if (parent->d_op->d_compare(parent, qstr, name))
-@@ -1163,26 +1155,38 @@ void d_rehash(struct dentry * entry)
-  */
- static inline void switch_names(struct dentry * dentry, struct dentry * target)
- {
--      const unsigned char *old_name, *new_name;
--      struct qstr *old_qstr, *new_qstr;
--
--      memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); 
--      old_qstr = target->d_qstr;
--      old_name = target->d_name.name;
--      new_qstr = dentry->d_qstr;
--      new_name = dentry->d_name.name;
--      if (old_name == target->d_iname) {
--              old_name = dentry->d_iname;
--              old_qstr = &dentry->d_name;
--      }
--      if (new_name == dentry->d_iname) {
--              new_name = target->d_iname;
--              new_qstr = &target->d_name;
--      }
--      target->d_name.name = new_name;
--      dentry->d_name.name = old_name;
--      target->d_qstr = new_qstr;
--      dentry->d_qstr = old_qstr;
-+      if (dname_external(target)) {
-+              if (dname_external(dentry)) {
-+                      /*
-+                       * Both external: swap the pointers
-+                       */
-+                      do_switch(target->d_name.name, dentry->d_name.name);
-+              } else {
-+                      /*
-+                       * dentry:internal, target:external.  Steal target's
-+                       * storage and make target internal.
-+                       */
-+                      dentry->d_name.name = target->d_name.name;
-+                      target->d_name.name = target->d_iname;
-+              }
-+      } else {
-+              if (dname_external(dentry)) {
-+                      /*
-+                       * dentry:external, target:internal.  Give dentry's
-+                       * storage to target and make dentry internal
-+                       */
-+                      memcpy(dentry->d_iname, target->d_name.name,
-+                                      target->d_name.len + 1);
-+                      target->d_name.name = dentry->d_name.name;
-+                      dentry->d_name.name = dentry->d_iname;
-+              } else {
-+                      /*
-+                       * Both are internal.  Just copy target to dentry
-+                       */
-+                      memcpy(dentry->d_iname, target->d_name.name,
-+                                      target->d_name.len + 1);
-+              }
-+      }
- }
- /*
+--- linux-2.6.5-7.276.orig/fs/dcache.c
++++ linux-2.6.5-7.276/fs/dcache.c
+@@ -775,7 +775,6 @@ struct dentry * d_alloc(struct dentry * 
+       dentry->d_parent = NULL;
+       dentry->d_move_count = 0;
+       dentry->d_sb = NULL;
+-      dentry->d_qstr = &dentry->d_name;
+       dentry->d_name.name = str;
+       dentry->d_name.len = name->len;
+       dentry->d_name.hash = name->hash;
diff --git a/lustre/kernel_patches/patches/export-do_kern_mount.patch b/lustre/kernel_patches/patches/export-do_kern_mount.patch
new file mode 100644 (file)
index 0000000..4abb386
--- /dev/null
@@ -0,0 +1,13 @@
+Index: linux-2.6/fs/super.c
+===================================================================
+--- linux-2.6.orig/fs/super.c  2006-07-20 10:51:39.000000000 +0800
++++ linux-2.6/fs/super.c       2006-07-20 10:51:59.000000000 +0800
+@@ -877,6 +877,8 @@ do_kern_mount(const char *fstype, int fl
+       return mnt;
+ }
++EXPORT_SYMBOL_GPL(do_kern_mount);
++
+ struct vfsmount *kern_mount(struct file_system_type *type)
+ {
+       return vfs_kern_mount(type, 0, type->name, NULL);
index dca4676..e54774f 100644 (file)
@@ -1,7 +1,19 @@
-Index: linux-2.6.5-7.201/fs/ext3/super.c
+Index: linux-2.6.5-7.201-full/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/super.c     2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/super.c  2006-06-20 19:42:08.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/include/linux/ext3_fs.h        2006-08-09 17:59:34.000000000 +0400
++++ linux-2.6.5-7.201-full/include/linux/ext3_fs.h     2006-08-22 12:35:55.000000000 +0400
+@@ -793,6 +793,7 @@ extern void ext3_put_super (struct super
+ extern void ext3_write_super (struct super_block *);
+ extern void ext3_write_super_lockfs (struct super_block *);
+ extern void ext3_unlockfs (struct super_block *);
++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int);
+ extern int ext3_remount (struct super_block *, int *, char *);
+ extern int ext3_statfs (struct super_block *, struct kstatfs *);
+Index: linux-2.6.5-7.201-full/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-7.201-full.orig/fs/ext3/super.c        2006-08-09 17:59:37.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/super.c     2006-08-09 17:59:37.000000000 +0400
 @@ -39,7 +39,7 @@
  static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
  static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
@@ -20,10 +32,10 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
                               struct ext3_super_block * es,
                               int sync)
  {
-Index: linux-2.6.5-7.201/fs/ext3/namei.c
+Index: linux-2.6.5-7.201-full/fs/ext3/namei.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/namei.c     2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/namei.c  2006-06-20 19:42:08.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/fs/ext3/namei.c        2006-08-09 17:59:37.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/namei.c     2006-08-09 17:59:37.000000000 +0400
 @@ -1598,7 +1598,7 @@ static int ext3_delete_entry (handle_t *
                              struct buffer_head * bh)
  {
@@ -44,10 +56,10 @@ Index: linux-2.6.5-7.201/fs/ext3/namei.c
                        if (pde)
                                pde->rec_len =
                                        cpu_to_le16(le16_to_cpu(pde->rec_len) +
-Index: linux-2.6.5-7.201/fs/ext3/xattr.c
+Index: linux-2.6.5-7.201-full/fs/ext3/xattr.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/xattr.c     2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/xattr.c  2006-06-20 19:42:30.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/fs/ext3/xattr.c        2006-07-14 01:53:23.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/xattr.c     2006-08-09 17:59:37.000000000 +0400
 @@ -107,7 +107,7 @@ ext3_xattr_register(int name_index, stru
  {
        int error = -EINVAL;
@@ -57,10 +69,10 @@ Index: linux-2.6.5-7.201/fs/ext3/xattr.c
                write_lock(&ext3_handler_lock);
                if (!ext3_xattr_handlers[name_index-1]) {
                        ext3_xattr_handlers[name_index-1] = handler;
-Index: linux-2.6.5-7.201/fs/ext3/inode.c
+Index: linux-2.6.5-7.201-full/fs/ext3/inode.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/inode.c     2006-06-20 19:40:44.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/inode.c  2006-06-20 19:42:08.000000000 +0400
+--- linux-2.6.5-7.201-full.orig/fs/ext3/inode.c        2006-07-14 01:53:22.000000000 +0400
++++ linux-2.6.5-7.201-full/fs/ext3/inode.c     2006-08-22 12:35:28.000000000 +0400
 @@ -1517,9 +1517,14 @@ out_stop:
                        if (end > inode->i_size) {
                                ei->i_disksize = end;
index df3d2ea..f6904f2 100644 (file)
@@ -1,7 +1,19 @@
+Index: linux-2.6.9-full/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.9-full.orig/include/linux/ext3_fs.h      2006-08-09 17:56:39.000000000 +0400
++++ linux-2.6.9-full/include/linux/ext3_fs.h   2006-08-22 12:36:22.000000000 +0400
+@@ -826,6 +826,7 @@ extern void ext3_put_super (struct super
+ extern void ext3_write_super (struct super_block *);
+ extern void ext3_write_super_lockfs (struct super_block *);
+ extern void ext3_unlockfs (struct super_block *);
++extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int);
+ extern int ext3_remount (struct super_block *, int *, char *);
+ extern int ext3_statfs (struct super_block *, struct kstatfs *);
 Index: linux-2.6.9-full/fs/ext3/super.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/super.c      2006-06-02 23:37:51.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/super.c   2006-06-02 23:56:29.000000000 +0400
+--- linux-2.6.9-full.orig/fs/ext3/super.c      2006-08-09 17:56:40.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/super.c   2006-08-09 17:56:40.000000000 +0400
 @@ -43,7 +43,7 @@ static int ext3_load_journal(struct supe
                             unsigned long journal_devnum);
  static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
@@ -22,8 +34,8 @@ Index: linux-2.6.9-full/fs/ext3/super.c
  {
 Index: linux-2.6.9-full/fs/ext3/namei.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/namei.c      2006-06-02 23:37:49.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/namei.c   2006-06-02 23:43:31.000000000 +0400
+--- linux-2.6.9-full.orig/fs/ext3/namei.c      2006-08-09 17:56:40.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/namei.c   2006-08-09 17:56:40.000000000 +0400
 @@ -1599,7 +1599,7 @@ static int ext3_delete_entry (handle_t *
                              struct buffer_head * bh)
  {
@@ -47,7 +59,7 @@ Index: linux-2.6.9-full/fs/ext3/namei.c
 Index: linux-2.6.9-full/fs/ext3/xattr.c
 ===================================================================
 --- linux-2.6.9-full.orig/fs/ext3/xattr.c      2006-06-01 14:58:48.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/xattr.c   2006-06-03 00:02:00.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/xattr.c   2006-08-09 17:56:40.000000000 +0400
 @@ -132,7 +132,7 @@ ext3_xattr_handler(int name_index)
  {
        struct xattr_handler *handler = NULL;
@@ -60,7 +72,7 @@ Index: linux-2.6.9-full/fs/ext3/xattr.c
 Index: linux-2.6.9-full/fs/ext3/inode.c
 ===================================================================
 --- linux-2.6.9-full.orig/fs/ext3/inode.c      2006-06-02 23:37:38.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/inode.c   2006-06-03 00:27:41.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/inode.c   2006-08-22 12:34:28.000000000 +0400
 @@ -1513,9 +1513,14 @@ out_stop:
                        if (end > inode->i_size) {
                                ei->i_disksize = end;
index 325d080..b807900 100644 (file)
@@ -1387,8 +1387,8 @@ Index: linux-2.6.16.i686/fs/ext3/mballoc.c
 +                       * Someone more lucky has already allocated it.
 +                       * The only thing we can do is just take first
 +                       * found block(s)
-+                       */
 +                      printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
++                       */
 +                      ac.ac_b_ex.fe_group = 0;
 +                      ac.ac_b_ex.fe_start = 0;
 +                      ac.ac_b_ex.fe_len = 0;
index c77ebdd..646e4fe 100644 (file)
@@ -1,8 +1,8 @@
-Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
+Index: linux-stage/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs.h        2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/include/linux/ext3_fs.h     2006-04-26 23:40:28.000000000 +0400
-@@ -57,6 +57,14 @@ struct statfs;
+--- linux-stage.orig/include/linux/ext3_fs.h   2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/include/linux/ext3_fs.h        2006-09-06 12:37:27.000000000 +0800
+@@ -57,6 +57,14 @@
  #define ext3_debug(f, a...)   do {} while (0)
  #endif
  
@@ -17,7 +17,7 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
  /*
   * Special inodes numbers
   */
-@@ -339,6 +347,7 @@ struct ext3_inode {
+@@ -339,6 +347,7 @@
  #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
  #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
@@ -25,7 +25,22 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef clear_opt
-@@ -700,7 +709,9 @@ extern int ext3_bg_has_super(struct supe
+@@ -361,6 +370,14 @@
+ #define ext3_find_first_zero_bit      ext2_find_first_zero_bit
+ #define ext3_find_next_zero_bit               ext2_find_next_zero_bit
++#ifndef ext2_find_next_le_bit
++#ifdef __LITTLE_ENDIAN
++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off))
++#else
++#error "mballoc needs a patch for big-endian systems - CFS bug 10634"
++#endif        /* __LITTLE_ENDIAN */
++#endif        /* !ext2_find_next_le_bit */
++
+ /*
+  * Maximal mount counts between two filesystem checks
+  */
+@@ -700,7 +717,9 @@
  extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
  extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
  extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
@@ -36,7 +51,7 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
  extern unsigned long ext3_count_free_blocks (struct super_block *);
  extern void ext3_check_blocks_bitmap (struct super_block *);
  extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
-@@ -822,6 +833,17 @@ extern void ext3_extents_initialize_bloc
+@@ -824,6 +843,17 @@
  extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
                          unsigned int cmd, unsigned long arg);
  
@@ -54,10 +69,10 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
  #endif        /* __KERNEL__ */
  
  #define EXT3_IOC_CREATE_INUM                  _IOW('f', 5, long)
-Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h
+Index: linux-stage/include/linux/ext3_fs_sb.h
 ===================================================================
---- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs_sb.h     2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h  2006-04-26 23:40:28.000000000 +0400
+--- linux-stage.orig/include/linux/ext3_fs_sb.h        2006-09-06 12:37:00.000000000 +0800
++++ linux-stage/include/linux/ext3_fs_sb.h     2006-09-06 12:37:01.000000000 +0800
 @@ -23,9 +23,15 @@
  #define EXT_INCLUDE
  #include <linux/blockgroup_lock.h>
@@ -74,7 +89,7 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h
  
  /*
   * third extended-fs super-block data in memory
-@@ -78,6 +84,43 @@ struct ext3_sb_info {
+@@ -78,6 +84,43 @@
        struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
        wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
  #endif
@@ -112,17 +127,17 @@ Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h
 +      unsigned long s_mb_buddies_generated;
 +      unsigned long long s_mb_generation_time;
  };
-+
 +#define EXT3_GROUP_INFO(sb, group)                                       \
 +      EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \
 +                               [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)]
++
  #endif        /* _LINUX_EXT3_FS_SB */
-Index: linux-2.6.5-7.252-full/fs/ext3/super.c
+Index: linux-stage/fs/ext3/super.c
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/super.c        2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/super.c     2006-04-26 23:40:28.000000000 +0400
-@@ -389,6 +389,7 @@ void ext3_put_super (struct super_block 
+--- linux-stage.orig/fs/ext3/super.c   2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/super.c        2006-09-06 12:37:01.000000000 +0800
+@@ -389,6 +389,7 @@
        struct ext3_super_block *es = sbi->s_es;
        int i;
  
@@ -130,7 +145,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
        ext3_ext_release(sb);
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
-@@ -545,6 +546,7 @@ enum {
+@@ -546,6 +547,7 @@
        Opt_err,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
        Opt_extents, Opt_noextents, Opt_extdebug,
@@ -138,7 +153,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
  };
  
  static match_table_t tokens = {
-@@ -591,6 +592,9 @@ static match_table_t tokens = {
+@@ -592,6 +594,9 @@
        {Opt_extents, "extents"},
        {Opt_noextents, "noextents"},
        {Opt_extdebug, "extdebug"},
@@ -148,7 +163,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL}
  };
-@@ -813,6 +815,19 @@ static int parse_options (char * options
+@@ -817,6 +822,19 @@
                case Opt_extdebug:
                        set_opt (sbi->s_mount_opt, EXTDEBUG);
                        break;
@@ -168,7 +183,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
                default:
                        printk (KERN_ERR
                                "EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1466,6 +1471,7 @@ static int ext3_fill_super (struct super
+@@ -1470,6 +1488,7 @@
                ext3_count_dirs(sb));
  
        ext3_ext_init(sb);
@@ -176,7 +191,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
  
        return 0;
  
-@@ -2114,7 +2120,13 @@ static struct file_system_type ext3_fs_t
+@@ -2118,7 +2137,13 @@
  
  static int __init init_ext3_fs(void)
  {
@@ -191,7 +206,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
        if (err)
                return err;
        err = init_inodecache();
-@@ -2143,6 +2155,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2147,6 +2172,7 @@
        unregister_filesystem(&ext3_fs_type);
        destroy_inodecache();
        exit_ext3_xattr();
@@ -199,11 +214,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/super.c
  }
  
  int ext3_prep_san_write(struct inode *inode, long *blocks,
-Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
+Index: linux-stage/fs/ext3/extents.c
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/extents.c      2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/extents.c   2006-04-26 23:40:28.000000000 +0400
-@@ -777,7 +777,7 @@ cleanup:
+--- linux-stage.orig/fs/ext3/extents.c 2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/extents.c      2006-09-06 12:37:01.000000000 +0800
+@@ -779,7 +779,7 @@
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
@@ -212,7 +227,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
                }
        }
        kfree(ablocks);
-@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1438,7 +1438,7 @@
                  path->p_idx->ei_leaf);
        bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
        ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
@@ -221,7 +236,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
        return err;
  }
  
-@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1923,10 +1923,12 @@
        int needed = ext3_remove_blocks_credits(tree, ex, from, to);
        handle_t *handle = ext3_journal_start(tree->inode, needed);
        struct buffer_head *bh;
@@ -235,7 +250,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
        if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
                /* tail removal */
                unsigned long num, start;
-@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1938,7 +1940,7 @@
                        bh = sb_find_get_block(tree->inode->i_sb, start + i);
                        ext3_forget(handle, 0, tree->inode, bh, start + i);
                }
@@ -244,11 +259,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
        } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
                printk("strange request: removal %lu-%lu from %u:%u\n",
                       from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
+Index: linux-stage/fs/ext3/inode.c
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/inode.c        2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/inode.c     2006-04-26 23:40:28.000000000 +0400
-@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h
+--- linux-stage.orig/fs/ext3/inode.c   2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/inode.c        2006-09-06 12:37:01.000000000 +0800
+@@ -574,7 +574,7 @@
                ext3_journal_forget(handle, branch[i].bh);
        }
        for (i = 0; i < keys; i++)
@@ -257,7 +272,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
        return err;
  }
  
-@@ -675,7 +675,7 @@ err_out:
+@@ -675,7 +675,7 @@
        if (err == -EAGAIN)
                for (i = 0; i < num; i++)
                        ext3_free_blocks(handle, inode, 
@@ -266,7 +281,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
        return err;
  }
  
-@@ -1837,7 +1837,7 @@ ext3_clear_blocks(handle_t *handle, stru
+@@ -1837,7 +1837,7 @@
                }
        }
  
@@ -275,7 +290,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
  }
  
  /**
-@@ -2008,7 +2008,7 @@ static void ext3_free_branches(handle_t 
+@@ -2008,7 +2008,7 @@
                                ext3_journal_test_restart(handle, inode);
                        }
  
@@ -284,11 +299,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
  
                        if (parent_bh) {
                                /*
-Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
+Index: linux-stage/fs/ext3/balloc.c
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/balloc.c       2006-02-14 15:26:58.000000000 +0300
-+++ linux-2.6.5-7.252-full/fs/ext3/balloc.c    2006-04-26 23:40:28.000000000 +0400
-@@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_
+--- linux-stage.orig/fs/ext3/balloc.c  2006-09-06 12:36:59.000000000 +0800
++++ linux-stage/fs/ext3/balloc.c       2006-09-06 12:37:01.000000000 +0800
+@@ -78,7 +78,7 @@
   *
   * Return buffer_head on success or NULL in case of failure.
   */
@@ -297,7 +312,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
  read_block_bitmap(struct super_block *sb, unsigned int block_group)
  {
        struct ext3_group_desc * desc;
-@@ -274,7 +274,7 @@ void ext3_discard_reservation(struct ino
+@@ -274,7 +274,7 @@
  }
  
  /* Free given blocks, update quota and i_blocks field */
@@ -306,7 +321,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
                        unsigned long block, unsigned long count)
  {
        struct buffer_head *bitmap_bh = NULL;
-@@ -1142,7 +1142,7 @@ int ext3_should_retry_alloc(struct super
+@@ -1142,7 +1142,7 @@
   * bitmap, and then for any free bit if that fails.
   * This function also updates quota and i_blocks field.
   */
@@ -315,11 +330,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
                        unsigned long goal, int *errp)
  {
        struct buffer_head *bitmap_bh = NULL;
-Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
+Index: linux-stage/fs/ext3/xattr.c
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/xattr.c        2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/xattr.c     2006-04-26 23:40:28.000000000 +0400
-@@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle,
+--- linux-stage.orig/fs/ext3/xattr.c   2006-09-06 12:37:00.000000000 +0800
++++ linux-stage/fs/ext3/xattr.c        2006-09-06 12:37:01.000000000 +0800
+@@ -1371,7 +1371,7 @@
                        new_bh = sb_getblk(sb, block);
                        if (!new_bh) {
  getblk_failed:
@@ -328,7 +343,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
                                error = -EIO;
                                goto cleanup;
                        }
-@@ -1411,7 +1411,7 @@ getblk_failed:
+@@ -1411,7 +1411,7 @@
                if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
                        /* Free the old block. */
                        ea_bdebug(old_bh, "freeing");
@@ -337,7 +352,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
  
                        /* ext3_forget() calls bforget() for us, but we
                           let our caller release old_bh, so we need to
-@@ -1519,7 +1519,7 @@ ext3_xattr_delete_inode(handle_t *handle
+@@ -1519,7 +1519,7 @@
                        mb_cache_entry_free(ce);
                        ce = NULL;
                }
@@ -346,10 +361,10 @@ Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
                get_bh(bh);
                ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
        } else {
-Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
+Index: linux-stage/fs/ext3/mballoc.c
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/mballoc.c      2006-04-22 17:31:47.543334750 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/mballoc.c   2006-04-26 23:42:45.000000000 +0400
+--- linux-stage.orig/fs/ext3/mballoc.c 2006-09-06 11:16:28.656439250 +0800
++++ linux-stage/fs/ext3/mballoc.c      2006-09-06 12:37:34.000000000 +0800
 @@ -0,0 +1,2702 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
@@ -792,7 +807,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
 +      while (i < max) {
 +              fragments++;
 +              first = i;
-+              i = find_next_bit(bitmap, max, i);
++              i = ext2_find_next_le_bit(bitmap, max, i);
 +              len = i - first;
 +              free += len;
 +              if (len > 1) 
@@ -3053,11 +3068,11 @@ Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-Index: linux-2.6.5-7.252-full/fs/ext3/Makefile
+Index: linux-stage/fs/ext3/Makefile
 ===================================================================
---- linux-2.6.5-7.252-full.orig/fs/ext3/Makefile       2006-04-25 17:42:19.000000000 +0400
-+++ linux-2.6.5-7.252-full/fs/ext3/Makefile    2006-04-26 23:40:28.000000000 +0400
-@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
+--- linux-stage.orig/fs/ext3/Makefile  2006-09-06 12:37:01.000000000 +0800
++++ linux-stage/fs/ext3/Makefile       2006-09-06 12:37:01.000000000 +0800
+@@ -6,7 +6,7 @@
  
  ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
           ioctl.o namei.o super.o symlink.o hash.o \
index 0040a6f..13f3482 100644 (file)
@@ -1356,8 +1356,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                       * Someone more lucky has already allocated it.
 +                       * The only thing we can do is just take first
 +                       * found block(s)
-+                       */
 +                      printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
++                       */
 +                      ac.ac_b_ex.fe_group = 0;
 +                      ac.ac_b_ex.fe_start = 0;
 +                      ac.ac_b_ex.fe_len = 0;
index e34c411..a00cd4a 100644 (file)
@@ -1,8 +1,8 @@
 Index: linux-stage/include/linux/ext3_fs.h
 ===================================================================
---- linux-stage.orig/include/linux/ext3_fs.h   2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/include/linux/ext3_fs.h        2006-05-25 10:36:04.000000000 -0600
-@@ -57,6 +57,14 @@ struct statfs;
+--- linux-stage.orig/include/linux/ext3_fs.h   2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/include/linux/ext3_fs.h        2006-09-06 12:29:38.000000000 +0800
+@@ -57,6 +57,14 @@
  #define ext3_debug(f, a...)   do {} while (0)
  #endif
  
@@ -17,7 +17,7 @@ Index: linux-stage/include/linux/ext3_fs.h
  /*
   * Special inodes numbers
   */
-@@ -365,6 +373,7 @@ struct ext3_inode {
+@@ -365,6 +373,7 @@
  #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
  #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
@@ -25,7 +25,22 @@ Index: linux-stage/include/linux/ext3_fs.h
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef clear_opt
-@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe
+@@ -387,6 +396,14 @@
+ #define ext3_find_first_zero_bit      ext2_find_first_zero_bit
+ #define ext3_find_next_zero_bit               ext2_find_next_zero_bit
++#ifndef ext2_find_next_le_bit
++#ifdef __LITTLE_ENDIAN
++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off))
++#else
++#error "mballoc needs a patch for big-endian systems - CFS bug 10634"
++#endif        /* __LITTLE_ENDIAN */
++#endif        /* !ext2_find_next_le_bit */
++
+ /*
+  * Maximal mount counts between two filesystem checks
+  */
+@@ -726,7 +743,7 @@
  extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
  extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
  extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
@@ -34,7 +49,7 @@ Index: linux-stage/include/linux/ext3_fs.h
  extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
                                 unsigned long, unsigned long, int *);
  extern unsigned long ext3_count_free_blocks (struct super_block *);
-@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc
+@@ -859,6 +876,17 @@
  extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
                          unsigned int cmd, unsigned long arg);
  
@@ -54,8 +69,8 @@ Index: linux-stage/include/linux/ext3_fs.h
  /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
 Index: linux-stage/include/linux/ext3_fs_sb.h
 ===================================================================
---- linux-stage.orig/include/linux/ext3_fs_sb.h        2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/include/linux/ext3_fs_sb.h     2006-05-25 10:59:14.000000000 -0600
+--- linux-stage.orig/include/linux/ext3_fs_sb.h        2006-09-06 12:27:36.000000000 +0800
++++ linux-stage/include/linux/ext3_fs_sb.h     2006-09-06 12:27:37.000000000 +0800
 @@ -23,9 +23,15 @@
  #define EXT_INCLUDE
  #include <linux/blockgroup_lock.h>
@@ -72,7 +87,7 @@ Index: linux-stage/include/linux/ext3_fs_sb.h
  
  /*
   * third extended-fs super-block data in memory
-@@ -81,6 +87,43 @@ struct ext3_sb_info {
+@@ -81,6 +87,43 @@
        char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
        int s_jquota_fmt;                       /* Format of quota to use */
  #endif
@@ -110,17 +125,17 @@ Index: linux-stage/include/linux/ext3_fs_sb.h
 +      unsigned long s_mb_buddies_generated;
 +      unsigned long long s_mb_generation_time;
  };
-+
 +#define EXT3_GROUP_INFO(sb, group)                                       \
 +      EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \
 +                               [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)]
++
  #endif        /* _LINUX_EXT3_FS_SB */
 Index: linux-stage/fs/ext3/super.c
 ===================================================================
---- linux-stage.orig/fs/ext3/super.c   2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/super.c        2006-05-25 10:36:04.000000000 -0600
-@@ -394,6 +394,7 @@ void ext3_put_super (struct super_block
+--- linux-stage.orig/fs/ext3/super.c   2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/super.c        2006-09-06 12:27:37.000000000 +0800
+@@ -394,6 +394,7 @@
        struct ext3_super_block *es = sbi->s_es;
        int i;
  
@@ -128,7 +143,7 @@ Index: linux-stage/fs/ext3/super.c
        ext3_ext_release(sb);
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
-@@ -597,6 +598,7 @@ enum {
+@@ -597,6 +598,7 @@
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
        Opt_extents, Opt_noextents, Opt_extdebug,
@@ -136,7 +151,7 @@ Index: linux-stage/fs/ext3/super.c
  };
  
  static match_table_t tokens = {
-@@ -649,6 +651,9 @@ static match_table_t tokens = {
+@@ -649,6 +651,9 @@
        {Opt_extents, "extents"},
        {Opt_noextents, "noextents"},
        {Opt_extdebug, "extdebug"},
@@ -146,7 +161,7 @@ Index: linux-stage/fs/ext3/super.c
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
-@@ -962,6 +967,19 @@ static int parse_options (char * options
+@@ -962,6 +967,19 @@
                case Opt_extdebug:
                        set_opt (sbi->s_mount_opt, EXTDEBUG);
                        break;
@@ -166,7 +181,7 @@ Index: linux-stage/fs/ext3/super.c
                default:
                        printk (KERN_ERR
                                "EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1651,6 +1669,7 @@ static int ext3_fill_super (struct super
+@@ -1651,6 +1669,7 @@
                ext3_count_dirs(sb));
  
        ext3_ext_init(sb);
@@ -174,7 +189,7 @@ Index: linux-stage/fs/ext3/super.c
  
        return 0;
  
-@@ -2433,7 +2452,13 @@ static struct file_system_type ext3_fs_t
+@@ -2433,7 +2452,13 @@
  
  static int __init init_ext3_fs(void)
  {
@@ -189,7 +204,7 @@ Index: linux-stage/fs/ext3/super.c
        if (err)
                return err;
        err = init_inodecache();
-@@ -2455,6 +2480,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2455,6 +2480,7 @@
        unregister_filesystem(&ext3_fs_type);
        destroy_inodecache();
        exit_ext3_xattr();
@@ -199,9 +214,9 @@ Index: linux-stage/fs/ext3/super.c
  int ext3_prep_san_write(struct inode *inode, long *blocks,
 Index: linux-stage/fs/ext3/extents.c
 ===================================================================
---- linux-stage.orig/fs/ext3/extents.c 2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/extents.c      2006-05-25 10:36:04.000000000 -0600
-@@ -777,7 +777,7 @@ cleanup:
+--- linux-stage.orig/fs/ext3/extents.c 2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/extents.c      2006-09-06 12:27:37.000000000 +0800
+@@ -779,7 +779,7 @@
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
@@ -210,7 +225,7 @@ Index: linux-stage/fs/ext3/extents.c
                }
        }
        kfree(ablocks);
-@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1438,7 +1438,7 @@
                  path->p_idx->ei_leaf);
        bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
        ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
@@ -219,7 +234,7 @@ Index: linux-stage/fs/ext3/extents.c
        return err;
  }
  
-@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1923,10 +1923,12 @@
        int needed = ext3_remove_blocks_credits(tree, ex, from, to);
        handle_t *handle = ext3_journal_start(tree->inode, needed);
        struct buffer_head *bh;
@@ -233,7 +248,7 @@ Index: linux-stage/fs/ext3/extents.c
        if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
                /* tail removal */
                unsigned long num, start;
-@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1938,7 +1940,7 @@
                        bh = sb_find_get_block(tree->inode->i_sb, start + i);
                        ext3_forget(handle, 0, tree->inode, bh, start + i);
                }
@@ -244,9 +259,9 @@ Index: linux-stage/fs/ext3/extents.c
                       from, to, ex->ee_block, ex->ee_len);
 Index: linux-stage/fs/ext3/inode.c
 ===================================================================
---- linux-stage.orig/fs/ext3/inode.c   2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/inode.c        2006-05-25 10:36:04.000000000 -0600
-@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
+--- linux-stage.orig/fs/ext3/inode.c   2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/inode.c        2006-09-06 12:27:37.000000000 +0800
+@@ -572,7 +572,7 @@
                ext3_journal_forget(handle, branch[i].bh);
        }
        for (i = 0; i < keys; i++)
@@ -255,7 +270,7 @@ Index: linux-stage/fs/ext3/inode.c
        return err;
  }
  
-@@ -673,7 +673,7 @@ err_out:
+@@ -673,7 +673,7 @@
        if (err == -EAGAIN)
                for (i = 0; i < num; i++)
                        ext3_free_blocks(handle, inode, 
@@ -264,7 +279,7 @@ Index: linux-stage/fs/ext3/inode.c
        return err;
  }
  
-@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru
+@@ -1831,7 +1831,7 @@
                }
        }
  
@@ -273,7 +288,7 @@ Index: linux-stage/fs/ext3/inode.c
  }
  
  /**
-@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t
+@@ -2004,7 +2004,7 @@
                                ext3_journal_test_restart(handle, inode);
                        }
  
@@ -284,9 +299,9 @@ Index: linux-stage/fs/ext3/inode.c
                                /*
 Index: linux-stage/fs/ext3/balloc.c
 ===================================================================
---- linux-stage.orig/fs/ext3/balloc.c  2006-05-25 10:36:02.000000000 -0600
-+++ linux-stage/fs/ext3/balloc.c       2006-05-25 10:36:04.000000000 -0600
-@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
+--- linux-stage.orig/fs/ext3/balloc.c  2006-09-06 12:27:36.000000000 +0800
++++ linux-stage/fs/ext3/balloc.c       2006-09-06 12:27:37.000000000 +0800
+@@ -79,7 +79,7 @@
   *
   * Return buffer_head on success or NULL in case of failure.
   */
@@ -331,9 +346,9 @@ Index: linux-stage/fs/ext3/balloc.c
        struct buffer_head *bitmap_bh = NULL;
 Index: linux-stage/fs/ext3/xattr.c
 ===================================================================
---- linux-stage.orig/fs/ext3/xattr.c   2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/xattr.c        2006-05-25 10:36:04.000000000 -0600
-@@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle,
+--- linux-stage.orig/fs/ext3/xattr.c   2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/xattr.c        2006-09-06 12:27:37.000000000 +0800
+@@ -1281,7 +1281,7 @@
                        new_bh = sb_getblk(sb, block);
                        if (!new_bh) {
  getblk_failed:
@@ -342,7 +357,7 @@ Index: linux-stage/fs/ext3/xattr.c
                                error = -EIO;
                                goto cleanup;
                        }
-@@ -1328,7 +1328,7 @@ getblk_failed:
+@@ -1328,7 +1328,7 @@
                        if (ce)
                                mb_cache_entry_free(ce);
                        ea_bdebug(old_bh, "freeing");
@@ -351,7 +366,7 @@ Index: linux-stage/fs/ext3/xattr.c
  
                        /* ext3_forget() calls bforget() for us, but we
                           let our caller release old_bh, so we need to
-@@ -1427,7 +1427,7 @@ ext3_xattr_delete_inode(handle_t *handle
+@@ -1427,7 +1427,7 @@
        if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
                if (ce)
                        mb_cache_entry_free(ce);
@@ -362,8 +377,8 @@ Index: linux-stage/fs/ext3/xattr.c
        } else {
 Index: linux-stage/fs/ext3/mballoc.c
 ===================================================================
---- linux-stage.orig/fs/ext3/mballoc.c 2006-05-23 17:33:37.579436680 -0600
-+++ linux-stage/fs/ext3/mballoc.c      2006-05-25 10:59:14.000000000 -0600
+--- linux-stage.orig/fs/ext3/mballoc.c 2006-09-06 11:16:28.656439250 +0800
++++ linux-stage/fs/ext3/mballoc.c      2006-09-06 12:30:11.000000000 +0800
 @@ -0,0 +1,2701 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
@@ -806,7 +821,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      while (i < max) {
 +              fragments++;
 +              first = i;
-+              i = find_next_bit(bitmap, max, i);
++              i = ext2_find_next_le_bit(bitmap, max, i);
 +              len = i - first;
 +              free += len;
 +              if (len > 1) 
@@ -3068,8 +3083,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +}
 Index: linux-stage/fs/ext3/Makefile
 ===================================================================
---- linux-stage.orig/fs/ext3/Makefile  2006-05-25 10:36:04.000000000 -0600
-+++ linux-stage/fs/ext3/Makefile       2006-05-25 10:36:04.000000000 -0600
+--- linux-stage.orig/fs/ext3/Makefile  2006-09-06 12:27:37.000000000 +0800
++++ linux-stage/fs/ext3/Makefile       2006-09-06 12:27:37.000000000 +0800
 @@ -6,7 +6,7 @@
  
  ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
diff --git a/lustre/kernel_patches/patches/jbd-stats-2.6.5.patch b/lustre/kernel_patches/patches/jbd-stats-2.6.5.patch
new file mode 100644 (file)
index 0000000..b2abf46
--- /dev/null
@@ -0,0 +1,772 @@
+Index: linux-2.6.5-7.201/include/linux/jbd.h
+===================================================================
+--- linux-2.6.5-7.201.orig/include/linux/jbd.h 2005-10-11 00:12:48.000000000 +0400
++++ linux-2.6.5-7.201/include/linux/jbd.h      2006-07-28 02:40:14.000000000 +0400
+@@ -411,6 +411,16 @@ struct handle_s 
+ };
++/*
++ * Some stats for checkpoint phase
++ */
++struct transaction_chp_stats_s {
++      unsigned long           cs_chp_time;
++      unsigned long           cs_forced_to_close;
++      unsigned long           cs_written;
++      unsigned long           cs_dropped;
++};
++
+ /* The transaction_t type is the guts of the journaling mechanism.  It
+  * tracks a compound transaction through its various states:
+  *
+@@ -542,6 +552,21 @@ struct transaction_s 
+       spinlock_t              t_handle_lock;
+       /*
++       * Longest time some handle had to wait for running transaction
++       */
++      unsigned long           t_max_wait;
++
++      /*
++       * When transaction started
++       */
++      unsigned long           t_start;
++
++      /*
++       * Checkpointing stats [j_checkpoint_sem]
++       */
++      struct transaction_chp_stats_s t_chp_stats;
++
++      /*
+        * Number of outstanding updates running on this transaction
+        * [t_handle_lock]
+        */
+@@ -581,6 +606,57 @@ struct transaction_s 
+       struct list_head        t_jcb;
+ };
++struct transaction_run_stats_s {
++      unsigned long           rs_wait;
++      unsigned long           rs_running;
++      unsigned long           rs_locked;
++      unsigned long           rs_flushing;
++      unsigned long           rs_logging;
++
++      unsigned long           rs_handle_count;
++      unsigned long           rs_blocks;
++      unsigned long           rs_blocks_logged;
++};
++
++struct transaction_stats_s
++{
++      int                     ts_type;
++      unsigned long           ts_tid;
++      union {
++              struct transaction_run_stats_s run;
++              struct transaction_chp_stats_s chp;
++      } u;
++};
++
++#define JBD_STATS_RUN         1
++#define JBD_STATS_CHECKPOINT  2
++
++#define ts_wait                       u.run.rs_wait
++#define ts_running            u.run.rs_running
++#define ts_locked             u.run.rs_locked
++#define ts_flushing           u.run.rs_flushing
++#define ts_logging            u.run.rs_logging
++#define ts_handle_count               u.run.rs_handle_count
++#define ts_blocks             u.run.rs_blocks
++#define ts_blocks_logged      u.run.rs_blocks_logged
++
++#define ts_chp_time           u.chp.cs_chp_time
++#define ts_forced_to_close    u.chp.cs_forced_to_close
++#define ts_written            u.chp.cs_written
++#define ts_dropped            u.chp.cs_dropped
++
++#define CURRENT_MSECS         (jiffies_to_msecs(jiffies))
++
++static inline unsigned int
++jbd_time_diff(unsigned int start, unsigned int end)
++{
++      if (unlikely(start > end))
++              end = end + (~0UL - start);
++      else
++              end -= start;
++      return end;
++}
++
+ /**
+  * struct journal_s - The journal_s type is the concrete type associated with
+  *     journal_t.
+@@ -817,6 +893,16 @@ struct journal_s
+       struct jbd_revoke_table_s *j_revoke_table[2];
+       /*
++       *
++       */
++      struct transaction_stats_s *j_history;
++      int                     j_history_max;
++      int                     j_history_cur;
++      spinlock_t              j_history_lock;
++      struct proc_dir_entry   *j_proc_entry;
++      struct transaction_stats_s j_stats;
++      
++      /*
+        * An opaque pointer to fs-private information.  ext3 puts its
+        * superblock pointer here
+        */
+Index: linux-2.6.5-7.201/fs/jbd/commit.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/jbd/commit.c     2005-10-11 00:12:45.000000000 +0400
++++ linux-2.6.5-7.201/fs/jbd/commit.c  2006-07-28 02:40:14.000000000 +0400
+@@ -21,6 +21,7 @@
+ #include <linux/mm.h>
+ #include <linux/pagemap.h>
+ #include <linux/smp_lock.h>
++#include <linux/jiffies.h>
+ /*
+  * Default IO end handler for temporary BJ_IO buffer_heads.
+@@ -101,6 +102,7 @@ static int inverted_lock(journal_t *jour
+  */
+ void journal_commit_transaction(journal_t *journal)
+ {
++      struct transaction_stats_s stats;
+       transaction_t *commit_transaction;
+       struct journal_head *jh, *new_jh, *descriptor;
+       struct buffer_head *wbuf[64];
+@@ -147,6 +149,11 @@ void journal_commit_transaction(journal_
+       spin_lock(&journal->j_state_lock);
+       commit_transaction->t_state = T_LOCKED;
++      stats.ts_wait = commit_transaction->t_max_wait;
++      stats.ts_locked = CURRENT_MSECS;
++      stats.ts_running = jbd_time_diff(commit_transaction->t_start,
++                                              stats.ts_locked);
++      
+       spin_lock(&commit_transaction->t_handle_lock);
+       while (commit_transaction->t_updates) {
+               DEFINE_WAIT(wait);
+@@ -219,6 +226,9 @@ void journal_commit_transaction(journal_
+        */
+       journal_switch_revoke_table(journal);
++      stats.ts_flushing = CURRENT_MSECS;
++      stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing);
++
+       commit_transaction->t_state = T_FLUSH;
+       journal->j_committing_transaction = commit_transaction;
+       journal->j_running_transaction = NULL;
+@@ -366,6 +376,11 @@ write_out_data:
+        */
+       commit_transaction->t_state = T_COMMIT;
++      stats.ts_logging = CURRENT_MSECS;
++      stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging);
++      stats.ts_blocks = commit_transaction->t_outstanding_credits;
++      stats.ts_blocks_logged = 0;
++
+       descriptor = 0;
+       bufs = 0;
+       while (commit_transaction->t_buffers) {
+@@ -514,6 +529,7 @@ start_journal_io:
+                               submit_bh(WRITE, bh);
+                       }
+                       cond_resched();
++                      stats.ts_blocks_logged += bufs;
+                       /* Force a new descriptor to be generated next
+                            time round the loop. */
+@@ -759,6 +775,7 @@ skip_commit: /* The journal should be un
+               cp_transaction = jh->b_cp_transaction;
+               if (cp_transaction) {
+                       JBUFFER_TRACE(jh, "remove from old cp transaction");
++                      cp_transaction->t_chp_stats.cs_dropped++;
+                       __journal_remove_checkpoint(jh);
+               }
+@@ -805,6 +822,36 @@ skip_commit: /* The journal should be un
+       J_ASSERT(commit_transaction->t_state == T_COMMIT);
++      commit_transaction->t_start = CURRENT_MSECS;
++      stats.ts_logging = jbd_time_diff(stats.ts_logging,
++                                              commit_transaction->t_start);
++
++      /*
++       * File the transaction for history
++       */
++      stats.ts_type = JBD_STATS_RUN;
++      stats.ts_tid = commit_transaction->t_tid;
++      stats.ts_handle_count = commit_transaction->t_handle_count;
++      spin_lock(&journal->j_history_lock);
++      memcpy(journal->j_history + journal->j_history_cur, &stats,
++                      sizeof(stats));
++      if (++journal->j_history_cur == journal->j_history_max)
++              journal->j_history_cur = 0;
++
++      /*
++       * Calculate overall stats
++       */
++      journal->j_stats.ts_tid++;
++      journal->j_stats.ts_wait += stats.ts_wait;
++      journal->j_stats.ts_running += stats.ts_running;
++      journal->j_stats.ts_locked += stats.ts_locked;
++      journal->j_stats.ts_flushing += stats.ts_flushing;
++      journal->j_stats.ts_logging += stats.ts_logging;
++      journal->j_stats.ts_handle_count += stats.ts_handle_count;
++      journal->j_stats.ts_blocks += stats.ts_blocks;
++      journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged;
++      spin_unlock(&journal->j_history_lock);
++
+       /*
+        * This is a bit sleazy.  We borrow j_list_lock to protect
+        * journal->j_committing_transaction in __journal_remove_checkpoint.
+Index: linux-2.6.5-7.201/fs/jbd/checkpoint.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/jbd/checkpoint.c 2005-10-11 00:12:45.000000000 +0400
++++ linux-2.6.5-7.201/fs/jbd/checkpoint.c      2006-07-28 02:40:14.000000000 +0400
+@@ -166,6 +166,7 @@ static int __cleanup_transaction(journal
+                       transaction_t *t = jh->b_transaction;
+                       tid_t tid = t->t_tid;
++                      transaction->t_chp_stats.cs_forced_to_close++;
+                       spin_unlock(&journal->j_list_lock);
+                       jbd_unlock_bh_state(bh);
+                       log_start_commit(journal, tid);
+@@ -227,7 +228,7 @@ __flush_batch(journal_t *journal, struct
+  */
+ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
+                       struct buffer_head **bhs, int *batch_count,
+-                      int *drop_count)
++                      int *drop_count, transaction_t *transaction)
+ {
+       struct buffer_head *bh = jh2bh(jh);
+       int ret = 0;
+@@ -248,6 +249,7 @@ static int __flush_buffer(journal_t *jou
+               set_buffer_jwrite(bh);
+               bhs[*batch_count] = bh;
+               jbd_unlock_bh_state(bh);
++              transaction->t_chp_stats.cs_written++;
+               (*batch_count)++;
+               if (*batch_count == NR_BATCH) {
+                       __flush_batch(journal, bhs, batch_count);
+@@ -316,6 +318,8 @@ int log_do_checkpoint(journal_t *journal
+               tid_t this_tid;
+               transaction = journal->j_checkpoint_transactions;
++              if (transaction->t_chp_stats.cs_chp_time == 0)
++                      transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS;
+               this_tid = transaction->t_tid;
+               jh = transaction->t_checkpoint_list;
+               last_jh = jh->b_cpprev;
+@@ -332,7 +336,8 @@ int log_do_checkpoint(journal_t *journal
+                               retry = 1;
+                               break;
+                       }
+-                      retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
++                      retry = __flush_buffer(journal, jh, bhs, &batch_count,
++                                              &drop_count, transaction);
+               } while (jh != last_jh && !retry);
+               if (batch_count) {
+@@ -598,6 +603,8 @@ void __journal_insert_checkpoint(struct 
+ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
+ {
++      struct transaction_stats_s stats;
++
+       assert_spin_locked(&journal->j_list_lock);
+       if (transaction->t_cpnext) {
+               transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
+@@ -623,5 +630,25 @@ void __journal_drop_transaction(journal_
+       J_ASSERT(journal->j_running_transaction != transaction);
+       jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
++
++      /*
++       * File the transaction for history
++       */
++      if (transaction->t_chp_stats.cs_written != 0 ||
++                      transaction->t_chp_stats.cs_chp_time != 0) {
++              stats.ts_type = JBD_STATS_CHECKPOINT;
++              stats.ts_tid = transaction->t_tid;
++              stats.u.chp = transaction->t_chp_stats;
++              if (stats.ts_chp_time)
++                      stats.ts_chp_time = 
++                              jbd_time_diff(stats.ts_chp_time, CURRENT_MSECS);
++              spin_lock(&journal->j_history_lock);
++              memcpy(journal->j_history + journal->j_history_cur, &stats,
++                              sizeof(stats));
++              if (++journal->j_history_cur == journal->j_history_max)
++                      journal->j_history_cur = 0;
++              spin_unlock(&journal->j_history_lock);
++      }
++
+       kfree(transaction);
+ }
+Index: linux-2.6.5-7.201/fs/jbd/transaction.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/jbd/transaction.c        2005-10-11 00:12:45.000000000 +0400
++++ linux-2.6.5-7.201/fs/jbd/transaction.c     2006-07-28 02:40:14.000000000 +0400
+@@ -60,6 +60,8 @@ get_transaction(journal_t *journal, tran
+       J_ASSERT(journal->j_running_transaction == NULL);
+       journal->j_running_transaction = transaction;
++      transaction->t_max_wait = 0;
++      transaction->t_start = CURRENT_MSECS;
+       return transaction;
+ }
+@@ -86,6 +88,7 @@ static int start_this_handle(journal_t *
+       int nblocks = handle->h_buffer_credits;
+       transaction_t *new_transaction = NULL;
+       int ret = 0;
++      unsigned long ts = CURRENT_MSECS;
+       if (nblocks > journal->j_max_transaction_buffers) {
+               printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
+@@ -219,6 +222,12 @@ repeat_locked:
+       /* OK, account for the buffers that this operation expects to
+        * use and add the handle to the running transaction. */
++      if (time_after(transaction->t_start, ts)) {
++              ts = jbd_time_diff(ts, transaction->t_start);
++              if (ts > transaction->t_max_wait)
++                      transaction->t_max_wait= ts;
++      }
++
+       handle->h_transaction = transaction;
+       transaction->t_outstanding_credits += nblocks;
+       transaction->t_updates++;
+Index: linux-2.6.5-7.201/fs/jbd/journal.c
+===================================================================
+--- linux-2.6.5-7.201.orig/fs/jbd/journal.c    2005-10-11 00:12:45.000000000 +0400
++++ linux-2.6.5-7.201/fs/jbd/journal.c 2006-08-02 01:20:09.000000000 +0400
+@@ -35,6 +35,7 @@
+ #include <linux/pagemap.h>
+ #include <asm/uaccess.h>
+ #include <linux/proc_fs.h>
++#include <linux/seq_file.h>
+ EXPORT_SYMBOL(journal_start);
+ EXPORT_SYMBOL(journal_restart);
+@@ -615,6 +616,337 @@ struct journal_head *journal_get_descrip
+       return journal_add_journal_head(bh);
+ }
++struct jbd_stats_proc_session {
++      journal_t *journal;
++      struct transaction_stats_s *stats;
++      int start;
++      int max;
++};
++
++static void *jbd_history_skip_empty(struct jbd_stats_proc_session *s,
++                                      struct transaction_stats_s *ts,
++                                      int first)
++{
++      if (ts == s->stats + s->max)
++              ts = s->stats;
++      if (!first && ts == s->stats + s->start)
++              return NULL;
++      while (ts->ts_type == 0) {
++              ts++;
++              if (ts == s->stats + s->max)
++                      ts = s->stats;
++              if (ts == s->stats + s->start)
++                      return NULL;
++      }
++      return ts;
++
++}
++
++static void *jbd_seq_history_start(struct seq_file *seq, loff_t *pos)
++{
++      struct jbd_stats_proc_session *s = seq->private;
++      struct transaction_stats_s *ts;
++      int l = *pos;
++
++      if (l == 0)
++              return SEQ_START_TOKEN;
++      ts = jbd_history_skip_empty(s, s->stats + s->start, 1);
++      if (!ts)
++              return NULL;
++      while (--l && (ts = jbd_history_skip_empty(s, ++ts, 0)) != NULL);
++      return ts;
++}
++
++static void *jbd_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      struct jbd_stats_proc_session *s = seq->private;
++      struct transaction_stats_s *ts = v;
++
++      ++*pos;
++      if (v == SEQ_START_TOKEN)
++              return jbd_history_skip_empty(s, s->stats + s->start, 1);
++      else
++              return jbd_history_skip_empty(s, ++ts, 0);
++}
++
++static int jbd_seq_history_show(struct seq_file *seq, void *v)
++{
++      struct transaction_stats_s *ts = v;
++      if (v == SEQ_START_TOKEN) {
++              seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
++                              "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
++                              "wait", "run", "lock", "flush", "log", "hndls",
++                              "block", "inlog", "ctime", "write", "drop",
++                              "close");
++              return 0;
++      }
++      if (ts->ts_type == JBD_STATS_RUN)
++              seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu "
++                              "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
++                              ts->ts_wait, ts->ts_running, ts->ts_locked,
++                              ts->ts_flushing, ts->ts_logging,
++                              ts->ts_handle_count, ts->ts_blocks,
++                              ts->ts_blocks_logged);
++      else if (ts->ts_type == JBD_STATS_CHECKPOINT)
++              seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n",
++                              "C", ts->ts_tid, " ", ts->ts_chp_time,
++                              ts->ts_written, ts->ts_dropped,
++                              ts->ts_forced_to_close);
++      else
++              J_ASSERT(0);
++      return 0;
++}
++
++static void jbd_seq_history_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_history_ops = {
++      .start  = jbd_seq_history_start,
++      .next   = jbd_seq_history_next,
++      .stop   = jbd_seq_history_stop,
++      .show   = jbd_seq_history_show,
++};
++
++static int jbd_seq_history_open(struct inode *inode, struct file *file)
++{
++      journal_t *journal = PDE(inode)->data;
++      struct jbd_stats_proc_session *s;
++      int rc, size;
++
++      s = kmalloc(sizeof(*s), GFP_KERNEL);
++      if (s == NULL)
++              return -EIO;
++      size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++      s->stats = kmalloc(size, GFP_KERNEL);
++      if (s == NULL) {
++              kfree(s);
++              return -EIO;
++      }
++      spin_lock(&journal->j_history_lock);
++      memcpy(s->stats, journal->j_history, size);
++      s->max = journal->j_history_max;
++      s->start = journal->j_history_cur % s->max;
++      spin_unlock(&journal->j_history_lock);
++      
++      rc = seq_open(file, &jbd_seq_history_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = s;
++      } else {
++              kfree(s->stats);
++              kfree(s);
++      }
++      return rc;
++
++}
++
++static ssize_t jbd_seq_history_write(struct file *file, const char __user *buf,
++                                      size_t len, loff_t * ppos)
++{
++      journal_t *journal = PDE(file->f_dentry->d_inode)->data;
++      int size;
++
++      if (!capable(CAP_SYS_ADMIN))
++              return -EPERM;
++
++      spin_lock(&journal->j_history_lock);
++      size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++      journal->j_history_cur = 0;
++      memset(journal->j_history, 0, size);
++      spin_unlock(&journal->j_history_lock);
++
++      return len;
++}
++
++static int jbd_seq_history_release(struct inode *inode, struct file *file)
++{
++      struct seq_file *seq = (struct seq_file *)file->private_data;
++      struct jbd_stats_proc_session *s = seq->private;
++      kfree(s->stats);
++      kfree(s);
++      return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_history_fops = {
++      .owner          = THIS_MODULE,
++      .open           = jbd_seq_history_open,
++      .read           = seq_read,
++      .write          = jbd_seq_history_write,
++      .llseek         = seq_lseek,
++      .release        = jbd_seq_history_release,
++};
++
++static void *jbd_seq_info_start(struct seq_file *seq, loff_t *pos)
++{
++      return *pos ? NULL : SEQ_START_TOKEN;
++}
++
++static void *jbd_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      return NULL;
++}
++
++static int jbd_seq_info_show(struct seq_file *seq, void *v)
++{
++      struct jbd_stats_proc_session *s = seq->private;
++      if (v != SEQ_START_TOKEN)
++              return 0;
++      seq_printf(seq, "%lu transaction, each upto %u blocks\n",
++                      s->stats->ts_tid,
++                      s->journal->j_max_transaction_buffers);
++      if (s->stats->ts_tid == 0)
++              return 0;
++      seq_printf(seq, "average: \n  %lums waiting for transaction\n",
++                      s->stats->ts_wait / s->stats->ts_tid);
++      seq_printf(seq, "  %lums running transaction\n",
++                      s->stats->ts_running / s->stats->ts_tid);
++      seq_printf(seq, "  %lums transaction was being locked\n",
++                      s->stats->ts_locked / s->stats->ts_tid);
++      seq_printf(seq, "  %lums flushing data (in ordered mode)\n",
++                      s->stats->ts_flushing / s->stats->ts_tid);
++      seq_printf(seq, "  %lums logging transaction\n",
++                      s->stats->ts_logging / s->stats->ts_tid);
++      seq_printf(seq, "  %lu handles per transaction\n",
++                      s->stats->ts_handle_count / s->stats->ts_tid);
++      seq_printf(seq, "  %lu blocks per transaction\n",
++                      s->stats->ts_blocks / s->stats->ts_tid);
++      seq_printf(seq, "  %lu logged blocks per transaction\n",
++                      s->stats->ts_blocks_logged / s->stats->ts_tid);
++      return 0;
++}
++
++static void jbd_seq_info_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_info_ops = {
++      .start  = jbd_seq_info_start,
++      .next   = jbd_seq_info_next,
++      .stop   = jbd_seq_info_stop,
++      .show   = jbd_seq_info_show,
++};
++
++static int jbd_seq_info_open(struct inode *inode, struct file *file)
++{
++      journal_t *journal = PDE(inode)->data;
++      struct jbd_stats_proc_session *s;
++      int rc, size;
++
++      s = kmalloc(sizeof(*s), GFP_KERNEL);
++      if (s == NULL)
++              return -EIO;
++      size = sizeof(struct transaction_stats_s);
++      s->stats = kmalloc(size, GFP_KERNEL);
++      if (s == NULL) {
++              kfree(s);
++              return -EIO;
++      }
++      spin_lock(&journal->j_history_lock);
++      memcpy(s->stats, &journal->j_stats, size);
++      s->journal = journal;
++      spin_unlock(&journal->j_history_lock);
++      
++      rc = seq_open(file, &jbd_seq_info_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = s;
++      } else {
++              kfree(s->stats);
++              kfree(s);
++      }
++      return rc;
++
++}
++
++static ssize_t jbd_seq_info_write(struct file *file, const char __user *buf,
++                                      size_t len, loff_t * ppos)
++{
++      journal_t *journal = PDE(file->f_dentry->d_inode)->data;
++      int size;
++
++      if (!capable(CAP_SYS_ADMIN))
++              return -EPERM;
++
++      size = sizeof(struct transaction_stats_s);
++      spin_lock(&journal->j_history_lock);
++      memset(&journal->j_stats, 0, size);
++      spin_unlock(&journal->j_history_lock);
++
++      return len;
++}
++
++static int jbd_seq_info_release(struct inode *inode, struct file *file)
++{
++      struct seq_file *seq = (struct seq_file *)file->private_data;
++      struct jbd_stats_proc_session *s = seq->private;
++      kfree(s->stats);
++      kfree(s);
++      return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_info_fops = {
++      .owner          = THIS_MODULE,
++      .open           = jbd_seq_info_open,
++      .read           = seq_read,
++      .write          = jbd_seq_info_write,
++      .llseek         = seq_lseek,
++      .release        = jbd_seq_info_release,
++};
++
++static struct proc_dir_entry *proc_jbd_stats = NULL;
++
++static void jbd_stats_proc_init(journal_t *journal)
++{
++      char name[64];
++
++      snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++      journal->j_proc_entry = proc_mkdir(name, proc_jbd_stats);
++      if (journal->j_proc_entry) {
++              struct proc_dir_entry *p;
++              p = create_proc_entry("history", S_IRUGO,
++                              journal->j_proc_entry);
++              if (p) {
++                      p->proc_fops = &jbd_seq_history_fops;
++                      p->data = journal;
++                      p = create_proc_entry("info", S_IRUGO,
++                                              journal->j_proc_entry);
++                      if (p) {
++                              p->proc_fops = &jbd_seq_info_fops;
++                              p->data = journal;
++                      }
++              }
++      }
++}
++
++static void jbd_stats_proc_exit(journal_t *journal)
++{
++      char name[64];
++
++      snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++      remove_proc_entry("info", journal->j_proc_entry);
++      remove_proc_entry("history", journal->j_proc_entry);
++      remove_proc_entry(name, proc_jbd_stats);
++}
++
++static void journal_init_stats(journal_t *journal)
++{
++      int size;
++
++      if (proc_jbd_stats == NULL)
++              return;
++
++      journal->j_history_max = 1500;
++      size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++      journal->j_history = kmalloc(size, GFP_KERNEL);
++      if (journal->j_history == NULL) {
++              journal->j_history_max = 0;
++              return;
++      }
++      memset(journal->j_history, 0, size);
++      spin_lock_init(&journal->j_history_lock);
++}
++
+ /*
+  * Management for journal control blocks: functions to create and
+  * destroy journal_t structures, and to initialise and read existing
+@@ -657,6 +989,9 @@ static journal_t * journal_init_common (
+               kfree(journal);
+               goto fail;
+       }
++      
++      journal_init_stats(journal);
++
+       return journal;
+ fail:
+       return NULL;
+@@ -699,6 +1034,7 @@ journal_t * journal_init_dev(struct bloc
+       journal->j_blk_offset = start;
+       journal->j_maxlen = len;
+       journal->j_blocksize = blocksize;
++      jbd_stats_proc_init(journal);
+       bh = __getblk(journal->j_dev, start, journal->j_blocksize);
+       J_ASSERT(bh != NULL);
+@@ -736,6 +1072,7 @@ journal_t * journal_init_inode (struct i
+       journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
+       journal->j_blocksize = inode->i_sb->s_blocksize;
++      jbd_stats_proc_init(journal);
+       err = journal_bmap(journal, 0, &blocknr);
+       /* If that failed, give up */
+@@ -1106,6 +1443,8 @@ void journal_destroy(journal_t *journal)
+               brelse(journal->j_sb_buffer);
+       }
++      if (journal->j_proc_entry)
++              jbd_stats_proc_exit(journal);
+       if (journal->j_inode)
+               iput(journal->j_inode);
+       if (journal->j_revoke)
+@@ -1861,6 +2200,28 @@ static void __exit remove_jbd_proc_entry
+ #endif
++#if defined(CONFIG_PROC_FS)
++
++#define JBD_STATS_PROC_NAME "fs/jbd"
++
++static void __init create_jbd_stats_proc_entry(void)
++{
++      proc_jbd_stats = proc_mkdir(JBD_STATS_PROC_NAME, NULL);
++}
++
++static void __exit remove_jbd_stats_proc_entry(void)
++{
++      if (proc_jbd_stats)
++              remove_proc_entry(JBD_STATS_PROC_NAME, NULL);
++}
++
++#else
++
++#define create_jbd_stats_proc_entry() do {} while (0)
++#define remove_jbd_stats_proc_entry() do {} while (0)
++
++#endif
++
+ kmem_cache_t *jbd_handle_cache;
+ static int __init journal_init_handle_cache(void)
+@@ -1915,6 +2276,7 @@ static int __init journal_init(void)
+       if (ret != 0)
+               journal_destroy_caches();
+       create_jbd_proc_entry();
++      create_jbd_stats_proc_entry();
+       return ret;
+ }
+@@ -1926,6 +2288,7 @@ static void __exit journal_exit(void)
+               printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
+ #endif
+       remove_jbd_proc_entry();
++      remove_jbd_stats_proc_entry();
+       journal_destroy_caches();
+ }
diff --git a/lustre/kernel_patches/patches/jbd-stats-2.6.9.patch b/lustre/kernel_patches/patches/jbd-stats-2.6.9.patch
new file mode 100644 (file)
index 0000000..7a48375
--- /dev/null
@@ -0,0 +1,735 @@
+Index: linux-2.6.9/include/linux/jbd.h
+===================================================================
+--- linux-2.6.9.orig/include/linux/jbd.h       2006-03-10 18:20:03.000000000 +0300
++++ linux-2.6.9/include/linux/jbd.h    2006-07-28 02:32:18.000000000 +0400
+@@ -422,6 +422,16 @@ struct handle_s 
+ };
++/*
++ * Some stats for checkpoint phase
++ */
++struct transaction_chp_stats_s {
++      unsigned long           cs_chp_time;
++      unsigned long           cs_forced_to_close;
++      unsigned long           cs_written;
++      unsigned long           cs_dropped;
++};
++
+ /* The transaction_t type is the guts of the journaling mechanism.  It
+  * tracks a compound transaction through its various states:
+  *
+@@ -553,6 +563,21 @@ struct transaction_s 
+       spinlock_t              t_handle_lock;
+       /*
++       * Longest time some handle had to wait for running transaction
++       */
++      unsigned long           t_max_wait;
++
++      /*
++       * When transaction started
++       */
++      unsigned long           t_start;
++
++      /*
++       * Checkpointing stats [j_checkpoint_sem]
++       */
++      struct transaction_chp_stats_s t_chp_stats;
++
++      /*
+        * Number of outstanding updates running on this transaction
+        * [t_handle_lock]
+        */
+@@ -592,6 +617,57 @@ struct transaction_s 
+       struct list_head        t_jcb;
+ };
++struct transaction_run_stats_s {
++      unsigned long           rs_wait;
++      unsigned long           rs_running;
++      unsigned long           rs_locked;
++      unsigned long           rs_flushing;
++      unsigned long           rs_logging;
++
++      unsigned long           rs_handle_count;
++      unsigned long           rs_blocks;
++      unsigned long           rs_blocks_logged;
++};
++
++struct transaction_stats_s
++{
++      int                     ts_type;
++      unsigned long           ts_tid;
++      union {
++              struct transaction_run_stats_s run;
++              struct transaction_chp_stats_s chp;
++      } u;
++};
++
++#define JBD_STATS_RUN         1
++#define JBD_STATS_CHECKPOINT  2
++
++#define ts_wait                       u.run.rs_wait
++#define ts_running            u.run.rs_running
++#define ts_locked             u.run.rs_locked
++#define ts_flushing           u.run.rs_flushing
++#define ts_logging            u.run.rs_logging
++#define ts_handle_count               u.run.rs_handle_count
++#define ts_blocks             u.run.rs_blocks
++#define ts_blocks_logged      u.run.rs_blocks_logged
++
++#define ts_chp_time           u.chp.cs_chp_time
++#define ts_forced_to_close    u.chp.cs_forced_to_close
++#define ts_written            u.chp.cs_written
++#define ts_dropped            u.chp.cs_dropped
++
++#define CURRENT_MSECS         (jiffies_to_msecs(jiffies))
++
++static inline unsigned int
++jbd_time_diff(unsigned int start, unsigned int end)
++{
++      if (unlikely(start > end))
++              end = end + (~0UL - start);
++      else
++              end -= start;
++      return end;
++}
++
+ /**
+  * struct journal_s - The journal_s type is the concrete type associated with
+  *     journal_t.
+@@ -828,6 +904,16 @@ struct journal_s
+       struct jbd_revoke_table_s *j_revoke_table[2];
+       /*
++       *
++       */
++      struct transaction_stats_s *j_history;
++      int                     j_history_max;
++      int                     j_history_cur;
++      spinlock_t              j_history_lock;
++      struct proc_dir_entry   *j_proc_entry;
++      struct transaction_stats_s j_stats;
++      
++      /*
+        * An opaque pointer to fs-private information.  ext3 puts its
+        * superblock pointer here
+        */
+Index: linux-2.6.9/fs/jbd/commit.c
+===================================================================
+--- linux-2.6.9.orig/fs/jbd/commit.c   2006-03-10 18:20:39.000000000 +0300
++++ linux-2.6.9/fs/jbd/commit.c        2006-07-28 02:32:18.000000000 +0400
+@@ -21,6 +21,7 @@
+ #include <linux/mm.h>
+ #include <linux/pagemap.h>
+ #include <linux/smp_lock.h>
++#include <linux/jiffies.h>
+ /*
+  * Default IO end handler for temporary BJ_IO buffer_heads.
+@@ -101,6 +102,7 @@ static int inverted_lock(journal_t *jour
+  */
+ void journal_commit_transaction(journal_t *journal)
+ {
++      struct transaction_stats_s stats;
+       transaction_t *commit_transaction;
+       struct journal_head *jh, *new_jh, *descriptor;
+       struct buffer_head *wbuf[64];
+@@ -147,6 +149,11 @@ void journal_commit_transaction(journal_
+       spin_lock(&journal->j_state_lock);
+       commit_transaction->t_state = T_LOCKED;
++      stats.ts_wait = commit_transaction->t_max_wait;
++      stats.ts_locked = CURRENT_MSECS;
++      stats.ts_running = jbd_time_diff(commit_transaction->t_start,
++                                              stats.ts_locked);
++      
+       spin_lock(&commit_transaction->t_handle_lock);
+       while (commit_transaction->t_updates) {
+               DEFINE_WAIT(wait);
+@@ -219,6 +226,9 @@ void journal_commit_transaction(journal_
+        */
+       journal_switch_revoke_table(journal);
++      stats.ts_flushing = CURRENT_MSECS;
++      stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing);
++
+       commit_transaction->t_state = T_FLUSH;
+       journal->j_committing_transaction = commit_transaction;
+       journal->j_running_transaction = NULL;
+@@ -365,6 +375,11 @@ write_out_data:
+        */
+       commit_transaction->t_state = T_COMMIT;
++      stats.ts_logging = CURRENT_MSECS;
++      stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging);
++      stats.ts_blocks = commit_transaction->t_outstanding_credits;
++      stats.ts_blocks_logged = 0;
++
+       descriptor = NULL;
+       bufs = 0;
+       while (commit_transaction->t_buffers) {
+@@ -513,6 +528,7 @@ start_journal_io:
+                               submit_bh(WRITE, bh);
+                       }
+                       cond_resched();
++                      stats.ts_blocks_logged += bufs;
+                       /* Force a new descriptor to be generated next
+                            time round the loop. */
+@@ -760,6 +776,7 @@ skip_commit: /* The journal should be un
+               cp_transaction = jh->b_cp_transaction;
+               if (cp_transaction) {
+                       JBUFFER_TRACE(jh, "remove from old cp transaction");
++                      cp_transaction->t_chp_stats.cs_dropped++;
+                       __journal_remove_checkpoint(jh);
+               }
+@@ -806,6 +823,36 @@ skip_commit: /* The journal should be un
+       J_ASSERT(commit_transaction->t_state == T_COMMIT);
++      commit_transaction->t_start = CURRENT_MSECS;
++      stats.ts_logging = jbd_time_diff(stats.ts_logging,
++                                              commit_transaction->t_start);
++
++      /*
++       * File the transaction for history
++       */
++      stats.ts_type = JBD_STATS_RUN;
++      stats.ts_tid = commit_transaction->t_tid;
++      stats.ts_handle_count = commit_transaction->t_handle_count;
++      spin_lock(&journal->j_history_lock);
++      memcpy(journal->j_history + journal->j_history_cur, &stats,
++                      sizeof(stats));
++      if (++journal->j_history_cur == journal->j_history_max)
++              journal->j_history_cur = 0;
++
++      /*
++       * Calculate overall stats
++       */
++      journal->j_stats.ts_tid++;
++      journal->j_stats.ts_wait += stats.ts_wait;
++      journal->j_stats.ts_running += stats.ts_running;
++      journal->j_stats.ts_locked += stats.ts_locked;
++      journal->j_stats.ts_flushing += stats.ts_flushing;
++      journal->j_stats.ts_logging += stats.ts_logging;
++      journal->j_stats.ts_handle_count += stats.ts_handle_count;
++      journal->j_stats.ts_blocks += stats.ts_blocks;
++      journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged;
++      spin_unlock(&journal->j_history_lock);
++
+       /*
+        * This is a bit sleazy.  We borrow j_list_lock to protect
+        * journal->j_committing_transaction in __journal_remove_checkpoint.
+Index: linux-2.6.9/fs/jbd/checkpoint.c
+===================================================================
+--- linux-2.6.9.orig/fs/jbd/checkpoint.c       2006-03-10 18:20:03.000000000 +0300
++++ linux-2.6.9/fs/jbd/checkpoint.c    2006-07-28 02:35:21.000000000 +0400
+@@ -166,6 +166,7 @@ static int __cleanup_transaction(journal
+                       transaction_t *t = jh->b_transaction;
+                       tid_t tid = t->t_tid;
++                      transaction->t_chp_stats.cs_forced_to_close++;
+                       spin_unlock(&journal->j_list_lock);
+                       jbd_unlock_bh_state(bh);
+                       log_start_commit(journal, tid);
+@@ -227,7 +228,7 @@ __flush_batch(journal_t *journal, struct
+  */
+ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
+                       struct buffer_head **bhs, int *batch_count,
+-                      int *drop_count)
++                      int *drop_count, transaction_t *transaction)
+ {
+       struct buffer_head *bh = jh2bh(jh);
+       int ret = 0;
+@@ -248,6 +249,7 @@ static int __flush_buffer(journal_t *jou
+               set_buffer_jwrite(bh);
+               bhs[*batch_count] = bh;
+               jbd_unlock_bh_state(bh);
++              transaction->t_chp_stats.cs_written++;
+               (*batch_count)++;
+               if (*batch_count == NR_BATCH) {
+                       __flush_batch(journal, bhs, batch_count);
+@@ -316,6 +318,8 @@ int log_do_checkpoint(journal_t *journal
+               tid_t this_tid;
+               transaction = journal->j_checkpoint_transactions;
++              if (transaction->t_chp_stats.cs_chp_time == 0)
++                      transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS;
+               this_tid = transaction->t_tid;
+               jh = transaction->t_checkpoint_list;
+               last_jh = jh->b_cpprev;
+@@ -332,7 +336,8 @@ int log_do_checkpoint(journal_t *journal
+                               retry = 1;
+                               break;
+                       }
+-                      retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
++                      retry = __flush_buffer(journal, jh, bhs, &batch_count,
++                                              &drop_count, transaction);
+               } while (jh != last_jh && !retry);
+               if (batch_count) {
+@@ -598,6 +603,8 @@ void __journal_insert_checkpoint(struct 
+ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
+ {
++      struct transaction_stats_s stats;
++
+       assert_spin_locked(&journal->j_list_lock);
+       if (transaction->t_cpnext) {
+               transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
+@@ -623,5 +630,25 @@ void __journal_drop_transaction(journal_
+       J_ASSERT(journal->j_running_transaction != transaction);
+       jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
++
++      /*
++       * File the transaction for history
++       */
++      if (transaction->t_chp_stats.cs_written != 0 ||
++                      transaction->t_chp_stats.cs_chp_time != 0) {
++              stats.ts_type = JBD_STATS_CHECKPOINT;
++              stats.ts_tid = transaction->t_tid;
++              stats.u.chp = transaction->t_chp_stats;
++              if (stats.ts_chp_time)
++                      stats.ts_chp_time = 
++                              jbd_time_diff(stats.ts_chp_time, CURRENT_MSECS);
++              spin_lock(&journal->j_history_lock);
++              memcpy(journal->j_history + journal->j_history_cur, &stats,
++                              sizeof(stats));
++              if (++journal->j_history_cur == journal->j_history_max)
++                      journal->j_history_cur = 0;
++              spin_unlock(&journal->j_history_lock);
++      }
++
+       kfree(transaction);
+ }
+Index: linux-2.6.9/fs/jbd/transaction.c
+===================================================================
+--- linux-2.6.9.orig/fs/jbd/transaction.c      2006-03-10 18:20:03.000000000 +0300
++++ linux-2.6.9/fs/jbd/transaction.c   2006-07-28 02:32:18.000000000 +0400
+@@ -60,6 +60,8 @@ get_transaction(journal_t *journal, tran
+       J_ASSERT(journal->j_running_transaction == NULL);
+       journal->j_running_transaction = transaction;
++      transaction->t_max_wait = 0;
++      transaction->t_start = CURRENT_MSECS;
+       return transaction;
+ }
+@@ -86,6 +88,7 @@ static int start_this_handle(journal_t *
+       int nblocks = handle->h_buffer_credits;
+       transaction_t *new_transaction = NULL;
+       int ret = 0;
++      unsigned long ts = CURRENT_MSECS;
+       if (nblocks > journal->j_max_transaction_buffers) {
+               printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
+@@ -219,6 +222,12 @@ repeat_locked:
+       /* OK, account for the buffers that this operation expects to
+        * use and add the handle to the running transaction. */
++      if (time_after(transaction->t_start, ts)) {
++              ts = jbd_time_diff(ts, transaction->t_start);
++              if (ts > transaction->t_max_wait)
++                      transaction->t_max_wait= ts;
++      }
++
+       handle->h_transaction = transaction;
+       transaction->t_outstanding_credits += nblocks;
+       transaction->t_updates++;
+Index: linux-2.6.9/fs/jbd/journal.c
+===================================================================
+--- linux-2.6.9.orig/fs/jbd/journal.c  2006-06-19 21:31:57.000000000 +0400
++++ linux-2.6.9/fs/jbd/journal.c       2006-07-28 02:32:18.000000000 +0400
+@@ -36,6 +36,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/page.h>
+ #include <linux/proc_fs.h>
++#include <linux/seq_file.h>
+ EXPORT_SYMBOL(journal_start);
+ EXPORT_SYMBOL(journal_restart);
+@@ -649,6 +650,300 @@ struct journal_head *journal_get_descrip
+       return journal_add_journal_head(bh);
+ }
++struct jbd_stats_proc_session {
++      journal_t *journal;
++      struct transaction_stats_s *stats;
++      int start;
++      int max;
++};
++
++static void *jbd_history_skip_empty(struct jbd_stats_proc_session *s,
++                                      struct transaction_stats_s *ts,
++                                      int first)
++{
++      if (ts == s->stats + s->max)
++              ts = s->stats;
++      if (!first && ts == s->stats + s->start)
++              return NULL;
++      while (ts->ts_type == 0) {
++              ts++;
++              if (ts == s->stats + s->max)
++                      ts = s->stats;
++              if (ts == s->stats + s->start)
++                      return NULL;
++      }
++      return ts;
++
++}
++
++static void *jbd_seq_history_start(struct seq_file *seq, loff_t *pos)
++{
++      struct jbd_stats_proc_session *s = seq->private;
++      struct transaction_stats_s *ts;
++      int l = *pos;
++
++      if (l == 0)
++              return SEQ_START_TOKEN;
++      ts = jbd_history_skip_empty(s, s->stats + s->start, 1);
++      if (!ts)
++              return NULL;
++      while (--l && (ts = jbd_history_skip_empty(s, ++ts, 0)) != NULL);
++      return ts;
++}
++
++static void *jbd_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      struct jbd_stats_proc_session *s = seq->private;
++      struct transaction_stats_s *ts = v;
++
++      ++*pos;
++      if (v == SEQ_START_TOKEN)
++              return jbd_history_skip_empty(s, s->stats + s->start, 1);
++      else
++              return jbd_history_skip_empty(s, ++ts, 0);
++}
++
++static int jbd_seq_history_show(struct seq_file *seq, void *v)
++{
++      struct transaction_stats_s *ts = v;
++      if (v == SEQ_START_TOKEN) {
++              seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
++                              "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
++                              "wait", "run", "lock", "flush", "log", "hndls",
++                              "block", "inlog", "ctime", "write", "drop",
++                              "close");
++              return 0;
++      }
++      if (ts->ts_type == JBD_STATS_RUN)
++              seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu "
++                              "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
++                              ts->ts_wait, ts->ts_running, ts->ts_locked,
++                              ts->ts_flushing, ts->ts_logging,
++                              ts->ts_handle_count, ts->ts_blocks,
++                              ts->ts_blocks_logged);
++      else if (ts->ts_type == JBD_STATS_CHECKPOINT)
++              seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n",
++                              "C", ts->ts_tid, " ", ts->ts_chp_time,
++                              ts->ts_written, ts->ts_dropped,
++                              ts->ts_forced_to_close);
++      else
++              J_ASSERT(0);
++      return 0;
++}
++
++static void jbd_seq_history_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_history_ops = {
++      .start  = jbd_seq_history_start,
++      .next   = jbd_seq_history_next,
++      .stop   = jbd_seq_history_stop,
++      .show   = jbd_seq_history_show,
++};
++
++static int jbd_seq_history_open(struct inode *inode, struct file *file)
++{
++      journal_t *journal = PDE(inode)->data;
++      struct jbd_stats_proc_session *s;
++      int rc, size;
++
++      s = kmalloc(sizeof(*s), GFP_KERNEL);
++      if (s == NULL)
++              return -EIO;
++      size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++      s->stats = kmalloc(size, GFP_KERNEL);
++      if (s == NULL) {
++              kfree(s);
++              return -EIO;
++      }
++      spin_lock(&journal->j_history_lock);
++      memcpy(s->stats, journal->j_history, size);
++      s->max = journal->j_history_max;
++      s->start = journal->j_history_cur % s->max;
++      spin_unlock(&journal->j_history_lock);
++      
++      rc = seq_open(file, &jbd_seq_history_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = s;
++      } else {
++              kfree(s->stats);
++              kfree(s);
++      }
++      return rc;
++
++}
++
++static int jbd_seq_history_release(struct inode *inode, struct file *file)
++{
++      struct seq_file *seq = (struct seq_file *)file->private_data;
++      struct jbd_stats_proc_session *s = seq->private;
++      kfree(s->stats);
++      kfree(s);
++      return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_history_fops = {
++      .owner          = THIS_MODULE,
++      .open           = jbd_seq_history_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = jbd_seq_history_release,
++};
++
++static void *jbd_seq_info_start(struct seq_file *seq, loff_t *pos)
++{
++      return *pos ? NULL : SEQ_START_TOKEN;
++}
++
++static void *jbd_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      return NULL;
++}
++
++static int jbd_seq_info_show(struct seq_file *seq, void *v)
++{
++      struct jbd_stats_proc_session *s = seq->private;
++      if (v != SEQ_START_TOKEN)
++              return 0;
++      seq_printf(seq, "%lu transaction, each upto %u blocks\n",
++                      s->stats->ts_tid,
++                      s->journal->j_max_transaction_buffers);
++      if (s->stats->ts_tid == 0)
++              return 0;
++      seq_printf(seq, "average: \n  %lums waiting for transaction\n",
++                      s->stats->ts_wait / s->stats->ts_tid);
++      seq_printf(seq, "  %lums running transaction\n",
++                      s->stats->ts_running / s->stats->ts_tid);
++      seq_printf(seq, "  %lums transaction was being locked\n",
++                      s->stats->ts_locked / s->stats->ts_tid);
++      seq_printf(seq, "  %lums flushing data (in ordered mode)\n",
++                      s->stats->ts_flushing / s->stats->ts_tid);
++      seq_printf(seq, "  %lums logging transaction\n",
++                      s->stats->ts_logging / s->stats->ts_tid);
++      seq_printf(seq, "  %lu handles per transaction\n",
++                      s->stats->ts_handle_count / s->stats->ts_tid);
++      seq_printf(seq, "  %lu blocks per transaction\n",
++                      s->stats->ts_blocks / s->stats->ts_tid);
++      seq_printf(seq, "  %lu logged blocks per transaction\n",
++                      s->stats->ts_blocks_logged / s->stats->ts_tid);
++      return 0;
++}
++
++static void jbd_seq_info_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_info_ops = {
++      .start  = jbd_seq_info_start,
++      .next   = jbd_seq_info_next,
++      .stop   = jbd_seq_info_stop,
++      .show   = jbd_seq_info_show,
++};
++
++static int jbd_seq_info_open(struct inode *inode, struct file *file)
++{
++      journal_t *journal = PDE(inode)->data;
++      struct jbd_stats_proc_session *s;
++      int rc, size;
++
++      s = kmalloc(sizeof(*s), GFP_KERNEL);
++      if (s == NULL)
++              return -EIO;
++      size = sizeof(struct transaction_stats_s);
++      s->stats = kmalloc(size, GFP_KERNEL);
++      if (s == NULL) {
++              kfree(s);
++              return -EIO;
++      }
++      spin_lock(&journal->j_history_lock);
++      memcpy(s->stats, &journal->j_stats, size);
++      s->journal = journal;
++      spin_unlock(&journal->j_history_lock);
++      
++      rc = seq_open(file, &jbd_seq_info_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = s;
++      } else {
++              kfree(s->stats);
++              kfree(s);
++      }
++      return rc;
++
++}
++
++static int jbd_seq_info_release(struct inode *inode, struct file *file)
++{
++      struct seq_file *seq = (struct seq_file *)file->private_data;
++      struct jbd_stats_proc_session *s = seq->private;
++      kfree(s->stats);
++      kfree(s);
++      return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_info_fops = {
++      .owner          = THIS_MODULE,
++      .open           = jbd_seq_info_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = jbd_seq_info_release,
++};
++
++static struct proc_dir_entry *proc_jbd_stats = NULL;
++
++static void jbd_stats_proc_init(journal_t *journal)
++{
++      char name[64];
++
++      snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++      journal->j_proc_entry = proc_mkdir(name, proc_jbd_stats);
++      if (journal->j_proc_entry) {
++              struct proc_dir_entry *p;
++              p = create_proc_entry("history", S_IRUGO,
++                              journal->j_proc_entry);
++              if (p) {
++                      p->proc_fops = &jbd_seq_history_fops;
++                      p->data = journal;
++                      p = create_proc_entry("info", S_IRUGO,
++                                              journal->j_proc_entry);
++                      if (p) {
++                              p->proc_fops = &jbd_seq_info_fops;
++                              p->data = journal;
++                      }
++              }
++      }
++}
++
++static void jbd_stats_proc_exit(journal_t *journal)
++{
++      char name[64];
++
++      snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++      remove_proc_entry("info", journal->j_proc_entry);
++      remove_proc_entry("history", journal->j_proc_entry);
++      remove_proc_entry(name, proc_jbd_stats);
++}
++
++static void journal_init_stats(journal_t *journal)
++{
++      int size;
++
++      if (proc_jbd_stats == NULL)
++              return;
++
++      journal->j_history_max = 100;
++      size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++      journal->j_history = kmalloc(size, GFP_KERNEL);
++      if (journal->j_history == NULL) {
++              journal->j_history_max = 0;
++              return;
++      }
++      memset(journal->j_history, 0, size);
++      spin_lock_init(&journal->j_history_lock);
++}
++
+ /*
+  * Management for journal control blocks: functions to create and
+  * destroy journal_t structures, and to initialise and read existing
+@@ -691,6 +986,9 @@ static journal_t * journal_init_common (
+               kfree(journal);
+               goto fail;
+       }
++      
++      journal_init_stats(journal);
++
+       return journal;
+ fail:
+       return NULL;
+@@ -733,6 +1031,7 @@ journal_t * journal_init_dev(struct bloc
+       journal->j_blk_offset = start;
+       journal->j_maxlen = len;
+       journal->j_blocksize = blocksize;
++      jbd_stats_proc_init(journal);
+       bh = __getblk(journal->j_dev, start, journal->j_blocksize);
+       J_ASSERT(bh != NULL);
+@@ -770,6 +1069,7 @@ journal_t * journal_init_inode (struct i
+       journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
+       journal->j_blocksize = inode->i_sb->s_blocksize;
++      jbd_stats_proc_init(journal);
+       err = journal_bmap(journal, 0, &blocknr);
+       /* If that failed, give up */
+@@ -1140,6 +1440,8 @@ void journal_destroy(journal_t *journal)
+               brelse(journal->j_sb_buffer);
+       }
++      if (journal->j_proc_entry)
++              jbd_stats_proc_exit(journal);
+       if (journal->j_inode)
+               iput(journal->j_inode);
+       if (journal->j_revoke)
+@@ -1895,6 +2197,28 @@ static void __exit remove_jbd_proc_entry
+ #endif
++#if defined(CONFIG_PROC_FS)
++
++#define JBD_STATS_PROC_NAME "fs/jbd"
++
++static void __init create_jbd_stats_proc_entry(void)
++{
++      proc_jbd_stats = proc_mkdir(JBD_STATS_PROC_NAME, NULL);
++}
++
++static void __exit remove_jbd_stats_proc_entry(void)
++{
++      if (proc_jbd_stats)
++              remove_proc_entry(JBD_STATS_PROC_NAME, NULL);
++}
++
++#else
++
++#define create_jbd_stats_proc_entry() do {} while (0)
++#define remove_jbd_stats_proc_entry() do {} while (0)
++
++#endif
++
+ kmem_cache_t *jbd_handle_cache;
+ static int __init journal_init_handle_cache(void)
+@@ -1949,6 +2273,7 @@ static int __init journal_init(void)
+       if (ret != 0)
+               journal_destroy_caches();
+       create_jbd_proc_entry();
++      create_jbd_stats_proc_entry();
+       return ret;
+ }
+@@ -1960,6 +2285,7 @@ static void __exit journal_exit(void)
+               printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
+ #endif
+       remove_jbd_proc_entry();
++      remove_jbd_stats_proc_entry();
+       journal_destroy_caches();
+ }
diff --git a/lustre/kernel_patches/patches/lookup_bdev_init_intent-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/lookup_bdev_init_intent-2.6.18-vanilla.patch
new file mode 100644 (file)
index 0000000..287ed90
--- /dev/null
@@ -0,0 +1,12 @@
+Index: linux-2.6.7/fs/block_dev.c
+===================================================================
+--- linux-2.6.7.orig/fs/block_dev.c    2004-06-16 13:20:26.000000000 +0800
++++ linux-2.6.7/fs/block_dev.c 2004-08-30 17:36:57.000000000 +0800
+@@ -832,6 +832,7 @@
+       if (!path || !*path)
+               return ERR_PTR(-EINVAL);
++      intent_init(&nd.intent.open, IT_LOOKUP);
+       error = path_lookup(path, LOOKUP_FOLLOW, &nd);
+       if (error)
+               return ERR_PTR(error);
diff --git a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6.18-vanilla.patch
deleted file mode 100644 (file)
index 5639fe4..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-Index: linux-2.6/fs/cifs/dir.c
-===================================================================
---- linux-2.6.orig/fs/cifs/dir.c       2006-07-15 21:04:01.000000000 +0800
-+++ linux-2.6/fs/cifs/dir.c    2006-07-15 21:04:47.000000000 +0800
-@@ -146,7 +146,7 @@ cifs_create(struct inode *inode, struct 
-       }
-       if(nd && (nd->flags & LOOKUP_OPEN)) {
--              int oflags = nd->intent.open.flags;
-+              int oflags = nd->intent.flags;
-               desiredAccess = 0;
-               if (oflags & FMODE_READ)
-Index: linux-2.6/fs/nfs/dir.c
-===================================================================
---- linux-2.6.orig/fs/nfs/dir.c        2006-07-15 21:04:01.000000000 +0800
-+++ linux-2.6/fs/nfs/dir.c     2006-07-15 21:04:47.000000000 +0800
-@@ -867,7 +867,7 @@ int nfs_is_exclusive_create(struct inode
-               return 0;
-       if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
-               return 0;
--      return (nd->intent.open.flags & O_EXCL) != 0;
-+      return (nd->intent.it_flags & O_EXCL) != 0;
- }
- static inline int nfs_reval_fsid(struct inode *dir,
-@@ -955,7 +955,7 @@ static int is_atomic_open(struct inode *
-       if (nd->flags & LOOKUP_DIRECTORY)
-               return 0;
-       /* Are we trying to write to a read only partition? */
--      if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
-+      if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
-               return 0;
-       return 1;
- }
-@@ -979,7 +979,7 @@ static struct dentry *nfs_atomic_lookup(
-       dentry->d_op = NFS_PROTO(dir)->dentry_ops;
-       /* Let vfs_create() deal with O_EXCL */
--      if (nd->intent.open.flags & O_EXCL) {
-+      if (nd->intent.it_flags & O_EXCL) {
-               d_add(dentry, NULL);
-               goto out;
-       }
-@@ -994,7 +994,7 @@ static struct dentry *nfs_atomic_lookup(
-               goto out;
-       }
--      if (nd->intent.open.flags & O_CREAT) {
-+      if (nd->intent.it_flags & O_CREAT) {
-               nfs_begin_data_update(dir);
-               res = nfs4_atomic_open(dir, dentry, nd);
-               nfs_end_data_update(dir);
-@@ -1013,7 +1013,7 @@ static struct dentry *nfs_atomic_lookup(
-                       case -ENOTDIR:
-                               goto no_open;
-                       case -ELOOP:
--                              if (!(nd->intent.open.flags & O_NOFOLLOW))
-+                              if (!(nd->intent.it_flags & O_NOFOLLOW))
-                                       goto no_open;
-                       /* case -EINVAL: */
-                       default:
-@@ -1049,7 +1049,7 @@ static int nfs_open_revalidate(struct de
-       /* NFS only supports OPEN on regular files */
-       if (!S_ISREG(inode->i_mode))
-               goto no_open;
--      openflags = nd->intent.open.flags;
-+      openflags = nd->intent.it_flags;
-       /* We cannot do exclusive creation on a positive dentry */
-       if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
-               goto no_open;
-@@ -1182,7 +1182,7 @@ static int nfs_create(struct inode *dir,
-       attr.ia_valid = ATTR_MODE;
-       if (nd && (nd->flags & LOOKUP_CREATE))
--              open_flags = nd->intent.open.flags;
-+              open_flags = nd->intent.it_flags;
-       lock_kernel();
-       nfs_begin_data_update(dir);
-Index: linux-2.6/fs/nfs/nfs4proc.c
-===================================================================
---- linux-2.6.orig/fs/nfs/nfs4proc.c   2006-07-15 21:04:01.000000000 +0800
-+++ linux-2.6/fs/nfs/nfs4proc.c        2006-07-15 21:09:29.000000000 +0800
-@@ -1246,7 +1246,7 @@ static int nfs4_intent_set_file(struct n
-               ctx->state = state;
-               return 0;
-       }
--      nfs4_close_state(state, nd->intent.open.flags);
-+      nfs4_close_state(state, nd->intent.flags);
-       return PTR_ERR(filp);
- }
-@@ -1259,22 +1259,22 @@ nfs4_atomic_open(struct inode *dir, stru
-       struct dentry *res;
-       if (nd->flags & LOOKUP_CREATE) {
--              attr.ia_mode = nd->intent.open.create_mode;
-+              attr.ia_mode = nd->intent.create_mode;
-               attr.ia_valid = ATTR_MODE;
-               if (!IS_POSIXACL(dir))
-                       attr.ia_mode &= ~current->fs->umask;
-       } else {
-               attr.ia_valid = 0;
--              BUG_ON(nd->intent.open.flags & O_CREAT);
-+              BUG_ON(nd->intent.flags & O_CREAT);
-       }
-       cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
-       if (IS_ERR(cred))
-               return (struct dentry *)cred;
--      state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
-+      state = nfs4_do_open(dir, dentry, nd->intent.flags, &attr, cred);
-       put_rpccred(cred);
-       if (IS_ERR(state)) {
--              if (PTR_ERR(state) == -ENOENT)
-+              ose_statef (PTR_ERR(state) == -ENOENT);
-                       d_add(dentry, NULL);
-               return (struct dentry *)state;
-       }
index 393cf78..7d6f4ad 100644 (file)
@@ -662,7 +662,7 @@ Index: linux-2.6.16.21-0.8/include/linux/fs.h
  
  #define FMODE_READ 1
  #define FMODE_WRITE 2
-+#define FMODE_EXEC 4
++#define FMODE_EXEC 16
  
  /* Internal kernel extensions */
  #define FMODE_LSEEK   4
index 6e86bde..1344b59 100644 (file)
@@ -1,7 +1,7 @@
 Index: linux-2.6/fs/inode.c
 ===================================================================
---- linux-2.6.orig/fs/inode.c  2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/inode.c       2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/fs/inode.c  2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/inode.c       2006-08-31 11:23:48.000000000 +0800
 @@ -234,6 +234,7 @@ void __iget(struct inode * inode)
        inodes_stat.nr_unused--;
  }
@@ -12,14 +12,13 @@ Index: linux-2.6/fs/inode.c
   * @inode: inode to clear
 Index: linux-2.6/fs/open.c
 ===================================================================
---- linux-2.6.orig/fs/open.c   2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/open.c        2006-07-15 21:04:08.000000000 +0800
-@@ -225,12 +225,12 @@ static long do_sys_truncate(const char _
-       struct nameidata nd;
+--- linux-2.6.orig/fs/open.c   2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/open.c        2006-08-31 11:59:09.000000000 +0800
+@@ -226,11 +226,12 @@ static long do_sys_truncate(const char _
        struct inode * inode;
        int error;
--
-+      intent_init(&nd.intent, IT_GETATTR);
++      intent_init(&nd.intent.open, IT_GETATTR);
        error = -EINVAL;
        if (length < 0) /* sorry, but loff_t says... */
                goto out;
@@ -29,15 +28,15 @@ Index: linux-2.6/fs/open.c
        if (error)
                goto out;
        inode = nd.dentry->d_inode;
-@@ -495,6 +495,7 @@ asmlinkage long sys_faccessat(int dfd, c
+@@ -495,6 +496,7 @@ asmlinkage long sys_faccessat(int dfd, c
        int old_fsuid, old_fsgid;
        kernel_cap_t old_cap;
        int res;
-+      intent_init(&nd.intent, IT_GETATTR);
++      intent_init(&nd.intent.open, IT_GETATTR);
  
        if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
                return -EINVAL;
-@@ -519,7 +520,7 @@ asmlinkage long sys_faccessat(int dfd, c
+@@ -519,7 +521,7 @@ asmlinkage long sys_faccessat(int dfd, c
        else
                current->cap_effective = current->cap_permitted;
  
@@ -46,29 +45,29 @@ Index: linux-2.6/fs/open.c
        if (!res) {
                res = vfs_permission(&nd, mode);
                /* SuS v2 requires we report a read only fs too */
-@@ -545,8 +546,9 @@ asmlinkage long sys_chdir(const char __u
+@@ -545,8 +547,9 @@ asmlinkage long sys_chdir(const char __u
  {
        struct nameidata nd;
        int error;
-+      intent_init(&nd.intent, IT_GETATTR);
++      intent_init(&nd.intent.open, IT_GETATTR);
  
 -      error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
 +      error = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
        if (error)
                goto out;
  
-@@ -596,8 +598,9 @@ asmlinkage long sys_chroot(const char __
+@@ -596,8 +599,9 @@ asmlinkage long sys_chroot(const char __
  {
        struct nameidata nd;
        int error;
-+      intent_init(&nd.intent, IT_GETATTR);
++      intent_init(&nd.intent.open, IT_GETATTR);
  
 -      error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
 +      error = __user_walk_it(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
        if (error)
                goto out;
  
-@@ -823,6 +826,7 @@ static struct file *__dentry_open(struct
+@@ -823,6 +827,7 @@ static struct file *__dentry_open(struct
                error = open(inode, f);
                if (error)
                        goto cleanup_all;
@@ -76,7 +75,7 @@ Index: linux-2.6/fs/open.c
        }
  
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
-@@ -849,6 +853,7 @@ cleanup_all:
+@@ -849,6 +854,7 @@ cleanup_all:
        f->f_dentry = NULL;
        f->f_vfsmnt = NULL;
  cleanup_file:
@@ -84,67 +83,39 @@ Index: linux-2.6/fs/open.c
        put_filp(f);
        dput(dentry);
        mntput(mnt);
-@@ -874,6 +879,7 @@ static struct file *do_filp_open(int dfd
+@@ -874,6 +880,7 @@ static struct file *do_filp_open(int dfd
  {
        int namei_flags, error;
        struct nameidata nd;
-+      intent_init(&nd.intent, IT_OPEN);
++      intent_init(&nd.intent.open, IT_OPEN);
  
        namei_flags = flags;
        if ((namei_flags+1) & O_ACCMODE)
-@@ -914,19 +920,19 @@ EXPORT_SYMBOL(filp_open);
- struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
-               int (*open)(struct inode *, struct file *))
- {
--      if (IS_ERR(nd->intent.open.file))
-+      if (IS_ERR(nd->intent.file))
-               goto out;
-       if (IS_ERR(dentry))
-               goto out_err;
--      nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt),
--                                           nd->intent.open.flags - 1,
--                                           nd->intent.open.file,
-+      nd->intent.file = __dentry_open(dget(dentry), mntget(nd->mnt),
-+                                           nd->intent.flags - 1,
-+                                           nd->intent.file,
-                                            open);
- out:
--      return nd->intent.open.file;
-+      return nd->intent.file;
- out_err:
-       release_open_intent(nd);
--      nd->intent.open.file = (struct file *)dentry;
-+      nd->intent.file = (struct file *)dentry;
-       goto out;
- }
- EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
-@@ -943,7 +949,8 @@ struct file *nameidata_to_filp(struct na
-       struct file *filp;
+@@ -944,6 +951,7 @@ struct file *nameidata_to_filp(struct na
  
        /* Pick up the filp from the open intent */
--      filp = nd->intent.open.file;
-+      filp = nd->intent.file;
-+      filp->f_it = &nd->intent;
+       filp = nd->intent.open.file;
++      filp->f_it = &nd->intent.open;
        /* Has the filesystem initialised the file for us? */
        if (filp->f_dentry == NULL)
                filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
 Index: linux-2.6/fs/nfsctl.c
 ===================================================================
---- linux-2.6.orig/fs/nfsctl.c 2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/nfsctl.c      2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/fs/nfsctl.c 2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/nfsctl.c      2006-08-31 11:23:48.000000000 +0800
 @@ -25,6 +25,7 @@ static struct file *do_open(char *name, 
        struct nameidata nd;
        int error;
  
-+      intent_init(&nd.intent, IT_OPEN);
++      intent_init(&nd.intent.open, IT_OPEN);
        nd.mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
  
        if (IS_ERR(nd.mnt))
 Index: linux-2.6/fs/namei.c
 ===================================================================
---- linux-2.6.orig/fs/namei.c  2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/namei.c       2006-07-15 21:04:36.000000000 +0800
-@@ -337,8 +337,19 @@ int deny_write_access(struct file * file
+--- linux-2.6.orig/fs/namei.c  2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/namei.c       2006-08-31 11:59:09.000000000 +0800
+@@ -344,8 +344,19 @@ int deny_write_access(struct file * file
        return 0;
  }
  
@@ -164,21 +135,7 @@ Index: linux-2.6/fs/namei.c
        dput(nd->dentry);
        mntput(nd->mnt);
  }
-@@ -359,10 +370,10 @@ void path_release_on_umount(struct namei
-  */
- void release_open_intent(struct nameidata *nd)
- {
--      if (nd->intent.open.file->f_dentry == NULL)
--              put_filp(nd->intent.open.file);
-+      if (nd->intent.file->f_dentry == NULL)
-+              put_filp(nd->intent.file);
-       else
--              fput(nd->intent.open.file);
-+              fput(nd->intent.file);
- }
- /*
-@@ -440,8 +451,12 @@ static struct dentry * real_lookup(struc
+@@ -447,8 +458,12 @@ static struct dentry * real_lookup(struc
  {
        struct dentry * result;
        struct inode *dir = parent->d_inode;
@@ -191,7 +148,7 @@ Index: linux-2.6/fs/namei.c
        /*
         * First re-do the cached lookup just in case it was created
         * while we waited for the directory semaphore..
-@@ -475,13 +490,16 @@ static struct dentry * real_lookup(struc
+@@ -482,13 +497,16 @@ static struct dentry * real_lookup(struc
         * Uhhuh! Nasty case: the cache was re-populated while
         * we waited on the semaphore. Need to revalidate.
         */
@@ -210,28 +167,28 @@ Index: linux-2.6/fs/namei.c
        return result;
  }
  
-@@ -509,7 +527,9 @@ walk_init_root(const char *name, struct 
+@@ -516,7 +534,9 @@ walk_init_root(const char *name, struct 
  static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
  {
        int res = 0;
-+      struct lookup_intent it = nd->intent;
++      struct lookup_intent it = nd->intent.open;
        char *name;
 +
        if (IS_ERR(link))
                goto fail;
  
-@@ -519,6 +539,10 @@ static __always_inline int __vfs_follow_
+@@ -526,6 +546,10 @@ static __always_inline int __vfs_follow_
                        /* weird __emul_prefix() stuff did it */
                        goto out;
        }
-+      intent_init(&nd->intent, it.it_op);
-+      nd->intent.it_flags = it.it_flags;
-+      nd->intent.it_create_mode = it.it_create_mode;
-+      nd->intent.file = it.file;
++      intent_init(&nd->intent.open, it.it_op);
++      nd->intent.open.it_flags = it.it_flags;
++      nd->intent.open.it_create_mode = it.it_create_mode;
++      nd->intent.open.file = it.file;
        res = link_path_walk(link, nd);
  out:
        if (nd->depth || res || nd->last_type!=LAST_NORM)
-@@ -771,6 +795,33 @@ fail:
+@@ -778,6 +802,33 @@ fail:
        return PTR_ERR(dentry);
  }
  
@@ -265,7 +222,7 @@ Index: linux-2.6/fs/namei.c
  /*
   * Name resolution.
   * This is the basic name resolution function, turning a pathname into
-@@ -867,7 +918,11 @@ static fastcall int __link_path_walk(con
+@@ -874,7 +925,11 @@ static fastcall int __link_path_walk(con
                        goto out_dput;
  
                if (inode->i_op->follow_link) {
@@ -277,7 +234,7 @@ Index: linux-2.6/fs/namei.c
                        if (err)
                                goto return_err;
                        err = -ENOENT;
-@@ -902,6 +957,23 @@ last_component:
+@@ -909,6 +964,23 @@ last_component:
                                inode = nd->dentry->d_inode;
                                /* fallthrough */
                        case 1:
@@ -301,7 +258,7 @@ Index: linux-2.6/fs/namei.c
                                goto return_reval;
                }
                if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
-@@ -909,7 +981,9 @@ last_component:
+@@ -916,7 +988,9 @@ last_component:
                        if (err < 0)
                                break;
                }
@@ -311,26 +268,7 @@ Index: linux-2.6/fs/namei.c
                if (err)
                        break;
                inode = next.dentry->d_inode;
-@@ -1152,13 +1226,13 @@ static int __path_lookup_intent_open(int
-       if (filp == NULL)
-               return -ENFILE;
--      nd->intent.open.file = filp;
--      nd->intent.open.flags = open_flags;
--      nd->intent.open.create_mode = create_mode;
-+      nd->intent.file = filp;
-+      nd->intent.flags = open_flags;
-+      nd->intent.create_mode = create_mode;
-       err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
--      if (IS_ERR(nd->intent.open.file)) {
-+      if (IS_ERR(nd->intent.file)) {
-               if (err == 0) {
--                      err = PTR_ERR(nd->intent.open.file);
-+                      err = PTR_ERR(nd->intent.file);
-                       path_release(nd);
-               }
-       } else if (err != 0)
-@@ -1261,7 +1335,7 @@ static struct dentry *lookup_hash(struct
+@@ -1268,7 +1342,7 @@ static struct dentry *lookup_hash(struct
  }
  
  /* SMP-safe */
@@ -339,7 +277,7 @@ Index: linux-2.6/fs/namei.c
  {
        unsigned long hash;
        struct qstr this;
-@@ -1281,11 +1355,17 @@ struct dentry * lookup_one_len(const cha
+@@ -1288,11 +1362,17 @@ struct dentry * lookup_one_len(const cha
        }
        this.hash = end_name_hash(hash);
  
@@ -358,7 +296,7 @@ Index: linux-2.6/fs/namei.c
  /*
   *    namei()
   *
-@@ -1297,8 +1377,9 @@ access:
+@@ -1304,8 +1384,9 @@ access:
   * that namei follows links, while lnamei does not.
   * SMP-safe
   */
@@ -370,14 +308,14 @@ Index: linux-2.6/fs/namei.c
  {
        char *tmp = getname(name);
        int err = PTR_ERR(tmp);
-@@ -1310,9 +1391,22 @@ int fastcall __user_walk_fd(int dfd, con
+@@ -1317,9 +1398,22 @@ int fastcall __user_walk_fd(int dfd, con
        return err;
  }
  
 +int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags,
 +                          struct nameidata *nd)
 +{
-+      intent_init(&nd->intent, IT_LOOKUP);
++      intent_init(&nd->intent.open, IT_LOOKUP);
 +      return __user_walk_fd_it(dfd, name, flags, nd);
 +}
 +
@@ -389,29 +327,29 @@ Index: linux-2.6/fs/namei.c
  int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
  {
 -      return __user_walk_fd(AT_FDCWD, name, flags, nd);
-+      intent_init(&nd->intent, IT_LOOKUP);
++      intent_init(&nd->intent.open, IT_LOOKUP);
 +      return __user_walk_it(name, flags, nd);
  }
  
  /*
-@@ -1593,6 +1687,8 @@ int open_namei(int dfd, const char *path
+@@ -1600,6 +1694,8 @@ int open_namei(int dfd, const char *path
        if (flag & O_APPEND)
                acc_mode |= MAY_APPEND;
  
-+      nd->intent.it_flags = flag;
-+      nd->intent.it_create_mode = mode;
++      nd->intent.open.it_flags = flag;
++      nd->intent.open.it_create_mode = mode;
        /*
         * The simplest case - just a plain lookup.
         */
-@@ -1607,6 +1703,7 @@ int open_namei(int dfd, const char *path
+@@ -1614,6 +1710,7 @@ int open_namei(int dfd, const char *path
        /*
         * Create - we need to know the parent.
         */
-+      nd->intent.it_op |= IT_CREAT;
++      nd->intent.open.it_op |= IT_CREAT;
        error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
        if (error)
                return error;
-@@ -1623,7 +1720,9 @@ int open_namei(int dfd, const char *path
+@@ -1630,7 +1727,9 @@ int open_namei(int dfd, const char *path
        dir = nd->dentry;
        nd->flags &= ~LOOKUP_PARENT;
        mutex_lock(&dir->d_inode->i_mutex);
@@ -421,28 +359,7 @@ Index: linux-2.6/fs/namei.c
        path.mnt = nd->mnt;
  
  do_last:
-@@ -1633,9 +1732,9 @@ do_last:
-               goto exit;
-       }
--      if (IS_ERR(nd->intent.open.file)) {
-+      if (IS_ERR(nd->intent.file)) {
-               mutex_unlock(&dir->d_inode->i_mutex);
--              error = PTR_ERR(nd->intent.open.file);
-+              error = PTR_ERR(nd->intent.file);
-               goto exit_dput;
-       }
-@@ -1688,7 +1787,7 @@ ok:
- exit_dput:
-       dput_path(&path, nd);
- exit:
--      if (!IS_ERR(nd->intent.open.file))
-+      if (!IS_ERR(nd->intent.file))
-               release_open_intent(nd);
-       path_release(nd);
-       return error;
-@@ -1731,7 +1830,9 @@ do_link:
+@@ -1746,7 +1845,9 @@ do_link:
        }
        dir = nd->dentry;
        mutex_lock(&dir->d_inode->i_mutex);
@@ -452,17 +369,17 @@ Index: linux-2.6/fs/namei.c
        path.mnt = nd->mnt;
        __putname(nd->last.name);
        goto do_last;
-@@ -2243,6 +2344,9 @@ asmlinkage long sys_linkat(int olddfd, c
+@@ -2260,6 +2361,9 @@ asmlinkage long sys_linkat(int olddfd, c
        int error;
        char * to;
  
-+      intent_init(&nd.intent, IT_LOOKUP);
-+      intent_init(&old_nd.intent, IT_LOOKUP);
++      intent_init(&nd.intent.open, IT_LOOKUP);
++      intent_init(&old_nd.intent.open, IT_LOOKUP);
 +
        if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
                return -EINVAL;
  
-@@ -2250,7 +2354,7 @@ asmlinkage long sys_linkat(int olddfd, c
+@@ -2267,7 +2371,7 @@ asmlinkage long sys_linkat(int olddfd, c
        if (IS_ERR(to))
                return PTR_ERR(to);
  
@@ -473,8 +390,8 @@ Index: linux-2.6/fs/namei.c
        if (error)
 Index: linux-2.6/fs/stat.c
 ===================================================================
---- linux-2.6.orig/fs/stat.c   2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/stat.c        2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/fs/stat.c   2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/stat.c        2006-08-31 11:23:48.000000000 +0800
 @@ -37,7 +37,7 @@ void generic_fillattr(struct inode *inod
  
  EXPORT_SYMBOL(generic_fillattr);
@@ -510,11 +427,11 @@ Index: linux-2.6/fs/stat.c
        int error;
  
 -      error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
-+      intent_init(&nd.intent, IT_GETATTR);
++      intent_init(&nd.intent.open, IT_GETATTR);
 +      error = __user_walk_fd_it(dfd, name, LOOKUP_FOLLOW, &nd);
        if (!error) {
 -              error = vfs_getattr(nd.mnt, nd.dentry, stat);
-+              error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat);
++              error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent.open, stat);
                path_release(&nd);
        }
        return error;
@@ -523,11 +440,11 @@ Index: linux-2.6/fs/stat.c
        int error;
  
 -      error = __user_walk_fd(dfd, name, 0, &nd);
-+      intent_init(&nd.intent, IT_GETATTR);
++      intent_init(&nd.intent.open, IT_GETATTR);
 +      error = __user_walk_fd_it(dfd, name, 0, &nd);
        if (!error) {
 -              error = vfs_getattr(nd.mnt, nd.dentry, stat);
-+              error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat);
++              error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent.open, stat);
                path_release(&nd);
        }
        return error;
@@ -536,19 +453,19 @@ Index: linux-2.6/fs/stat.c
        struct file *f = fget(fd);
        int error = -EBADF;
 +      struct nameidata nd;
-+      intent_init(&nd.intent, IT_GETATTR);
++      intent_init(&nd.intent.open, IT_GETATTR);
  
        if (f) {
 -              error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat);
-+              error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat);
-+              intent_release(&nd.intent);
++              error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent.open, stat);
++              intent_release(&nd.intent.open);
                fput(f);
        }
        return error;
 Index: linux-2.6/fs/namespace.c
 ===================================================================
---- linux-2.6.orig/fs/namespace.c      2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/namespace.c   2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/fs/namespace.c      2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/namespace.c   2006-08-31 11:59:07.000000000 +0800
 @@ -73,6 +73,7 @@ struct vfsmount *alloc_vfsmnt(const char
                INIT_LIST_HEAD(&mnt->mnt_share);
                INIT_LIST_HEAD(&mnt->mnt_slave_list);
@@ -588,7 +505,7 @@ Index: linux-2.6/fs/namespace.c
                return err;
        if (!old_name || !*old_name)
                return -EINVAL;
-+      intent_init(&old_nd.intent, IT_LOOKUP);
++      intent_init(&old_nd.intent.open, IT_LOOKUP);
        err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
        if (err)
                return err;
@@ -596,7 +513,7 @@ Index: linux-2.6/fs/namespace.c
                return -EPERM;
        if (!old_name || !*old_name)
                return -EINVAL;
-+      intent_init(&old_nd.intent, IT_LOOKUP);
++      intent_init(&old_nd.intent.open, IT_LOOKUP);
        err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
        if (err)
                return err;
@@ -604,19 +521,19 @@ Index: linux-2.6/fs/namespace.c
        int retval = 0;
        int mnt_flags = 0;
  
-+      intent_init(&nd.intent, IT_LOOKUP);
++      intent_init(&nd.intent.open, IT_LOOKUP);
        /* Discard magic */
        if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
                flags &= ~MS_MGC_MSK;
 Index: linux-2.6/fs/exec.c
 ===================================================================
---- linux-2.6.orig/fs/exec.c   2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/fs/exec.c        2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/fs/exec.c   2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/fs/exec.c        2006-08-31 11:59:09.000000000 +0800
 @@ -127,6 +127,7 @@ asmlinkage long sys_uselib(const char __
        struct nameidata nd;
        int error;
  
-+      intent_init(&nd.intent, IT_OPEN);
++      intent_init(&nd.intent.open, IT_OPEN);
        error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
        if (error)
                goto out;
@@ -624,26 +541,19 @@ Index: linux-2.6/fs/exec.c
        int err;
        struct file *file;
  
-+      intent_init(&nd.intent, IT_OPEN);
++      intent_init(&nd.intent.open, IT_OPEN);
        err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
        file = ERR_PTR(err);
  
 Index: linux-2.6/include/linux/dcache.h
 ===================================================================
---- linux-2.6.orig/include/linux/dcache.h      2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/include/linux/dcache.h   2006-07-15 21:04:08.000000000 +0800
-@@ -4,6 +4,7 @@
- #ifdef __KERNEL__
- #include <asm/atomic.h>
-+#include <linux/string.h>
- #include <linux/list.h>
- #include <linux/spinlock.h>
- #include <linux/cache.h>
-@@ -36,6 +37,8 @@ struct qstr {
+--- linux-2.6.orig/include/linux/dcache.h      2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/include/linux/dcache.h   2006-08-31 12:00:07.000000000 +0800
+@@ -36,6 +36,9 @@ struct qstr {
        const unsigned char *name;
  };
  
++struct inode;
 +#include <linux/namei.h>
 +
  struct dentry_stat_t {
@@ -651,8 +561,8 @@ Index: linux-2.6/include/linux/dcache.h
        int nr_unused;
 Index: linux-2.6/include/linux/fs.h
 ===================================================================
---- linux-2.6.orig/include/linux/fs.h  2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/include/linux/fs.h       2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/include/linux/fs.h  2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/include/linux/fs.h       2006-08-31 11:59:09.000000000 +0800
 @@ -280,6 +280,8 @@ typedef void (dio_iodone_t)(struct kiocb
  #define ATTR_KILL_SUID        2048
  #define ATTR_KILL_SGID        4096
@@ -670,7 +580,7 @@ Index: linux-2.6/include/linux/fs.h
  
        __u32                   i_generation;
  
-@@ -699,6 +702,7 @@ struct file {
+@@ -700,6 +703,7 @@ struct file {
        spinlock_t              f_ep_lock;
  #endif /* #ifdef CONFIG_EPOLL */
        struct address_space    *f_mapping;
@@ -678,7 +588,7 @@ Index: linux-2.6/include/linux/fs.h
  };
  extern spinlock_t files_lock;
  #define file_list_lock() spin_lock(&files_lock);
-@@ -1099,7 +1103,9 @@ struct inode_operations {
+@@ -1100,7 +1104,9 @@ struct inode_operations {
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int, struct nameidata *);
        int (*setattr) (struct dentry *, struct iattr *);
@@ -688,7 +598,7 @@ Index: linux-2.6/include/linux/fs.h
        int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
-@@ -1140,6 +1146,7 @@ struct super_operations {
+@@ -1141,6 +1147,7 @@ struct super_operations {
        int (*remount_fs) (struct super_block *, int *, char *);
        void (*clear_inode) (struct inode *);
        void (*umount_begin) (struct vfsmount *, int);
@@ -696,7 +606,7 @@ Index: linux-2.6/include/linux/fs.h
  
        int (*show_options)(struct seq_file *, struct vfsmount *);
        int (*show_stats)(struct seq_file *, struct vfsmount *);
-@@ -1362,6 +1369,7 @@ extern int may_umount_tree(struct vfsmou
+@@ -1363,6 +1370,7 @@ extern int may_umount_tree(struct vfsmou
  extern int may_umount(struct vfsmount *);
  extern void umount_tree(struct vfsmount *, int, struct list_head *);
  extern void release_mounts(struct list_head *);
@@ -704,7 +614,7 @@ Index: linux-2.6/include/linux/fs.h
  extern long do_mount(char *, char *, char *, unsigned long, void *);
  extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
  extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
-@@ -1423,6 +1431,7 @@ extern long do_sys_open(int fdf, const c
+@@ -1424,6 +1432,7 @@ extern long do_sys_open(int fdf, const c
                        int mode);
  extern struct file *filp_open(const char *, int, int);
  extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
@@ -714,8 +624,8 @@ Index: linux-2.6/include/linux/fs.h
  
 Index: linux-2.6/include/linux/namei.h
 ===================================================================
---- linux-2.6.orig/include/linux/namei.h       2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/include/linux/namei.h    2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/include/linux/namei.h       2006-08-31 11:17:39.000000000 +0800
++++ linux-2.6/include/linux/namei.h    2006-08-31 11:23:48.000000000 +0800
 @@ -5,10 +5,39 @@
  
  struct vfsmount;
@@ -758,16 +668,9 @@ Index: linux-2.6/include/linux/namei.h
 +              } d;
  };
  
- enum { MAX_NESTED_LINKS = 5 };
-@@ -22,12 +51,16 @@ struct nameidata {
-       unsigned        depth;
-       char *saved_names[MAX_NESTED_LINKS + 1];
--      /* Intent data */
--      union {
--              struct open_intent open;
--      } intent;
-+      struct lookup_intent intent;
+ enum { MAX_NESTED_LINKS = 8 };
+@@ -28,6 +57,13 @@ struct nameidata {
+       } intent;
  };
  
 +static inline void intent_init(struct lookup_intent *it, int op)
@@ -780,7 +683,7 @@ Index: linux-2.6/include/linux/namei.h
  /*
   * Type of the last component on LOOKUP_PARENT
   */
-@@ -48,6 +81,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
+@@ -48,6 +84,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
  #define LOOKUP_PARENT         16
  #define LOOKUP_NOALT          32
  #define LOOKUP_REVAL          64
@@ -789,7 +692,7 @@ Index: linux-2.6/include/linux/namei.h
  /*
   * Intent data
   */
-@@ -57,10 +92,19 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
+@@ -57,10 +95,19 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
  
  extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
  extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
@@ -811,8 +714,8 @@ Index: linux-2.6/include/linux/namei.h
  extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
 Index: linux-2.6/include/linux/mount.h
 ===================================================================
---- linux-2.6.orig/include/linux/mount.h       2006-07-15 21:04:02.000000000 +0800
-+++ linux-2.6/include/linux/mount.h    2006-07-15 21:04:08.000000000 +0800
+--- linux-2.6.orig/include/linux/mount.h       2006-08-31 11:17:40.000000000 +0800
++++ linux-2.6/include/linux/mount.h    2006-08-31 11:23:48.000000000 +0800
 @@ -53,6 +53,8 @@ struct vfsmount {
        struct list_head mnt_slave;     /* slave list entry */
        struct vfsmount *mnt_master;    /* slave is on master->mnt_slave_list */
index 36b2e65..5898ff7 100644 (file)
@@ -1,7 +1,7 @@
 Index: linux-2.6/fs/dcache.c
 ===================================================================
---- linux-2.6.orig/fs/dcache.c 2006-07-15 16:08:36.000000000 +0800
-+++ linux-2.6/fs/dcache.c      2006-07-15 16:10:41.000000000 +0800
+--- linux-2.6.orig/fs/dcache.c 2006-08-31 11:59:09.000000000 +0800
++++ linux-2.6/fs/dcache.c      2006-09-06 14:01:37.000000000 +0800
 @@ -226,6 +226,13 @@ int d_invalidate(struct dentry * dentry)
                spin_unlock(&dcache_lock);
                return 0;
@@ -48,14 +48,13 @@ Index: linux-2.6/fs/dcache.c
        x = y; y = __tmp; } while (0)
 Index: linux-2.6/include/linux/dcache.h
 ===================================================================
---- linux-2.6.orig/include/linux/dcache.h      2006-07-15 16:10:33.000000000 +0800
-+++ linux-2.6/include/linux/dcache.h   2006-07-15 16:10:41.000000000 +0800
-@@ -176,6 +176,8 @@ d_iput:            no              no              no       yes
+--- linux-2.6.orig/include/linux/dcache.h      2006-08-31 12:00:23.000000000 +0800
++++ linux-2.6/include/linux/dcache.h   2006-09-06 14:02:36.000000000 +0800
+@@ -176,6 +176,7 @@ d_iput:            no              no              no       yes
  
  #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
  #define DCACHE_UNHASHED               0x0010  
-+#define DCACHE_LUSTRE_INVALID     0x0020  /* Lustre invalidated */
-+
++#define DCACHE_LUSTRE_INVALID 0x0040  /* Lustre invalidated */
  
  #define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */
  
index 90ada9a..b2090f0 100644 (file)
@@ -21,3 +21,4 @@ ext3-patch-fuzz-fixup-fc3.patch
 uml-exprt-clearuser.patch
 fsprivate-2.6.patch
 linux-2.6.9-ext3-sub-second-timestamp.patch 
+bitops_ext2_find_next_le_bit-2.6.patch 
index 4d881fe..c9c7c23 100644 (file)
@@ -28,3 +28,4 @@ raid5-stripe-by-stripe-handling.patch
 raid5-optimize-memcpy.patch
 raid5-merge-ios.patch
 raid5-serialize-ovelapping-reqs.patch
+bitops_ext2_find_next_le_bit-2.6.patch
index 1c9aeb7..8c7d33c 100644 (file)
@@ -27,3 +27,5 @@ raid5-large-io.patch
 raid5-stripe-by-stripe-handling.patch
 raid5-merge-ios.patch
 raid5-serialize-ovelapping-reqs.patch
+jbd-stats-2.6.9.patch 
+bitops_ext2_find_next_le_bit-2.6.patch 
index 0b06087..6ecff57 100644 (file)
@@ -1,7 +1,7 @@
 lustre_version.patch
 vfs_intent-2.6-sles10.patch
 vfs_nointent-2.6-sles10.patch
-vfs_races-2.6.12.patch
+vfs_races-2.6.18-vanilla.patch
 ext3-wantedi-misc-2.6-suse.patch 
 jbd-2.6.10-jcberr.patch
 nfs-cifs-intent-2.6-fc5.patch 
@@ -11,7 +11,6 @@ export_symbols-2.6.12.patch
 dev_read_only-2.6-fc5.patch 
 export-2.6-fc5.patch 
 lookup_bdev_init_intent.patch
-8kstack-2.6.12.patch
 remove-suid-2.6-suse.patch
 export-show_task-2.6-fc5.patch 
 sd_iostats-2.6-rhel4.patch 
index 4068bed..f2b2c7b 100644 (file)
@@ -9,3 +9,5 @@ fsprivate-2.6.patch
 dcache-qstr-api-fix-2.6-suse.patch 
 iallocsem_consistency.patch
 tcp-zero-copy-2.6.5-7.244.patch
+jbd-stats-2.6.5.patch 
+bitops_ext2_find_next_le_bit-2.6.patch
index 0ac21df..64a1e82 100644 (file)
@@ -4,17 +4,16 @@ vfs_nointent-2.6.18-vanilla.patch
 vfs_races-2.6.18-vanilla.patch
 ext3-wantedi-misc-2.6.18-vanilla.patch 
 jbd-jcberr-2.6.18-vanilla.patch
-nfs-cifs-intent-2.6.18-vanilla.patch 
 iopen-misc-2.6.18-vanilla.patch
 export-truncate-2.6.18-vanilla.patch 
 export_symbols-2.6.18-vanilla.patch 
 dev_read_only-2.6.18-vanilla.patch 
 export-2.6.18-vanilla.patch 
-lookup_bdev_init_intent.patch
+lookup_bdev_init_intent-2.6.18-vanilla.patch
 8kstack-2.6.12.patch
 remove-suid-2.6-suse.patch
 export-show_task-2.6.18-vanilla.patch 
 sd_iostats-2.6-rhel4.patch 
 export_symbol_numa-2.6-fc5.patch 
 tcp-zero-copy-2.6.18-vanilla.patch
-vfs_intent-2.6-fc5-fix.patch
+export-do_kern_mount.patch
index e4d8f5c..22a8b5c 100644 (file)
@@ -1,5 +1,5 @@
 lnxmaj="2.6.5"
-lnxrel="7.267"
+lnxrel="7.276"
 
 KERNEL=linux-$lnxmaj-$lnxrel.tar.bz2
 # they include our patches
index 5ccc999..1bdaff4 100644 (file)
@@ -324,7 +324,6 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
         /* cli->cl_max_mds_{easize,cookiesize} updated by mdc_init_ea_size() */
         cli->cl_max_mds_easize = sizeof(struct lov_mds_md);
         cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie);
-        cli->cl_sandev = to_kdev_t(0);
 
         if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
                 if (!strcmp(lustre_cfg_string(lcfg, 3), "inactive")) {
@@ -1033,14 +1032,13 @@ void target_abort_recovery(void *data)
         target_cancel_recovery_timer(obd);
         spin_unlock_bh(&obd->obd_processing_task_lock);
 
-        CERROR("%s: recovery period over; disconnecting unfinished clients.\n",
-               obd->obd_name);
+        LCONSOLE_WARN("%s: recovery period over; disconnecting unfinished "
+                      "clients.\n", obd->obd_name);
         class_disconnect_stale_exports(obd);
         abort_recovery_queue(obd);
 
         target_finish_recovery(obd);
-
-        ptlrpc_run_recovery_over_upcall(obd);
+        CDEBUG(D_HA, "%s: recovery complete\n", obd_uuid2str(&obd->obd_uuid));
         EXIT;
 }
 
@@ -1345,7 +1343,8 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc)
                 spin_unlock_bh(&obd->obd_processing_task_lock);
 
                 target_finish_recovery(obd);
-                ptlrpc_run_recovery_over_upcall(obd);
+                CDEBUG(D_HA, "%s: recovery complete\n",
+                       obd_uuid2str(&obd->obd_uuid));
         } else {
                 CWARN("%s: %d recoverable clients remain\n",
                        obd->obd_name, obd->obd_recoverable_clients);
@@ -1510,7 +1509,9 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req)
         struct obd_device *obd = req->rq_export->exp_obd;
         struct obd_device *master_obd;
         struct lustre_quota_ctxt *qctxt;
-        struct qunit_data *qdata, *rep;
+        struct qunit_data *qdata;
+        void* rep;
+        struct qunit_data_old *qdata_old;
         int rc = 0;
         int repsize[2] = { sizeof(struct ptlrpc_body),
                            sizeof(struct qunit_data) };
@@ -1521,11 +1522,27 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req)
                 CERROR("packing reply failed!: rc = %d\n", rc);
                 RETURN(rc);
         }
-        rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*rep));
-        LASSERT(rep);
-        
-        qdata = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata),
-                                   lustre_swab_qdata);
+        LASSERT(req->rq_export);
+
+        /* fixed for bug10707 */
+        if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) &&
+            !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) {
+                CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
+                rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, 
+                                     sizeof(struct qunit_data));
+                LASSERT(rep);
+                qdata = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata), 
+                                           lustre_swab_qdata);
+        } else {
+                CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
+                rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, 
+                                     sizeof(struct qunit_data_old));
+                LASSERT(rep);
+                qdata_old = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata_old), 
+                                               lustre_swab_qdata_old);
+                qdata = lustre_quota_old_to_new(qdata_old);
+        }
+
         if (qdata == NULL) {
                 CERROR("Can't unpack qunit_data\n");
                 RETURN(-EPROTO);
@@ -1544,7 +1561,13 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req)
                        "dqacq failed! (rc:%d)\n", rc);
 
         /* the qd_count might be changed in lqc_handler */
-        memcpy(rep, qdata, sizeof(*rep));
+        if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) &&
+            !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) {
+                memcpy(rep,qdata,sizeof(*qdata));
+        } else {
+                qdata_old = lustre_quota_new_to_old(qdata);
+                memcpy(rep,qdata_old,sizeof(*qdata_old));
+        }
         req->rq_status = rc;
         rc = ptlrpc_reply(req);
 
index fa4657b..16b65fb 100644 (file)
@@ -1401,3 +1401,134 @@ void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
 
         LDLM_LOCK_PUT(lock);
 }
+
+void cdebug_va(cfs_debug_limit_state_t *cdls, __u32 mask,
+               const char *file, const char *func, const int line,
+               const char *fmt, va_list args);
+void cdebug(cfs_debug_limit_state_t *cdls, __u32 mask,
+            const char *file, const char *func, const int line,
+            const char *fmt, ...);
+
+void
+ldlm_lock_debug(cfs_debug_limit_state_t *cdls,
+                __u32 level, struct ldlm_lock *lock,
+                const char *file, const char *func, const int line,
+                char *fmt, ...)
+{
+        va_list args;
+
+        va_start(args, fmt);
+        cdebug_va(cdls, level, file, func, line, fmt, args);
+        va_end(args);
+
+        if (lock->l_resource == NULL) {
+                cdebug(cdls, level, file, func, line,
+                       " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+                       "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: "
+                       LPX64" expref: %d pid: %u\n", lock,
+                       lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
+                       lock->l_readers, lock->l_writers,
+                       ldlm_lockname[lock->l_granted_mode],
+                       ldlm_lockname[lock->l_req_mode],
+                       lock->l_flags, lock->l_remote_handle.cookie,
+                       lock->l_export ?
+                       atomic_read(&lock->l_export->exp_refcount) : -99,
+                       lock->l_pid);
+                return;
+        }
+
+        switch (lock->l_resource->lr_type) {
+        case LDLM_EXTENT:
+                cdebug(cdls, level, file, func, line,
+                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " 
+                       "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64
+                       "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64
+                       " expref: %d pid: %u\n",
+                       lock->l_resource->lr_namespace->ns_name, lock,
+                       lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
+                       lock->l_readers, lock->l_writers,
+                       ldlm_lockname[lock->l_granted_mode],
+                       ldlm_lockname[lock->l_req_mode],
+                       lock->l_resource->lr_name.name[0],
+                       lock->l_resource->lr_name.name[1],
+                       atomic_read(&lock->l_resource->lr_refcount),
+                       ldlm_typename[lock->l_resource->lr_type],
+                       lock->l_policy_data.l_extent.start,
+                       lock->l_policy_data.l_extent.end,
+                       lock->l_req_extent.start, lock->l_req_extent.end,
+                       lock->l_flags, lock->l_remote_handle.cookie,
+                       lock->l_export ?
+                       atomic_read(&lock->l_export->exp_refcount) : -99,
+                       lock->l_pid);
+                break;
+
+        case LDLM_FLOCK:
+                cdebug(cdls, level, file, func, line,
+                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+                       "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d "
+                       "["LPU64"->"LPU64"] flags: %x remote: "LPX64
+                       " expref: %d pid: %u\n",
+                       lock->l_resource->lr_namespace->ns_name, lock,
+                       lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
+                       lock->l_readers, lock->l_writers,
+                       ldlm_lockname[lock->l_granted_mode],
+                       ldlm_lockname[lock->l_req_mode],
+                       lock->l_resource->lr_name.name[0],
+                       lock->l_resource->lr_name.name[1],
+                       atomic_read(&lock->l_resource->lr_refcount),
+                       ldlm_typename[lock->l_resource->lr_type],
+                       lock->l_policy_data.l_flock.pid,
+                       lock->l_policy_data.l_flock.start,
+                       lock->l_policy_data.l_flock.end,
+                       lock->l_flags, lock->l_remote_handle.cookie,
+                       lock->l_export ?
+                       atomic_read(&lock->l_export->exp_refcount) : -99,
+                       lock->l_pid);
+                break;
+
+        case LDLM_IBITS:
+                cdebug(cdls, level, file, func, line,
+                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+                       "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s "
+                       "flags: %x remote: "LPX64" expref: %d "
+                       "pid %u\n",
+                       lock->l_resource->lr_namespace->ns_name,
+                       lock, lock->l_handle.h_cookie,
+                       atomic_read (&lock->l_refc),
+                       lock->l_readers, lock->l_writers,
+                       ldlm_lockname[lock->l_granted_mode],
+                       ldlm_lockname[lock->l_req_mode],
+                       lock->l_resource->lr_name.name[0],
+                       lock->l_resource->lr_name.name[1],
+                       lock->l_policy_data.l_inodebits.bits,
+                       atomic_read(&lock->l_resource->lr_refcount),
+                       ldlm_typename[lock->l_resource->lr_type],
+                       lock->l_flags, lock->l_remote_handle.cookie,
+                       lock->l_export ?
+                       atomic_read(&lock->l_export->exp_refcount) : -99,
+                       lock->l_pid);
+                break;
+
+        default:
+                cdebug(cdls, level, file, func, line,
+                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
+                       "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x "
+                       "remote: "LPX64" expref: %d pid: %u\n",
+                       lock->l_resource->lr_namespace->ns_name,
+                       lock, lock->l_handle.h_cookie,
+                       atomic_read (&lock->l_refc),
+                       lock->l_readers, lock->l_writers,
+                       ldlm_lockname[lock->l_granted_mode],
+                       ldlm_lockname[lock->l_req_mode],
+                       lock->l_resource->lr_name.name[0],
+                       lock->l_resource->lr_name.name[1],
+                       atomic_read(&lock->l_resource->lr_refcount),
+                       ldlm_typename[lock->l_resource->lr_type],
+                       lock->l_flags, lock->l_remote_handle.cookie,
+                       lock->l_export ?
+                       atomic_read(&lock->l_export->exp_refcount) : -99,
+                       lock->l_pid);
+                break;
+        }
+}
+EXPORT_SYMBOL(ldlm_lock_debug);
index b12a4bf..e683de7 100644 (file)
@@ -118,6 +118,7 @@ static int expired_lock_main(void *arg)
 {
         struct list_head *expired = &expired_lock_thread.elt_expired_locks;
         struct l_wait_info lwi = { 0 };
+        int do_dump;
 
         ENTRY;
         cfs_daemonize("ldlm_elt");
@@ -145,6 +146,8 @@ static int expired_lock_main(void *arg)
                         expired_lock_thread.elt_dump = 0;
                 }
 
+                do_dump = 0;
+
                 while (!list_empty(expired)) {
                         struct obd_export *export;
                         struct ldlm_lock *lock;
@@ -169,12 +172,18 @@ static int expired_lock_main(void *arg)
                         export = class_export_get(lock->l_export);
                         spin_unlock_bh(&waiting_locks_spinlock);
 
+                        do_dump++;
                         class_fail_export(export);
                         class_export_put(export);
                         spin_lock_bh(&waiting_locks_spinlock);
                 }
                 spin_unlock_bh(&waiting_locks_spinlock);
 
+                if (do_dump && obd_dump_on_eviction) {
+                        CERROR("dump the log upon eviction\n");
+                        libcfs_debug_dumplog();
+                }
+
                 if (expired_lock_thread.elt_state == ELT_TERMINATE)
                         break;
         }
index 9cc6806..9edfc92 100644 (file)
@@ -894,6 +894,7 @@ static int pages_io(int xfer, loff_t pos)
                 }
         }
 
+        unlink(path);
         t_touch(path);
 
         fd = t_open(path);
@@ -972,7 +973,8 @@ int t50(char *name)
         while (np <= _npages) {
                 printf("%3d per xfer(total %d)...\t", np, _npages);
                 fflush(stdout);
-                pages_io(np, offset);
+                if (pages_io(np, offset) != 0)
+                        return 1;
                 np += np;
         }
         LEAVE();
index ca33b76..9c40c36 100644 (file)
@@ -192,7 +192,6 @@ int ll_drop_dentry(struct dentry *dentry)
         if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
 #else
         if (!d_unhashed(dentry)) {
-                struct inode *inode = dentry->d_inode;
 #endif
                 CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
                        "inode %p refc %d\n", dentry->d_name.len,
@@ -203,19 +202,19 @@ int ll_drop_dentry(struct dentry *dentry)
                  * sys_getcwd() could return -ENOENT -bzzz */
 #ifdef LUSTRE_KERNEL_VERSION
                 dentry->d_flags |= DCACHE_LUSTRE_INVALID;
-#else
-                if (!inode || !S_ISDIR(inode->i_mode))
-                        __d_drop(dentry);
-#endif
-
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
                 __d_drop(dentry);
-                if (inode) {
+                if (dentry->d_inode) {
                         /* Put positive dentries to orphan list */
-                        hlist_add_head(&dentry->d_hash,
-                                       &ll_i2sbi(inode)->ll_orphan_dentry_list);
+                        list_add(&dentry->d_hash,
+                                 &ll_i2sbi(dentry->d_inode)->ll_orphan_dentry_list);
                 }
 #endif
+#else
+                if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
+                        __d_drop(dentry);
+#endif
+
         }
         unlock_dentry(dentry);
         return 0;
index 21052ba..e9e07c0 100644 (file)
@@ -648,7 +648,7 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
         ldlm_policy_data_t tmpex;
         unsigned long start, end, count, skip, i, j;
         struct page *page;
-        int rc, rc2, discard = lock->l_flags & LDLM_FL_DISCARD_DATA;
+        int rc, rc2, l_flags, discard = lock->l_flags & LDLM_FL_DISCARD_DATA;
         struct lustre_handle lockh;
         ENTRY;
 
@@ -709,7 +709,6 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
          * batching writeback under the lock explicitly. */
         for (i = start, j = start % count; i <= end;
              j++, i++, tmpex.l_extent.start += PAGE_CACHE_SIZE) {
-                int l_flags;
                 if (j == count) {
                         CDEBUG(D_PAGE, "skip index %lu to %lu\n", i, i + skip);
                         i += skip;
@@ -747,17 +746,13 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
                         lock_page(page);
                 }
 
-                l_flags = LDLM_FL_BLOCK_GRANTED|LDLM_FL_CBPENDING |
-                          LDLM_FL_TEST_LOCK;
-
                 tmpex.l_extent.end = tmpex.l_extent.start + PAGE_CACHE_SIZE - 1;
-                /* check to see if another DLM lock covers this page */
+                l_flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
+                /* check to see if another DLM lock covers this page b=2765 */
                 rc2 = obd_match(ll_s2dtexp(inode->i_sb), lsm, LDLM_EXTENT,
                                 &tmpex, LCK_PR | LCK_PW, &l_flags, inode,
                                 &lockh);
-                /* rc2 < 0 means some error occured, e.g. export was down.
-                 * rc2 == 0 means nothing was matched */
-                if (rc2 <= 0 && page->mapping != NULL) {
+                if (rc2 == 0 && page->mapping != NULL) {
                         struct ll_async_page *llap = llap_cast_private(page);
                         // checking again to account for writeback's lock_page()
                         LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n");
@@ -1290,6 +1285,7 @@ repeat:
         /* BUG: 5972 */
         file_accessed(file);
         retval = generic_file_read(file, buf, chunk, ppos);
+        ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, count, 0);
 
         ll_tree_unlock(&tree);
 
@@ -1398,6 +1394,7 @@ repeat:
         CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
                inode->i_ino, chunk, *ppos);
         retval = generic_file_write(file, buf, chunk, ppos);
+        ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, count, 1);
 
 out:
         ll_tree_unlock(&tree);
index 523ac02..db2094b 100644 (file)
 #endif
 
 #define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
-#if !defined(LUSTRE_KERNEL_VERSION) || (LUSTRE_KERNEL_VERSION < 46)
 #define LUSTRE_FPRIVATE(file) ((file)->private_data)
-#else
-#if (LUSTRE_KERNEL_VERSION < 46)
-#define LUSTRE_FPRIVATE(file) ((file)->private_data)
-#else
-#define LUSTRE_FPRIVATE(file) ((file)->fs_private)
-#endif
-#endif
 
 #ifdef LUSTRE_KERNEL_VERSION
 static inline struct lookup_intent *ll_nd2it(struct nameidata *nd)
@@ -165,6 +157,34 @@ struct ll_ra_info {
         unsigned long             ra_stats[_NR_RA_STAT];
 };
 
+/* LL_HIST_MAX=32 causes an overflow */
+#define LL_HIST_MAX 28
+#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */
+#define LL_PROCESS_HIST_MAX 10
+struct per_process_info {
+        pid_t pid;
+        struct obd_histogram pp_r_hist;
+        struct obd_histogram pp_w_hist;
+};
+
+/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */
+struct ll_rw_extents_info {
+        struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1];
+};
+
+#define LL_OFFSET_HIST_MAX 100
+struct ll_rw_process_info {
+        pid_t                     rw_pid;
+        int                       rw_op;
+        loff_t                    rw_range_start;
+        loff_t                    rw_range_end;
+        loff_t                    rw_last_file_pos;
+        loff_t                    rw_offset;
+        size_t                    rw_smallest_extent;
+        size_t                    rw_largest_extent;
+        struct file               *rw_last_file;
+};
+
 /* flags for sbi->ll_flags */
 #define LL_SBI_NOLCK            0x01 /* DLM locking disabled (directio-only) */
 #define LL_SBI_CHECKSUM         0x02 /* checksum each page as it's written */
@@ -188,7 +208,7 @@ struct ll_sb_info {
         struct list_head          ll_conn_chain; /* per-conn chain of SBs */
         struct lustre_client_ocd  ll_lco;
 
-        struct hlist_head         ll_orphan_dentry_list; /*please don't ask -p*/
+        struct list_head          ll_orphan_dentry_list; /*please don't ask -p*/
         struct ll_close_queue    *ll_lcq;
 
         struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
@@ -207,6 +227,12 @@ struct ll_sb_info {
         /* =0 - hold lock over whole read/write
          * >0 - max. chunk to be read/written w/o lock re-acquiring */
         unsigned long             ll_max_rw_chunk;
+        struct ll_rw_extents_info ll_rw_extents_info;
+        int                       ll_extent_process_count;
+        struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX];
+        unsigned int              ll_offset_process_count;
+        struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
+        unsigned int              ll_rw_offset_entry_count;
 };
 
 #define LL_DEFAULT_MAX_RW_CHUNK         (32 * 1024 * 1024)
@@ -442,6 +468,8 @@ int ll_release_openhandle(struct dentry *, struct lookup_intent *);
 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
                 struct file *file);
 int ll_md_real_close(struct inode *inode, int flags);
+extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
+                               *file, size_t count, int rw);
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
                struct lookup_intent *it, struct kstat *stat);
index d36139f..f4faaab 100644 (file)
@@ -54,6 +54,7 @@ static struct ll_sb_info *ll_init_sbi(void)
 {
         struct ll_sb_info *sbi = NULL;
         class_uuid_t uuid;
+        int i;
         ENTRY;
 
         OBD_ALLOC(sbi, sizeof(*sbi));
@@ -63,7 +64,6 @@ static struct ll_sb_info *ll_init_sbi(void)
         spin_lock_init(&sbi->ll_lock);
         spin_lock_init(&sbi->ll_lco.lco_lock);
         INIT_LIST_HEAD(&sbi->ll_pglist);
-        sbi->ll_pglist_gen = 0;
         if (num_physpages >> (20 - PAGE_SHIFT) < 512)
                 sbi->ll_async_page_max = num_physpages / 2;
         else
@@ -74,7 +74,7 @@ static struct ll_sb_info *ll_init_sbi(void)
                                            SBI_DEFAULT_READAHEAD_WHOLE_MAX;
 
         INIT_LIST_HEAD(&sbi->ll_conn_chain);
-        INIT_HLIST_HEAD(&sbi->ll_orphan_dentry_list);
+        INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
 
         class_generate_random_uuid(uuid);
         class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
@@ -86,6 +86,11 @@ static struct ll_sb_info *ll_init_sbi(void)
 
         INIT_LIST_HEAD(&sbi->ll_deathrow);
         spin_lock_init(&sbi->ll_deathrow_lock);
+        for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { 
+                spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_r_hist.oh_lock);
+                spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_w_hist.oh_lock);
+        }
+
         RETURN(sbi);
 }
 
@@ -143,7 +148,8 @@ static int ll_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
         RETURN(rc);
 }
 
-int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
+static int client_common_fill_super(struct super_block *sb, 
+                                    char *md, char *dt)
 {
         struct inode *root = 0;
         struct ll_sb_info *sbi = ll_s2sbi(sb);
@@ -151,16 +157,16 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         struct lu_fid rootfid;
         struct obd_statfs osfs;
         struct ptlrpc_request *request = NULL;
-        struct lustre_handle osc_conn = {0, };
+        struct lustre_handle dt_conn = {0, };
         struct lustre_handle md_conn = {0, };
         struct obd_connect_data *data = NULL;
-        struct lustre_md md;
+        struct lustre_md lmd;
         int err;
         ENTRY;
 
-        obd = class_name2obd(mdc);
+        obd = class_name2obd(md);
         if (!obd) {
-                CERROR("MDC %s: not setup or attached\n", mdc);
+                CERROR("MD %s: not setup or attached\n", md);
                 RETURN(-EINVAL);
         }
 
@@ -170,7 +176,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
 
         if (proc_lustre_fs_root) {
                 err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
-                                                  osc, mdc);
+                                                  dt, md);
                 if (err < 0)
                         CERROR("could not register mount in /proc/lustre");
         }
@@ -197,19 +203,20 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
 
         err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, data);
         if (err == -EBUSY) {
-                CERROR("An MDT (mdc %s) is performing recovery, of which this"
-                       " client is not a part.  Please wait for recovery to "
-                       "complete, abort, or time out.\n", mdc);
+                LCONSOLE_ERROR("An MDT (md %s) is performing recovery, of "
+                               "which this client is not a part.  Please wait "
+                               "for recovery to complete, abort, or "
+                               "time out.\n", md);
                 GOTO(out, err);
         } else if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", mdc, err);
+                CERROR("cannot connect to %s: rc = %d\n", md, err);
                 GOTO(out, err);
         }
         sbi->ll_md_exp = class_conn2export(&md_conn);
 
         err = obd_statfs(obd, &osfs, cfs_time_current_64() - HZ);
         if (err)
-                GOTO(out_mdc, err);
+                GOTO(out_md, err);
 
         LASSERT(osfs.os_bsize);
         sb->s_blocksize = osfs.os_bsize;
@@ -243,6 +250,8 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
          * on all clients. */
         /* s_dev is also used in lt_compare() to compare two fs, but that is
          * only a node-local comparison. */
+        
+        /* XXX: this will not work with LMV */
         sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_target_uuid.uuid,
                                  strlen(sbi2mdc(sbi)->cl_target_uuid.uuid));
 #endif
@@ -251,12 +260,12 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         err = ll_fid_md_init(sbi);
         if (err) {
                 CERROR("can't init FIDs framework, rc %d\n", err);
-                GOTO(out_mdc, err);
+                GOTO(out_md, err);
         }
 
-        obd = class_name2obd(osc);
+        obd = class_name2obd(dt);
         if (!obd) {
-                CERROR("OSC %s: not setup or attached\n", osc);
+                CERROR("DT %s: not setup or attached\n", dt);
                 GOTO(out_md_fid, err = -ENODEV);
         }
 
@@ -271,19 +280,18 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         obd->obd_upcall.onu_upcall = ll_ocd_update;
         data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << PAGE_SHIFT;
 
-
-        err = obd_connect(NULL, &osc_conn, obd, &sbi->ll_sb_uuid, data);
+        err = obd_connect(NULL, &dt_conn, obd, &sbi->ll_sb_uuid, data);
         if (err == -EBUSY) {
-                CERROR("An OST (osc %s) is performing recovery, of which this"
-                       " client is not a part.  Please wait for recovery to "
-                       "complete, abort, or time out.\n", osc);
+                LCONSOLE_ERROR("An OST (dt %s) is performing recovery, of which this"
+                               " client is not a part.  Please wait for recovery to "
+                               "complete, abort, or time out.\n", dt);
                 GOTO(out, err);
         } else if (err) {
-                CERROR("cannot connect to %s: rc = %d\n", osc, err);
-                GOTO(out_mdc, err);
+                CERROR("cannot connect to %s: rc = %d\n", dt, err);
+                GOTO(out_md, err);
         }
 
-        sbi->ll_dt_exp = class_conn2export(&osc_conn);
+        sbi->ll_dt_exp = class_conn2export(&dt_conn);
 
         spin_lock(&sbi->ll_lco.lco_lock);
         sbi->ll_lco.lco_flags = data->ocd_connect_flags;
@@ -297,7 +305,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
                 LCONSOLE_ERROR("There are no OST's in this filesystem. "
                                "There must be at least one active OST for "
                                "a client to start.\n");
-                GOTO(out_osc, err);
+                GOTO(out_dt, err);
         }
 
         if (!ll_async_page_slab) {
@@ -307,14 +315,14 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
                                                        ll_async_page_slab_size,
                                                        0, 0, NULL, NULL);
                 if (!ll_async_page_slab)
-                        GOTO(out_osc, -ENOMEM);
+                        GOTO(out_dt, -ENOMEM);
         }
 
         /* init FIDs framework */
         err = ll_fid_dt_init(sbi);
         if (err) {
                 CERROR("can't init FIDs framework, rc %d\n", err);
-                GOTO(out_osc, err);
+                GOTO(out_dt, err);
         }
 
         err = md_getstatus(sbi->ll_md_exp, &rootfid);
@@ -338,24 +346,24 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
                          0, &request);
         if (err) {
                 CERROR("md_getattr failed for root: rc = %d\n", err);
-                GOTO(out_osc, err);
+                GOTO(out_dt, err);
         }
 
         err = md_get_lustre_md(sbi->ll_md_exp, request, 
                                REPLY_REC_OFF, sbi->ll_dt_exp, sbi->ll_md_exp, 
-                               &md);
+                               &lmd);
         if (err) {
                 CERROR("failed to understand root inode md: rc = %d\n", err);
                 ptlrpc_req_finished (request);
-                GOTO(out_osc, err);
+                GOTO(out_dt, err);
         }
 
         LASSERT(fid_is_sane(&sbi->ll_root_fid));
-        root = ll_iget(sb, ll_fid_build_ino(sbi, &sbi->ll_root_fid), &md);
+        root = ll_iget(sb, ll_fid_build_ino(sbi, &sbi->ll_root_fid), &lmd);
         ptlrpc_req_finished(request);
 
         if (root == NULL || is_bad_inode(root)) {
-                md_free_lustre_md(sbi->ll_dt_exp, &md);
+                md_free_lustre_md(sbi->ll_dt_exp, &lmd);
                 CERROR("lustre_lite: bad iget4 for root\n");
                 GOTO(out_root, err = -EBADF);
         }
@@ -386,12 +394,12 @@ out_root:
                 iput(root);
 out_dt_fid:
         obd_fid_fini(sbi->ll_dt_exp);
-out_osc:
+out_dt:
         obd_disconnect(sbi->ll_dt_exp);
         sbi->ll_dt_exp = NULL;
 out_md_fid:
         obd_fid_fini(sbi->ll_md_exp);
-out_mdc:
+out_md:
         obd_disconnect(sbi->ll_md_exp);
         sbi->ll_md_exp = NULL;
 out:
@@ -461,12 +469,12 @@ void lustre_dump_dentry(struct dentry *dentry, int recur)
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 void lustre_throw_orphan_dentries(struct super_block *sb)
 {
-        struct hlist_node *tmp, *next;
+        struct dentry *dentry, *next;
         struct ll_sb_info *sbi = ll_s2sbi(sb);
 
         /* Do this to get rid of orphaned dentries. That is not really trw. */
-        hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) {
-                struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash);
+        list_for_each_entry_safe(dentry, next, &sbi->ll_orphan_dentry_list,
+                                 d_hash) {
                 CWARN("found orphan dentry %.*s (%p->%p) at unmount, dumping "
                       "before and after shrink_dcache_parent\n",
                       dentry->d_name.len, dentry->d_name.name, dentry, next);
@@ -901,10 +909,14 @@ int ll_fill_super(struct super_block *sb)
 
         CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
 
+        cfs_module_get();
+
         /* client additional sb info */
         lsi->lsi_llsbi = sbi = ll_init_sbi();
-        if (!sbi)
+        if (!sbi) {
+                cfs_module_put();
                 RETURN(-ENOMEM);
+        }
 
         err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
         if (err) 
@@ -1042,6 +1054,9 @@ void ll_put_super(struct super_block *sb)
         lustre_common_put_super(sb);
 
         LCONSOLE_WARN("client %s umount complete\n", ll_instance);
+        
+        cfs_module_put();
+
         EXIT;
 } /* client_put_super */
 
@@ -1082,15 +1097,11 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
                         inode = igrab(lock->l_ast_data);
                 } else {
                         inode = lock->l_ast_data;
-                        if (inode->i_state & I_FREEING)
-                                __LDLM_DEBUG(D_INFO, lock,
-                                     "l_ast_data %p is bogus: magic %08x",
-                                     lock->l_ast_data, lli->lli_inode_magic);
-                        else
-                                __LDLM_DEBUG(D_WARNING, lock,
-                                     "l_ast_data %p is bogus: magic %08x",
-                                     lock->l_ast_data, lli->lli_inode_magic);
-
+                        ldlm_lock_debug(NULL, inode->i_state & I_FREEING ?
+                                                D_INFO : D_WARNING,
+                                        lock, __FILE__, __func__, __LINE__,
+                                        "l_ast_data %p is bogus: magic %08x",
+                                        lock->l_ast_data, lli->lli_inode_magic);
                         inode = NULL;
                 }
         }
index 4cc1777..8b03321 100644 (file)
@@ -35,6 +35,9 @@ struct proc_dir_entry *proc_lustre_fs_root;
 /* /proc/lustre/llite mount point registration */
 struct file_operations llite_dump_pgcache_fops;
 struct file_operations ll_ra_stats_fops;
+struct file_operations ll_rw_extents_stats_fops;
+struct file_operations ll_rw_extents_stats_pp_fops;
+struct file_operations ll_rw_offset_stats_fops;
 
 static int ll_rd_blksize(char *page, char **start, off_t off, int count,
                          int *eof, void *data)
@@ -483,12 +486,31 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
         entry->proc_fops = &llite_dump_pgcache_fops;
         entry->data = sbi;
 
-        entry = create_proc_entry("read_ahead_stats", 0444, sbi->ll_proc_root);
+        entry = create_proc_entry("read_ahead_stats", 0644, sbi->ll_proc_root);
         if (entry == NULL)
                 GOTO(out, err = -ENOMEM);
         entry->proc_fops = &ll_ra_stats_fops;
         entry->data = sbi;
 
+        entry = create_proc_entry("extents_stats", 0644, sbi->ll_proc_root);
+        if (entry == NULL)
+                 GOTO(out, err = -ENOMEM);
+        entry->proc_fops = &ll_rw_extents_stats_fops;
+        entry->data = sbi;
+
+        entry = create_proc_entry("extents_stats_per_process", 0644,
+                                  sbi->ll_proc_root);
+        if (entry == NULL)
+                 GOTO(out, err = -ENOMEM);
+        entry->proc_fops = &ll_rw_extents_stats_pp_fops;
+        entry->data = sbi;
+
+        entry = create_proc_entry("offset_stats", 0644, sbi->ll_proc_root);
+        if (entry == NULL)
+                GOTO(out, err = -ENOMEM);
+        entry->proc_fops = &ll_rw_offset_stats_fops;
+        entry->data = sbi;
+
         svc_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES);
         if (svc_stats == NULL) {
                 err = -ENOMEM;
@@ -687,13 +709,13 @@ static void *llite_dump_pgcache_seq_next(struct seq_file *seq, void *v,
         return dummy_llap;
 }
 
-static void llite_dump_pgcache_seq_stop(struct seq_file *seq, void *v)
+static void null_stop(struct seq_file *seq, void *v)
 {
 }
 
 struct seq_operations llite_dump_pgcache_seq_sops = {
         .start = llite_dump_pgcache_seq_start,
-        .stop = llite_dump_pgcache_seq_stop,
+        .stop = null_stop,
         .next = llite_dump_pgcache_seq_next,
         .show = llite_dump_pgcache_seq_show,
 };
@@ -801,24 +823,22 @@ static int ll_ra_stats_seq_show(struct seq_file *seq, void *v)
         return 0;
 }
 
-static void *ll_ra_stats_seq_start(struct seq_file *p, loff_t *pos)
+static void *ll_stats_seq_start(struct seq_file *p, loff_t *pos)
 {
         if (*pos == 0)
                 return (void *)1;
         return NULL;
 }
-static void *ll_ra_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
+static void *ll_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
 {
         ++*pos;
         return NULL;
 }
-static void ll_ra_stats_seq_stop(struct seq_file *p, void *v)
-{
-}
+
 struct seq_operations ll_ra_stats_seq_sops = {
-        .start = ll_ra_stats_seq_start,
-        .stop = ll_ra_stats_seq_stop,
-        .next = ll_ra_stats_seq_next,
+        .start = ll_stats_seq_start,
+        .stop = null_stop,
+        .next = ll_stats_seq_next,
         .show = ll_ra_stats_seq_show,
 };
 
@@ -859,5 +879,393 @@ struct file_operations ll_ra_stats_fops = {
         .release = seq_release,
 };
 
+#define pct(a,b) (b ? a * 100 / b : 0)
+
+static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
+                                   struct seq_file *seq, int which)
+{
+        unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum;
+        unsigned long start, end, r, w;
+        char *unitp = "KMGTPEZY";
+        int i, units = 10;
+        struct per_process_info *pp_info = &io_extents->pp_extents[which];
+
+        read_cum = 0;
+        write_cum = 0;
+        start = 0;
+
+        for(i = 0; i < LL_HIST_MAX; i++) {
+                read_tot += pp_info->pp_r_hist.oh_buckets[i];
+                write_tot += pp_info->pp_w_hist.oh_buckets[i];
+        }
+
+        for(i = 0; i < LL_HIST_MAX; i++) {
+                r = pp_info->pp_r_hist.oh_buckets[i];
+                w = pp_info->pp_w_hist.oh_buckets[i];
+                read_cum += r;
+                write_cum += w;
+                end = 1 << (i + LL_HIST_START - units);
+                seq_printf(seq, "%4lu%c - %4lu%c%c: %14lu %4lu %4lu  | "
+                           "%14lu %4lu %4lu\n", start, *unitp, end, *unitp,
+                           (i == LL_HIST_MAX - 1) ? '+' : ' ',
+                           r, pct(r, read_tot), pct(read_cum, read_tot),
+                           w, pct(w, write_tot), pct(write_cum, write_tot));
+                start = end;
+                if (start == 1<<10) {
+                        start = 1;
+                        units += 10;
+                        unitp++;
+                }
+                if (read_cum == read_tot && write_cum == write_tot)
+                        break;
+        }
+}
+
+static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v)
+{
+        struct timeval now;
+        struct ll_sb_info *sbi = seq->private;
+        struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+        int k;
+
+        do_gettimeofday(&now);
+
+        seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
+                   now.tv_sec, now.tv_usec);
+        seq_printf(seq, "%15s %19s       | %20s\n", " ", "read", "write");
+        seq_printf(seq, "%13s   %14s %4s %4s  | %14s %4s %4s\n", 
+                   "extents", "calls", "%", "cum%",
+                   "calls", "%", "cum%");
+        
+        spin_lock(&sbi->ll_lock);
+        for(k = 0; k < LL_PROCESS_HIST_MAX; k++) {
+                if(io_extents->pp_extents[k].pid != 0) {
+                        seq_printf(seq, "\nPID: %d\n",
+                                   io_extents->pp_extents[k].pid);
+                        ll_display_extents_info(io_extents, seq, k);
+                }
+        }
+        spin_unlock(&sbi->ll_lock);
+        
+        return 0;
+}
+
+struct seq_operations ll_rw_extents_stats_pp_seq_sops = {
+        .start = ll_stats_seq_start,
+        .stop  = null_stop,
+        .next  = ll_stats_seq_next,
+        .show  = ll_rw_extents_stats_pp_seq_show,
+};
+
+static int ll_rw_extents_stats_pp_seq_open(struct inode *inode,
+                                           struct file *file)
+{
+        struct proc_dir_entry *dp = PDE(inode);
+        struct seq_file *seq;
+        int rc;
+
+        rc = seq_open(file, &ll_rw_extents_stats_pp_seq_sops);
+        if(rc)
+                return rc;
+        seq = file->private_data;
+        seq->private = dp->data;
+        return 0;
+}
+
+static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file,
+                                                const char *buf, size_t len,
+                                                loff_t *off)
+{
+        struct seq_file *seq = file->private_data;
+        struct ll_sb_info *sbi = seq->private;
+        struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+        int i;
+
+        spin_lock(&sbi->ll_lock);
+        for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+                io_extents->pp_extents[i].pid = 0;
+                lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
+                lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
+        }
+        spin_unlock(&sbi->ll_lock);
+        return len;
+}
+
+struct file_operations ll_rw_extents_stats_pp_fops = {
+        .owner   = THIS_MODULE,
+        .open    = ll_rw_extents_stats_pp_seq_open,
+        .read    = seq_read,
+        .write   = ll_rw_extents_stats_pp_seq_write,
+        .llseek  = seq_lseek,
+        .release = seq_release,
+};
+
+static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v)
+{
+        struct timeval now;
+        struct ll_sb_info *sbi = seq->private;
+        struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+
+        do_gettimeofday(&now);
+
+        seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
+                   now.tv_sec, now.tv_usec);
+
+        seq_printf(seq, "%15s %19s       | %20s\n", " ", "read", "write");
+        seq_printf(seq, "%13s   %14s %4s %4s  | %14s %4s %4s\n", 
+                   "extents", "calls", "%", "cum%",
+                   "calls", "%", "cum%");
+
+        spin_lock(&sbi->ll_lock);
+        ll_display_extents_info(io_extents, seq, LL_PROCESS_HIST_MAX);
+        spin_unlock(&sbi->ll_lock);
+
+        return 0;
+}
+
+struct seq_operations ll_rw_extents_stats_seq_sops = {
+        .start = ll_stats_seq_start,
+        .stop  = null_stop,
+        .next  = ll_stats_seq_next,
+        .show  = ll_rw_extents_stats_seq_show,
+};
+
+static int ll_rw_extents_stats_seq_open(struct inode *inode, struct file *file)
+{
+        struct proc_dir_entry *dp = PDE(inode);
+        struct seq_file *seq;
+        int rc;
+
+        rc = seq_open(file, &ll_rw_extents_stats_seq_sops);
+        if(rc)
+                return rc;
+        seq = file->private_data;
+        seq->private = dp->data;
+        return 0;
+}
+
+static ssize_t ll_rw_extents_stats_seq_write(struct file *file, const char *buf,
+                                        size_t len, loff_t *off)
+{
+        struct seq_file *seq = file->private_data;
+        struct ll_sb_info *sbi = seq->private;
+        struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+
+        lprocfs_oh_clear(&io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist);
+        lprocfs_oh_clear(&io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist);
+
+        return len;
+}
+
+struct file_operations ll_rw_extents_stats_fops = {
+        .owner   = THIS_MODULE,
+        .open    = ll_rw_extents_stats_seq_open,
+        .read    = seq_read,
+        .write   = ll_rw_extents_stats_seq_write,
+        .llseek  = seq_lseek,
+        .release = seq_release,
+};
+
+void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
+                               *file, size_t count, int rw)
+{
+        int i, cur = -1;
+        struct ll_rw_process_info *process;
+        struct ll_rw_process_info *offset;
+        int *off_count = &sbi->ll_rw_offset_entry_count;
+        int *process_count = &sbi->ll_offset_process_count;
+        struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+
+        process = sbi->ll_rw_process_info;
+        offset = sbi->ll_rw_offset_info;
+
+        spin_lock(&sbi->ll_lock);
+        /* Extent statistics */
+        for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+                if(io_extents->pp_extents[i].pid == pid) {
+                        cur = i;
+                        break;
+                }
+        }
+
+        if (cur == -1) {
+                /* new process */
+                sbi->ll_extent_process_count = 
+                        (sbi->ll_extent_process_count + 1) % LL_PROCESS_HIST_MAX;
+                cur = sbi->ll_extent_process_count;
+                io_extents->pp_extents[cur].pid = pid;
+                lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_r_hist);
+                lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
+        }
+
+        for(i = 0; (count >= (1 << LL_HIST_START << i)) && 
+             (i < (LL_HIST_MAX - 1)); i++);
+        if (rw == 0) {
+                io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
+                io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
+        } else {
+                io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
+                io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
+        }
+
+        /* Offset statistics */
+        for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+                if (process[i].rw_pid == pid) {
+                        if (process[i].rw_last_file != file) {
+                                process[i].rw_range_start = file->f_pos;
+                                process[i].rw_last_file_pos =
+                                                        file->f_pos + count;
+                                process[i].rw_smallest_extent = count;
+                                process[i].rw_largest_extent = count;
+                                process[i].rw_offset = 0;
+                                process[i].rw_last_file = file;
+                                spin_unlock(&sbi->ll_lock);
+                                return;
+                        }
+                        if (process[i].rw_last_file_pos != file->f_pos) {
+                                *off_count =
+                                    (*off_count + 1) % LL_OFFSET_HIST_MAX;
+                                offset[*off_count].rw_op = process[i].rw_op;
+                                offset[*off_count].rw_pid = pid;
+                                offset[*off_count].rw_range_start =
+                                        process[i].rw_range_start;
+                                offset[*off_count].rw_range_end =
+                                        process[i].rw_last_file_pos;
+                                offset[*off_count].rw_smallest_extent =
+                                        process[i].rw_smallest_extent;
+                                offset[*off_count].rw_largest_extent =
+                                        process[i].rw_largest_extent;
+                                offset[*off_count].rw_offset =
+                                        process[i].rw_offset;
+                                process[i].rw_op = rw;
+                                process[i].rw_range_start = file->f_pos;
+                                process[i].rw_smallest_extent = count;
+                                process[i].rw_largest_extent = count;
+                                process[i].rw_offset = file->f_pos -
+                                        process[i].rw_last_file_pos;
+                        }
+                        if(process[i].rw_smallest_extent > count)
+                                process[i].rw_smallest_extent = count;
+                        if(process[i].rw_largest_extent < count)
+                                process[i].rw_largest_extent = count;
+                        process[i].rw_last_file_pos = file->f_pos + count;
+                        spin_unlock(&sbi->ll_lock);
+                        return;
+                }
+        }
+        *process_count = (*process_count + 1) % LL_PROCESS_HIST_MAX;
+        process[*process_count].rw_pid = pid;
+        process[*process_count].rw_op = rw;
+        process[*process_count].rw_range_start = file->f_pos;
+        process[*process_count].rw_last_file_pos = file->f_pos + count;
+        process[*process_count].rw_smallest_extent = count;
+        process[*process_count].rw_largest_extent = count;
+        process[*process_count].rw_offset = 0;
+        process[*process_count].rw_last_file = file;
+        spin_unlock(&sbi->ll_lock);
+}
+
+char lpszt[] = LPSZ;
+
+static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
+{
+        struct timeval now;
+        struct ll_sb_info *sbi = seq->private;
+        struct ll_rw_process_info *offset = sbi->ll_rw_offset_info;
+        struct ll_rw_process_info *process = sbi->ll_rw_process_info;
+        char format[50];
+        int i;
+
+        do_gettimeofday(&now);
+
+        spin_lock(&sbi->ll_lock);
+
+        seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
+                   now.tv_sec, now.tv_usec);
+        seq_printf(seq, "%3s %10s %14s %14s %17s %17s %14s\n",
+                   "R/W", "PID", "RANGE START", "RANGE END",
+                   "SMALLEST EXTENT", "LARGEST EXTENT", "OFFSET");
+        sprintf(format, "%s%s%s%s%s\n", 
+                "%3c %10d %14Lu %14Lu %17", lpszt+1, " %17", lpszt+1, " %14Ld");
+        /* We stored the discontiguous offsets here; print them first */
+        for(i = 0; i < LL_OFFSET_HIST_MAX; i++) {
+                if (offset[i].rw_pid != 0)
+                        /* Is there a way to snip the '%' off of LPSZ? */
+                        seq_printf(seq, format,
+                                   offset[i].rw_op ? 'W' : 'R',
+                                   offset[i].rw_pid,
+                                   offset[i].rw_range_start,
+                                   offset[i].rw_range_end,
+                                   offset[i].rw_smallest_extent,
+                                   offset[i].rw_largest_extent,
+                                   offset[i].rw_offset);
+        }
+        /* Then print the current offsets for each process */
+        for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+                if (process[i].rw_pid != 0)
+                        seq_printf(seq, format,
+                                   process[i].rw_op ? 'W' : 'R',
+                                   process[i].rw_pid,
+                                   process[i].rw_range_start,
+                                   process[i].rw_last_file_pos,
+                                   process[i].rw_smallest_extent,
+                                   process[i].rw_largest_extent,
+                                   process[i].rw_offset);
+        }
+        spin_unlock(&sbi->ll_lock);
+
+        return 0;
+}
+
+struct seq_operations ll_rw_offset_stats_seq_sops = {
+        .start = ll_stats_seq_start,
+        .stop = null_stop,
+        .next = ll_stats_seq_next,
+        .show = ll_rw_offset_stats_seq_show,
+};
+
+static int ll_rw_offset_stats_seq_open(struct inode *inode, struct file *file)
+{
+        struct proc_dir_entry *dp = PDE(inode);
+        struct seq_file *seq;
+        int rc;
+
+        rc = seq_open(file, &ll_rw_offset_stats_seq_sops);
+        if (rc)
+                return rc;
+        seq = file->private_data;
+        seq->private = dp->data;
+        return 0;
+}
+
+static ssize_t ll_rw_offset_stats_seq_write(struct file *file, const char *buf,
+                                       size_t len, loff_t *off)
+{
+        struct seq_file *seq = file->private_data;
+        struct ll_sb_info *sbi = seq->private;
+        struct ll_rw_process_info *process_info = sbi->ll_rw_process_info;
+        struct ll_rw_process_info *offset_info = sbi->ll_rw_offset_info;
+
+        spin_lock(&sbi->ll_lock);
+        sbi->ll_offset_process_count = 0;
+        sbi->ll_rw_offset_entry_count = 0;
+        memset(process_info, 0, sizeof(struct ll_rw_process_info) *
+               LL_PROCESS_HIST_MAX);
+        memset(offset_info, 0, sizeof(struct ll_rw_process_info) *
+               LL_OFFSET_HIST_MAX);
+        spin_unlock(&sbi->ll_lock);
+
+        return len;
+}
+
+struct file_operations ll_rw_offset_stats_fops = {
+        .owner   = THIS_MODULE,
+        .open    = ll_rw_offset_stats_seq_open,
+        .read    = seq_read,
+        .write   = ll_rw_offset_stats_seq_write,
+        .llseek  = seq_lseek,
+        .release = seq_release,
+};
+
 LPROCFS_INIT_VARS(llite, NULL, lprocfs_obd_vars)
 #endif /* LPROCFS */
index a5ca4b2..1b1351c 100644 (file)
@@ -88,11 +88,10 @@ static int __init init_lustre_lite(void)
 static void __exit exit_lustre_lite(void)
 {
         int rc;
-        int rc;
 
         lustre_register_client_fill_super(NULL);
         lustre_register_client_process_config(NULL);
-        
+
         ll_unregister_cache(&ll_cache_definition);
 
         rc = kmem_cache_destroy(ll_file_data_slab);
index 0765586..e44dcbf 100644 (file)
@@ -159,8 +159,9 @@ static int ll_follow_link(struct dentry *dentry, struct nameidata *nd)
 
         rc = vfs_follow_link(nd, symname);
         ptlrpc_req_finished(request);
+        EXIT;
  out:
-        RETURN(rc);
+        return rc;
 }
 
 struct inode_operations ll_fast_symlink_inode_operations = {
index 63fe107..1f4b2dc 100644 (file)
@@ -120,6 +120,10 @@ int ll_setxattr_common(struct inode *inode, const char *name,
         if (rc)
                 RETURN(rc);
 
+        /* b10667: ignore lustre special xattr for now */
+        if (xattr_type == XATTR_TRUSTED_T && strcmp(name, "trusted.lov") == 0)
+                RETURN(0);
+
         rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid,
                          name, value, size, 0, flags, &req);
         if (rc) {
index 5d2763e..4e3763e 100644 (file)
@@ -487,7 +487,7 @@ int lmv_add_target(struct obd_device *obd, struct obd_uuid *tgt_uuid)
                         RETURN(-EINVAL);
                 }
 
-                rc = obd_llog_init(obd, mdc_obd, 0, NULL);
+                rc = obd_llog_init(obd, mdc_obd, 0, NULL, tgt_uuid);
                 if (rc) {
                         lmv_init_unlock(lmv);
                         CERROR("lmv failed to setup llogging subsystems\n");
@@ -2065,7 +2065,7 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
 }
 
 static int lmv_llog_init(struct obd_device *obd, struct obd_device *tgt,
-                         int count, struct llog_catid *logid)
+                         int count, struct llog_catid *logid, struct obd_uuid *uuid)
 {
         struct llog_ctxt *ctxt;
         int rc;
index 4a6a951..5ab5297 100755 (executable)
@@ -70,7 +70,7 @@ static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes,
         }
 
         if (lmm->lmm_stripe_size == 0 ||
-            (__u64)le32_to_cpu(lmm->lmm_stripe_size) * stripe_count > ~0UL) {
+            (__u64)le32_to_cpu(lmm->lmm_stripe_size)*stripe_count > 0xffffffff){
                 CERROR("bad stripe size %u\n",
                        le32_to_cpu(lmm->lmm_stripe_size));
                 lov_dump_lmm_v1(D_WARNING, lmm);
@@ -93,7 +93,7 @@ lsm_stripe_by_index_plain(struct lov_stripe_md *lsm, int *stripeno,
                            obd_off *lov_off, unsigned long *swidth)
 {
         if (swidth)
-                *swidth = lsm->lsm_stripe_size * lsm->lsm_stripe_count;
+                *swidth = (ulong)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
 }
 
 static void
@@ -101,7 +101,7 @@ lsm_stripe_by_offset_plain(struct lov_stripe_md *lsm, int *stripeno,
                            obd_off *lov_off, unsigned long *swidth)
 {
         if (swidth)
-                *swidth = lsm->lsm_stripe_size * lsm->lsm_stripe_count;
+                *swidth = (ulong)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
 }
 
 static obd_off
@@ -111,6 +111,13 @@ lsm_stripe_offset_by_index_plain(struct lov_stripe_md *lsm,
         return 0;
 }
 
+static obd_off
+lsm_stripe_offset_by_offset_plain(struct lov_stripe_md *lsm,
+                                  obd_off lov_off)
+{
+        return 0;
+}
+
 static int
 lsm_stripe_index_by_offset_plain(struct lov_stripe_md *lsm,
                                   obd_off lov_off)
@@ -193,8 +200,9 @@ struct lsm_operations lsm_plain_ops = {
         .lsm_stripe_by_index    = lsm_stripe_by_index_plain,
         .lsm_stripe_by_offset   = lsm_stripe_by_offset_plain,
         .lsm_revalidate         = lsm_revalidate_plain,
-        .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_plain,
-        .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_plain,
+        .lsm_stripe_offset_by_index  = lsm_stripe_offset_by_index_plain,
+        .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_plain,
+        .lsm_stripe_index_by_offset  = lsm_stripe_index_by_offset_plain,
         .lsm_lmm_verify         = lsm_lmm_verify_plain,
         .lsm_unpackmd           = lsm_unpackmd_plain,
 };
@@ -281,7 +289,7 @@ lsm_stripe_by_index_join(struct lov_stripe_md *lsm, int *stripeno,
         *stripeno -= le->le_loi_idx;
 
         if (swidth)
-                *swidth = lsm->lsm_stripe_size * le->le_stripe_count;
+                *swidth = (ulong)lsm->lsm_stripe_size * le->le_stripe_count;
 
         if (lov_off) {
                 struct lov_extent *lov_le = lovea_off2le(lsm, *lov_off);
@@ -314,7 +322,7 @@ lsm_stripe_by_offset_join(struct lov_stripe_md *lsm, int *stripeno,
                 *stripeno -= le->le_loi_idx;
 
         if (swidth)
-                *swidth = lsm->lsm_stripe_size * le->le_stripe_count;
+                *swidth = (ulong)lsm->lsm_stripe_size * le->le_stripe_count;
 }
 
 static obd_off
@@ -328,6 +336,17 @@ lsm_stripe_offset_by_index_join(struct lov_stripe_md *lsm,
         return le ? le->le_start : 0;
 }
 
+static obd_off
+lsm_stripe_offset_by_offset_join(struct lov_stripe_md *lsm,
+                                 obd_off lov_off)
+{
+        struct lov_extent *le;
+
+        le = lovea_off2le(lsm, lov_off);
+
+        return le ? le->le_start : 0;
+}
+
 static int
 lsm_stripe_index_by_offset_join(struct lov_stripe_md *lsm,
                                  obd_off lov_off)
@@ -541,8 +560,9 @@ struct lsm_operations lsm_join_ops = {
         .lsm_stripe_by_index  = lsm_stripe_by_index_join,
         .lsm_stripe_by_offset = lsm_stripe_by_offset_join,
         .lsm_revalidate       = lsm_revalidate_join,
-        .lsm_stripe_offset_by_index = lsm_stripe_offset_by_index_join,
-        .lsm_stripe_index_by_offset = lsm_stripe_index_by_offset_join,
+        .lsm_stripe_offset_by_index  = lsm_stripe_offset_by_index_join,
+        .lsm_stripe_offset_by_offset = lsm_stripe_offset_by_offset_join,
+        .lsm_stripe_index_by_offset  = lsm_stripe_index_by_offset_join,
         .lsm_lmm_verify         = lsm_lmm_verify_join,
         .lsm_unpackmd           = lsm_unpackmd_join,
 };
index 1b6f31b..5c0fa75 100644 (file)
@@ -215,13 +215,14 @@ int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,
 int lov_fini_statfs_set(struct lov_request_set *set);
 
 /* lov_obd.c */
+void lov_fix_desc(struct lov_desc *desc);
 int lov_get_stripecnt(struct lov_obd *lov, int stripe_count);
 void lov_getref(struct obd_device *obd);
 void lov_putref(struct obd_device *obd);
 
 /* lov_log.c */
 int lov_llog_init(struct obd_device *obd, struct obd_device *tgt,
-                  int count, struct llog_catid *logid);
+                  int count, struct llog_catid *logid, struct obd_uuid *uuid);
 int lov_llog_finish(struct obd_device *obd, int count);
 
 /* lov_pack.c */
index 5b07c30..4ce722c 100644 (file)
@@ -104,14 +104,9 @@ static int lov_llog_origin_connect(struct llog_ctxt *ctxt, int count,
 {
         struct obd_device *obd = ctxt->loc_obd;
         struct lov_obd *lov = &obd->u.lov;
-        int i, rc = 0;
+        int i, rc = 0, err = 0;
         ENTRY;
 
-        /* We might have added an osc and not told the mds yet */
-        if (count != lov->desc.ld_tgt_count)
-                CERROR("Origin connect mds cnt %d != lov cnt %d\n", count,
-                       lov->desc.ld_tgt_count);
-
         lov_getref(obd);
         for (i = 0; i < count; i++) {
                 struct obd_device *child;
@@ -119,20 +114,21 @@ static int lov_llog_origin_connect(struct llog_ctxt *ctxt, int count,
                 
                 if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
                         continue;
-                child = lov->lov_tgts[i]->ltd_exp->exp_obd;
-                cctxt = llog_get_context(child, ctxt->loc_idx);
                 if (uuid && !obd_uuid_equals(uuid, &lov->lov_tgts[i]->ltd_uuid))
                         continue;
-
+                CDEBUG(D_CONFIG, "connect %d/%d\n", i, count);
+                child = lov->lov_tgts[i]->ltd_exp->exp_obd;
+                cctxt = llog_get_context(child, ctxt->loc_idx);
                 rc = llog_connect(cctxt, 1, logid, gen, uuid);
                 if (rc) {
                         CERROR("error osc_llog_connect tgt %d (%d)\n", i, rc);
-                        break;
+                        if (!err) 
+                                err = rc;
                 }
         }
         lov_putref(obd);
 
-        RETURN(rc);
+        RETURN(err);
 }
 
 /* the replicators commit callback */
@@ -181,7 +177,7 @@ static struct llog_operations lov_size_repl_logops = {
 };
 
 int lov_llog_init(struct obd_device *obd, struct obd_device *tgt,
-                  int count, struct llog_catid *logid)
+                  int count, struct llog_catid *logid, struct obd_uuid *uuid)
 {
         struct lov_obd *lov = &obd->u.lov;
         struct obd_device *child;
@@ -198,23 +194,17 @@ int lov_llog_init(struct obd_device *obd, struct obd_device *tgt,
         if (rc)
                 RETURN(rc);
 
-        CDEBUG(D_CONFIG, "llog init with %d/%d targets\n",
-               count, lov->desc.ld_tgt_count);
-        /* count may not match ld_tgt_count during dynamic ost add */
-
         lov_getref(obd);
-        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+        /* count may not match lov->desc.ld_tgt_count during dynamic ost add */
+        for (i = 0; i < count; i++) {
                 if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
                         continue;
+                if (uuid && !obd_uuid_equals(uuid, &lov->lov_tgts[i]->ltd_uuid))
+                        continue;
+                CDEBUG(D_CONFIG, "init %d/%d\n", i, count);
                 LASSERT(lov->lov_tgts[i]->ltd_exp);
                 child = lov->lov_tgts[i]->ltd_exp->exp_obd;
-                if (lov->lov_tgts[i]->ltd_exp->exp_imp_reverse) {
-                        CERROR("NZR: idx %d import state %s\n", i,
-                               ptlrpc_import_state_name(lov->lov_tgts[i]->ltd_exp->exp_imp_reverse->imp_state));                
-                } else {
-                        CERROR("NZR: idx %d no import\n", i);
-                }
-                rc = obd_llog_init(child, tgt, 1, logid + i);
+                rc = obd_llog_init(child, tgt, 1, logid + i, uuid);
                 if (rc) {
                         CERROR("error osc_llog_init idx %d osc '%s' tgt '%s' "
                                "(rc=%d)\n", i, child->obd_name, tgt->obd_name,
index 48b7c32..c1f4c5d 100644 (file)
@@ -643,18 +643,18 @@ static void __lov_del_obd(struct obd_device *obd, __u32 index)
         }
 }
 
-static void lov_fix_desc(struct lov_desc *desc)
+void lov_fix_desc(struct lov_desc *desc)
 {
         if (desc->ld_default_stripe_size < PTLRPC_MAX_BRW_SIZE) {
-                CWARN("Increasing default_stripe_size "LPU64" to %u\n",
-                      desc->ld_default_stripe_size, PTLRPC_MAX_BRW_SIZE);
+                LCONSOLE_WARN("Increasing default stripe size to min %u\n",
+                              PTLRPC_MAX_BRW_SIZE);
                 desc->ld_default_stripe_size = PTLRPC_MAX_BRW_SIZE;
         } else if (desc->ld_default_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
-                CWARN("default_stripe_size "LPU64" isn't a multiple of %u\n",
-                      desc->ld_default_stripe_size, LOV_MIN_STRIPE_SIZE);
                 desc->ld_default_stripe_size &= ~(LOV_MIN_STRIPE_SIZE - 1);
-                CWARN("changing to "LPU64"\n", desc->ld_default_stripe_size);
-       }
+                LCONSOLE_WARN("Changing default stripe size to "LPU64" (a "
+                              "multiple of %u)\n",
+                              desc->ld_default_stripe_size,LOV_MIN_STRIPE_SIZE);
+        }
 
         if (desc->ld_default_stripe_count == 0)
                 desc->ld_default_stripe_count = 1;
@@ -662,8 +662,7 @@ static void lov_fix_desc(struct lov_desc *desc)
         /* from lov_setstripe */
         if ((desc->ld_pattern != 0) &&
             (desc->ld_pattern != LOV_PATTERN_RAID0)) {
-                CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n",
-                       desc->ld_pattern);
+                LCONSOLE_WARN("Unknown stripe pattern: %#x\n",desc->ld_pattern);
                 desc->ld_pattern = 0;
         }
 }
@@ -708,9 +707,9 @@ static int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
          * of 4GB or larger on 32-bit CPUs. */
         count = desc->ld_default_stripe_count;
         if ((count > 0 ? count : desc->ld_tgt_count) *
-            desc->ld_default_stripe_size > ~0UL) {
-                CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n",
-                       desc->ld_default_stripe_size, count, ~0UL);
+            desc->ld_default_stripe_size > 0xffffffff) {
+                CERROR("LOV: stripe width "LPU64"x%u > 4294967295 bytes\n",
+                       desc->ld_default_stripe_size, count);
                 RETURN(-EINVAL);
         }
 
@@ -836,53 +835,15 @@ static int lov_process_config(struct obd_device *obd, obd_count len, void *buf)
         }
         case LCFG_PARAM: {
                 struct lprocfs_static_vars lvars;
-                struct lov_obd *lov = &obd->u.lov;
-                struct lov_desc *desc = &(lov->desc);
-                int i;
+                struct lov_desc *desc = &(obd->u.lov.desc);
                 
                 if (!desc)
                         GOTO(out, rc = -EINVAL);
                 
                 lprocfs_init_vars(lov, &lvars);
                 
-                /* setparam 0:lov_mdsA 1:default_stripe_size=1048576 
-                   2:default_stripe_pattern=0 3:default_stripe_offset=0 */
-                for (i = 1; i < lcfg->lcfg_bufcount; i++) {
-                        char *key, *sval;
-                        long val;
-                        key = lustre_cfg_buf(lcfg, i);
-                        sval = strchr(key, '=');
-                        if (!sval || (*(sval + 1) == 0)) {
-                                CERROR("Can't parse param %s\n", key);
-                                rc = -EINVAL;
-                                /* continue parsing other params */
-                                continue;
-                        }
-                        val = simple_strtol(sval + 1, NULL, 0);
-                        rc = 0;
-                        /* LOV_STRIPE_* aren't settable in proc */
-                        if (class_match_param(key, 
-                                              PARAM_LOV_STRIPE_SIZE,0) == 0)
-                                desc->ld_default_stripe_size = val;
-                        else if (class_match_param(key, 
-                                              PARAM_LOV_STRIPE_COUNT, 0) == 0)
-                                desc->ld_default_stripe_count = val;
-                        else if (class_match_param(key, 
-                                              PARAM_LOV_STRIPE_OFFSET, 0) == 0)
-                                desc->ld_default_stripe_offset = val;
-                        else if (class_match_param(key, 
-                                              PARAM_LOV_STRIPE_PATTERN, 0) == 0)
-                                desc->ld_pattern = val;
-                        else 
-                                rc = class_process_proc_param(PARAM_LOV, 
-                                                              lvars.obd_vars,
-                                                              lcfg, obd);
-                        if (rc >= 0) {
-                                LCONSOLE_INFO("set %s to %ld\n", key, val);
-                                rc = 0;
-                        }
-                }
-                lov_fix_desc(desc);
+                rc = class_process_proc_param(PARAM_LOV, lvars.obd_vars,
+                                              lcfg, obd);
                 GOTO(out, rc);
         }
         default: {
index 22af87e..87597b2 100644 (file)
@@ -119,6 +119,7 @@ int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
 {
         unsigned long ssize  = lsm->lsm_stripe_size;
         unsigned long swidth, stripe_off, this_stripe;
+        uint64_t l_off, s_off;
         int magic = lsm->lsm_magic;
         int ret = 0;
 
@@ -128,6 +129,23 @@ int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
         }
 
         LASSERT(lsm_op_find(magic) != NULL);
+        /*It will check whether the lov_off and stripeno 
+         *are in the same extent. 
+         *1) lov_off extent < stripeno extent, ret = -1, obd_off = 0
+         *2) lov_off extent > stripeno extent, ret = 1, 
+         *   obd_off = lov_off extent offset*/
+        l_off = lsm_op_find(magic)->lsm_stripe_offset_by_index(lsm, stripeno);
+        s_off = lsm_op_find(magic)->lsm_stripe_offset_by_offset(lsm, lov_off);
+        if (s_off < l_off) {
+                ret = -1;
+                *obd_off = 0;
+                return ret;
+        } else if (s_off > l_off) {
+                ret = 1;
+                *obd_off = s_off;
+                return ret;
+        }
+        /*If they are in the same extent, original logic*/
         lsm_op_find(magic)->lsm_stripe_by_index(lsm, &stripeno, &lov_off,
                                                 &swidth);
        
index 2107483..29f3746 100644 (file)
@@ -687,7 +687,7 @@ int lov_update_create_set(struct lov_request_set *set,
         loi->loi_id = req->rq_oi.oi_oa->o_id;
         loi->loi_gr = req->rq_oi.oi_oa->o_gr;
         loi->loi_ost_idx = req->rq_idx;
-        CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPX64" at idx %d\n",
+        CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPU64" at idx %d\n",
                lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx);
         loi_init(loi);
 
@@ -1426,8 +1426,8 @@ int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success)
                         do_div(osfs->os_ffree, expected_stripes);
 
                 spin_lock(&obd->obd_osfs_lock);
-                memcpy(&obd->obd_osfs, osfs, sizeof(osfs));
-                obd->obd_osfs_age = cfs_time_current_64();
+                memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
+                obd->obd_osfs_age = get_jiffies_64();
                 spin_unlock(&obd->obd_osfs_lock);
                 RETURN(0);
         }
@@ -1458,8 +1458,8 @@ void lov_update_statfs(struct obd_device *obd, struct obd_statfs *osfs,
                        struct obd_statfs *lov_sfs, int success)
 {
         spin_lock(&obd->obd_osfs_lock);
-        memcpy(&obd->obd_osfs, lov_sfs, sizeof(osfs));
-        obd->obd_osfs_age = cfs_time_current_64();
+        memcpy(&obd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
+        obd->obd_osfs_age = get_jiffies_64();
         spin_unlock(&obd->obd_osfs_lock);
 
         if (success == 0) {
index 8930167..2fca4b1 100644 (file)
@@ -31,6 +31,7 @@
 #include <lprocfs_status.h>
 #include <obd_class.h>
 #include <linux/seq_file.h>
+#include "lov_internal.h"
 
 #ifdef LPROCFS
 static int lov_rd_stripesize(char *page, char **start, off_t off, int count,
@@ -45,6 +46,25 @@ static int lov_rd_stripesize(char *page, char **start, off_t off, int count,
         return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size);
 }
 
+static int lov_wr_stripesize(struct file *file, const char *buffer,
+                               unsigned long count, void *data)
+{
+        struct obd_device *dev = (struct obd_device *)data;
+        struct lov_desc *desc;
+        __u64 val;
+        int rc;
+        
+        LASSERT(dev != NULL);
+        desc = &dev->u.lov.desc;
+        rc = lprocfs_write_u64_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        desc->ld_default_stripe_size = val;
+        lov_fix_desc(desc);
+        return count;
+}
+
 static int lov_rd_stripeoffset(char *page, char **start, off_t off, int count,
                                int *eof, void *data)
 {
@@ -57,6 +77,25 @@ static int lov_rd_stripeoffset(char *page, char **start, off_t off, int count,
         return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset);
 }
 
+static int lov_wr_stripeoffset(struct file *file, const char *buffer,
+                               unsigned long count, void *data)
+{
+        struct obd_device *dev = (struct obd_device *)data;
+        struct lov_desc *desc;
+        __u64 val;
+        int rc;
+        
+        LASSERT(dev != NULL);
+        desc = &dev->u.lov.desc;
+        rc = lprocfs_write_u64_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        desc->ld_default_stripe_offset = val;
+        lov_fix_desc(desc);
+        return count;
+}
+
 static int lov_rd_stripetype(char *page, char **start, off_t off, int count,
                              int *eof, void *data)
 {
@@ -69,6 +108,24 @@ static int lov_rd_stripetype(char *page, char **start, off_t off, int count,
         return snprintf(page, count, "%u\n", desc->ld_pattern);
 }
 
+static int lov_wr_stripetype(struct file *file, const char *buffer,
+                             unsigned long count, void *data)
+{
+        struct obd_device *dev = (struct obd_device *)data;
+        struct lov_desc *desc;
+        int val, rc;
+        
+        LASSERT(dev != NULL);
+        desc = &dev->u.lov.desc;
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        desc->ld_pattern = val;
+        lov_fix_desc(desc);
+        return count;
+}
+
 static int lov_rd_stripecount(char *page, char **start, off_t off, int count,
                               int *eof, void *data)
 {
@@ -81,6 +138,24 @@ static int lov_rd_stripecount(char *page, char **start, off_t off, int count,
         return snprintf(page, count, "%u\n", desc->ld_default_stripe_count);
 }
 
+static int lov_wr_stripecount(struct file *file, const char *buffer,
+                              unsigned long count, void *data)
+{
+        struct obd_device *dev = (struct obd_device *)data;
+        struct lov_desc *desc;
+        int val, rc;
+        
+        LASSERT(dev != NULL);
+        desc = &dev->u.lov.desc;
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        desc->ld_default_stripe_count = val;
+        lov_fix_desc(desc);
+        return count;
+}
+
 static int lov_rd_numobd(char *page, char **start, off_t off, int count,
                          int *eof, void *data)
 {
@@ -238,24 +313,22 @@ static int lov_target_seq_open(struct inode *inode, struct file *file)
 
 struct lprocfs_vars lprocfs_obd_vars[] = {
         { "uuid",         lprocfs_rd_uuid,        0, 0 },
-        /* If you change the stripe* names, 
-           make sure lustre_param.h is updated */
-        { "stripesize",   lov_rd_stripesize,      0, 0 },
-        { "stripeoffset", lov_rd_stripeoffset,    0, 0 },
-        { "stripecount",  lov_rd_stripecount,     0, 0 },
-        { "stripetype",   lov_rd_stripetype,      0, 0 },
+        { "stripesize",   lov_rd_stripesize,      lov_wr_stripesize, 0 },
+        { "stripeoffset", lov_rd_stripeoffset,    lov_wr_stripeoffset, 0 },
+        { "stripecount",  lov_rd_stripecount,     lov_wr_stripecount, 0 },
+        { "stripetype",   lov_rd_stripetype,      lov_wr_stripetype, 0 },
         { "numobd",       lov_rd_numobd,          0, 0 },
         { "activeobd",    lov_rd_activeobd,       0, 0 },
         { "filestotal",   lprocfs_rd_filestotal,  0, 0 },
         { "filesfree",    lprocfs_rd_filesfree,   0, 0 },
-        /*{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },*/
+        /*{ "filegroups", lprocfs_rd_filegroups,  0, 0 },*/
         { "blocksize",    lprocfs_rd_blksize,     0, 0 },
         { "kbytestotal",  lprocfs_rd_kbytestotal, 0, 0 },
         { "kbytesfree",   lprocfs_rd_kbytesfree,  0, 0 },
         { "kbytesavail",  lprocfs_rd_kbytesavail, 0, 0 },
         { "desc_uuid",    lov_rd_desc_uuid,       0, 0 },
-        { "qos_prio_free", lov_rd_qos_priofree, lov_wr_qos_priofree, 0 },
-        { "qos_maxage",   lov_rd_qos_maxage, lov_wr_qos_maxage, 0 },
+        { "qos_prio_free",lov_rd_qos_priofree,    lov_wr_qos_priofree, 0 },
+        { "qos_maxage",   lov_rd_qos_maxage,      lov_wr_qos_maxage, 0 },
         { 0 }
 };
 
index 06296b8..760bb78 100644 (file)
@@ -1433,8 +1433,10 @@ struct chk_dqblk{
         __u32                   dqb_valid;       /* flag for above fields */
 };
 
-static inline unsigned int const
-chkquot_hash(qid_t id, int type)
+static inline unsigned int chkquot_hash(qid_t id, int type)
+                                        __attribute__((__const__));
+
+static inline unsigned int chkquot_hash(qid_t id, int type)
 {
         return (id * (MAXQUOTAS - type)) % NR_DQHASH;
 }
index a4d3811..171bd18 100644 (file)
@@ -1274,7 +1274,7 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
         lprocfs_init_vars(mdc, &lvars);
         lprocfs_obd_setup(obd, lvars.obd_vars);
 
-        rc = obd_llog_init(obd, obd, 0, NULL);
+        rc = obd_llog_init(obd, obd, 0, NULL, NULL);
         if (rc) {
                 mdc_cleanup(obd);
                 CERROR("failed to setup llogging subsystems\n");
@@ -1350,7 +1350,8 @@ static int mdc_cleanup(struct obd_device *obd)
 
 
 static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt,
-                         int count, struct llog_catid *logid)
+                         int count, struct llog_catid *logid, 
+                         struct obd_uuid *uuid)
 {
         struct llog_ctxt *ctxt;
         int rc;
@@ -1457,7 +1458,8 @@ int __init mdc_init(void)
         int rc;
         struct lprocfs_static_vars lvars;
         lprocfs_init_vars(mdc, &lvars);
-
+        
+        request_module("lquota");
         quota_interface = PORTAL_SYMBOL_GET(mdc_quota_interface);
         init_obd_quota_ops(quota_interface, &mdc_obd_ops);
 
index 1196a49..b0ff285 100644 (file)
@@ -261,7 +261,7 @@ static int mds_connect_internal(struct obd_export *exp,
 {
         struct obd_device *obd = exp->exp_obd;
         if (data != NULL) {
-                data->ocd_connect_flags &= MDS_CONNECT_SUPPORTED;
+                data->ocd_connect_flags &= MDT_CONNECT_SUPPORTED;
                 data->ocd_ibits_known &= MDS_INODELOCK_FULL;
 
                 /* If no known bits (which should not happen, probably,
@@ -982,6 +982,8 @@ static int mds_getattr(struct ptlrpc_request *req, int offset)
         int rc = 0;
         ENTRY;
 
+        OBD_COUNTER_INCREMENT(obd, getattr);
+
         body = lustre_swab_reqbuf(req, offset, sizeof(*body),
                                   lustre_swab_mds_body);
         if (body == NULL)
@@ -1043,6 +1045,7 @@ static int mds_statfs(struct ptlrpc_request *req)
         /* This will trigger a watchdog timeout */
         OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP,
                          (MDS_SERVICE_WATCHDOG_TIMEOUT / 1000) + 1);
+        OBD_COUNTER_INCREMENT(obd, statfs);
 
         rc = lustre_pack_reply(req, 2, size, NULL);
         if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_STATFS_PACK)) {
@@ -1981,7 +1984,31 @@ static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                 GOTO(err_qctxt, rc);
 
         lprocfs_init_vars(mds, &lvars);
-        lprocfs_obd_setup(obd, lvars.obd_vars);
+        if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0 &&
+            lprocfs_alloc_obd_stats(obd, LPROC_MDS_LAST) == 0) {
+                /* Init private stats here */
+                lprocfs_counter_init(obd->obd_stats, LPROC_MDS_OPEN,
+                                     /*LPROCFS_CNTR_AVGMINMAX*/0,
+                                     "open", "reqs");
+                lprocfs_counter_init(obd->obd_stats, LPROC_MDS_CLOSE,
+                                     0, "close", "reqs");
+                lprocfs_counter_init(obd->obd_stats, LPROC_MDS_MKNOD,
+                                     0, "mknod", "reqs");
+                lprocfs_counter_init(obd->obd_stats, LPROC_MDS_LINK,
+                                     0, "link", "reqs");
+                lprocfs_counter_init(obd->obd_stats, LPROC_MDS_UNLINK,
+                                     0, "unlink", "reqs");
+                lprocfs_counter_init(obd->obd_stats, LPROC_MDS_MKDIR,
+                                     0, "mkdir", "reqs");
+                lprocfs_counter_init(obd->obd_stats, LPROC_MDS_RMDIR,
+                                     0, "rmdir", "reqs");
+                lprocfs_counter_init(obd->obd_stats, LPROC_MDS_RENAME,
+                                     0, "rename", "reqs");
+                lprocfs_counter_init(obd->obd_stats, LPROC_MDS_GETXATTR,
+                                     0, "getxattr", "reqs");
+                lprocfs_counter_init(obd->obd_stats, LPROC_MDS_SETXATTR,
+                                     0, "setxattr", "reqs");
+        }
 
         uuid_ptr = fsfilt_uuid(obd, obd->u.obt.obt_sb);
         if (uuid_ptr != NULL) {
@@ -2217,6 +2244,7 @@ static int mds_cleanup(struct obd_device *obd)
                    we just need to drop our ref */
                 class_export_put(mds->mds_osc_exp);
 
+        lprocfs_free_obd_stats(obd);
         lprocfs_obd_cleanup(obd);
 
         lquota_cleanup(quota_interface, obd);
@@ -2390,6 +2418,8 @@ static int mds_intent_policy(struct ldlm_namespace *ns,
         switch ((long)it->opc) {
         case IT_OPEN:
         case IT_CREAT|IT_OPEN:
+                lprocfs_counter_incr(req->rq_export->exp_obd->obd_stats,
+                                     LPROC_MDS_OPEN);
                 fixup_handle_for_resent_req(req, DLM_LOCKREQ_OFF, lock, NULL,
                                             &lockh);
                 /* XXX swab here to assert that an mds_open reint
@@ -2416,6 +2446,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns,
                         getattr_part = MDS_INODELOCK_LOOKUP;
         case IT_GETATTR:
                         getattr_part |= MDS_INODELOCK_LOOKUP;
+                        OBD_COUNTER_INCREMENT(req->rq_export->exp_obd, getattr);
         case IT_READDIR:
                 fixup_handle_for_resent_req(req, DLM_LOCKREQ_OFF, lock,
                                             &new_lock, &lockh);
@@ -2714,6 +2745,7 @@ static __attribute__((unused)) int __init mds_init(void)
         int rc;
         struct lprocfs_static_vars lvars;
 
+        request_module("lquota");
         quota_interface = PORTAL_SYMBOL_GET(mds_quota_interface);
         rc = lquota_init(quota_interface);
         if (rc) {
index 308102a..ab74030 100644 (file)
@@ -177,7 +177,7 @@ int mds_cleanup_pending(struct obd_device *obd);
 
 /* mds/mds_log.c */
 int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, int count,
-                  struct llog_catid *logid);
+                  struct llog_catid *logid, struct obd_uuid *uuid);
 int mds_llog_finish(struct obd_device *obd, int count);
 
 /* mds/mds_lov.c */
@@ -256,4 +256,19 @@ static inline int mds_fid2str(char *str, __u64 id, __u32 generation)
         return sprintf(str, "%llx:%08x", (unsigned long long)id, generation);
 }
 
+/* mds/lproc_mds.c */
+enum {
+        LPROC_MDS_OPEN = 0,
+        LPROC_MDS_CLOSE,
+        LPROC_MDS_MKNOD,
+        LPROC_MDS_LINK,
+        LPROC_MDS_UNLINK,
+        LPROC_MDS_MKDIR,
+        LPROC_MDS_RMDIR,
+        LPROC_MDS_RENAME,
+        LPROC_MDS_GETXATTR,
+        LPROC_MDS_SETXATTR,
+        LPROC_MDS_LAST,
+};
+
 #endif /* _MDS_INTERNAL_H */
index a0f0a7a..c9d33f4 100644 (file)
@@ -363,7 +363,7 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req,
 
         LASSERT(body != NULL); /* previously verified & swabbed by caller */
 
-#if CRAY_XT3
+#ifdef CRAY_XT3
         if (req->rq_uid != LNET_UID_ANY) {
                 /* Non-root local cluster client */
                 LASSERT (req->rq_uid != 0);
@@ -386,7 +386,7 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req,
                 return rc;
         }
 
-#if CRAY_XT3
+#ifdef CRAY_XT3
         if (ucred->luc_uce)
                 ucred->luc_fsgid = ucred->luc_uce->ue_primary;
 #endif
index 67403eb..fa31b5f 100644 (file)
@@ -178,7 +178,7 @@ static struct llog_operations mds_size_repl_logops = {
 };
 
 int mds_llog_init(struct obd_device *obd, struct obd_device *tgt,
-                  int count, struct llog_catid *logid)
+                  int count, struct llog_catid *logid, struct obd_uuid *uuid)
 {
         struct obd_device *lov_obd = obd->u.mds.mds_osc_obd;
         int rc;
@@ -194,9 +194,9 @@ int mds_llog_init(struct obd_device *obd, struct obd_device *tgt,
         if (rc)
                 RETURN(rc);
 
-        rc = obd_llog_init(lov_obd, tgt, count, logid);
+        rc = obd_llog_init(lov_obd, tgt, count, logid, uuid);
         if (rc)
-                CERROR("error lov_llog_init\n");
+                CERROR("lov_llog_init err %d\n", rc);
 
         RETURN(rc);
 }
index cc92186..eb2845d 100644 (file)
@@ -70,8 +70,8 @@ static int mds_lov_read_objids(struct obd_device *obd)
         LASSERT(!mds->mds_lov_objids_size);
         LASSERT(!mds->mds_lov_objids_dirty);
 
-        /* Read everything in the file, even if our current lov desc 
-           has fewer targets. Old targets not in the lov descriptor 
+        /* Read everything in the file, even if our current lov desc
+           has fewer targets. Old targets not in the lov descriptor
            during mds setup may still have valid objids. */
         size = mds->mds_lov_objid_filp->f_dentry->d_inode->i_size;
         if (size == 0)
@@ -88,9 +88,9 @@ static int mds_lov_read_objids(struct obd_device *obd)
                 CERROR("Error reading objids %d\n", rc);
                 RETURN(rc);
         }
-                
-        mds->mds_lov_objids_in_file = size / sizeof(*ids); 
-        
+
+        mds->mds_lov_objids_in_file = size / sizeof(*ids);
+
         for (i = 0; i < mds->mds_lov_objids_in_file; i++) {
                 CDEBUG(D_INFO, "read last object "LPU64" for idx %d\n",
                        mds->mds_lov_objids[i], i);
@@ -102,7 +102,7 @@ int mds_lov_write_objids(struct obd_device *obd)
 {
         struct mds_obd *mds = &obd->u.mds;
         loff_t off = 0;
-        int i, rc, tgts; 
+        int i, rc, tgts;
         ENTRY;
 
         if (!mds->mds_lov_objids_dirty)
@@ -170,9 +170,9 @@ int mds_lov_set_nextid(struct obd_device *obd)
                                 KEY_NEXT_ID,
                                 mds->mds_lov_desc.ld_tgt_count,
                                 mds->mds_lov_objids, NULL);
-        
-        if (rc) 
-                CERROR ("%s: mds_lov_set_nextid failed (%d)\n", 
+
+        if (rc)
+                CERROR ("%s: mds_lov_set_nextid failed (%d)\n",
                         obd->obd_name, rc);
 
         RETURN(rc);
@@ -182,7 +182,7 @@ int mds_lov_set_nextid(struct obd_device *obd)
 static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov)
 {
         struct mds_obd *mds = &obd->u.mds;
-        struct lov_desc *ld; 
+        struct lov_desc *ld;
         __u32 size, stripes, valsize = sizeof(mds->mds_lov_desc);
         int rc = 0;
         ENTRY;
@@ -198,13 +198,13 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov)
 
         /* The size of the LOV target table may have increased. */
         size = ld->ld_tgt_count * sizeof(obd_id);
-        if ((mds->mds_lov_objids_size == 0) || 
+        if ((mds->mds_lov_objids_size == 0) ||
             (size > mds->mds_lov_objids_size)) {
                 obd_id *ids;
-                
+
                 /* add room by powers of 2 */
                 size = 1;
-                while (size < ld->ld_tgt_count) 
+                while (size < ld->ld_tgt_count)
                         size = size << 1;
                 size = size * sizeof(obd_id);
 
@@ -214,7 +214,7 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov)
                 memset(ids, 0, size);
                 if (mds->mds_lov_objids_size) {
                         obd_id *old_ids = mds->mds_lov_objids;
-                        memcpy(ids, mds->mds_lov_objids, 
+                        memcpy(ids, mds->mds_lov_objids,
                                mds->mds_lov_objids_size);
                         mds->mds_lov_objids = ids;
                         OBD_FREE(old_ids, mds->mds_lov_objids_size);
@@ -229,9 +229,9 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov)
         CDEBUG(D_CONFIG, "updated lov_desc, tgt_count: %d\n",
                mds->mds_lov_desc.ld_tgt_count);
 
-        stripes = min((__u32)LOV_MAX_STRIPE_COUNT, 
-                      max(mds->mds_lov_desc.ld_tgt_count,
-                          mds->mds_lov_objids_in_file));
+        stripes = min_t(__u32, LOV_MAX_STRIPE_COUNT,
+                        max(mds->mds_lov_desc.ld_tgt_count,
+                            mds->mds_lov_objids_in_file));
         mds->mds_max_mdsize = lov_mds_md_size(stripes);
         mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie);
         CDEBUG(D_CONFIG, "updated max_mdsize/max_cookiesize: %d/%d\n",
@@ -246,9 +246,9 @@ out:
 #define MDSLOV_NO_INDEX -1
 
 /* Inform MDS about new/updated target */
-static int mds_lov_update_mds(struct obd_device *obd,   
-                              struct obd_device *watched, 
-                              __u32 idx)
+static int mds_lov_update_mds(struct obd_device *obd,
+                              struct obd_device *watched,
+                              __u32 idx, struct obd_uuid *uuid)
 {
         struct mds_obd *mds = &obd->u.mds;
         int old_count;
@@ -261,23 +261,23 @@ static int mds_lov_update_mds(struct obd_device *obd,
                 RETURN(rc);
 
         CDEBUG(D_CONFIG, "idx=%d, recov=%d/%d, cnt=%d/%d\n",
-               idx, obd->obd_recovering, obd->obd_async_recov, old_count, 
+               idx, obd->obd_recovering, obd->obd_async_recov, old_count,
                mds->mds_lov_desc.ld_tgt_count);
 
         /* idx is set as data from lov_notify. */
         if (idx != MDSLOV_NO_INDEX && !obd->obd_recovering) {
                 if (idx >= mds->mds_lov_desc.ld_tgt_count) {
-                        CERROR("index %d > count %d!\n", idx, 
+                        CERROR("index %d > count %d!\n", idx,
                                mds->mds_lov_desc.ld_tgt_count);
                         RETURN(-EINVAL);
                 }
-                
+
                 if (idx >= mds->mds_lov_objids_in_file) {
                         /* We never read this lastid; ask the osc */
                         obd_id lastid;
                         __u32 size = sizeof(lastid);
                         rc = obd_get_info(watched->obd_self_export,
-                                          strlen("last_id"), 
+                                          strlen("last_id"),
                                           "last_id", &size, &lastid);
                         if (rc)
                                 RETURN(rc);
@@ -286,10 +286,10 @@ static int mds_lov_update_mds(struct obd_device *obd,
                         mds_lov_write_objids(obd);
                 } else {
                         /* We have read this lastid from disk; tell the osc.
-                           Don't call this during recovery. */ 
+                           Don't call this during recovery. */
                         rc = mds_lov_set_nextid(obd);
                 }
-        
+
                 CDEBUG(D_CONFIG, "last object "LPU64" from OST %d\n",
                       mds->mds_lov_objids[idx], idx);
         }
@@ -298,7 +298,9 @@ static int mds_lov_update_mds(struct obd_device *obd,
         /* We only _need_ to do this at first add (idx), or the first time
            after recovery.  However, it should now be safe to call anytime. */
         CDEBUG(D_CONFIG, "reset llogs idx=%d\n", idx);
-        llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count);
+        mutex_down(&obd->obd_dev_sem);
+        llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, uuid);
+        mutex_up(&obd->obd_dev_sem);
 
         RETURN(rc);
 }
@@ -329,7 +331,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name)
         if (data == NULL)
                 RETURN(-ENOMEM);
         data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX |
-                                  OBD_CONNECT_REQPORTAL;
+                                  OBD_CONNECT_REQPORTAL | OBD_CONNECT_QUOTA64;
         data->ocd_version = LUSTRE_VERSION_CODE;
         data->ocd_group = mds->mds_id +  FILTER_GROUP_MDS0;
         /* NB: lov_connect() needs to fill in .ocd_index for each OST */
@@ -360,7 +362,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name)
                 GOTO(err_reg, rc);
 
         /* tgt_count may be 0! */
-        rc = llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count);
+        rc = llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL);
         if (rc) {
                 CERROR("failed to initialize catalog %d\n", rc);
                 GOTO(err_reg, rc);
@@ -589,8 +591,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
                 rc = llog_ioctl(ctxt, cmd, data);
                 pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
-                llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count);
-
+                llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL);
                 group = FILTER_GROUP_MDS0 + mds->mds_id;
                 rc2 = obd_set_info_async(mds->mds_osc_exp,
                                          strlen(KEY_MDS_CONN), KEY_MDS_CONN,
@@ -631,9 +632,9 @@ struct mds_lov_sync_info {
 };
 
 /* We only sync one osc at a time, so that we don't have to hold
-   any kind of lock on the whole mds_lov_desc, which may change 
+   any kind of lock on the whole mds_lov_desc, which may change
    (grow) as a result of mds_lov_add_ost.  This also avoids any
-   kind of mismatch between the lov_desc and the mds_lov_desc, 
+   kind of mismatch between the lov_desc and the mds_lov_desc,
    which are not in lock-step during lov_add_obd */
 static int __mds_lov_synchronize(void *data)
 {
@@ -654,7 +655,7 @@ static int __mds_lov_synchronize(void *data)
         uuid = &watched->u.cli.cl_target_uuid;
         LASSERT(uuid);
 
-        rc = mds_lov_update_mds(obd, watched, idx);
+        rc = mds_lov_update_mds(obd, watched, idx, uuid);
         if (rc != 0)
                 GOTO(out, rc);
         group = FILTER_GROUP_MDS0 + mds->mds_id;
@@ -666,7 +667,7 @@ static int __mds_lov_synchronize(void *data)
         rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT),
                           mds->mds_lov_desc.ld_tgt_count,
                           NULL, NULL, uuid);
-        
+
         if (rc != 0) {
                 CERROR("%s: failed at llog_origin_connect: %d\n",
                        obd->obd_name, rc);
@@ -705,7 +706,7 @@ int mds_lov_synchronize(void *data)
         char name[20];
 
         if (mlsi->mlsi_index == MDSLOV_NO_INDEX)
-                /* There is still a watched target, 
+                /* There is still a watched target,
                 but we don't know its index */
                 sprintf(name, "ll_sync_tgt");
         else
@@ -715,7 +716,7 @@ int mds_lov_synchronize(void *data)
         RETURN(__mds_lov_synchronize(data));
 }
 
-int mds_lov_start_synchronize(struct obd_device *obd, 
+int mds_lov_start_synchronize(struct obd_device *obd,
                               struct obd_device *watched,
                               void *data, int nonblock)
 {
@@ -732,7 +733,7 @@ int mds_lov_start_synchronize(struct obd_device *obd,
 
         mlsi->mlsi_obd = obd;
         mlsi->mlsi_watched = watched;
-        if (data) 
+        if (data)
                 mlsi->mlsi_index = *(__u32 *)data;
         else
                 mlsi->mlsi_index = MDSLOV_NO_INDEX;
@@ -794,9 +795,9 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched,
 
         if (obd->obd_recovering) {
                 CWARN("MDS %s: in recovery, not resetting orphans on %s\n",
-                      obd->obd_name, 
+                      obd->obd_name,
                       obd_uuid2str(&watched->u.cli.cl_target_uuid));
-                /* We still have to fix the lov descriptor for ost's added 
+                /* We still have to fix the lov descriptor for ost's added
                    after the mdt in the config log.  They didn't make it into
                    mds_lov_connect. */
                 rc = mds_lov_update_desc(obd, obd->u.mds.mds_osc_exp);
@@ -804,11 +805,11 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched,
         }
 
         LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL);
-        rc = mds_lov_start_synchronize(obd, watched, data, 
+        rc = mds_lov_start_synchronize(obd, watched, data,
                                        !(ev == OBD_NOTIFY_SYNC));
-        
+
         lquota_recovery(quota_interface, obd);
-                
+
         RETURN(rc);
 }
 
@@ -832,14 +833,14 @@ int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode,
         int rc, err;
         ENTRY;
 
-        if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC || 
+        if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC ||
             le32_to_cpu(lmm->lmm_magic == LOV_MAGIC_JOIN))
                 RETURN(0);
 
         CDEBUG(D_INODE, "converting LOV EA on %lu/%u from %#08x to %#08x\n",
                inode->i_ino, inode->i_generation, le32_to_cpu(lmm->lmm_magic),
                LOV_MAGIC);
-       
+
         rc = obd_unpackmd(obd->u.mds.mds_osc_exp, &lsm, lmm, lmm_size);
         if (rc < 0)
                 GOTO(conv_end, rc);
index d7e7645..ec405c1 100644 (file)
@@ -881,6 +881,7 @@ int mds_open(struct mds_update_record *rec, int offset,
         int lock_flags = 0;
         ENTRY;
 
+        lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_OPEN);
         OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PAUSE_OPEN | OBD_FAIL_ONCE,
                          (obd_timeout + 1) / 4);
 
@@ -1202,6 +1203,9 @@ found_child:
                 else
                         ptlrpc_save_lock(req, &parent_lockh, parent_mode);
         }
+        /* trigger dqacq on the owner of child and parent */
+        lquota_adjust(quota_interface, obd, qcids, qpids, rc, FSFILT_OP_CREATE);
+
         /* If we have not taken the "open" lock, we may not return 0 here,
            because caller expects 0 to mean "lock is taken", and it needs
            nonzero return here for caller to return EDLM_LOCK_ABORTED to
@@ -1211,8 +1215,6 @@ found_child:
         if ((cleanup_phase != 3) && !rc)
                 rc = ENOLCK;
 
-        /* trigger dqacq on the owner of child and parent */
-        lquota_adjust(quota_interface, obd, qcids, qpids, rc, FSFILT_OP_CREATE);
         RETURN(rc);
 }
 
@@ -1436,6 +1438,7 @@ int mds_close(struct ptlrpc_request *req, int offset)
         CDEBUG(D_HA, "close req->rep_len %d mdsize %d cookiesize %d\n",
                req->rq_replen,
                obd->u.mds.mds_max_mdsize, obd->u.mds.mds_max_cookiesize);
+        lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_CLOSE);
 
         body = lustre_swab_reqbuf(req, offset, sizeof(*body),
                                   lustre_swab_mds_body);
index 4e605a9..e857405 100644 (file)
@@ -501,6 +501,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
 
         DEBUG_REQ(D_INODE, req, "setattr "LPU64"/%u %x", rec->ur_fid1->id,
                   rec->ur_fid1->generation, rec->ur_iattr.ia_valid);
+        OBD_COUNTER_INCREMENT(obd, setattr);
 
         MDS_CHECK_RESENT(req, reconstruct_reint_setattr(rec, offset, req));
 
@@ -810,6 +811,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = ll_vfs_create(dir, dchild, rec->ur_mode, NULL);
+                lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_MKNOD);
                 EXIT;
                 break;
         }
@@ -818,6 +820,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = vfs_mkdir(dir, dchild, rec->ur_mode);
+                lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_MKDIR);
                 EXIT;
                 break;
         }
@@ -829,6 +832,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                         rc = -EINVAL;           /* -EPROTO? */
                 else
                         rc = ll_vfs_symlink(dir, dchild, rec->ur_tgt, S_IALLUGO);
+                lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_MKNOD);
                 EXIT;
                 break;
         }
@@ -841,6 +845,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev);
+                lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_MKNOD);
                 EXIT;
                 break;
         }
@@ -1634,6 +1639,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = vfs_rmdir(dparent->d_inode, dchild);
+                lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_RMDIR);
                 break;
         case S_IFREG: {
                 struct lov_mds_md *lmm = lustre_msg_buf(req->rq_repmsg,
@@ -1644,6 +1650,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = vfs_unlink(dparent->d_inode, dchild);
+                lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_UNLINK);
                 break;
         }
         case S_IFLNK:
@@ -1656,6 +1663,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
                 if (IS_ERR(handle))
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 rc = vfs_unlink(dparent->d_inode, dchild);
+                lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_UNLINK);
                 break;
         default:
                 CERROR("bad file type %o unlinking %s\n", rec->ur_mode,
@@ -1769,6 +1777,7 @@ static int mds_reint_link(struct mds_update_record *rec, int offset,
         DEBUG_REQ(D_INODE, req, "original "LPU64"/%u to "LPU64"/%u %s",
                   rec->ur_fid1->id, rec->ur_fid1->generation,
                   rec->ur_fid2->id, rec->ur_fid2->generation, rec->ur_name);
+        lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_LINK);
 
         MDS_CHECK_RESENT(req, mds_reconstruct_generic(req));
 
@@ -2112,7 +2121,8 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
         DEBUG_REQ(D_INODE, req, "parent "LPU64"/%u %s to "LPU64"/%u %s",
                   rec->ur_fid1->id, rec->ur_fid1->generation, rec->ur_name,
                   rec->ur_fid2->id, rec->ur_fid2->generation, rec->ur_tgt);
-
+        lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_RENAME);
+        
         MDS_CHECK_RESENT(req, mds_reconstruct_generic(req));
 
         rc = mds_get_parents_children_locked(obd, mds, rec->ur_fid1, &de_srcdir,
@@ -2301,7 +2311,7 @@ int mds_reint_rec(struct mds_update_record *rec, int offset,
         int rc;
         ENTRY;
 
-#if CRAY_XT3
+#ifdef CRAY_XT3
         if (req->rq_uid != LNET_UID_ANY) {
                 /* non-root local cluster client
                  * NB root's creds are believed... */
@@ -2326,7 +2336,7 @@ int mds_reint_rec(struct mds_update_record *rec, int offset,
         /* checked by unpacker */
         LASSERT(rec->ur_opcode < REINT_MAX && reinters[rec->ur_opcode] != NULL);
 
-#if CRAY_XT3
+#ifdef CRAY_XT3
         if (rec->ur_uc.luc_uce)
                 rec->ur_uc.luc_fsgid = rec->ur_uc.luc_uce->ue_primary;
 #endif
index b60d429..ca46092 100644 (file)
@@ -174,6 +174,8 @@ int mds_getxattr(struct ptlrpc_request *req)
         int rc = 0;
         ENTRY;
 
+        lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_GETXATTR);
+
         body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body),
                                   lustre_swab_mds_body);
         if (body == NULL)
@@ -334,6 +336,8 @@ int mds_setxattr(struct ptlrpc_request *req)
         int rc;
         ENTRY;
 
+        lprocfs_counter_incr(obd->obd_stats, LPROC_MDS_SETXATTR);
+
         body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body),
                                   lustre_swab_mds_body);
         if (body == NULL)
index a268422..40959c1 100644 (file)
@@ -64,7 +64,7 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         if (rc)
                 GOTO(err_decref, rc);
 
-        rc = obd_llog_init(obd, obd, 0, NULL);
+        rc = obd_llog_init(obd, obd, 0, NULL, NULL);
         if (rc) {
                 CERROR("failed to setup llogging subsystems\n");
                 GOTO(err_cleanup, rc);
@@ -80,7 +80,8 @@ err_decref:
 }
 
 static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt,
-                         int count, struct llog_catid *logid)
+                         int count, struct llog_catid *logid, 
+                         struct obd_uuid *uuid)
 {
         struct llog_ctxt *ctxt;
         int rc;
index fe912bf..4af0211 100644 (file)
@@ -390,7 +390,7 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         if (rc)
                 GOTO(err_decref, rc);
 
-        rc = obd_llog_init(obd, obd, 0, NULL);
+        rc = obd_llog_init(obd, obd, 0, NULL, NULL);
         if (rc) {
                 CERROR("failed to setup llogging subsystems\n");
                 GOTO(err_cleanup, rc);
@@ -788,7 +788,8 @@ static int mgc_import_event(struct obd_device *obd,
 }
 
 static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt,
-                         int count, struct llog_catid *logid)
+                         int count, struct llog_catid *logid,
+                         struct obd_uuid *uuid)
 {
         struct llog_ctxt *ctxt;
         int rc;
index 9148e3b..34c6459 100644 (file)
@@ -417,7 +417,6 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req)
                 mti->mti_flags |= LDD_F_UPDATE;
         }
 
-
         if (mti->mti_flags & LDD_F_UPDATE) {
                 CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname, 
                        mti->mti_stripe_index);
index dfca3ee..3a3477d 100644 (file)
@@ -1626,11 +1626,19 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb,
         /* We also have to update the other logs where this osc is part of 
            the lov */
 
-        /* Append ost info to mdt log */
         if (mti->mti_flags & LDD_F_UPGRADE14) 
                 /* If we're upgrading, the old mdt log already has our
                    entry. Let's do a fake one for fun. */
                 flags = CM_SKIP | CM_UPGRADE146;
+        
+        if ((mti->mti_flags & LDD_F_UPDATE) != LDD_F_UPDATE) {
+                /* If the update flag isn't set, don't update client/mdt
+                   logs. */
+                flags |= CM_SKIP;
+                LCONSOLE_WARN("Client log for %s was not updated; writeconf "
+                              "the MDT first to regenerate it.\n",
+                              mti->mti_svname);
+        }
 
         // for_all_existing_mdt
         for (i = 0; i < INDEX_MAP_SIZE * 8; i++){
@@ -1951,10 +1959,15 @@ int mgs_write_log_target(struct obd_device *obd,
                                       mti->mti_stripe_index, mti->mti_svname);
                         /* FIXME mark old log sections as invalid, 
                            inc config ver #, add new log sections.
-                           Make sure to update client and mds logs too
+                           Make sure to update client and mdt logs too
                            if needed */
-                        /* in the mean time, assume all logs were lost
-                           (writeconf), and recreate this one */
+                        /* In the meantime, if we found the index in the 
+                           client log, we can't add it again. So recreate
+                           the target log, but do _not_ update the client/mdt
+                           logs. For "full" writeconf, the client log won't
+                           have an entry for this target, so we won't get
+                           here. */
+                        mti->mti_flags &= ~LDD_F_UPDATE;
                 }
         }
 
@@ -2089,12 +2102,12 @@ int mgs_erase_logs(struct obd_device *obd, char *fsname)
                 RETURN(rc);
         }
                                                                                 
-        /* Delete the fs db */
         down(&mgs->mgs_sem);
+        
+        /* Delete the fs db */
         fsdb = mgs_find_fsdb(obd, fsname);
         if (fsdb) 
                 mgs_free_fsdb(fsdb);
-        up(&mgs->mgs_sem);
 
         list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
                 list_del(&dirent->lld_list);
@@ -2105,6 +2118,8 @@ int mgs_erase_logs(struct obd_device *obd, char *fsname)
                 OBD_FREE(dirent, sizeof(*dirent));
         }
         
+        up(&mgs->mgs_sem);
+
         RETURN(rc);
 }
 
index 36f49d4..53b9644 100644 (file)
@@ -62,10 +62,10 @@ int obd_memmax;
 /* The following are visible and mutable through /proc/sys/lustre/. */
 unsigned int obd_fail_loc;
 unsigned int obd_dump_on_timeout;
+unsigned int obd_dump_on_eviction;
 unsigned int obd_timeout = 100; /* seconds */
 unsigned int ldlm_timeout = 20; /* seconds */
 unsigned int obd_health_check_timeout = 120; /* seconds */
-char obd_lustre_upcall[128] = "DEFAULT"; /* or NONE or /full/path/to/upcall  */
 
 cfs_waitq_t obd_race_waitq;
 int obd_race_state;
@@ -383,10 +383,10 @@ EXPORT_SYMBOL(obd_print_fail_loc);
 EXPORT_SYMBOL(obd_race_waitq);
 EXPORT_SYMBOL(obd_race_state);
 EXPORT_SYMBOL(obd_dump_on_timeout);
+EXPORT_SYMBOL(obd_dump_on_eviction);
 EXPORT_SYMBOL(obd_timeout);
 EXPORT_SYMBOL(ldlm_timeout);
 EXPORT_SYMBOL(obd_health_check_timeout);
-EXPORT_SYMBOL(obd_lustre_upcall);
 EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
 
 EXPORT_SYMBOL(proc_lustre_root);
@@ -570,7 +570,9 @@ int init_obdclass(void)
         if (err)
                 return err;
         err = class_procfs_init();
-        lustre_register_fs();
+        if (err)
+                return err;
+        err = lustre_register_fs();
 #endif
 
         return err;
index 59b7e45..3443d83 100644 (file)
@@ -26,7 +26,6 @@ extern unsigned int obd_fail_loc;
 extern unsigned int obd_dump_on_timeout;
 extern unsigned int obd_timeout;
 extern unsigned int ldlm_timeout;
-extern char obd_lustre_upcall[128];
 extern unsigned int obd_sync_filter;
 extern atomic_t obd_memory;
 
@@ -50,9 +49,6 @@ SYSCTL_PROC(_lustre,          OID_AUTO,       lustre_kernel_version,
 SYSCTL_INT(_lustre,            OID_AUTO,       dump_on_timeout, 
           CTLTYPE_INT | CTLFLAG_RW,            &obd_dump_on_timeout,
           0,           "lustre_dump_on_timeout");
-SYSCTL_STRING(_lustre,         OID_AUTO,       upcall, 
-          CTLTYPE_STRING | CTLFLAG_RW,         obd_lustre_upcall,
-          128,         "lustre_upcall");
 SYSCTL_INT(_lustre,            OID_AUTO,       memused, 
           CTLTYPE_INT | CTLFLAG_RW,            (int *)&obd_memory.counter,
           0,           "lustre_memory_used");
index 6eb062b..09a24c9 100644 (file)
@@ -63,6 +63,7 @@
 #include <obd_support.h>
 #include <obd_class.h>
 #include <lprocfs_status.h>
+#include <lustre_ver.h>
 #ifdef __KERNEL__
 #include <linux/lustre_build_version.h>
 #include <linux/lustre_version.h>
@@ -218,18 +219,14 @@ int obd_proc_read_version(char *page, char **start, off_t off, int count,
                           int *eof, void *data)
 {
         *eof = 1;
-        return snprintf(page, count, "%s\n", BUILD_VERSION);
-}
-
-int obd_proc_read_kernel_version(char *page, char **start, off_t off, int count,
-                                 int *eof, void *data)
-{
-        *eof = 1;
+        return snprintf(page, count, "lustre: %s\nkernel: %u\nbuild:  %s\n",
+                        LUSTRE_VERSION_STRING,
 #ifdef LUSTRE_KERNEL_VERSION
-        return snprintf(page, count, "%u\n", LUSTRE_KERNEL_VERSION);
+                        LUSTRE_KERNEL_VERSION,
 #else
-        return snprintf(page, count, "%u\n", "patchless");
+                        "patchless",
 #endif
+                        BUILD_VERSION);
 }
 
 int obd_proc_read_pinger(char *page, char **start, off_t off, int count,
@@ -312,7 +309,6 @@ struct proc_dir_entry *proc_lustre_root = NULL;
 
 struct lprocfs_vars lprocfs_base[] = {
         { "version", obd_proc_read_version, NULL, NULL },
-        { "kernel_version", obd_proc_read_kernel_version, NULL, NULL },
         { "pinger", obd_proc_read_pinger, NULL, NULL },
         { "health_check", obd_proc_read_health, NULL, NULL },
         { "health_check_timeout", obd_proc_rd_health_timeout,
index 169aecb..fe5cd34 100644 (file)
@@ -54,10 +54,10 @@ enum {
         OBD_FAIL_LOC = 1,       /* control test failures instrumentation */
         OBD_TIMEOUT,            /* RPC timeout before recovery/intr */
         OBD_DUMP_ON_TIMEOUT,    /* dump kernel debug log upon eviction */
-        OBD_UPCALL,             /* path to recovery upcall */
         OBD_MEMUSED,            /* bytes currently OBD_ALLOCated */
         OBD_SYNCFILTER,         /* XXX temporary, as we play with sync osts.. */
         OBD_LDLM_TIMEOUT,       /* LDLM timeout for ASTs before client eviction */
+        OBD_DUMP_ON_EVICTION,   /* dump kernel debug log upon eviction */
 };
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8)
@@ -102,9 +102,8 @@ static ctl_table obd_table[] = {
                 &proc_set_timeout},
         {OBD_DUMP_ON_TIMEOUT, "dump_on_timeout", &obd_dump_on_timeout,
                 sizeof(int), 0644, NULL, &proc_dointvec},
-        /* XXX need to lock so we avoid update races with recovery upcall! */
-        {OBD_UPCALL, "upcall", obd_lustre_upcall, 128, 0644, NULL,
-                &proc_dostring, &sysctl_string },
+        {OBD_DUMP_ON_EVICTION, "dump_on_eviction", &obd_dump_on_eviction,
+                sizeof(int), 0644, NULL, &proc_dointvec},
         {OBD_MEMUSED, "memused", (int *)&obd_memory.counter,
                 sizeof(int), 0644, NULL, &proc_dointvec},
         {OBD_LDLM_TIMEOUT, "ldlm_timeout", &ldlm_timeout, sizeof(int), 0644,
index 3912558..9cad6f4 100644 (file)
@@ -313,22 +313,18 @@ int llog_obd_origin_add(struct llog_ctxt *ctxt,
 }
 EXPORT_SYMBOL(llog_obd_origin_add);
 
-int llog_cat_initialize(struct obd_device *obd, int count)
+int llog_cat_initialize(struct obd_device *obd, int count,
+                        struct obd_uuid *uuid)
 {
+        char name[32] = CATLIST;
         struct llog_catid *idarray;
         int size = sizeof(*idarray) * count;
-        char name[32] = CATLIST;
         int rc;
         ENTRY;
 
-        /* We don't want multiple mdt threads here at once */
-        mutex_down(&obd->obd_dev_sem);
-
         OBD_ALLOC(idarray, size);
-        if (!idarray) {
-                mutex_up(&obd->obd_dev_sem);
+        if (!idarray) 
                 RETURN(-ENOMEM);
-        }
 
         rc = llog_get_cat_list(obd, obd, name, count, idarray);
         if (rc) {
@@ -336,7 +332,7 @@ int llog_cat_initialize(struct obd_device *obd, int count)
                 GOTO(out, rc);
         }
 
-        rc = obd_llog_init(obd, obd, count, idarray);
+        rc = obd_llog_init(obd, obd, count, idarray, uuid);
         if (rc) {
                 CERROR("rc: %d\n", rc);
                 GOTO(out, rc);
@@ -350,20 +346,19 @@ int llog_cat_initialize(struct obd_device *obd, int count)
 
  out:
         OBD_FREE(idarray, size);
-        mutex_up(&obd->obd_dev_sem);
         RETURN(rc);
 }
 EXPORT_SYMBOL(llog_cat_initialize);
 
 int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd,
-                  int count, struct llog_catid *logid)
+                  int count, struct llog_catid *logid, struct obd_uuid *uuid)
 {
         int rc;
         ENTRY;
         OBD_CHECK_DT_OP(obd, llog_init, 0);
         OBD_COUNTER_INCREMENT(obd, llog_init);
 
-        rc = OBP(obd, llog_init)(obd, disk_obd, count, logid);
+        rc = OBP(obd, llog_init)(obd, disk_obd, count, logid, uuid);
         RETURN(rc);
 }
 EXPORT_SYMBOL(obd_llog_init);
index 94edfc9..aedaa30 100644 (file)
@@ -598,7 +598,8 @@ static int llog_run_tests(struct obd_device *obd)
 
 
 static int llog_test_llog_init(struct obd_device *obd, struct obd_device *tgt,
-                               int count, struct llog_catid *logid)
+                               int count, struct llog_catid *logid,
+                               struct obd_uuid *uuid)
 {
         int rc;
         ENTRY;
@@ -651,7 +652,7 @@ static int llog_test_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                 RETURN(-EINVAL);
         }
 
-        rc = obd_llog_init(obd, tgt, 0, NULL);
+        rc = obd_llog_init(obd, tgt, 0, NULL, NULL);
         if (rc)
                 RETURN(rc);
 
index f4d8a50..ce75703 100644 (file)
@@ -384,6 +384,12 @@ static const char *obd_connect_names[] = {
         "join_file",
         "getattr_by_fid",
         "no_oh_for_devices",
+        "local_1.8_client",
+        "remote_1.8_client",
+        "max_byte_per_rpc",
+        "64bit_qdata",
+        "fid_capability",
+        "oss_capability",
         NULL
 };
 
@@ -640,7 +646,7 @@ int lprocfs_register_stats(struct proc_dir_entry *root, const char *name,
         struct proc_dir_entry *entry;
         LASSERT(root != NULL);
 
-        entry = create_proc_entry(name, 0444, root);
+        entry = create_proc_entry(name, 0644, root);
         if (entry == NULL)
                 return -ENOMEM;
         entry->proc_fops = &lprocfs_stats_seq_fops;
@@ -745,7 +751,6 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, join_lru);
-        LPROCFS_OBD_OP_INIT(num_private_stats, stats, san_preprw);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, init_export);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, extent_calc);
@@ -976,6 +981,7 @@ int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count,
                               __u64 *val, int mult)
 {
         char kernbuf[22], *end, *pbuf;
+        __u64 whole, frac = 0, frac_d = 1, units;
 
         if (count > (sizeof(kernbuf) - 1) )
                 return -EINVAL;
@@ -985,32 +991,42 @@ int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count,
 
         kernbuf[count] = '\0';
         pbuf = kernbuf;
-        if (*pbuf == '-') {
-                mult = -mult;
-                pbuf++;
-        }
+        if (*pbuf == '-') 
+                return -ERANGE;
 
-        *val = simple_strtoull(pbuf, &end, 10) * mult;
+        whole = simple_strtoull(pbuf, &end, 10);
         if (pbuf == end)
                 return -EINVAL;
 
         if (end != NULL && *end == '.') {
-                int temp_val;
-                int i, pow = 1;
-
+                int i;
                 pbuf = end + 1;
-                if (strlen(pbuf) > 10)
-                        pbuf[10] = '\0';
-
-                temp_val = (int)simple_strtoull(pbuf, &end, 10) * mult;
-
-                if (pbuf < end) {
-                        for (i = 0; i < (end - pbuf); i++)
-                                pow *= 10;
+                frac = simple_strtoull(pbuf, &end, 10);
+                /* count decimal places */
+                for (i = 0; i < (end - pbuf); i++)
+                        frac_d *= 10;
+        }
 
-                        *val += (__u64)(temp_val / pow);
-                }
+        units = 1;
+        switch(*end) {
+        case 'p': case 'P':
+                units <<= 10;
+        case 't': case 'T':
+                units <<= 10;
+        case 'g': case 'G':
+                units <<= 10;
+        case 'm': case 'M':
+                units <<= 10;
+        case 'k': case 'K':
+                units <<= 10;
         }
+        /* Specified units override the multiplier */
+        if (units) 
+                mult = units;
+
+        frac = frac * mult;
+        do_div(frac, frac_d);
+        *val = whole * mult + frac;
         return 0;
 }
 
index be6efef..6e9ff1b 100644 (file)
@@ -40,6 +40,7 @@ struct uuid_nid_data {
         struct list_head un_list;
         lnet_nid_t       un_nid;
         char            *un_uuid;
+        int              un_count;  /* nid/uuid pair refcount */
 };
 
 /* FIXME: This should probably become more elegant than a global linked list */
@@ -85,8 +86,10 @@ int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index)
    LNET will choose the best one. */
 int class_add_uuid(const char *uuid, __u64 nid)
 {
-        struct uuid_nid_data *data;
+        struct list_head *tmp, *n;
+        struct uuid_nid_data *data, *entry;
         int nob = strnlen (uuid, PAGE_SIZE) + 1;
+        int found = 0;
 
         LASSERT(nid != 0);  /* valid newconfig NID is never zero */
 
@@ -103,16 +106,34 @@ int class_add_uuid(const char *uuid, __u64 nid)
                 return -ENOMEM;
         }
 
-        CDEBUG(D_INFO, "add uuid %s %s\n", uuid, libcfs_nid2str(nid));
         memcpy(data->un_uuid, uuid, nob);
         data->un_nid = nid;
+        data->un_count = 1;
 
         spin_lock (&g_uuid_lock);
 
-        list_add(&data->un_list, &g_uuid_list);
+        list_for_each_safe(tmp, n, &g_uuid_list) {
+                entry = list_entry(tmp, struct uuid_nid_data, un_list);
+                if (entry->un_nid == nid && 
+                    (strcmp(entry->un_uuid, uuid) == 0)) {
+                        found++;
+                        entry->un_count++;
+                        break;
+                }
+        }
+        if (!found) 
+                list_add(&data->un_list, &g_uuid_list);
 
         spin_unlock (&g_uuid_lock);
 
+        if (found) {
+                CDEBUG(D_INFO, "found uuid %s %s cnt=%d\n", uuid, 
+                       libcfs_nid2str(nid), entry->un_count);
+                OBD_FREE(data->un_uuid, nob);
+                OBD_FREE(data, sizeof(*data));
+        } else {
+                CDEBUG(D_INFO, "add uuid %s %s\n", uuid, libcfs_nid2str(nid));
+        }
         return 0;
 }
 
@@ -131,11 +152,16 @@ int class_del_uuid(const char *uuid)
         list_for_each_safe(tmp, n, &g_uuid_list) {
                 data = list_entry(tmp, struct uuid_nid_data, un_list);
 
-                if (uuid == NULL || strcmp(data->un_uuid, uuid) == 0) {
+                if (uuid == NULL) {
                         list_del (&data->un_list);
                         list_add (&data->un_list, &deathrow);
-                        if (uuid)
-                                break;
+                } else if (strcmp(data->un_uuid, uuid) == 0) {
+                        --data->un_count;
+                        if (data->un_count <= 0) {
+                                list_del (&data->un_list);
+                                list_add (&data->un_list, &deathrow);
+                        }
+                        break;
                 }
         }
 
@@ -151,7 +177,8 @@ int class_del_uuid(const char *uuid)
                 data = list_entry(deathrow.next, struct uuid_nid_data, un_list);
 
                 list_del (&data->un_list);
-                CDEBUG(D_INFO, "del uuid %s\n", data->un_uuid);
+                CDEBUG(D_INFO, "del uuid %s %s\n", data->un_uuid,
+                       libcfs_nid2str(data->un_nid));
 
                 OBD_FREE(data->un_uuid, strlen(data->un_uuid) + 1);
                 OBD_FREE(data, sizeof(*data));
index f0c4ad8..6e8bf9f 100644 (file)
@@ -756,12 +756,8 @@ int class_process_config(struct lustre_cfg *lcfg)
                 GOTO(out, err = 0);
         }
         case LCFG_SET_UPCALL: {
-                CDEBUG(D_IOCTL, "setting lustre ucpall to: %s\n",
-                       lustre_cfg_string(lcfg, 1));
-                if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof obd_lustre_upcall)
-                        GOTO(out, err = -EINVAL);
-                strncpy(obd_lustre_upcall, lustre_cfg_string(lcfg, 1),
-                        sizeof (obd_lustre_upcall));
+                LCONSOLE_ERROR("recovery upcall is deprecated\n");
+                /* COMPAT_146 Don't fail on old configs */
                 GOTO(out, err = 0);
         }
         case LCFG_MARKER: {
@@ -774,7 +770,7 @@ int class_process_config(struct lustre_cfg *lcfg)
         case LCFG_PARAM: {
                 /* llite has no obd */
                 if ((class_match_param(lustre_cfg_string(lcfg, 1), 
-                                      PARAM_LLITE, 0) == 0) &&
+                                       PARAM_LLITE, 0) == 0) &&
                     client_process_config) {
                         err = (*client_process_config)(lcfg);
                         GOTO(out, err);
@@ -839,6 +835,7 @@ out:
 int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, 
                              struct lustre_cfg *lcfg, void *data)
 {
+#ifdef __KERNEL__
         struct lprocfs_vars *var;
         char *key, *sval;
         int i, vallen;
@@ -875,9 +872,14 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
                         if (class_match_param(key, (char *)var->name, 0) == 0) {
                                 matched++;
                                 rc = -EROFS;
-                                if (var->write_fptr) 
+                                if (var->write_fptr) {
+                                        mm_segment_t oldfs;
+                                        oldfs = get_fs();
+                                        set_fs(KERNEL_DS);
                                         rc = (var->write_fptr)(NULL, sval,
                                                                vallen, data);
+                                        set_fs(oldfs);
+                                }
                                 if (rc < 0) 
                                         CERROR("writing proc entry %s err %d\n", 
                                                var->name, rc);
@@ -898,6 +900,10 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
         if (rc > 0) 
                 rc = 0;
         RETURN(rc);
+#else
+        CDEBUG(D_CONFIG, "liblustre can't process params.\n");
+        return -ENOSYS;
+#endif
 }
 
 int class_config_dump_handler(struct llog_handle * handle,
index 83062ac..4f24d10 100644 (file)
@@ -214,15 +214,17 @@ int server_put_mount(const char *name, struct vfsmount *mnt)
 
 static void ldd_print(struct lustre_disk_data *ldd)
 {
-        PRINT_CMD(PRINT_MASK, "  disk data:\n");
-        PRINT_CMD(PRINT_MASK, "config:  %d\n", ldd->ldd_config_ver);
-        PRINT_CMD(PRINT_MASK, "fs:      %s\n", ldd->ldd_fsname);
+        PRINT_CMD(PRINT_MASK, "  disk data:\n"); 
         PRINT_CMD(PRINT_MASK, "server:  %s\n", ldd->ldd_svname);
+        PRINT_CMD(PRINT_MASK, "uuid:    %s\n", (char *)ldd->ldd_uuid);
+        PRINT_CMD(PRINT_MASK, "fs:      %s\n", ldd->ldd_fsname);
         PRINT_CMD(PRINT_MASK, "index:   %04x\n", ldd->ldd_svindex);
+        PRINT_CMD(PRINT_MASK, "config:  %d\n", ldd->ldd_config_ver);
         PRINT_CMD(PRINT_MASK, "flags:   %#x\n", ldd->ldd_flags);
         PRINT_CMD(PRINT_MASK, "diskfs:  %s\n", MT_STR(ldd));
         PRINT_CMD(PRINT_MASK, "options: %s\n", ldd->ldd_mount_opts);
-        PRINT_CMD(PRINT_MASK, "params: %s\n", ldd->ldd_params);
+        PRINT_CMD(PRINT_MASK, "params:  %s\n", ldd->ldd_params);
+        PRINT_CMD(PRINT_MASK, "comment: %s\n", ldd->ldd_userdata);
 }
 
 static int ldd_parse(struct lvfs_run_ctxt *mount_ctxt,
@@ -508,6 +510,8 @@ static int server_stop_mgs(struct super_block *sb)
         RETURN(rc);
 }
 
+DECLARE_MUTEX(mgc_start_lock);
+
 /* Set up a mgcobd to process startup logs */
 static int lustre_start_mgc(struct super_block *sb)
 {
@@ -562,6 +566,8 @@ static int lustre_start_mgc(struct super_block *sb)
                 GOTO(out_free, rc = -ENOMEM);
         sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
 
+        mutex_down(&mgc_start_lock);
+
         obd = class_name2obd(mgcname);
         if (obd) {
                 /* Re-using an existing MGC */
@@ -702,6 +708,8 @@ out:
            to the same mgc.*/
         lsi->lsi_mgc = obd;
 out_free:
+        mutex_up(&mgc_start_lock);
+
         if (mgcname) 
                 OBD_FREE(mgcname, len);
         if (niduuid) 
@@ -714,7 +722,7 @@ static int lustre_stop_mgc(struct super_block *sb)
         struct lustre_sb_info *lsi = s2lsi(sb);
         struct obd_device *obd;
         char *niduuid, *ptr = 0;
-        int i, rc, len;
+        int i, rc = 0, len;
         ENTRY;
 
         if (!lsi)
@@ -724,12 +732,13 @@ static int lustre_stop_mgc(struct super_block *sb)
                 RETURN(-ENOENT);
 
         lsi->lsi_mgc = NULL;
+        mutex_down(&mgc_start_lock);
         if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
                 /* This is not fatal, every client that stops
                    will call in here. */
                 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
                        atomic_read(&obd->u.cli.cl_mgc_refcount));
-                RETURN(-EBUSY);
+                GOTO(out, rc = -EBUSY); 
         }
 
         /* MGC must always stop */
@@ -753,7 +762,7 @@ static int lustre_stop_mgc(struct super_block *sb)
 
         rc = class_manual_cleanup(obd);
         if (rc) 
-                RETURN(rc);
+                GOTO(out, rc);
 
         /* Clean the nid uuids */
         if (!niduuid) 
@@ -769,7 +778,9 @@ static int lustre_stop_mgc(struct super_block *sb)
         OBD_FREE(niduuid, len);
         /* class_import_put will get rid of the additional connections */
         
-        RETURN(0);
+out:
+        mutex_up(&mgc_start_lock);
+        RETURN(rc);
 }
 
 /* Since there's only one mgc per node, we have to change it's fs to get
@@ -806,6 +817,8 @@ static int server_mgc_clear_fs(struct obd_device *mgc)
         RETURN(rc);
 }
 
+DECLARE_MUTEX(server_start_lock);
+
 /* Stop MDS/OSS if nobody is using them */
 static int server_stop_servers(int lddflags, int lsiflags)
 {
@@ -814,8 +827,9 @@ static int server_stop_servers(int lddflags, int lsiflags)
         int rc = 0;
         ENTRY;
 
-        /* Either an MDT or an OST or neither  */
+        mutex_down(&server_start_lock);
 
+        /* Either an MDT or an OST or neither  */
         /* if this was an MDT, and there are no more MDT's, clean up the MDS */
         if ((lddflags & LDD_F_SV_TYPE_MDT) && 
             (obd = class_name2obd(LUSTRE_MDS_OBDNAME))) {
@@ -837,6 +851,8 @@ static int server_stop_servers(int lddflags, int lsiflags)
                         rc = err;
         }
 
+        mutex_up(&server_start_lock);
+
         RETURN(rc);
 }
 
@@ -1053,6 +1069,7 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
         /* If we're an MDT, make sure the global MDS is running */
         if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
                 /* make sure the MDS is started */
+                mutex_down(&server_start_lock);
                 obd = class_name2obd(LUSTRE_MDS_OBDNAME);
                 if (!obd) {
                         rc = lustre_start_simple(LUSTRE_MDS_OBDNAME, 
@@ -1061,10 +1078,12 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
                                                  LUSTRE_MDS_OBDNAME"_uuid",
                                                  0, 0);
                         if (rc) {
+                                mutex_up(&server_start_lock);
                                 CERROR("failed to start MDS: %d\n", rc);
                                 RETURN(rc);
                         }
                 }
+                mutex_up(&server_start_lock);
         }
         /* If we're an MDT, make sure the global MDS is running */
         if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_MDT) {
@@ -1087,6 +1106,7 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
         /* If we're an OST, make sure the global OSS is running */
         if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
                 /* make sure OSS is started */
+                mutex_down(&server_start_lock);
                 obd = class_name2obd(LUSTRE_OSS_OBDNAME);
                 if (!obd) {
                         rc = lustre_start_simple(LUSTRE_OSS_OBDNAME, 
@@ -1094,10 +1114,12 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
                                                  LUSTRE_OSS_OBDNAME"_uuid", 
                                                  0, 0);
                         if (rc) {
+                                mutex_up(&server_start_lock);
                                 CERROR("failed to start OSS: %d\n", rc);
                                 RETURN(rc);
                         }
                 }
+                mutex_up(&server_start_lock);
         }
 
         /* Set the mgc fs to our server disk.  This allows the MGC
@@ -1569,10 +1591,8 @@ static int server_fill_super(struct super_block *sb)
         /* start MGS before MGC */
         if (IS_MGS(lsi->lsi_ldd)) {
                 rc = server_start_mgs(sb);
-                if (rc) {
-                        CERROR("ignoring Failed MGS start!!\n");
-                        //GOTO(out_mnt, rc);
-                }
+                if (rc) 
+                        GOTO(out_mnt, rc);
         }
 
         rc = lustre_start_mgc(sb);
@@ -1712,11 +1732,16 @@ static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
 {
         char *s1 = ptr, *s2;
         __u32 index, *exclude_list;
-        int rc = 0;
+        int rc = 0, devmax;
         ENTRY;
+        
+        /* The shortest an ost name can be is 8 chars: -OST0000.
+           We don't actually know the fsname at this time, so in fact 
+           a user could specify any fsname. */
+        devmax = strlen(ptr) / 8 + 1;
 
         /* temp storage until we figure out how many we have */
-        OBD_ALLOC(exclude_list, sizeof(index) * MAX_OBD_DEVICES);
+        OBD_ALLOC(exclude_list, sizeof(index) * devmax);
         if (!exclude_list)
                 RETURN(-ENOMEM);
 
@@ -1735,8 +1760,7 @@ static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
                 s1 = s2;
                 /* now we are pointing at ':' (next exclude)
                    or ',' (end of excludes) */
-
-                if (lmd->lmd_exclude_count >= MAX_OBD_DEVICES)
+                if (lmd->lmd_exclude_count >= devmax)
                         break;
         }
         if (rc >= 0) /* non-err */
@@ -1754,7 +1778,7 @@ static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
                         lmd->lmd_exclude_count = 0;
                 }
         }
-        OBD_FREE(exclude_list, sizeof(index) * MAX_OBD_DEVICES);
+        OBD_FREE(exclude_list, sizeof(index) * devmax); 
         RETURN(rc);
 }
 
index c1a170a..1429bdf 100644 (file)
@@ -571,6 +571,8 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa,
 
 #ifdef __KERNEL__
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#include <linux/iobuf.h>
+
 static int echo_client_ubrw(struct obd_device *obd, int rw,
                             struct obdo *oa, struct lov_stripe_md *lsm,
                             obd_off offset, obd_size count, char *buffer,
index 0f25c77..8305eb5 100644 (file)
@@ -1,6 +1,6 @@
 MODULES := obdfilter
 
-obdfilter-objs := filter.o filter_io.o filter_log.o filter_san.o
+obdfilter-objs := filter.o filter_io.o filter_log.o
 obdfilter-objs += lproc_obdfilter.o filter_lvb.o
 
 ifeq ($(PATCHLEVEL),4)
index a380f19..12e8070 100644 (file)
@@ -1968,7 +1968,7 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
         ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
                            "filter_ldlm_cb_client", &obd->obd_ldlm_client);
 
-        rc = llog_cat_initialize(obd, 1);
+        rc = llog_cat_initialize(obd, 1, NULL);
         if (rc) {
                 CERROR("failed to setup llogging subsystems\n");
                 GOTO(err_post, rc);
@@ -2077,7 +2077,8 @@ static struct llog_operations filter_size_orig_logops = {
 };
 
 static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt,
-                            int count, struct llog_catid *catid)
+                            int count, struct llog_catid *catid,
+                            struct obd_uuid *uuid)
 {
         struct llog_ctxt *ctxt;
         int rc;
@@ -2222,6 +2223,9 @@ static int filter_connect_internal(struct obd_export *exp,
 
         data->ocd_connect_flags &= OST_CONNECT_SUPPORTED;
         exp->exp_connect_flags = data->ocd_connect_flags;
+        if (exp->exp_imp_reverse)
+                exp->exp_imp_reverse->imp_connect_data.ocd_connect_flags 
+                        = data->ocd_connect_flags;
         data->ocd_version = LUSTRE_VERSION_CODE;
 
         if (exp->exp_connect_flags & OBD_CONNECT_GRANT) {
@@ -3712,36 +3716,6 @@ static struct obd_ops filter_obd_ops = {
         .o_process_config = filter_process_config,
 };
 
-static struct obd_ops filter_sanobd_ops = {
-        .o_owner          = THIS_MODULE,
-        .o_get_info       = filter_get_info,
-        .o_set_info_async = filter_set_info_async,
-        .o_setup          = filter_san_setup,
-        .o_precleanup     = filter_precleanup,
-        .o_cleanup        = filter_cleanup,
-        .o_connect        = filter_connect,
-        .o_reconnect      = filter_reconnect,
-        .o_disconnect     = filter_disconnect,
-        .o_ping           = filter_ping,
-        .o_init_export    = filter_init_export,
-        .o_destroy_export = filter_destroy_export,
-        .o_statfs         = filter_statfs,
-        .o_getattr        = filter_getattr,
-        .o_unpackmd       = filter_unpackmd,
-        .o_create         = filter_create,
-        .o_setattr        = filter_setattr,
-        .o_destroy        = filter_destroy,
-        .o_brw            = filter_brw,
-        .o_punch          = filter_truncate,
-        .o_sync           = filter_sync,
-        .o_preprw         = filter_preprw,
-        .o_commitrw       = filter_commitrw,
-        .o_san_preprw     = filter_san_preprw,
-        .o_llog_init      = filter_llog_init,
-        .o_llog_finish    = filter_llog_finish,
-        .o_iocontrol      = filter_iocontrol,
-};
-
 quota_interface_t *quota_interface;
 extern quota_interface_t filter_quota_interface;
 
@@ -3754,6 +3728,7 @@ static int __init obdfilter_init(void)
 
         lprocfs_init_vars(filter, &lvars);
 
+        request_module("lquota");
         OBD_ALLOC(obdfilter_created_scratchpad,
                   OBDFILTER_CREATED_SCRATCHPAD_ENTRIES *
                   sizeof(*obdfilter_created_scratchpad));
@@ -3768,20 +3743,12 @@ static int __init obdfilter_init(void)
 
         quota_interface = PORTAL_SYMBOL_GET(filter_quota_interface);
         init_obd_quota_ops(quota_interface, &filter_obd_ops);
-        init_obd_quota_ops(quota_interface, &filter_sanobd_ops);
 
         rc = class_register_type(&filter_obd_ops, NULL, lvars.module_vars,
                                  LUSTRE_OST_NAME, NULL);
-        if (rc)
-                GOTO(out_fmd, rc);
-
-        rc = class_register_type(&filter_sanobd_ops, NULL, lvars.module_vars,
-                                 LUSTRE_OSTSAN_NAME, NULL);
         if (rc) {
                 int err;
 
-                class_unregister_type(LUSTRE_OST_NAME);
-out_fmd:
                 err = kmem_cache_destroy(ll_fmd_cachep);
                 LASSERTF(err == 0, "Cannot destroy ll_fmd_cachep: rc %d\n",err);
                 ll_fmd_cachep = NULL;
@@ -3808,7 +3775,6 @@ static void __exit obdfilter_exit(void)
                 ll_fmd_cachep = NULL;
         }
 
-        class_unregister_type(LUSTRE_OSTSAN_NAME);
         class_unregister_type(LUSTRE_OST_NAME);
         OBD_FREE(obdfilter_created_scratchpad,
                  OBDFILTER_CREATED_SCRATCHPAD_ENTRIES *
index f8ff9d5..4a3516f 100644 (file)
@@ -169,11 +169,6 @@ void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
 int filter_recov_log_mds_ost_cb(struct llog_handle *llh,
                                struct llog_rec_hdr *rec, void *data);
 
-/* filter_san.c */
-int filter_san_setup(struct obd_device *obd, struct lustre_cfg *cfg);
-int filter_san_preprw(int cmd, struct obd_export *, struct obdo *, int objcount,
-                      struct obd_ioobj *, int niocount, struct niobuf_remote *);
-
 #ifdef LPROCFS
 void filter_tally_write(struct filter_obd *filter, struct page **pages,
                         int nr_pages, unsigned long *blocks,
index 6b4811a..b2be0d8 100644 (file)
@@ -664,6 +664,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
 
         iattr_from_obdo(&iattr, oa, i);
         if (iattr.ia_valid & (ATTR_UID | ATTR_GID)) {
+                unsigned int save;
+
                 CDEBUG(D_INODE, "update UID/GID to %lu/%lu\n",
                        (unsigned long)oa->o_uid, (unsigned long)oa->o_gid);
 
@@ -680,10 +682,12 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
 
                 /* To avoid problems with quotas, UID and GID must be set
                  * in the inode before filter_direct_io() - see bug 10357. */
-                if (iattr.ia_valid & ATTR_UID)
-                        inode->i_uid = iattr.ia_uid;
-                if (iattr.ia_valid & ATTR_GID)
-                        inode->i_gid = iattr.ia_gid;
+                save = iattr.ia_valid;
+                iattr.ia_valid &= (ATTR_UID | ATTR_GID);
+                rc = fsfilt_setattr(obd, res->dentry, oti->oti_handle, &iattr, 0);
+                CDEBUG(D_QUOTA, "set uid(%u)/gid(%u) to ino(%lu). rc(%d)\n", 
+                                iattr.ia_uid, iattr.ia_gid, inode->i_ino, rc);
+                iattr.ia_valid = save & ~(ATTR_UID | ATTR_GID);
         }
 
         /* filter_direct_io drops i_mutex */
@@ -735,7 +739,7 @@ cleanup:
         err = lquota_adjust(quota_interface, obd, qcids, NULL, rc,
                             FSFILT_OP_CREATE);
         CDEBUG(err ? D_ERROR : D_QUOTA,
-               "error filter adjust qunit! (rc:%d)\n", err);
+               "filter adjust qunit! (rc:%d)\n", err);
 
         RETURN(rc);
 }
diff --git a/lustre/obdfilter/filter_san.c b/lustre/obdfilter/filter_san.c
deleted file mode 100644 (file)
index c679b3e..0000000
+++ /dev/null
@@ -1,129 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  linux/fs/obdfilter/filter_san.c
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *   Author: Peter Braam <braam@clusterfs.com>
- *   Author: Andreas Dilger <adilger@clusterfs.com>
- *
- *   This file is part of the Lustre file system, http://www.lustre.org
- *   Lustre is a trademark of Cluster File Systems, Inc.
- *
- *   You may have signed or agreed to another license before downloading
- *   this software.  If so, you are bound by the terms and conditions
- *   of that agreement, and the following does not apply to you.  See the
- *   LICENSE file included with this distribution for more information.
- *
- *   If you did not agree to a different license, then this copy of Lustre
- *   is open source software; you can redistribute it and/or modify it
- *   under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
- *
- *   In either case, Lustre is distributed in the hope that it will be
- *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   license text for more details.
- */
-
-#define DEBUG_SUBSYSTEM S_FILTER
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/pagemap.h> // XXX kill me soon
-#include <linux/version.h>
-
-#include <obd_class.h>
-#include <lustre_fsfilt.h>
-#include "filter_internal.h"
-
-/* sanobd setup methods - use a specific mount option */
-int filter_san_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
-{
-        unsigned long page;
-        int rc;
-
-        if (lcfg->lcfg_bufcount < 3 || LUSTRE_CFG_BUFLEN(lcfg, 2) < 1)
-                RETURN(-EINVAL);
-
-        /* 2.6.9 selinux wants a full option page for do_kern_mount (bug6471) */
-        page = get_zeroed_page(GFP_KERNEL);
-        if (!page)
-                RETURN(-ENOMEM);
-
-        /* for ext3/ldiskfs filesystem, we must mount in 'writeback' mode */
-        if (!strcmp(lustre_cfg_string(lcfg, 2), "ldiskfs"))
-                strcpy((void *)page, "data=writeback");
-        else if (!strcmp(lustre_cfg_string(lcfg, 2), "ext3"))
-                strcpy((void *)page, "data=writeback,asyncdel");
-        else
-                LBUG(); /* just a reminder */
-
-        rc = filter_common_setup(obd, lcfg, (void *)page);
-        free_page(page);
-
-        return rc;
-}
-
-int filter_san_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
-                      int objcount, struct obd_ioobj *obj, int niocount,
-                      struct niobuf_remote *nb)
-{
-        struct obd_ioobj *o = obj;
-        struct niobuf_remote *rnb = nb;
-        int rc = 0;
-        int i;
-        ENTRY;
-        LASSERT(objcount == 1);
-
-        for (i = 0; i < objcount; i++, o++) {
-                struct dentry *dentry;
-                struct inode *inode;
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-                sector_t (*fs_bmap)(struct address_space *, sector_t);
-#else
-                int (*fs_bmap)(struct address_space *, long);
-#endif
-                int j;
-
-                dentry = filter_oa2dentry(exp->exp_obd, oa);
-                if (IS_ERR(dentry))
-                        GOTO(out, rc = PTR_ERR(dentry));
-
-                inode = dentry->d_inode;
-                fs_bmap = inode->i_mapping->a_ops->bmap;
-
-                for (j = 0; j < o->ioo_bufcnt; j++, rnb++) {
-                        long block;
-
-                        block = rnb->offset >> inode->i_blkbits;
-
-                        if (cmd == OBD_BRW_READ) {
-                                block = fs_bmap(inode->i_mapping, block);
-                        } else {
-                                loff_t newsize = rnb->offset + rnb->len;
-                                /* fs_prep_san_write will also update inode
-                                 * size for us:
-                                 * (1) new alloced block
-                                 * (2) existed block but size extented
-                                 */
-                                /* FIXME We could call fs_prep_san_write()
-                                 * only once for all the blocks allocation.
-                                 * Now call it once for each block, for
-                                 * simplicity. And if error happens, we
-                                 * probably need to release previous alloced
-                                 * block */
-                                rc = fs_prep_san_write(exp->exp_obd, inode,
-                                                       &block, 1, newsize);
-                                if (rc)
-                                        break;
-                        }
-
-                        rnb->offset = block;
-                }
-                f_dput(dentry);
-        }
-out:
-        RETURN(rc);
-}
-
index 568a725..ce9107f 100644 (file)
@@ -1,4 +1,4 @@
 MODULES := osc
-osc-objs := osc_request.o lproc_osc.o osc_lib.o osc_create.o
+osc-objs := osc_request.o lproc_osc.o osc_create.o
 
 @INCLUDE_RULES@
index c9f2fbb..2b00785 100644 (file)
@@ -5,7 +5,7 @@
 
 if LIBLUSTRE
 noinst_LIBRARIES = libosc.a
-libosc_a_SOURCES = osc_request.c osc_lib.c osc_create.c osc_internal.h
+libosc_a_SOURCES = osc_request.c osc_create.c osc_internal.h
 libosc_a_CPPFLAGS = $(LLCPPFLAGS)
 libosc_a_CFLAGS = $(LLCFLAGS)
 endif
diff --git a/lustre/osc/osc_lib.c b/lustre/osc/osc_lib.c
deleted file mode 100644 (file)
index 39bd2f8..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2003 Cluster File Systems, Inc.
- *
- *   This file is part of the Lustre file system, http://www.lustre.org
- *   Lustre is a trademark of Cluster File Systems, Inc.
- *
- *   You may have signed or agreed to another license before downloading
- *   this software.  If so, you are bound by the terms and conditions
- *   of that agreement, and the following does not apply to you.  See the
- *   LICENSE file included with this distribution for more information.
- *
- *   If you did not agree to a different license, then this copy of Lustre
- *   is open source software; you can redistribute it and/or modify it
- *   under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
- *
- *   In either case, Lustre is distributed in the hope that it will be
- *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   license text for more details.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#define DEBUG_SUBSYSTEM S_OSC
-
-#ifdef __KERNEL__
-# include <linux/module.h>
-# include <obd.h>
-# include <obd_ost.h>
-# include <lustre_net.h>
-# include <lustre_dlm.h>
-# include <lustre_lib.h>
-# include <linux/lustre_compat25.h>
-
-/* convert a pathname into a kdev_t */
-static kdev_t path2dev(char *path)
-{
-        struct dentry *dentry;
-        struct nameidata nd;
-        kdev_t dev = KDEVT_INIT(0);
-
-        if (ll_path_lookup(path, LOOKUP_FOLLOW, &nd))
-                return val_to_kdev(0);
-
-        dentry = nd.dentry;
-        if (dentry->d_inode && !is_bad_inode(dentry->d_inode) &&
-            S_ISBLK(dentry->d_inode->i_mode))
-                dev = dentry->d_inode->i_rdev;
-        path_release(&nd);
-
-        return dev;
-}
-
-int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg)
-{
-        struct client_obd *cli = &obddev->u.cli;
-        ENTRY;
-
-        if (lcfg->lcfg_bufcount < 4 || LUSTRE_CFG_BUFLEN(lcfg, 3) < 1) {
-                CERROR("setup requires a SAN device pathname\n");
-                RETURN(-EINVAL);
-        }
-
-        client_obd_setup(obddev, lcfg);
-
-        cli->cl_sandev = path2dev(lustre_cfg_string(lcfg, 3));
-        if (!kdev_t_to_nr(cli->cl_sandev)) {
-                CERROR("%s seems not a valid SAN device\n",
-                       lustre_cfg_string(lcfg, 3));
-                RETURN(-EINVAL);
-        }
-
-        RETURN(0);
-}
-#endif
index 33a9710..13d7e03 100644 (file)
@@ -2484,298 +2484,6 @@ out:
         RETURN(rc);
 }
 
-/* Note: caller will lock/unlock, and set uptodate on the pages */
-#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int sanosc_brw_read(struct obd_export *exp, struct obd_info *oinfo,
-                           obd_count page_count, struct brw_page *pga)
-{
-        struct ptlrpc_request *req = NULL;
-        struct ost_body *body;
-        struct niobuf_remote *nioptr;
-        struct obd_ioobj *iooptr;
-        struct obd_import *imp = class_exp2cliimp(exp);
-        int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body)};
-        int swab, mapped = 0, rc;
-        ENTRY;
-
-        /* XXX does not handle 'new' brw protocol */
-
-        size[REQ_REC_OFF + 1] = sizeof(struct obd_ioobj);
-        size[REQ_REC_OFF + 2] = page_count * sizeof(*nioptr);
-
-        req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
-                              OST_SAN_READ, 4, size, NULL);
-        if (!req)
-                RETURN(-ENOMEM);
-
-        /* FIXME bug 249 */
-        /* See bug 7198 */
-        if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_REQPORTAL)
-                req->rq_request_portal = OST_IO_PORTAL;
-
-        body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        iooptr = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1,
-                                sizeof(*iooptr));
-        nioptr = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2,
-                                sizeof(*nioptr) * page_count);
-
-        memcpy(&body->oa, oinfo->oi_oa, sizeof(body->oa));
-
-        obdo_to_ioobj(oinfo->oi_oa, iooptr);
-        iooptr->ioo_bufcnt = page_count;
-
-        for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
-                LASSERT(PageLocked(pga[mapped].pg));
-                LASSERT(mapped == 0 || pga[mapped].off > pga[mapped - 1].off);
-
-                nioptr->offset = pga[mapped].off;
-                nioptr->len    = pga[mapped].count;
-                nioptr->flags  = pga[mapped].flag;
-        }
-
-        size[REPLY_REC_OFF + 1] = page_count * sizeof(*nioptr);
-        ptlrpc_req_set_repsize(req, 3, size);
-
-        rc = ptlrpc_queue_wait(req);
-        if (rc)
-                GOTO(out_req, rc);
-
-        body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body),
-                                  lustre_swab_ost_body);
-        if (body == NULL) {
-                CERROR("Can't unpack body\n");
-                GOTO(out_req, rc = -EPROTO);
-        }
-
-        memcpy(oinfo->oi_oa, &body->oa, sizeof(*oinfo->oi_oa));
-
-        swab = lustre_msg_swabbed(req->rq_repmsg);
-        LASSERT_REPSWAB(req, REPLY_REC_OFF + 1);
-        nioptr = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1,
-                                size[REPLY_REC_OFF + 1]);
-        if (!nioptr) {
-                /* nioptr missing or short */
-                GOTO(out_req, rc = -EPROTO);
-        }
-
-        /* actual read */
-        for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
-                struct page *page = pga[mapped].pg;
-                struct buffer_head *bh;
-                kdev_t dev;
-
-                if (swab)
-                        lustre_swab_niobuf_remote (nioptr);
-
-                /* got san device associated */
-                LASSERT(exp->exp_obd != NULL);
-                dev = exp->exp_obd->u.cli.cl_sandev;
-
-                /* hole */
-                if (!nioptr->offset) {
-                        CDEBUG(D_PAGE, "hole at ino %lu; index %ld\n",
-                                        page->mapping->host->i_ino,
-                                        page->index);
-                        memset(page_address(page), 0, CFS_PAGE_SIZE);
-                        continue;
-                }
-
-                if (!page->buffers) {
-                        create_empty_buffers(page, dev, CFS_PAGE_SIZE);
-                        bh = page->buffers;
-
-                        clear_bit(BH_New, &bh->b_state);
-                        set_bit(BH_Mapped, &bh->b_state);
-                        bh->b_blocknr = (unsigned long)nioptr->offset;
-
-                        clear_bit(BH_Uptodate, &bh->b_state);
-
-                        ll_rw_block(READ, 1, &bh);
-                } else {
-                        bh = page->buffers;
-
-                        /* if buffer already existed, it must be the
-                         * one we mapped before, check it */
-                        LASSERT(!test_bit(BH_New, &bh->b_state));
-                        LASSERT(test_bit(BH_Mapped, &bh->b_state));
-                        LASSERT(bh->b_blocknr == (unsigned long)nioptr->offset);
-
-                        /* wait it's io completion */
-                        if (test_bit(BH_Lock, &bh->b_state))
-                                wait_on_buffer(bh);
-
-                        if (!test_bit(BH_Uptodate, &bh->b_state))
-                                ll_rw_block(READ, 1, &bh);
-                }
-
-
-                /* must do syncronous write here */
-                wait_on_buffer(bh);
-                if (!buffer_uptodate(bh)) {
-                        /* I/O error */
-                        rc = -EIO;
-                        goto out_req;
-                }
-        }
-
-out_req:
-        ptlrpc_req_finished(req);
-        RETURN(rc);
-}
-
-static int sanosc_brw_write(struct obd_export *exp, struct obd_info *oinfo,
-                            obd_count page_count, struct brw_page *pga)
-{
-        struct ptlrpc_request *req = NULL;
-        struct ost_body *body;
-        struct niobuf_remote *nioptr;
-        struct obd_ioobj *iooptr;
-        struct obd_import *imp = class_exp2cliimp(exp);
-        int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) };
-        int swab, mapped = 0, rc;
-        ENTRY;
-
-        size[REQ_REC_OFF + 1] = sizeof(struct obd_ioobj);
-        size[REQ_REC_OFF + 2] = page_count * sizeof(*nioptr);
-
-        req = ptlrpc_prep_req_pool(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
-                                   OST_SAN_WRITE, 4, size, NULL,
-                                   imp->imp_rq_pool, NULL);
-        if (!req)
-                RETURN(-ENOMEM);
-
-        /* FIXME bug 249 */
-        /* See bug 7198 */
-        if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_REQPORTAL)
-                req->rq_request_portal = OST_IO_PORTAL;
-
-        body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        iooptr = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1,
-                                sizeof(*iooptr));
-        nioptr = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2,
-                                sizeof(*nioptr) * page_count);
-
-        memcpy(&body->oa, oinfo->oi_oa, sizeof(body->oa));
-
-        obdo_to_ioobj(oinfo->oi_oa, iooptr);
-        iooptr->ioo_bufcnt = page_count;
-
-        /* pack request */
-        for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
-                LASSERT(PageLocked(pga[mapped].pg));
-                LASSERT(mapped == 0 || pga[mapped].off > pga[mapped - 1].off);
-
-                nioptr->offset = pga[mapped].off;
-                nioptr->len    = pga[mapped].count;
-                nioptr->flags  = pga[mapped].flag;
-        }
-
-        size[REPLY_REC_OFF + 1] = page_count * sizeof(*nioptr);
-        ptlrpc_req_set_repsize(req, 3, size);
-
-        rc = ptlrpc_queue_wait(req);
-        if (rc)
-                GOTO(out_req, rc);
-
-        swab = lustre_msg_swabbed (req->rq_repmsg);
-        LASSERT_REPSWAB(req, REPLY_REC_OFF + 1);
-        nioptr = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1,
-                                size[REPLY_REC_OFF + 1]);
-        if (!nioptr) {
-                CERROR("absent/short niobuf array\n");
-                GOTO(out_req, rc = -EPROTO);
-        }
-
-        /* actual write */
-        for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
-                struct page *page = pga[mapped].pg;
-                struct buffer_head *bh;
-                kdev_t dev;
-
-                if (swab)
-                        lustre_swab_niobuf_remote (nioptr);
-
-                /* got san device associated */
-                LASSERT(exp->exp_obd != NULL);
-                dev = exp->exp_obd->u.cli.cl_sandev;
-
-                if (!page->buffers) {
-                        create_empty_buffers(page, dev, CFS_PAGE_SIZE);
-                } else {
-                        /* checking */
-                        LASSERT(!test_bit(BH_New, &page->buffers->b_state));
-                        LASSERT(test_bit(BH_Mapped, &page->buffers->b_state));
-                        LASSERT(page->buffers->b_blocknr ==
-                                (unsigned long)nioptr->offset);
-                }
-                bh = page->buffers;
-
-                LASSERT(bh);
-
-                /* if buffer locked, wait it's io completion */
-                if (test_bit(BH_Lock, &bh->b_state))
-                        wait_on_buffer(bh);
-
-                clear_bit(BH_New, &bh->b_state);
-                set_bit(BH_Mapped, &bh->b_state);
-
-                /* override the block nr */
-                bh->b_blocknr = (unsigned long)nioptr->offset;
-
-                /* we are about to write it, so set it
-                 * uptodate/dirty
-                 * page lock should garentee no race condition here */
-                set_bit(BH_Uptodate, &bh->b_state);
-                set_bit(BH_Dirty, &bh->b_state);
-
-                ll_rw_block(WRITE, 1, &bh);
-
-                /* must do syncronous write here */
-                wait_on_buffer(bh);
-                if (!buffer_uptodate(bh) || test_bit(BH_Dirty, &bh->b_state)) {
-                        /* I/O error */
-                        rc = -EIO;
-                        goto out_req;
-                }
-        }
-
-out_req:
-        ptlrpc_req_finished(req);
-        RETURN(rc);
-}
-
-static int sanosc_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo,
-                      obd_count page_count, struct brw_page *pga,
-                      struct obd_trans_info *oti)
-{
-        struct obd_import *imp = class_exp2cliimp(exp);
-        struct client_obd *cli = &imp->imp_obd->u.cli;
-        ENTRY;
-
-        while (page_count) {
-                obd_count pages_per_brw;
-                int rc;
-
-                if (page_count > cli->cl_max_pages_per_rpc)
-                        pages_per_brw = cli->cl_max_pages_per_rpc;
-                else
-                        pages_per_brw = page_count;
-
-                if (cmd & OBD_BRW_WRITE)
-                        rc = sanosc_brw_write(exp, oinfo, pages_per_brw, pga);
-                else
-                        rc = sanosc_brw_read(exp, oinfo, pages_per_brw, pga);
-
-                if (rc != 0)
-                        RETURN(rc);
-
-                page_count -= pages_per_brw;
-                pga += pages_per_brw;
-        }
-        RETURN(0);
-}
-#endif
-
 static void osc_set_data_with_check(struct lustre_handle *lockh, void *data,
                                     int flags)
 {
@@ -2930,12 +2638,13 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo,
                          * lov_enqueue() */
                 }
 
+                /* We already have a lock, and it's referenced */
+                oinfo->oi_cb_up(oinfo, ELDLM_OK);
+                
                 /* For async requests, decref the lock. */
                 if (einfo->ei_rqset)
                         ldlm_lock_decref(oinfo->oi_lockh, einfo->ei_mode);
 
-                /* We already have a lock, and it's referenced */
-                oinfo->oi_cb_up(oinfo, ELDLM_OK);
                 RETURN(ELDLM_OK);
         }
 
@@ -2965,8 +2674,8 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo,
                         osc_set_data_with_check(oinfo->oi_lockh,
                                                 einfo->ei_cbdata,
                                                 einfo->ei_flags);
-                        ldlm_lock_decref(oinfo->oi_lockh, LCK_PW);
                         oinfo->oi_cb_up(oinfo, ELDLM_OK);
+                        ldlm_lock_decref(oinfo->oi_lockh, LCK_PW);
                         RETURN(ELDLM_OK);
                 }
         }
@@ -3509,16 +3218,21 @@ static struct llog_operations osc_size_repl_logops = {
 
 static struct llog_operations osc_mds_ost_orig_logops;
 static int osc_llog_init(struct obd_device *obd, struct obd_device *tgt,
-                        int count, struct llog_catid *catid)
+                         int count, struct llog_catid *catid, 
+                         struct obd_uuid *uuid)
 {
         int rc;
         ENTRY;
 
-        osc_mds_ost_orig_logops = llog_lvfs_ops;
-        osc_mds_ost_orig_logops.lop_setup = llog_obd_origin_setup;
-        osc_mds_ost_orig_logops.lop_cleanup = llog_obd_origin_cleanup;
-        osc_mds_ost_orig_logops.lop_add = llog_obd_origin_add;
-        osc_mds_ost_orig_logops.lop_connect = llog_origin_connect;
+        spin_lock(&obd->obd_dev_lock);
+        if (osc_mds_ost_orig_logops.lop_setup != llog_obd_origin_setup) {
+                osc_mds_ost_orig_logops = llog_lvfs_ops;
+                osc_mds_ost_orig_logops.lop_setup = llog_obd_origin_setup;
+                osc_mds_ost_orig_logops.lop_cleanup = llog_obd_origin_cleanup;
+                osc_mds_ost_orig_logops.lop_add = llog_obd_origin_add;
+                osc_mds_ost_orig_logops.lop_connect = llog_origin_connect;
+        }
+        spin_unlock(&obd->obd_dev_lock);
 
         rc = llog_setup(obd, LLOG_MDS_OST_ORIG_CTXT, tgt, count,
                         &catid->lci_logid, &osc_mds_ost_orig_logops);
@@ -3821,59 +3535,17 @@ struct obd_ops osc_obd_ops = {
         .o_process_config       = osc_process_config,
 };
 
-#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-struct obd_ops sanosc_obd_ops = {
-        .o_owner                = THIS_MODULE,
-        .o_setup                = client_sanobd_setup,
-        .o_precleanup           = osc_precleanup,
-        .o_cleanup              = osc_cleanup,
-        .o_add_conn             = client_import_add_conn,
-        .o_del_conn             = client_import_del_conn,
-        .o_connect              = client_connect_import,
-        .o_reconnect            = osc_reconnect,
-        .o_disconnect           = client_disconnect_export,
-        .o_statfs               = osc_statfs,
-        .o_statfs_async         = osc_statfs_async,
-        .o_packmd               = osc_packmd,
-        .o_unpackmd             = osc_unpackmd,
-        .o_create               = osc_real_create,
-        .o_destroy              = osc_destroy,
-        .o_getattr              = osc_getattr,
-        .o_getattr_async        = osc_getattr_async,
-        .o_setattr              = osc_setattr,
-        .o_setattr_async        = osc_setattr_async,
-        .o_brw                  = sanosc_brw,
-        .o_punch                = osc_punch,
-        .o_sync                 = osc_sync,
-        .o_enqueue              = osc_enqueue,
-        .o_match                = osc_match,
-        .o_change_cbdata        = osc_change_cbdata,
-        .o_cancel               = osc_cancel,
-        .o_cancel_unused        = osc_cancel_unused,
-        .o_join_lru             = osc_join_lru,
-        .o_iocontrol            = osc_iocontrol,
-        .o_import_event         = osc_import_event,
-        .o_llog_init            = osc_llog_init,
-        .o_llog_finish          = osc_llog_finish,
-};
-#endif
-
 extern quota_interface_t osc_quota_interface;
 
 int __init osc_init(void)
 {
         struct lprocfs_static_vars lvars;
-#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        struct lprocfs_static_vars sanlvars;
-#endif
         int rc;
         ENTRY;
 
         lprocfs_init_vars(osc, &lvars);
-#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        lprocfs_init_vars(osc, &sanlvars);
-#endif
 
+        request_module("lquota");
         quota_interface = PORTAL_SYMBOL_GET(osc_quota_interface);
         lquota_init(quota_interface);
         init_obd_quota_ops(quota_interface, &osc_obd_ops);
@@ -3886,17 +3558,6 @@ int __init osc_init(void)
                 RETURN(rc);
         }
 
-#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        rc = class_register_type(&sanosc_obd_ops, NULL, sanlvars.module_vars,
-                                 LUSTRE_SANOSC_NAME, NULL);
-        if (rc) {
-                class_unregister_type(LUSTRE_OSC_NAME);
-                if (quota_interface)
-                        PORTAL_SYMBOL_PUT(osc_quota_interface);
-                RETURN(rc);
-        }
-#endif
-
         RETURN(rc);
 }
 
@@ -3907,9 +3568,6 @@ static void /*__exit*/ osc_exit(void)
         if (quota_interface)
                 PORTAL_SYMBOL_PUT(osc_quota_interface);
 
-#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        class_unregister_type(LUSTRE_SANOSC_NAME);
-#endif
         class_unregister_type(LUSTRE_OSC_NAME);
 }
 
index c2183cf..d883f47 100644 (file)
@@ -1074,95 +1074,6 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         RETURN(rc);
 }
 
-static int ost_san_brw(struct ptlrpc_request *req, int cmd)
-{
-        struct niobuf_remote *remote_nb, *res_nb, *pp_rnb = NULL;
-        struct obd_ioobj *ioo;
-        struct ost_body *body, *repbody;
-        int rc, i, objcount, niocount, npages, swab;
-        int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
-        ENTRY;
-
-        /* XXX not set to use latest protocol */
-
-        swab = lustre_msg_swabbed(req->rq_reqmsg);
-        body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body),
-                                  lustre_swab_ost_body);
-        if (body == NULL) {
-                CERROR("Missing/short ost_body\n");
-                GOTO(out, rc = -EFAULT);
-        }
-
-        ioo = lustre_swab_reqbuf(req, REQ_REC_OFF + 1, sizeof(*ioo),
-                                 lustre_swab_obd_ioobj);
-        if (ioo == NULL) {
-                CERROR("Missing/short ioobj\n");
-                GOTO(out, rc = -EFAULT);
-        }
-        objcount = lustre_msg_buflen(req->rq_reqmsg, REQ_REC_OFF + 1) /
-                   sizeof(*ioo);
-        niocount = ioo[0].ioo_bufcnt;
-        for (i = 1; i < objcount; i++) {
-                if (swab)
-                        lustre_swab_obd_ioobj (&ioo[i]);
-                niocount += ioo[i].ioo_bufcnt;
-        }
-
-        remote_nb = lustre_swab_reqbuf(req, REQ_REC_OFF + 2,
-                                       niocount * sizeof(*remote_nb),
-                                       lustre_swab_niobuf_remote);
-        if (remote_nb == NULL) {
-                CERROR("Missing/short niobuf\n");
-                GOTO(out, rc = -EFAULT);
-        }
-        if (swab) {                             /* swab the remaining niobufs */
-                for (i = 1; i < niocount; i++)
-                        lustre_swab_niobuf_remote (&remote_nb[i]);
-        }
-
-        /*
-         * Per-thread array of struct niobuf_remote's was allocated by
-         * ost_thread_init().
-         */
-        pp_rnb = ost_tls(req)->remote;
-
-        /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */
-        npages = get_per_page_niobufs(ioo, objcount,remote_nb,niocount,&pp_rnb);
-        if (npages < 0)
-                GOTO (out, rc = npages);
-
-        size[REPLY_REC_OFF + 1] = npages * sizeof(*pp_rnb);
-        rc = lustre_pack_reply(req, 3, size, NULL);
-        if (rc)
-                GOTO(out, rc);
-
-        req->rq_status = obd_san_preprw(cmd, req->rq_export, &body->oa,
-                                        objcount, ioo, npages, pp_rnb);
-
-        if (req->rq_status)
-                GOTO(out, rc = 0);
-
-        repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
-                                 sizeof(*repbody));
-        memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
-
-        res_nb = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1,
-                                size[REPLY_REC_OFF + 1]);
-        memcpy(res_nb, remote_nb, size[REPLY_REC_OFF + 1]);
-        rc = 0;
-out:
-        target_committed_to_req(req);
-        if (rc) {
-                req->rq_status = rc;
-                ptlrpc_error(req);
-        } else {
-                ptlrpc_reply(req);
-        }
-
-        return rc;
-}
-
-
 static int ost_set_info(struct obd_export *exp, struct ptlrpc_request *req)
 {
         char *key, *val = NULL;
@@ -1323,8 +1234,6 @@ int ost_msg_check_version(struct lustre_msg *msg)
         case OST_SETATTR:
         case OST_WRITE:
         case OST_READ:
-        case OST_SAN_READ:
-        case OST_SAN_WRITE:
         case OST_PUNCH:
         case OST_STATFS:
         case OST_SYNC:
@@ -1477,18 +1386,6 @@ static int ost_handle(struct ptlrpc_request *req)
                 LASSERT(current->journal_info == NULL);
                 /* ost_brw_read sends its own replies */
                 RETURN(rc);
-        case OST_SAN_READ:
-                CDEBUG(D_INODE, "san read\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
-                rc = ost_san_brw(req, OBD_BRW_READ);
-                /* ost_san_brw sends its own replies */
-                RETURN(rc);
-        case OST_SAN_WRITE:
-                CDEBUG(D_INODE, "san write\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
-                rc = ost_san_brw(req, OBD_BRW_WRITE);
-                /* ost_san_brw sends its own replies */
-                RETURN(rc);
         case OST_PUNCH:
                 CDEBUG(D_INODE, "punch\n");
                 OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
index 9b79a1e..bf5d9ba 100644 (file)
@@ -1035,6 +1035,9 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req)
         DEBUG_REQ(D_ERROR, req, "timeout (sent at %lu, %lus ago)",
                   (long)req->rq_sent, CURRENT_SECONDS - req->rq_sent);
 
+        if (imp != NULL)
+                LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer);
+
         spin_lock(&req->rq_lock);
         req->rq_timedout = 1;
         req->rq_wait_ctx = 0;
index 8863dc2..e93e40b 100644 (file)
@@ -206,7 +206,7 @@ void request_in_callback(lnet_event_t *ev)
         req->rq_self = ev->target.nid;
         req->rq_rqbd = rqbd;
         req->rq_phase = RQ_PHASE_NEW;
-#if CRAY_XT3
+#ifdef CRAY_XT3
         req->rq_uid = ev->uid;
 #endif
 
index 600b514..740450c 100644 (file)
@@ -310,10 +310,12 @@ static int import_select_connection(struct obd_import *imp)
         class_export_put(dlmexp);
 
         if (imp->imp_conn_current != imp_conn) {
-                LCONSOLE_INFO("Changing connection for %s to %s/%s\n",
-                              imp->imp_obd->obd_name, imp_conn->oic_uuid.uuid,
-                              libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
-        imp->imp_conn_current = imp_conn;
+                if (imp->imp_conn_current)
+                        LCONSOLE_INFO("Changing connection for %s to %s/%s\n",
+                                      imp->imp_obd->obd_name,
+                                      imp_conn->oic_uuid.uuid,
+                                      libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+                imp->imp_conn_current = imp_conn;
         }
 
         CDEBUG(D_HA, "%s: import %p using connection %s/%s\n",
@@ -824,6 +826,11 @@ static int ptlrpc_invalidate_import_thread(void *data)
 
         ptlrpc_invalidate_import(imp);
 
+        if (obd_dump_on_eviction) {
+                CERROR("dump the log upon eviction\n");
+                libcfs_debug_dumplog();
+        }
+
         IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
         ptlrpc_import_recovery_state_machine(imp);
 
index 0753225..2911944 100644 (file)
@@ -51,8 +51,8 @@ struct ll_rpc_opcode {
         { OST_OPEN,         "ost_open" },
         { OST_CLOSE,        "ost_close" },
         { OST_STATFS,       "ost_statfs" },
-        { OST_SAN_READ,     "ost_san_read" },
-        { OST_SAN_WRITE,    "ost_san_write" },
+        { 14,                NULL },
+        { 15,                NULL },
         { OST_SYNC,         "ost_sync" },
         { OST_SET_INFO,     "ost_set_info" },
         { OST_QUOTACHECK,   "ost_quotacheck" },
@@ -96,7 +96,7 @@ const char* ll_opcode2str(__u32 opcode)
          *        is missing from the table above.
          * or  2) The opcode space was renumbered or rearranged,
          *        and the opcode_offset() function in
-         *        ptlrpc_internals.h needs to be modified.
+         *        ptlrpc_internal.h needs to be modified.
          */
         __u32 offset = opcode_offset(opcode);
         LASSERT(offset < LUSTRE_MAX_OPCODES);
index 5724089..f88fa3f 100644 (file)
@@ -828,7 +828,7 @@ static inline void lustre_msg_set_buflen_v1(void *msg, int n, int len)
         m->lm_buflens[n] = len;
 }
 
-static inline int
+static inline void
 lustre_msg_set_buflen_v2(struct lustre_msg_v2 *m, int n, int len)
 {
         if (n >= m->lm_bufcount)
@@ -2109,8 +2109,97 @@ int llog_log_swabbed(struct llog_log_hdr *hdr)
 void lustre_swab_qdata(struct qunit_data *d)
 {
         __swab32s (&d->qd_id);
+        __swab32s (&d->qd_flags);
+        __swab64s (&d->qd_count);
+}
+
+void lustre_swab_qdata_old(struct qunit_data_old *d)
+{
+        __swab32s (&d->qd_id);
         __swab32s (&d->qd_type);
         __swab32s (&d->qd_count);
         __swab32s (&d->qd_isblk);
 }
 
+#ifdef __KERNEL__
+struct qunit_data *lustre_quota_old_to_new(struct qunit_data_old *d)
+{
+        struct qunit_data_old tmp;
+        struct qunit_data *ret;
+        ENTRY;
+
+        if (!d)
+                return NULL;
+
+        tmp = *d;
+        ret = (struct qunit_data *)d;
+        ret->qd_id = tmp.qd_id;
+        ret->qd_flags = (tmp.qd_type ? QUOTA_IS_GRP : 0) | (tmp.qd_isblk ? QUOTA_IS_BLOCK : 0);
+        ret->qd_count = tmp.qd_count;
+        RETURN(ret);
+
+}
+EXPORT_SYMBOL(lustre_quota_old_to_new);
+
+struct qunit_data_old *lustre_quota_new_to_old(struct qunit_data *d)
+{
+        struct qunit_data tmp;
+        struct qunit_data_old *ret;
+        ENTRY;
+
+        if (!d)
+                return NULL;
+
+        LASSERT(d->qd_count <= MAX_QUOTA_COUNT32);
+        tmp = *d;
+        ret = (struct qunit_data_old *)d;
+        ret->qd_id = tmp.qd_id;
+        ret->qd_type = ((tmp.qd_flags & QUOTA_IS_GRP) ? GRPQUOTA : USRQUOTA);
+        ret->qd_count = (__u32)tmp.qd_count;
+        ret->qd_isblk = ((tmp.qd_flags & QUOTA_IS_BLOCK) ? 1 : 0);
+        RETURN(ret);
+}
+EXPORT_SYMBOL(lustre_quota_new_to_old);
+#endif /* __KERNEL__ */
+
+
+void cdebug_va(cfs_debug_limit_state_t *cdls, __u32 mask,
+               const char *file, const char *func, const int line,
+               const char *fmt, va_list args);
+void cdebug(cfs_debug_limit_state_t *cdls, __u32 mask,
+            const char *file, const char *func, const int line,
+            const char *fmt, ...);
+
+void debug_req(cfs_debug_limit_state_t *cdls,
+               __u32 level, struct ptlrpc_request *req,
+               const char *file, const char *func, const int line,
+               const char *fmt, ...)
+{
+        va_list args;
+        
+        va_start(args, fmt);
+        cdebug_va(cdls, level, file, func, line, fmt, args);
+        va_end(args);
+
+        cdebug(cdls, level, file, func, line,
+               " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl "
+               REQ_FLAGS_FMT"/%x/%x rc %d/%d\n",
+               req, req->rq_xid, req->rq_transno,
+               req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : -1,
+               req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) :
+                  req->rq_export ?
+                  (char*)req->rq_export->exp_client_uuid.uuid : "<?>",
+               req->rq_import ?
+                  (char *)req->rq_import->imp_connection->c_remote_uuid.uuid :
+                  req->rq_export ?
+                  (char *)req->rq_export->exp_connection->c_remote_uuid.uuid : "<?>",
+               (req->rq_import && req->rq_import->imp_client) ?
+                  req->rq_import->imp_client->cli_request_portal : -1,
+               req->rq_reqlen, req->rq_replen, atomic_read(&req->rq_refcount),
+               DEBUG_REQ_FLAGS(req),
+               req->rq_reqmsg ? lustre_msg_get_flags(req->rq_reqmsg) : 0,
+               req->rq_repmsg ? lustre_msg_get_flags(req->rq_repmsg) : 0,
+               req->rq_status,
+               req->rq_repmsg ? lustre_msg_get_status(req->rq_repmsg) : 0);
+}
+EXPORT_SYMBOL(debug_req);
index be470c9..b1c852b 100644 (file)
@@ -243,6 +243,7 @@ EXPORT_SYMBOL(lustre_swab_ldlm_lock_desc);
 EXPORT_SYMBOL(lustre_swab_ldlm_request);
 EXPORT_SYMBOL(lustre_swab_ldlm_reply);
 EXPORT_SYMBOL(lustre_swab_qdata);
+EXPORT_SYMBOL(lustre_swab_qdata_old);
 EXPORT_SYMBOL(lustre_msg_get_flags);
 EXPORT_SYMBOL(lustre_msg_add_flags);
 EXPORT_SYMBOL(lustre_msg_set_flags);
@@ -274,8 +275,6 @@ EXPORT_SYMBOL(lustre_swab_md_fld);
 EXPORT_SYMBOL(lustre_swab_generic_32s);
 
 /* recover.c */
-EXPORT_SYMBOL(ptlrpc_run_recovery_over_upcall);
-EXPORT_SYMBOL(ptlrpc_run_failed_import_upcall);
 EXPORT_SYMBOL(ptlrpc_disconnect_import);
 EXPORT_SYMBOL(ptlrpc_resend);
 EXPORT_SYMBOL(ptlrpc_wake_delayed);
index 1ca9a20..1849656 100644 (file)
 
 static int ptlrpc_recover_import_no_retry(struct obd_import *, char *);
 
-void ptlrpc_run_recovery_over_upcall(struct obd_device *obd)
-{
-        char *argv[4];
-        char *envp[3];
-        int rc;
-        ENTRY;
-
-        argv[0] = obd_lustre_upcall;
-        argv[1] = "RECOVERY_OVER";
-        argv[2] = obd->obd_uuid.uuid;
-        argv[3] = NULL;
-        
-        envp[0] = "HOME=/";
-        envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-        envp[2] = NULL;
-
-        rc = USERMODEHELPER(argv[0], argv, envp);
-        if (rc < 0) {
-                CERROR("Error invoking recovery upcall %s %s %s: %d; check "
-                       "/proc/sys/lustre/upcall\n",
-                       argv[0], argv[1], argv[2], rc);
-
-        } else {
-                CWARN("Invoked upcall %s %s %s\n",
-                      argv[0], argv[1], argv[2]);
-        }
-}
-
-void ptlrpc_run_failed_import_upcall(struct obd_import* imp)
-{
-#ifdef __KERNEL__
-        char *argv[7];
-        char *envp[3];
-        int rc;
-        ENTRY;
-
-        spin_lock(&imp->imp_lock);
-        if (imp->imp_state == LUSTRE_IMP_CLOSED) {
-                spin_unlock(&imp->imp_lock);
-                EXIT;
-                return;
-        }
-        spin_unlock(&imp->imp_lock);
-
-        argv[0] = obd_lustre_upcall;
-        argv[1] = "FAILED_IMPORT";
-        argv[2] = obd2cli_tgt(imp->imp_obd);
-        argv[3] = imp->imp_obd->obd_name;
-        argv[4] = imp->imp_connection->c_remote_uuid.uuid;
-        argv[5] = imp->imp_obd->obd_uuid.uuid;
-        argv[6] = NULL;
-
-        envp[0] = "HOME=/";
-        envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-        envp[2] = NULL;
-
-        rc = USERMODEHELPER(argv[0], argv, envp);
-        if (rc < 0) {
-                CERROR("Error invoking recovery upcall %s %s %s %s %s %s: %d; "
-                       "check /proc/sys/lustre/lustre_upcall\n",
-                       argv[0], argv[1], argv[2], argv[3], argv[4], argv[5],rc);
-        } else {
-                CWARN("Invoked upcall %s %s %s %s %s %s\n",
-                      argv[0], argv[1], argv[2], argv[3], argv[4], argv[5]);
-        }
-#else
-        if (imp->imp_state == LUSTRE_IMP_CLOSED) {
-                EXIT;
-                return;
-        }
-        ptlrpc_recover_import(imp, NULL);
-#endif
-}
-
-/* This might block waiting for the upcall to start, so it should
- * not be called from a thread that shouldn't block. (Like ptlrpcd) */
 void ptlrpc_initiate_recovery(struct obd_import *imp)
 {
         ENTRY;
 
-        LASSERT (obd_lustre_upcall != NULL);
-
-        if (strcmp(obd_lustre_upcall, "DEFAULT") == 0) {
-                CDEBUG(D_HA, "%s: starting recovery without upcall\n",
-                        obd2cli_tgt(imp->imp_obd));
-                ptlrpc_connect_import(imp, NULL);
-        } else if (strcmp(obd_lustre_upcall, "NONE") == 0) {
-                CDEBUG(D_HA, "%s: recovery disabled\n",
-                        obd2cli_tgt(imp->imp_obd));
-        } else {
-                CDEBUG(D_HA, "%s: calling upcall to start recovery\n",
-                        obd2cli_tgt(imp->imp_obd));
-                ptlrpc_run_failed_import_upcall(imp);
-        }
+        CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
+        ptlrpc_connect_import(imp, NULL);
 
         EXIT;
 }
index b9dddba..474a7b3 100644 (file)
@@ -933,11 +933,9 @@ void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
         /* special security flags accoding to opcode */
         switch (opcode) {
         case OST_READ:
-        case OST_SAN_READ:
                 req->rq_bulk_read = 1;
                 break;
         case OST_WRITE:
-        case OST_SAN_WRITE:
                 req->rq_bulk_write = 1;
                 break;
         case SEC_CTX_INIT:
@@ -1473,9 +1471,9 @@ int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
         if (rc == SECSVC_OK) {
                 __u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
 
-                if (opc == OST_WRITE || opc == OST_SAN_WRITE)
+                if (opc == OST_WRITE)
                         req->rq_bulk_write = 1;
-                else if (opc == OST_READ || opc == OST_SAN_READ)
+                else if (opc == OST_READ)
                         req->rq_bulk_read = 1;
         }
 
index 47506c1..bd69097 100644 (file)
@@ -49,6 +49,38 @@ struct lustre_qunit {
         struct list_head lq_waiters;            /* All write threads waiting for this qunit */
 };
 
+int should_translate_quota (struct obd_import *imp)
+{
+        struct obd_device *obd;
+        struct obd_export *tmp;
+        ENTRY;
+
+        LASSERT(imp);
+        if (imp->imp_connect_data.ocd_connect_flags){
+                if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64)
+                        return 0;
+                else
+                        return 1;
+        }
+
+        obd = imp->imp_obd;
+        spin_lock(&obd->obd_dev_lock);
+        list_for_each_entry(tmp,&obd->obd_exports,exp_obd_chain){
+                if (tmp->exp_imp_reverse == imp){
+                        imp->imp_connect_data.ocd_connect_flags = tmp->exp_connect_flags;
+                        spin_unlock(&obd->obd_dev_lock);
+                        if (tmp->exp_connect_flags & OBD_CONNECT_QUOTA64)
+                                return 0;
+                        else
+                                return 1;
+                }
+        }
+        spin_unlock(&obd->obd_dev_lock);
+        CDEBUG(D_QUOTA, "don't find the corresponding export!");
+
+        RETURN(0);
+}
+
 void qunit_cache_cleanup(void)
 {
         int i;
@@ -87,11 +119,15 @@ int qunit_cache_init(void)
         RETURN(0);
 }
 
-static inline int const
+static inline int
+qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
+             __attribute__((__const__));
+
+static inline int
 qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
 {
         unsigned int id = qdata->qd_id;
-        unsigned int type = qdata->qd_type;
+        unsigned int type = qdata->qd_flags & QUOTA_IS_GRP;
 
         unsigned long tmp = ((unsigned long)qctxt >> L1_CACHE_SHIFT) ^ id;
         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
@@ -110,8 +146,7 @@ static inline struct lustre_qunit *find_qunit(unsigned int hashent,
         list_for_each_entry(qunit, qunit_hash + hashent, lq_hash) {
                 tmp = &qunit->lq_data;
                 if (qunit->lq_ctxt == qctxt &&
-                    qdata->qd_id == tmp->qd_id && qdata->qd_type == tmp->qd_type
-                    && qdata->qd_isblk == tmp->qd_isblk)
+                    qdata->qd_id == tmp->qd_id && qdata->qd_flags == tmp->qd_flags)
                         return qunit;
         }
         return NULL;
@@ -135,13 +170,15 @@ check_cur_qunit(struct obd_device *obd,
         __u64 usage, limit;
         struct obd_quotactl *qctl;
         int ret = 0;
+        __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
+        __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
         ENTRY;
 
         if (!sb_any_quota_enabled(sb))
                 RETURN(0);
 
         /* ignore root user */
-        if (qdata->qd_id == 0 && qdata->qd_type == USRQUOTA)
+        if (qdata->qd_id == 0 && qdata_type == USRQUOTA)
                 RETURN(0);
 
         OBD_ALLOC_PTR(qctl);
@@ -151,7 +188,7 @@ check_cur_qunit(struct obd_device *obd,
         /* get fs quota usage & limit */
         qctl->qc_cmd = Q_GETQUOTA;
         qctl->qc_id = qdata->qd_id;
-        qctl->qc_type = qdata->qd_type;
+        qctl->qc_type = qdata_type;
         ret = fsfilt_quotactl(obd, sb, qctl);
         if (ret) {
                 if (ret == -ESRCH)      /* no limit */
@@ -161,7 +198,7 @@ check_cur_qunit(struct obd_device *obd,
                 GOTO(out, ret);
         }
 
-        if (qdata->qd_isblk) {
+        if (is_blk) {
                 usage = qctl->qc_dqblk.dqb_curspace;
                 limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS;
                 qunit_sz = qctxt->lqc_bunit_sz;
@@ -180,8 +217,8 @@ check_cur_qunit(struct obd_device *obd,
                 GOTO(out, ret = 0);
 
         /* we don't count the MIN_QLIMIT */
-        if ((limit == MIN_QLIMIT && !qdata->qd_isblk) ||
-            (toqb(limit) == MIN_QLIMIT && qdata->qd_isblk))
+        if ((limit == MIN_QLIMIT && !is_blk) ||
+            (toqb(limit) == MIN_QLIMIT && is_blk))
                 limit = 0;
 
         LASSERT(qdata->qd_count == 0);
@@ -273,11 +310,6 @@ struct qunit_waiter {
         int qw_rc;
 };
 
-#define QDATA_DEBUG(qd, fmt, arg...)                                    \
-        CDEBUG(D_QUOTA, "id(%u) type(%u) count(%u) isblk(%u):"          \
-               fmt, qd->qd_id, qd->qd_type, qd->qd_count, qd->qd_isblk, \
-               ## arg);                                                 \
-
 #define INC_QLIMIT(limit, count) (limit == MIN_QLIMIT) ? \
                                  (limit = count) : (limit += count)
 
@@ -294,6 +326,35 @@ static int
 schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
                struct qunit_data *qdata, int opc, int wait);
 
+static int split_before_schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
+                                       struct qunit_data *qdata, int opc, int wait)
+{
+        int rc = 0, ret;
+        struct qunit_data tmp_qdata;
+        ENTRY;
+
+        LASSERT(qdata);
+        if (qctxt->lqc_import)
+                while (should_translate_quota(qctxt->lqc_import) &&
+                       qdata->qd_count > MAX_QUOTA_COUNT32) {
+
+                        tmp_qdata = *qdata;
+                        tmp_qdata.qd_count = MAX_QUOTA_COUNT32;
+                        qdata->qd_count -= tmp_qdata.qd_count;
+                        ret = schedule_dqacq(obd, qctxt, &tmp_qdata, opc, wait);
+                        if (!rc)
+                                rc = ret;
+                }
+
+        if (qdata->qd_count){
+                ret = schedule_dqacq(obd, qctxt, qdata, opc, wait);
+                if (!rc)
+                        rc = ret;
+        }
+
+        RETURN(rc);
+}
+
 static int
 dqacq_completion(struct obd_device *obd,
                  struct lustre_quota_ctxt *qctxt,
@@ -304,15 +365,18 @@ dqacq_completion(struct obd_device *obd,
         unsigned long qunit_sz;
         struct qunit_waiter *qw, *tmp;
         int err = 0;
+        __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
+        __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
         ENTRY;
 
         LASSERT(qdata);
-        qunit_sz = qdata->qd_isblk ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz;
-        LASSERT(!(qdata->qd_count % qunit_sz));
+        qunit_sz = is_blk ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz;
+        /* now qdata->qd_count is 64bit, we can't do it */
+        //LASSERT(!(qdata->qd_count % qunit_sz));
 
         /* update local operational quota file */
         if (rc == 0) {
-                __u32 count = QUSG(qdata->qd_count, qdata->qd_isblk);
+                __u32 count = QUSG(qdata->qd_count, is_blk);
                 struct obd_quotactl *qctl;
                 __u64 *hardlimit;
 
@@ -325,14 +389,14 @@ dqacq_completion(struct obd_device *obd,
                  * set fs quota limit */
                 qctl->qc_cmd = Q_GETQUOTA;
                 qctl->qc_id = qdata->qd_id;
-                qctl->qc_type = qdata->qd_type;
+                qctl->qc_type = qdata_type;
                 err = fsfilt_quotactl(obd, sb, qctl);
                 if (err) {
                         CERROR("error get quota fs limit! (rc:%d)\n", err);
                         GOTO(out_mem, err);
                 }
 
-                if (qdata->qd_isblk) {
+                if (is_blk) {
                         qctl->qc_dqblk.dqb_valid = QIF_BLIMITS;
                         hardlimit = &qctl->qc_dqblk.dqb_bhardlimit;
                 } else {
@@ -402,8 +466,8 @@ out:
          *   - local dqacq/dqrel.
          *   - local disk io failure.
          */
-        if (err || (rc && rc != -EBUSY) ||
-            is_master(obd, qctxt, qdata->qd_id, qdata->qd_type))
+        if (err || (rc && rc != -EBUSY) || 
+            is_master(obd, qctxt, qdata->qd_id, qdata_type))
                 RETURN(err);
 
         /* reschedule another dqacq/dqrel if needed */
@@ -412,7 +476,7 @@ out:
         if (rc > 0) {
                 int opc;
                 opc = rc == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
-                rc = schedule_dqacq(obd, qctxt, qdata, opc, 0);
+                rc = split_before_schedule_dqacq(obd, qctxt, qdata, opc, 0);
                 QDATA_DEBUG(qdata, "reschedudle opc(%d) rc(%d)\n", opc, rc);
         }
         RETURN(err);
@@ -430,17 +494,28 @@ static int dqacq_interpret(struct ptlrpc_request *req, void *data, int rc)
         struct lustre_qunit *qunit = aa->aa_qunit;
         struct obd_device *obd = req->rq_import->imp_obd;
         struct qunit_data *qdata = NULL;
+        struct qunit_data_old *qdata_old = NULL;
         ENTRY;
 
-        qdata = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*qdata),
-                                   lustre_swab_qdata);
+        LASSERT(req);
+        LASSERT(req->rq_import);
+        if ((req->rq_import->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64)  &&
+            !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) {
+                CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
+                qdata = lustre_swab_reqbuf(req, REPLY_REC_OFF, sizeof(*qdata), lustre_swab_qdata);
+        } else {
+                CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
+                qdata_old = lustre_swab_reqbuf(req, REPLY_REC_OFF, sizeof(struct qunit_data_old),
+                                               lustre_swab_qdata_old);
+                qdata = lustre_quota_old_to_new(qdata_old);
+        }
         if (qdata == NULL) {
                 DEBUG_REQ(D_ERROR, req, "error unpacking qunit_data\n");
                 RETURN(-EPROTO);
         }
 
         LASSERT(qdata->qd_id == qunit->lq_data.qd_id &&
-                qdata->qd_type == qunit->lq_data.qd_type &&
+                (qdata->qd_flags & QUOTA_IS_GRP) == (qunit->lq_data.qd_flags & QUOTA_IS_GRP) &&
                 (qdata->qd_count == qunit->lq_data.qd_count ||
                  qdata->qd_count == 0));
 
@@ -506,7 +581,7 @@ schedule_dqacq(struct obd_device *obd,
         LASSERT(qunit);
 
         /* master is going to dqacq/dqrel from itself */
-        if (is_master(obd, qctxt, qdata->qd_id, qdata->qd_type)) {
+        if (is_master(obd, qctxt, qdata->qd_id, qdata->qd_flags & QUOTA_IS_GRP)) {
                 int rc2;
                 QDATA_DEBUG(qdata, "local %s.\n",
                             opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
@@ -524,9 +599,26 @@ schedule_dqacq(struct obd_device *obd,
                 RETURN(-ENOMEM);
         }
 
-        reqdata = lustre_msg_buf(req->rq_reqmsg, REPLY_REC_OFF,
-                                 sizeof(*reqdata));
-        *reqdata = *qdata;
+        LASSERT(!should_translate_quota(qctxt->lqc_import) || 
+                qdata->qd_count <= MAX_QUOTA_COUNT32);
+        if (should_translate_quota(qctxt->lqc_import) ||
+            OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT))
+        {
+                struct qunit_data_old *reqdata_old, *tmp;
+                        
+                reqdata_old = lustre_msg_buf(req->rq_reqmsg, REPLY_REC_OFF, 
+                                             sizeof(*reqdata_old));
+                tmp = lustre_quota_new_to_old(qdata);
+                *reqdata_old = *tmp;
+                size[1] = sizeof(*reqdata_old);
+                CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
+        } else {
+                reqdata = lustre_msg_buf(req->rq_reqmsg, REPLY_REC_OFF,
+                                         sizeof(*reqdata));
+                *reqdata = *qdata;
+                size[1] = sizeof(*reqdata);
+                CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
+        }
         ptlrpc_req_set_repsize(req, 2, size);
 
         CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
@@ -568,8 +660,9 @@ qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
 
         for (i = 0; i < MAXQUOTAS; i++) {
                 qdata[i].qd_id = id[i];
-                qdata[i].qd_type = i;
-                qdata[i].qd_isblk = isblk;
+                qdata[i].qd_flags = 0;
+                qdata[i].qd_flags |= i;
+                qdata[i].qd_flags |= isblk ? QUOTA_IS_BLOCK : 0;        
                 qdata[i].qd_count = 0;
 
                 ret = check_cur_qunit(obd, qctxt, &qdata[i]);
@@ -577,7 +670,8 @@ qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
                         int opc;
                         /* need acquire or release */
                         opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
-                        ret = schedule_dqacq(obd, qctxt, &qdata[i], opc, wait);
+                        ret = split_before_schedule_dqacq(obd, qctxt, &qdata[i], 
+                                                          opc, wait);
                         if (!rc)
                                 rc = ret;
                 }
@@ -601,8 +695,9 @@ qctxt_wait_pending_dqacq(struct lustre_quota_ctxt *qctxt, unsigned int id,
         qw.qw_rc = 0;
 
         qdata.qd_id = id;
-        qdata.qd_type = type;
-        qdata.qd_isblk = isblk;
+        qdata.qd_flags = 0;
+        qdata.qd_flags |= type;
+        qdata.qd_flags |= isblk ? QUOTA_IS_BLOCK : 0;
         qdata.qd_count = 0;
 
         spin_lock(&qunit_hash_lock);
@@ -735,15 +830,16 @@ static int qslave_recovery_main(void *arg)
                                 goto free;
 
                         qdata.qd_id = dqid->di_id;
-                        qdata.qd_type = type;
-                        qdata.qd_isblk = 1;
+                        qdata.qd_flags = 0;
+                        qdata.qd_flags |= type;
+                        qdata.qd_flags |= QUOTA_IS_BLOCK;
                         qdata.qd_count = 0;
 
                         ret = check_cur_qunit(obd, qctxt, &qdata);
                         if (ret > 0) {
                                 int opc;
                                 opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
-                                rc = schedule_dqacq(obd, qctxt, &qdata, opc, 0);
+                                rc = split_before_schedule_dqacq(obd, qctxt, &qdata, opc, 0);
                         } else
                                 rc = 0;
 
index b8a3516..69af027 100644 (file)
@@ -412,9 +412,10 @@ static struct list_head qinfo_hash[NR_DQHASH];
 /* SLAB cache for client quota context */
 cfs_mem_cache_t *qinfo_cachep = NULL;
 
-static inline int const hashfn(struct client_obd *cli,
-                               unsigned long id,
-                               int type)
+static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
+                         __attribute__((__const__));
+
+static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
 {
         unsigned long tmp = ((unsigned long)cli>>6) ^ id;
         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
index 6e8438e..5756e88 100644 (file)
                qinfo->qi_info[0].dqi_free_entry,                              \
                qinfo->qi_info[1].dqi_free_entry, ## arg);
 
+#define QDATA_DEBUG(qd, fmt, arg...)                                          \
+        CDEBUG(D_QUOTA, "id(%u) type(%lu) count(%llu) isblk(%lu):"            \
+               fmt, qd->qd_id, qd->qd_flags & QUOTA_IS_GRP, qd->qd_count,     \
+               (qd->qd_flags & QUOTA_IS_BLOCK) >> 1,       \
+               ## arg);
+
+
 /* quota_context.c */
 void qunit_cache_cleanup(void);
 int qunit_cache_init(void);
index 7332669..77d6699 100644 (file)
@@ -77,8 +77,12 @@ void lustre_dquot_exit(void)
         EXIT;
 }
 
-static inline int const dquot_hashfn(struct lustre_quota_info *info,
-                                     unsigned int id, int type)
+static inline int
+dquot_hashfn(struct lustre_quota_info *info, unsigned int id, int type)
+             __attribute__((__const__));
+
+static inline int
+dquot_hashfn(struct lustre_quota_info *info, unsigned int id, int type)
 {
         unsigned long tmp = ((unsigned long)info >> L1_CACHE_SHIFT) ^ id;
         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
@@ -201,6 +205,8 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc)
         struct lustre_dquot *dquot = NULL;
         __u64 *usage = NULL;
         __u32 hlimit = 0, slimit = 0;
+        __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
+        __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
         time_t *time = NULL;
         unsigned int grace = 0;
         int rc = 0;
@@ -209,9 +215,9 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc)
         OBD_FAIL_RETURN(OBD_FAIL_OBD_DQACQ, -EIO);
 
         /* slaves never acquires qunit for user root */
-        LASSERT(qdata->qd_id || qdata->qd_type == GRPQUOTA);
+        LASSERT(qdata->qd_id || qdata_type);
 
-        dquot = lustre_dqget(obd, info, qdata->qd_id, qdata->qd_type);
+        dquot = lustre_dqget(obd, info, qdata->qd_id, qdata_type);
         if (IS_ERR(dquot))
                 RETURN(PTR_ERR(dquot));
 
@@ -226,14 +232,14 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc)
                 GOTO(out, rc = -EBUSY);
         }
 
-        if (qdata->qd_isblk) {
-                grace = info->qi_info[qdata->qd_type].dqi_bgrace;
+        if (is_blk) {
+                grace = info->qi_info[qdata_type].dqi_bgrace;
                 usage = &dquot->dq_dqb.dqb_curspace;
                 hlimit = dquot->dq_dqb.dqb_bhardlimit;
                 slimit = dquot->dq_dqb.dqb_bsoftlimit;
                 time = &dquot->dq_dqb.dqb_btime;
         } else {
-                grace = info->qi_info[qdata->qd_type].dqi_igrace;
+                grace = info->qi_info[qdata_type].dqi_igrace;
                 usage = (__u64 *) & dquot->dq_dqb.dqb_curinodes;
                 hlimit = dquot->dq_dqb.dqb_ihardlimit;
                 slimit = dquot->dq_dqb.dqb_isoftlimit;
@@ -250,11 +256,11 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc)
         switch (opc) {
         case QUOTA_DQACQ:
                 if (hlimit && 
-                    QUSG(*usage + qdata->qd_count, qdata->qd_isblk) > hlimit)
+                    QUSG(*usage + qdata->qd_count, is_blk) > hlimit)
                         GOTO(out, rc = -EDQUOT);
 
                 if (slimit &&
-                    QUSG(*usage + qdata->qd_count, qdata->qd_isblk) > slimit) {
+                    QUSG(*usage + qdata->qd_count, is_blk) > slimit) {
                         if (*time && cfs_time_current_sec() >= *time)
                                 GOTO(out, rc = -EDQUOT);
                         else if (!*time)
@@ -272,7 +278,7 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc)
                         *usage -= qdata->qd_count;
 
                 /* (usage <= soft limit) but not (usage < soft limit) */
-                if (!slimit || QUSG(*usage, qdata->qd_isblk) <= slimit)
+                if (!slimit || QUSG(*usage, is_blk) <= slimit)
                         *time = 0;
                 break;
         default:
index 287691a..2588e6a 100644 (file)
@@ -6,11 +6,11 @@
 # These are scripts that are generated from .in files
 genscripts = lustre_config.sh lc_modprobe.sh lc_net.sh lc_hb.sh lc_cluman.sh lustre_createcsv.sh lc_md.sh lc_lvm.sh
 
-sbin_SCRIPTS = $(genscripts) lc_servip.sh lustre_up14.sh
+sbin_SCRIPTS = $(genscripts) lc_servip.sh lustre_up14.sh lustre_rmmod.sh
 
 EXTRA_DIST = license-status maketags.sh version_tag.pl.in lc_common.sh \
             $(addsuffix .in,$(genscripts)) lc_mon.sh lc_servip.sh \
-            lustre_up14.sh
+            lustre_up14.sh lustre_rmmod.sh
 
 scriptlibdir = $(libdir)/@PACKAGE@
 scriptlib_DATA = lc_common.sh
index 14a690d..86959bb 100644 (file)
@@ -173,13 +173,13 @@ foreach my $mds (@{$objs{"mds"}}) {
     my $lov = $mds->{"lov"};
     my $mkfs_options="";
     if (defined($lov->{"stripe_sz"})) {
-        $mkfs_options .= "lov.stripe.size=" . $lov->{"stripe_sz"} . " ";
+        $mkfs_options .= "lov.stripesize=" . $lov->{"stripe_sz"} . " ";
     }
     if (defined($lov->{"stripe_cnt"})) {
-        $mkfs_options .= "lov.stripe.count=" . $lov->{"stripe_cnt"} . " ";
+        $mkfs_options .= "lov.stripecount=" . $lov->{"stripe_cnt"} . " ";
     }
     if (defined($lov->{"stripe_pattern"})) {
-        $mkfs_options .= "lov.stripe.pattern=" . $lov->{"stripe_pattern"} . " ";
+        $mkfs_options .= "lov.stripetype=" . $lov->{"stripe_pattern"} . " ";
     }
     chop($mkfs_options);
     if ($mkfs_options ne "") {
diff --git a/lustre/scripts/lustre_rmmod.sh b/lustre/scripts/lustre_rmmod.sh
new file mode 100755 (executable)
index 0000000..2f6b6c2
--- /dev/null
@@ -0,0 +1,12 @@
+#!/bin/sh
+#
+# remove all lustre modules.  Won't succeed if they're in use, or if you
+# manually did a 'lctl network up'.
+###############################################################################
+
+SRCDIR=`dirname $0`
+PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
+
+lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
+# do it again, in case we tried to unload the lnd's too early
+lsmod | grep lnet > /dev/null && lctl modules | awk '{ print $2 }' | xargs rmmod
index 0f8567f..ecd2c7c 100644 (file)
@@ -21,7 +21,7 @@ ALWAYS_EXCEPT=" $CONF_SANITY_EXCEPT $MOUNTCONFSKIP 23"
 SRCDIR=`dirname $0`
 PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
 
-PTLDEBUG=${PTLDEBUG:-1}
+PTLDEBUG=${PTLDEBUG:--1}
 LUSTRE=${LUSTRE:-`dirname $0`/..}
 RLUSTRE=${RLUSTRE:-$LUSTRE}
 MOUNTLUSTRE=${MOUNTLUSTRE:-/sbin/mount.lustre}
@@ -82,7 +82,7 @@ stop_ost2() {
 
 mount_client() {
        local MOUNTPATH=$1
-       echo "mount lustre on ${MOUNTPATH}....."
+       echo "mount $FSNAME on ${MOUNTPATH}....."
        zconf_mount `hostname` $MOUNTPATH  || return 96
 }
 
@@ -856,6 +856,53 @@ test_23() {
 }
 run_test 23 "interrupt client during recovery mount delay"
 
+test_24a() {
+       local fs2mds_HOST=$mds_HOST
+        add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME}2 --nomgs --mgsnode=$MGSNID --reformat ${MDSDEV}_2 || exit 10
+
+       local fs2ost_HOST=$ost_HOST
+       local fs2ostdev=$(ostdevname 1)_2
+       add fs2ost $OST_MKFS_OPTS --fsname=${FSNAME}2 --reformat $fs2ostdev || exit 10
+
+       setup
+       start fs2mds ${MDSDEV}_2 $MDS_MOUNT_OPTS
+       start fs2ost $fs2ostdev $OST_MOUNT_OPTS
+       mkdir -p $MOUNT2
+       mount -t lustre $MGSNID:/${FSNAME}2 $MOUNT2 || return 1
+       check_mount || return 2
+       sleep 5
+       cp /etc/passwd $MOUNT2/b || return 3
+       rm $MOUNT2/b || return 4
+        grep $MOUNT2' ' /proc/mounts > /dev/null || return 5
+       df
+       stop_mds
+       umount $MOUNT2
+       stop fs2mds -f
+       stop fs2ost -f
+       cleanup || return 6
+}
+run_test 24a "Multiple MDTs on a single node"
+
+test_24b() {
+       local fs2mds_HOST=$mds_HOST
+        add fs2mds $MDS_MKFS_OPTS --fsname=${FSNAME}2 --mgs --reformat ${MDSDEV}_2 || exit 10
+       setup
+       start fs2mds ${MDSDEV}_2 $MDS_MOUNT_OPTS && return 2
+       cleanup || return 6
+}
+run_test 24b "Multiple MGSs on a single node (should return err)"
+
+test_25() {
+       setup
+       check_mount || return 2
+       local MODULES=$($LCTL modules | awk '{ print $2 }')
+       rmmod $MODULES 2>/dev/null || true
+       cleanup || return 6
+}
+run_test 25 "Verify modules are referenced"
+
+
+
 umount_client $MOUNT   
 cleanup_nocli
 cleanup_krb5_env
index fb9c99b..7e96ae6 100644 (file)
@@ -81,7 +81,8 @@ int main(int argc, char **argv)
 
                 rc = write(fd, wbuf, len);
                 if (rc != len) {
-                        printf("Write error %s (rc = %d)\n",strerror(errno),rc);
+                        printf("Write error %s (rc = %d, len = %ld)\n",
+                               strerror(errno), rc, len);
                         return 1;
                 }
         }
index 83f6564..460c8f1 100755 (executable)
@@ -14,7 +14,6 @@ ALWAYS_EXCEPT="10 $INSANITY_EXCEPT"
 
 SETUP=${SETUP:-"setup"}
 CLEANUP=${CLEANUP:-"cleanup"}
-UPCALL=${UPCALL:-DEFAULT}
 
 build_test_filter
 
index 35078e0..aae9161 100755 (executable)
@@ -49,6 +49,16 @@ if [ "$WAS_MOUNTED" ]; then
        MAX_ERR=4               # max expected error from e2fsck
 fi
 
+get_mnt_devs() {
+       DEVS=`cat /proc/fs/lustre/$1/*/mntdev`
+       for DEV in $DEVS; do
+               case $DEV in
+               *loop*) losetup $DEV | sed -e "s/.*(//" -e "s/).*//" ;;
+               *) echo $DEV ;;
+               esac
+       done
+}
+
 if [ "$LFSCK_SETUP" != "no" ]; then
        #Create test directory 
        rm -rf $DIR
@@ -112,7 +122,9 @@ if [ "$LFSCK_SETUP" != "no" ]; then
        done
        MDS_REMOVE=`echo $MDS_REMOVE | sed "s#$MOUNT/##g"`
 
-       OSTDEVS=`cat /proc/fs/lustre/obdfilter/*/mntdev`
+       MDTDEVS=`get_mnt_devs mds`
+       OSTDEVS=`get_mnt_devs obdfilter`
+       OSTCOUNT=`echo $OSTDEVS | wc -w`
        sh llmountcleanup.sh || exit 40
 
        # Remove objects associated with files
@@ -153,8 +165,9 @@ if [ "$LFSCK_SETUP" != "no" ]; then
 
        do_umount
 else
-       OSTDEVS=`cat /proc/fs/lustre/obdfilter/*/mntdev`
-       OSTCOUNT=`$LFIND $MOUNT | grep -c "^[0-9]*: "`
+       MDTDEVS=`get_mnt_devs mds`
+       OSTDEVS=`get_mnt_devs obdfilter`
+       OSTCOUNT=`echo $OSTDEVS | wc -w`
 fi # LFSCK_SETUP
 
 # Run e2fsck to get mds and ost info
index 85685a6..a09f023 100644 (file)
@@ -203,7 +203,8 @@ int main(int argc, char **argv)
         }
 
         fname = strrchr(argv[2], '/');
-        fname++;
+        fname = (fname == NULL ? argv[2] : fname + 1);
+
         strncpy((char *)lum_file1, fname, lum_size);
         rc = ioctl(dirfd(dir), IOC_MDC_GETFILESTRIPE, lum_file1);
         if (rc) {
@@ -221,7 +222,7 @@ int main(int argc, char **argv)
                 }
 
                 fname = strrchr(argv[3], '/');
-                fname++;
+                fname = (fname == NULL ? argv[3] : fname + 1);
                 strncpy((char *)lum_file2, fname, lum_size);
                 rc = ioctl(dirfd(dir), IOC_MDC_GETFILESTRIPE, lum_file2);
                 if (rc) {
index a30a6b6..a345f4f 100644 (file)
@@ -12,7 +12,7 @@ if [ "$LUSTRE" ]; then
   lustre_opt="--lustre=$LUSTRE"
 fi
 
-sh -x $mkconfig $config || exit 1
+[ -f $config ] || sh -x $mkconfig $config || exit 1
 
 $LCONF $lustre_opt --reformat $@ $OPTS $config || exit 4
 
diff --git a/lustre/tests/llmount-upcall.sh b/lustre/tests/llmount-upcall.sh
deleted file mode 100755 (executable)
index f5a1ef7..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/sh
-LUSTRE=`dirname $0`/..
-exec >> /tmp/recovery-`hostname`.log
-exec 2>&1
-
-$LUSTRE/utils/lconf --recover --verbose --tgt_uuid $2 --client_uuid $3 --conn_uuid $4 $LUSTRE/tests/local.xml
index 8c3c625..c23a845 100755 (executable)
@@ -129,11 +129,11 @@ try_to_cleanup
 drop_request "statone /mnt/lustre/2" & wait_for_timeout
 try_to_cleanup
 
-do_client "cp /etc/resolv.conf /mnt/lustre/resolv.conf"
-drop_request "cat /mnt/lustre/resolv.conf > /dev/null" & wait_for_timeout
+do_client "cp /etc/inittab /mnt/lustre/inittab"
+drop_request "cat /mnt/lustre/inittab > /dev/null" & wait_for_timeout
 try_to_cleanup
 
-drop_request "mv /mnt/lustre/resolv.conf /mnt/lustre/renamed" & wait_for_timeout
+drop_request "mv /mnt/lustre/inittab /mnt/lustre/renamed" & wait_for_timeout
 try_to_cleanup
 
 drop_request "mlink /mnt/lustre/renamed-again /mnt/lustre/link1" & wait_for_timeout
diff --git a/lustre/tests/recovery-small-upcall.sh b/lustre/tests/recovery-small-upcall.sh
deleted file mode 100755 (executable)
index b1ad60c..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-LUSTRE=`dirname $0`/..
-PATH=$LUSTRE/utils:$PATH
-lctl --device %$3 recover || logger -p kern.info recovery failed: $@
index 9f9b8e4..0f73e10 100755 (executable)
@@ -2,10 +2,10 @@
 
 set -e
 
-#         bug  5494 7288
-ALWAYS_EXCEPT="24   27 $RECOVERY_SMALL_EXCEPT"
+#         bug  5494 7288 5493
+ALWAYS_EXCEPT="24   27   52 $RECOVERY_SMALL_EXCEPT"
 
-PTLDEBUG=${PTLDEBUG:-1}
+PTLDEBUG=${PTLDEBUG:--1}
 LUSTRE=${LUSTRE:-`dirname $0`/..}
 . $LUSTRE/tests/test-framework.sh
 init_test_env $@
@@ -61,14 +61,14 @@ test_3() {
 run_test 3 "stat: drop req, drop rep"
 
 test_4() {
-    do_facet client "cp /etc/passwd $MOUNT/passwd" || return 1
-    drop_request "cat $MOUNT/passwd > /dev/null"   || return 2
-    drop_reply "cat $MOUNT/passwd > /dev/null"     || return 3
+    do_facet client "cp /etc/inittab $MOUNT/inittab" || return 1
+    drop_request "cat $MOUNT/inittab > /dev/null"   || return 2
+    drop_reply "cat $MOUNT/inittab > /dev/null"     || return 3
 }
 run_test 4 "open: drop req, drop rep"
 
 test_5() {
-    drop_request "mv $MOUNT/passwd $MOUNT/renamed" || return 1
+    drop_request "mv $MOUNT/inittab $MOUNT/renamed" || return 1
     drop_reint_reply "mv $MOUNT/renamed $MOUNT/renamed-again" || return 2
     do_facet client "checkstat -v $MOUNT/renamed-again"  || return 3
 }
index ea56524..3ec7485 100755 (executable)
@@ -2,10 +2,10 @@
 
 set -e
 
-# bug number:  6088 10124
-ALWAYS_EXCEPT="8    15c   $REPLAY_DUAL_EXCEPT"
+# bug number:  6088 10124 10800
+ALWAYS_EXCEPT="8    15c   17    $REPLAY_DUAL_EXCEPT"
 
-PTLDEBUG=${PTLDEBUG:-1}
+PTLDEBUG=${PTLDEBUG:--1}
 LUSTRE=${LUSTRE:-`dirname $0`/..}
 . $LUSTRE/tests/test-framework.sh
 
index 1c40ea2..0caba0b 100755 (executable)
@@ -2,7 +2,7 @@
 
 set -e
 
-PTLDEBUG=${PTLDEBUG:-1}
+PTLDEBUG=${PTLDEBUG:--1}
 LUSTRE=${LUSTRE:-`dirname $0`/..}
 . $LUSTRE/tests/test-framework.sh
 init_test_env $@
@@ -11,6 +11,10 @@ init_test_env $@
 ostfailover_HOST=${ostfailover_HOST:-$ost_HOST}
 #failover= must be defined in OST_MKFS_OPTIONS if ostfailover_HOST != ost_HOST
 
+# Tests that fail on uml
+CPU=`awk '/model/ {print $4}' /proc/cpuinfo`
+[ "$CPU" = "UML" ] && EXCEPT="$EXCEPT 6"
+
 # Skip these tests
 # BUG NUMBER: 
 ALWAYS_EXCEPT="$REPLAY_OST_SINGLE_EXCEPT"
diff --git a/lustre/tests/replay-ost-upcall.sh b/lustre/tests/replay-ost-upcall.sh
deleted file mode 100755 (executable)
index 9f9efbf..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/sh
-
-
-TESTDIR=`dirname $0`
-LUSTRE=$TESTDIR/..
-
-exec >> $TESTDIR/recovery-`hostname`.log
-exec 2>&1
-
-set -xv
-
-failed_import() {
-#    $LUSTRE/utils/lctl --device %$3 recover ||
-#        logger -p kern.info recovery failed: $@
-
-    if [ -f $LUSTRE/tests/ostactive ] ; then
-       source $LUSTRE/tests/ostactive
-    else
-        ostactive=ost
-    fi
-
-    $LUSTRE/utils/lconf --verbose --recover --node client_facet  \
-      --select ost1=${ostactive}_facet\
-     --tgt_uuid $2 --client_uuid $3 --conn_uuid $4 $TESTDIR/replay-ost-single.xml
-
-}
-
-recovery_over() {
-    logger -p kern.info upcall: $@
-}
-
-
-case "$1" in
-FAILED_IMPORT) failed_import $@
-               ;;
-RECOVERY_OVER) recovery_over $@
-               ;;
-esac
diff --git a/lustre/tests/replay-single-upcall.sh b/lustre/tests/replay-single-upcall.sh
deleted file mode 100755 (executable)
index 59c1371..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/bin/sh
-
-
-TESTDIR=`dirname $0`
-LUSTRE=$TESTDIR/..
-
-mkdir -p $TESTDIR/logs
-
-exec >> $TESTDIR/logs/recovery-`hostname`.log
-exec 2>&1
-
-echo ==========================================
-echo "start upcall: `date`"
-echo "command line: $0 $*"
-
-set -xv
-
-failed_import() {
-    if [ -f $TESTDIR/XMLCONFIG ] ; then
-       source $TESTDIR/XMLCONFIG
-       if [ ! -f $TESTDIR/XMLCONFIG ]; then
-           echo "config file not found: $XMLCONFIG"
-           exit 1
-        fi
-    else
-       echo "$TESTDIR/XMLCONFIG: not found"
-       exit 1
-    fi
-       
-    if [ -f $TESTDIR/mdsactive ] ; then
-        source $TESTDIR/mdsactive
-       MDSSELECT="--select mds_svc=${mdsactive}_facet"
-    fi
-
-    if [ -f $TESTDIR/ostactive ] ; then
-        source $TESTDIR/ostactive
-       OSTSELECT="--select ost_svc=${ostactive}_facet"
-    fi
-
-    $LUSTRE/utils/lconf --verbose --recover --node client_facet  \
-      $MDSSELECT $OSTSELECT \
-     --tgt_uuid $2 --client_uuid $3 --conn_uuid $4 $XMLCONFIG
-
-}
-
-recovery_over() {
-    logger -p kern.info upcall: $@
-}
-
-
-case "$1" in
-FAILED_IMPORT) failed_import $@
-               ;;
-RECOVERY_OVER) recovery_over $@
-               ;;
-esac
index 35c4961..443222f 100755 (executable)
@@ -14,8 +14,8 @@ init_test_env $@
 
 
 # Skip these tests
-# bug number: 2766
-ALWAYS_EXCEPT="0b   $REPLAY_SINGLE_EXCEPT"
+# bug number: 2766 4176
+ALWAYS_EXCEPT="0b  39   $REPLAY_SINGLE_EXCEPT"
 
 build_test_filter
 
@@ -442,6 +442,26 @@ test_20() {
 }
 run_test 20 "|X| open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)"
 
+test_20b() {
+    BEFORESPACE=`df -P $DIR | tail -1 | awk '{ print $4 }'`
+    dd if=/dev/zero of=$DIR/$tfile bs=4k count=10000 &
+    pid=$!
+    usleep 60                           # give dd a chance to start
+    lfs getstripe $DIR/$tfile || return 1
+    rm -f $DIR/$tfile   || return 2     # make it an orphan
+    mds_evict_client
+    df -P $DIR || df -P $DIR || true    # reconnect
+
+    fail mds                            # start orphan recovery
+    df -P $DIR || df -P $DIR || true    # reconnect
+    sleep 2
+    AFTERSPACE=`df -P $DIR | tail -1 | awk '{ print $4 }'`
+    [ $AFTERSPACE -lt $((BEFORESPACE - 20)) ] && \
+        error "after $AFTERSPACE < before $BEFORESPACE" && return 5
+    return 0
+}
+run_test 20b "write, unlink, eviction, replay, (test mds_cleanup_orphans)"
+
 test_21() {
     replay_barrier mds
     multiop $DIR/$tfile O_tSc &
@@ -881,11 +901,10 @@ test_44() {
     mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices`
     [ "$mdcdev" ] || exit 2
     for i in `seq 1 10`; do
-       echo iteration $i
-        #define OBD_FAIL_TGT_CONN_RACE     0x701
-        do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
-        $LCTL --device $mdcdev recover
-        df $MOUNT
+       #define OBD_FAIL_TGT_CONN_RACE     0x701
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
+       $LCTL --device $mdcdev recover
+       df $MOUNT
     done
     do_facet mds "sysctl -w lustre.fail_loc=0"
     return 0
@@ -896,11 +915,10 @@ test_44b() {
     mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices`
     [ "$mdcdev" ] || exit 2
     for i in `seq 1 10`; do
-       echo iteration $i
-        #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
-        do_facet mds "sysctl -w lustre.fail_loc=0x80000704"
-        $LCTL --device $mdcdev recover
-        df $MOUNT
+       #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000704"
+       $LCTL --device $mdcdev recover
+       df $MOUNT
     done
     do_facet mds "sysctl -w lustre.fail_loc=0"
     return 0
index ef1467a..c4fe5f4 100644 (file)
@@ -70,7 +70,7 @@ build_test_filter() {
         done
        # turn on/off quota tests must be included
        eval ONLY_0=true
-       eval ONLY_9=true
+       eval ONLY_99=true
 }
 
 _basetest() {
@@ -164,7 +164,7 @@ set_blk_tunesz() {
                echo $(($1 * $BLK_SZ)) > $i
        done
        # set btune size on mds
-       for i in `ls /proc/fs/lustre/mds/mds*/quota_btune_sz`; do
+       for i in `ls /proc/fs/lustre/mds/lustre-MDT*/quota_btune_sz`; do
                echo $(($1 * $BLK_SZ)) > $i
        done
 }
@@ -173,7 +173,7 @@ set_blk_unitsz() {
        for i in `ls /proc/fs/lustre/obdfilter/*/quota_bunit_sz`; do
                echo $(($1 * $BLK_SZ)) > $i
        done
-       for i in `ls /proc/fs/lustre/mds/mds*/quota_bunit_sz`; do
+       for i in `ls /proc/fs/lustre/mds/lustre-MDT*/quota_bunit_sz`; do
                echo $(($1 * $BLK_SZ)) > $i
        done
 }
@@ -184,7 +184,7 @@ set_file_tunesz() {
                echo $1 > $i
        done
        # set iunit and itune size on mds
-       for i in `ls /proc/fs/lustre/mds/mds*/quota_itune_sz`; do
+       for i in `ls /proc/fs/lustre/mds/lustre-MDT*/quota_itune_sz`; do
                echo $1 > $i
        done
 
@@ -195,7 +195,7 @@ set_file_unitsz() {
        for i in `ls /proc/fs/lustre/obdfilter/*/quota_iunit_sz`; do
                echo $1 > $i
        done;
-       for i in `ls /proc/fs/lustre/mds/mds*/quota_iunit_sz`; do
+       for i in `ls /proc/fs/lustre/mds/lustre-MDT*/quota_iunit_sz`; do
                echo $1 > $i
        done
 }
@@ -210,7 +210,7 @@ pre_test() {
                # set block tunables
                set_blk_tunesz $BTUNE_SZ
                set_blk_unitsz $BUNIT_SZ
-               # set file tunaables
+               # set file tunables
                set_file_tunesz $ITUNE_SZ
                set_file_unitsz $IUNIT_SZ
        fi
@@ -361,7 +361,6 @@ test_block_soft() {
 
        echo "    Write before timer goes off"
        $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$BUNIT_SZ >/dev/null 2>&1 || error "write failure, but expect success"
-       sync; sleep 1; sync;
        echo "    Done"
        
        echo "    Sleep $GRACE seconds ..."
@@ -369,6 +368,7 @@ test_block_soft() {
 
        echo "    Write after timer goes off"
        # maybe cache write, ignore.
+       sync; sleep 1; sync;
        $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$(($BUNIT_SZ * 2)) >/dev/null 2>&1 || echo " " > /dev/null
        sync; sleep 1; sync;
        $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=1 seek=$(($BUNIT_SZ * 3)) >/dev/null 2>&1 && error "write success, but expect EDQUOT"
@@ -538,9 +538,9 @@ test_6() {
 
        echo "  Exceed quota limit ..."
        $RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ count=$(($LIMIT - $BUNIT_SZ * $OSTCOUNT)) >/dev/null 2>&1 || error "write fileb failure, but expect success"
-       sync; sleep 1; sync;
+       #sync; sleep 1; sync;
        $RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ seek=$LIMIT count=$BUNIT_SZ >/dev/null 2>&1 && error "write fileb success, but expect EDQUOT"
-       sync; sleep 1; sync;
+       #sync; sleep 1; sync;
        echo "  Write to OST0 return EDQUOT"
        # this write maybe cache write, ignore it's failure
        $RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ count=$(($BUNIT_SZ * 2)) >/dev/null 2>&1 || echo " " > /dev/null
@@ -589,7 +589,7 @@ test_7()
        echo 0 > /proc/sys/lustre/fail_loc
 
        echo "  Trigger recovery..."
-       OSC0_UUID="`$LCTL dl | awk '/.* *-osc-* / { print $1 }'`"
+       OSC0_UUID="`$LCTL dl | awk '$3 ~ /osc/ { print $1 }'`"
        for i in $OSC0_UUID; do
                $LCTL --device $i activate > /dev/null 2>&1 || error "activate osc failed!"
        done
@@ -603,8 +603,8 @@ test_7()
        [ $TOTAL_LIMIT -eq $LIMIT ] || error "total limits not recovery!"
        echo "  total limits = $TOTAL_LIMIT"
        
-       OST0_UUID=`$LCTL dl | awk '/.*OST_[^ ]+_UUID.* / { print $5 }'`
-       [ -z "$OST0_UUID" ] && OST0_UUID=`$LCTL dl | awk '/.*ost1_[^ ]*UUID.* / { print $5 }'`
+       OST0_UUID=`$LCTL dl | awk '$3 ~ /obdfilter/ { print $5 }'| head -n1`
+       [ -z "$OST0_UUID" ] && OST0_UUID=`$LCTL dl | awk '$3 ~ /obdfilter/ { print $5 }'|head -n1`
        OST0_LIMIT="`$LFS quota -o $OST0_UUID -u $TSTUSR $MOUNT | awk '/^.*[[:digit:]+][[:space:]+]/ { print $3 }'`"
        [ $OST0_LIMIT -eq $BUNIT_SZ ] || error "high limits not released!"
        echo "  limits on $OST0_UUID = $OST0_LIMIT"
@@ -644,13 +644,143 @@ test_8() {
 }
 run_test 8 "Run dbench with quota enabled ==========="
 
+# run for fixing bug10707, it needs a big room. test for 64bit
+test_9() {
+        lustrefs_size=`df | grep $MOUNT | awk '{print $(NF - 2)}'`
+        size_file=$((1024 * 1024 * 9 / 2 * $OSTCOUNT))
+        echo "lustrefs_size:$lustrefs_size  size_file:$size_file"
+        if [ $lustrefs_size -lt $size_file ]; then
+            echo "WARN: too few capacity, skip this test."
+            return 0;
+        fi
+
+        # set the D_QUOTA flag
+        debug_flag=`cat /proc/sys/lnet/debug`
+        D_QUOTA_FLAG=67108864
+        set_flag=0
+        if [ $((debug_flag & D_QUOTA_FLAG)) -ne $D_QUOTA_FLAG ]; then
+            echo  $((debug_flag | D_QUOTA_FLAG)) > /proc/sys/lnet/debug
+            set_flag=1
+        fi
+
+        TESTFILE="$TSTDIR/quota_tst90"
+
+        echo "  Set block limit $LIMIT bytes to $TSTUSR.$TSTUSR"
+        BLK_LIMIT=$((100 * 1024 * 1024)) # 100G
+        FILE_LIMIT=1000000
+
+        echo "  Set enough high limit for user: $TSTUSR"
+        $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $MOUNT
+        echo "  Set enough high limit for group: $TSTUSR"
+        $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $MOUNT
+
+        echo "  Set stripe"
+        [ $OSTCOUNT -ge 2 ] && $LFS setstripe $TESTFILE 65536 0 $OSTCOUNT
+        touch $TESTFILE
+        chown $TSTUSR.$TSTUSR $TESTFILE
+
+        echo "    Write the big file of $(($OSTCOUNT * 9 / 2 ))G ..."
+        $RUNAS dd if=/dev/zero of=$TESTFILE  bs=$BLK_SZ count=$size_file >/dev/null 2>&1 || error "(usr) write $((9 / 2 * $OSTCOUNT))G file failure, but expect success"
+       
+       echo "    delete the big file of $(($OSTCOUNT * 9 / 2))G..." 
+        $RUNAS rm -f $TESTFILE >/dev/null 2>&1
+
+        echo "    write the big file of 2G..."
+        $RUNAS dd if=/dev/zero of=$TESTFILE  bs=$BLK_SZ count=$((1024 * 1024 * 2)) >/dev/null 2>&1 || error "(usr) write $((9 / 2 * $OSTCOUNT))G file failure, but expect seccess"
+
+        echo "    delete the big file of 2G..."
+        $RUNAS rm -f $TESTFILE >/dev/null 2>&1
+
+        RC=$?
+
+        # clear the flage
+        if [ $set_flag -eq 1 ]; then
+            echo  $debug_flag > /proc/sys/lnet/debug
+        fi
+
+        return $RC
+}
+run_test 9 "run for fixing bug10707(64bit) ==========="
+
+# run for fixing bug10707, it need a big room. test for 32bit
+test_10() {
+       lustrefs_size=`df | grep $MOUNT | awk '{print $(NF - 2)}'`
+       size_file=$((1024 * 1024 * 9 / 2 * $OSTCOUNT))
+       echo "lustrefs_size:$lustrefs_size  size_file:$size_file"
+       if [ $lustrefs_size -lt $size_file ]; then
+               echo "WARN: too few capacity, skip this test."
+               return 0;
+       fi
+
+       if [ ! -d /proc/fs/lustre/ost/ -o ! -d /proc/fs/lustre/mds ]; then
+           echo "WARN: mds or ost isn't on the local machine, skip this test."
+           return 0;
+       fi
+
+       sync; sleep 10; sync;
+
+       # set the D_QUOTA flag
+       debug_flag=`cat /proc/sys/lnet/debug`
+       D_QUOTA_FLAG=67108864
+       set_flag=0
+       if [ $((debug_flag & D_QUOTA_FLAG)) -ne $D_QUOTA_FLAG ]; then
+           echo  $((debug_flag | D_QUOTA_FLAG)) > /proc/sys/lnet/debug
+           set_flag=1
+       fi
+
+       # make qd_count 32 bit
+       sysctl -w lustre.fail_loc=2560
+
+       TESTFILE="$TSTDIR/quota_tst100"
+
+       echo "  Set block limit $LIMIT bytes to $TSTUSR.$TSTUSR"
+       BLK_LIMIT=$((100 * 1024 * 1024)) # 100G
+       FILE_LIMIT=1000000
+
+       echo "  Set enough high limit for user: $TSTUSR"
+       $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $MOUNT
+       echo "  Set enough high limit for group: $TSTUSR"
+       $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $MOUNT
+
+       echo "  Set stripe"
+       [ $OSTCOUNT -ge 2 ] && $LFS setstripe $TESTFILE 65536 0 $OSTCOUNT
+       touch $TESTFILE
+       chown $TSTUSR.$TSTUSR $TESTFILE
+
+       echo "    Write the big file of $(($OSTCOUNT * 9 / 2 ))G ..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE  bs=$BLK_SZ count=$size_file >/dev/null 2>&1 || error "(usr) write $((9 / 2 * $OSTCOUNT))G file failure, but expect success"
+
+       echo "    delete the big file of $(($OSTCOUNT * 9 / 2))G..." 
+       $RUNAS rm -f $TESTFILE >/dev/null 2>&1
+
+       echo "    write the big file of 2G..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE  bs=$BLK_SZ count=$((1024 * 1024 * 2)) >/dev/null 2>&1 || error "(usr) write $((9 / 2 * $OSTCOUNT))G file failure, but expect success"
+
+       echo "    delete the big file of 2G..."
+       $RUNAS rm -f $TESTFILE >/dev/null 2>&1
+
+       RC=$?
+
+       # clear the flage
+       if [ $set_flag -eq 1 ]; then
+              echo  $debug_flag > /proc/sys/lnet/debug
+       fi
+
+       # make qd_count 64 bit
+       sysctl -w lustre.fail_loc=0
+
+       return $RC
+}
+run_test 10 "run for fixing bug10707(32bit) ==========="
+
+
 # turn off quota
-test_9()
+test_99()
 {
        $LFS quotaoff $MOUNT
        return 0
 }
-run_test 9 "Quota off ==============================="
+run_test 99 "Quota off ==============================="
 
 
 log "cleanup: ======================================================"
index b320fc4..f3b74c2 100644 (file)
@@ -114,10 +114,10 @@ trace() {
 TRACE=${TRACE:-""}
 
 check_kernel_version() {
-       VERSION_FILE=$LPROC/kernel_version
+       VERSION_FILE=$LPROC/version
        WANT_VER=$1
        [ ! -f $VERSION_FILE ] && echo "can't find kernel version" && return 1
-       GOT_VER=`cat $VERSION_FILE`
+       GOT_VER=$(awk '/kernel:/ {print $2}' $VERSION_FILE)
        [ $GOT_VER -ge $WANT_VER ] && return 0
        log "test needs at least kernel version $WANT_VER, running $GOT_VER"
        return 1
@@ -1105,7 +1105,25 @@ test_27r() {
 
        reset_enospc
 }
-run_test 27r "stripe file with some full OSTs (shouldn't LBUG) ==="
+run_test 27r "stripe file with some full OSTs (shouldn't LBUG) ="
+
+test_27s() {
+       mkdir -p $DIR/$tdir
+       $LSTRIPE $DIR/$tdir $((2048 * 1024 * 1024)) -1 2 && \
+               error "stripe width >= 2^32 succeeded" || true
+}
+run_test 27s "lsm_xfersize overflow (should error) (bug 10725)"
+
+test_27t() { # bug 10864
+        WDIR=`pwd`
+        WLFS=`which lfs`
+        cd $DIR
+        touch $tfile
+        $WLFS getstripe $tfile
+        cd $WDIR
+}
+run_test 27t "check that utils parse path correctly"
+
 
 test_28() {
        mkdir $DIR/d28
@@ -2541,11 +2559,11 @@ run_test 65i "set non-default striping on root directory (bug 6367)="
 test_65j() { # bug6367
        return
        # if we aren't already remounting for each test, do so for this test
-       if [ "$CLEANUP" = ":" ]; then
+       if [ "$CLEANUP" = ":" -a "$I_MOUNTED" = "yes" ]; then
                cleanup -f || error "failed to unmount"
-               setup || error "failed to remount"
+               setup
        fi
-       $SETSTRIPE -d $MOUNT || true
+       $SETSTRIPE -d $MOUNT
 }
 run_test 65j "set default striping on root directory (bug 6367)="
 
@@ -2751,71 +2769,132 @@ test_74() { # bug 6149, 6184
 run_test 74 "ldlm_enqueue freed-export error path (shouldn't LBUG)"
 
 JOIN=${JOIN:-"lfs join"}
-test_75() {
+F75=$DIR/f75
+F128k=${F75}_128k
+FHEAD=${F75}_head
+FTAIL=${F75}_tail
+export T75_PREP=no
+test75_prep() {
+        [ $T75_PREP = "yes" ] && return
+        echo "using F75=$F75, F128k=$F128k, FHEAD=$FHEAD, FTAIL=$FTAIL"
+        dd if=/dev/urandom of=${F75}_128k bs=128k count=1 || error "dd failed"
+        log "finished dd"
+        chmod 777 ${F128k}
+        T75_PREP=yes
+}
+test_75a() {
 #      skipped temporarily: we do not have join file currently
 #      please remove this when ready - huanghua
        return
-       F=$DIR/$tfile
-       F128k=${F}_128k
-       FHEAD=${F}_head
-       FTAIL=${F}_tail
-       echo "using F=$F, F128k=$F128k, FHEAD=$FHEAD, FTAIL=$FTAIL"
-       rm -f $F*
-
-       dd if=/dev/urandom of=${F}_128k bs=1024 count=128 || error "dd failed"
-       chmod 777 ${F128k}
-       cp -p ${F128k} ${FHEAD}
-       cp -p ${F128k} ${FTAIL}
-       cat ${F128k} ${F128k} > ${F}_sim_sim
-
-       $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
-       cmp ${FHEAD} ${F}_sim_sim || error "${FHEAD} ${F}_sim_sim differ"
-       $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} still exist after join"
-
-       cp -p ${F128k} ${FTAIL}
-       cat ${F}_sim_sim >> ${F}_join_sim
-       cat ${F128k} >> ${F}_join_sim
-       $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
-       cmp ${FHEAD} ${F}_join_sim || \
-               error "${FHEAD} ${F}_join_sim are different"
-       $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join"
-
-       cp -p ${F128k} ${FTAIL}
-       cat ${F128k} >> ${F}_sim_join
-       cat ${F}_join_sim >> ${F}_sim_join
-       $JOIN ${FTAIL} ${FHEAD} || error "join error"
-       cmp ${FTAIL} ${F}_sim_join || \
-               error "${FTAIL} ${F}_sim_join are different"
-       $CHECKSTAT -a ${FHEAD} || error "tail ${FHEAD} exist after join"
-
-       cp -p ${F128k} ${FHEAD}
-       cp -p ${F128k} ${FHEAD}_tmp
-       cat ${F}_sim_sim >> ${F}_join_join
-       cat ${F}_sim_join >> ${F}_join_join
-       $JOIN ${FHEAD} ${FHEAD}_tmp || error "join ${FHEAD} ${FHEAD}_tmp error"
-       $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
-       cmp ${FHEAD} ${F}_join_join || error "${FHEAD} ${F}_join_join differ"
-       $CHECKSTAT -a ${FHEAD}_tmp || error "${FHEAD}_tmp exist after join"
-       $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join (2)"
-
-       rm -rf ${FHEAD} || error "delete join file error"
-       cp -p ${F128k} ${F}_join_10_compare
-       cp -p ${F128k} ${F}_join_10
-       for ((i = 0; i < 10; i++)); do
-               cat ${F128k} >> ${F}_join_10_compare
-               cp -p ${F128k} ${FTAIL}
-               $JOIN ${F}_join_10 ${FTAIL} || \
-                       error "join ${F}_join_10 ${FTAIL} error"
-               $CHECKSTAT -a ${FTAIL} || error "tail file exist after join"
-       done
-       cmp ${F}_join_10 ${F}_join_10_compare || \
-               error "files ${F}_join_10 ${F}_join_10_compare are different"
-       $LFS getstripe ${F}_join_10
-       $OPENUNLINK ${F}_join_10 ${F}_join_10 || error "files unlink open"
-
-       ls -l $F*
+        test75_prep
+        cp -p ${F128k} ${FHEAD}
+        log "finished cp to $FHEAD"
+        cp -p ${F128k} ${FTAIL}
+        log "finished cp to $FTAIL"
+        cat ${F128k} ${F128k} > ${F75}_sim_sim
+        $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
+        log "finished join $FHEAD to ${F75}_sim_sim"
+        cmp ${FHEAD} ${F75}_sim_sim || error "${FHEAD} ${F75}_sim_sim differ"
+        log "finished cmp $FHEAD to ${F75}_sim_sim"
+        $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} still exist after join"
+}
+run_test 75a "TEST join file ===================================="
+test_75b() {
+#      skipped temporarily: we do not have join file currently
+#      please remove this when ready - huanghua
+       return
+        test75_prep
+        cp -p ${F128k} ${FTAIL}
+        cat ${F75}_sim_sim >> ${F75}_join_sim
+        cat ${F128k} >> ${F75}_join_sim
+        $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
+        cmp ${FHEAD} ${F75}_join_sim || \
+                error "${FHEAD} ${F75}_join_sim are different"
+        $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join"
+}
+run_test 75b "TEST join file 2 =================================="
+test_75c() {
+#      skipped temporarily: we do not have join file currently
+#      please remove this when ready - huanghua
+       return
+        test75_prep
+        cp -p ${F128k} ${FTAIL}
+        cat ${F128k} >> ${F75}_sim_join
+        cat ${F75}_join_sim >> ${F75}_sim_join
+        $JOIN ${FTAIL} ${FHEAD} || error "join error"
+        cmp ${FTAIL} ${F75}_sim_join || \
+                error "${FTAIL} ${F75}_sim_join are different"
+        $CHECKSTAT -a ${FHEAD} || error "tail ${FHEAD} exist after join"
+}
+run_test 75c "TEST join file 3 =================================="
+test_75d() {
+#      skipped temporarily: we do not have join file currently
+#      please remove this when ready - huanghua
+       return
+        test75_prep
+        cp -p ${F128k} ${FHEAD}
+        cp -p ${F128k} ${FHEAD}_tmp
+        cat ${F75}_sim_sim >> ${F75}_join_join
+        cat ${F75}_sim_join >> ${F75}_join_join
+        $JOIN ${FHEAD} ${FHEAD}_tmp || error "join ${FHEAD} ${FHEAD}_tmp error"
+        $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
+        cmp ${FHEAD} ${F75}_join_join ||error "${FHEAD} ${F75}_join_join differ"        $CHECKSTAT -a ${FHEAD}_tmp || error "${FHEAD}_tmp exist after join"
+        $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join (2)"
+}
+run_test 75d "TEST join file 4 =================================="
+test_75e() {
+#      skipped temporarily: we do not have join file currently
+#      please remove this when ready - huanghua
+       return
+        test75_prep
+        rm -rf ${FHEAD} || "delete join file error"
+}
+run_test 75e "TEST join file 5 (remove joined file) ============="
+test_75f() {
+#      skipped temporarily: we do not have join file currently
+#      please remove this when ready - huanghua
+       return
+        test75_prep
+        cp -p ${F128k} ${F75}_join_10_compare
+        cp -p ${F128k} ${F75}_join_10
+        for ((i = 0; i < 10; i++)); do
+                cat ${F128k} >> ${F75}_join_10_compare
+                cp -p ${F128k} ${FTAIL}
+                $JOIN ${F75}_join_10 ${FTAIL} || \
+                        error "join ${F75}_join_10 ${FTAIL} error"
+                $CHECKSTAT -a ${FTAIL} || error "tail file exist after join"
+        done
+        cmp ${F75}_join_10 ${F75}_join_10_compare || \
+                error "files ${F75}_join_10 ${F75}_join_10_compare differ"
+}
+run_test 75f "TEST join file 6 (join 10 files) =================="
+test_75g() {
+#      skipped temporarily: we do not have join file currently
+#      please remove this when ready - huanghua
+       return
+        [ ! -f ${F75}_join_10 ] && echo "${F75}_join_10 missing" && return
+        $LFS getstripe ${F75}_join_10
+        $OPENUNLINK ${F75}_join_10 ${F75}_join_10 || error "files unlink open"
+        ls -l $F75*
 }
-run_test 75 "TEST join file ===================================="
+run_test 75g "TEST join file 7 (open unlink) ===================="
 
 num_inodes() {
        awk '/lustre_inode_cache|^inode_cache/ {print $2; exit}' /proc/slabinfo
@@ -2988,6 +3067,8 @@ test_102() {
 
        [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return
        [ -z "`grep xattr $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return
+       [ -z "$(which setfattr 2>/dev/null)" ] && echo "skipping $TESTNAME (could not find setfattr)" && return
+
        echo "set/get xattr..."
         setfattr -n trusted.name1 -v value1 $testfile || error
         [ "`getfattr -n trusted.name1 $testfile 2> /dev/null | \
@@ -3018,8 +3099,9 @@ test_102() {
         getfattr -d -m user $testfile 2> /dev/null | \
         grep "user.author1" && error || true
 
-       echo "set lustre specific xattr (should be denied)..."
-       setfattr -n "trusted.lov" -v "invalid value" $testfile || true
+       # b10667: setting lustre special xattr be silently discarded
+       echo "set lustre special xattr ..."
+       setfattr -n "trusted.lov" -v "invalid value" $testfile || error
 
        rm -f $testfile
 }
index aa70f83..8a46e8d 100644 (file)
@@ -120,7 +120,7 @@ build_test_filter() {
         for O in $ONLY; do
             eval ONLY_${O}=true
         done
-        for E in $EXCEPT $ALWAYS_EXCEPT $SANITY_EXCEPT; do
+        for E in $EXCEPT $ALWAYS_EXCEPT $SANITYN_EXCEPT; do
             eval EXCEPT_${E}=true
         done
 }
index c3e147f..637c2c6 100644 (file)
@@ -3,7 +3,7 @@
 
 set -e
 trap 'echo "test-framework exiting on error"' ERR
-#set -vx
+#set -x
 
 
 export REFORMAT=""
@@ -118,6 +118,7 @@ load_modules() {
     load_module fid/fid
     load_module fld/fld
     load_module lmv/lmv
+    load_module quota/lquota
     load_module mdc/mdc
     load_module osc/osc
     load_module lov/lov
@@ -147,6 +148,29 @@ load_modules() {
     [ -f $LUSTRE/utils/mount.lustre ] && cp $LUSTRE/utils/mount.lustre /sbin/. || true
 }
 
+wait_for_lnet() {
+    local UNLOADED=0
+    local WAIT=0
+    local MAX=60
+    MODULES=$($LCTL modules | awk '{ print $2 }')
+    while [ -n "$MODULES" ]; do
+       sleep 5
+       rmmod $MODULES >/dev/null 2>&1 || true
+       MODULES=$($LCTL modules | awk '{ print $2 }')
+        if [ -z "$MODULES" ]; then
+           return 0
+        else
+            WAIT=$((WAIT + 5))
+            echo "waiting, $((MAX - WAIT)) secs left"
+        fi
+        if [ $WAIT -eq $MAX ]; then
+            echo "LNET modules $MODULES will not unload"
+           lsmod
+            return 3
+        fi
+    done
+}
+
 unload_modules() {
     lsmod | grep lnet > /dev/null && $LCTL dl && $LCTL dk $TMP/debug
     local MODULES=$($LCTL modules | awk '{ print $2 }')
@@ -154,14 +178,20 @@ unload_modules() {
     rmmod $MODULES >/dev/null 2>&1 || true
      # do it again, in case we tried to unload ksocklnd too early
     MODULES=$($LCTL modules | awk '{ print $2 }')
-    [ -n "$MODULES" ] && rmmod $MODULES >/dev/null && sleep 2 || true
+    [ -n "$MODULES" ] && rmmod $MODULES >/dev/null || true
     MODULES=$($LCTL modules | awk '{ print $2 }')
     if [ -n "$MODULES" ]; then
-       echo "modules still loaded"
+       echo "Modules still loaded: "
        echo $MODULES 
-       cat $LPROC/devices || true
-       lsmod
-       return 2
+       if [ -e $LPROC ]; then
+           echo "Lustre still loaded"
+           cat $LPROC/devices || true
+           lsmod
+           return 2
+       else
+           echo "Lustre stopped, but LNET is still loaded"
+           wait_for_lnet || return 3
+       fi
     fi
     HAVE_MODULES=false
 
index 9548cdc..9c2728e 100644 (file)
@@ -10,7 +10,7 @@
  *
  * compile: mpicc -g -Wall -o write_disjoint write_disjoint.c
  * run:     mpirun -np N -machlist <hostlist file> write_disjoint
- *  or:     pdsh -w <N hosts> write_disjoint 
+ *  or:     pdsh -w <N hosts> write_disjoint
  *  or:     prun -n N [-N M] write_disjoint
  */
 #include <stdlib.h>
 void rprintf(int rank, int loop, const char *fmt, ...)
 {
         va_list       ap;
+
         printf("rank %d, loop %d: ", rank, loop);
+
         va_start(ap, fmt);
+
         vprintf(fmt, ap);
-        MPI_Abort(MPI_COMM_WORLD, -1);
+
+        MPI_Abort(MPI_COMM_WORLD, -1); /* This will exit() according to man */
 }
 
+#define CHUNK_SIZE(n) chunk_size[(n) % 2]
+
 int main (int argc, char *argv[]) {
-         int i, n, fd, chunk_size, file_size;
-         int rank, noProcessors, done;
-         int error;
-         off_t offset;
-         char **chunk_buf;
-         char *read_buf, c;
-         struct stat stat_buf;
-         ssize_t ret;
-         char *filename = "/mnt/lustre/write_disjoint";
-         int numloops = 1000;
+        int i, n, fd;
+        unsigned long chunk_size[2];
+        int rank, noProcessors, done;
+        int error;
+        off_t offset;
+        char **chunk_buf;
+        char *read_buf, c;
+        struct stat stat_buf;
+        ssize_t ret;
+        char *filename = "/mnt/lustre/write_disjoint";
+        int numloops = 1000;
 
         error = MPI_Init(&argc, &argv);
         if (error != MPI_SUCCESS)
@@ -66,105 +69,116 @@ int main (int argc, char *argv[]) {
                 }
         }
 
-         MPI_Comm_size(MPI_COMM_WORLD, &noProcessors);
-         MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-                         
-         chunk_buf = malloc(noProcessors * sizeof(chunk_buf[0]));
-         for (i=0; i < noProcessors; i++) {
+        MPI_Comm_size(MPI_COMM_WORLD, &noProcessors);
+        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+        chunk_buf = malloc(noProcessors * sizeof(chunk_buf[0]));
+        for (i=0; i < noProcessors; i++) {
                 chunk_buf[i] = malloc(CHUNK_MAX_SIZE);
                 memset(chunk_buf[i], 'A'+ i, CHUNK_MAX_SIZE);
-         }
-         read_buf = malloc(noProcessors * CHUNK_MAX_SIZE);
-         
-         if (rank == 0) {
+        }
+        read_buf = malloc(noProcessors * CHUNK_MAX_SIZE);
+
+        if (rank == 0) {
                 fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666);
-                if (fd < 0) 
-                        rprintf(rank, -1, "open() returned %s\n", 
+                if (fd < 0)
+                        rprintf(rank, -1, "open() returned %s\n",
                                 strerror(errno));
-         }
-         MPI_Barrier(MPI_COMM_WORLD);
-
-         fd = open(filename, O_RDWR);
-         if (fd < 0)
-                 rprintf(rank, -1, "open() returned %s\n", strerror(errno));
-         
-         for (n=0; n < numloops; n++) {
-                 /* reset the environment */
-                 if (rank == 0) {
-                         ret = truncate(filename, 0);
-                         if (ret != 0)
-                                 rprintf(rank, n, "truncate() returned %s\n", 
-                                         strerror(errno) );
-                 }
-                 chunk_size = rand() % CHUNK_MAX_SIZE;
-
-                 if (n % 1000 == 0 && rank == 0)
-                         printf("loop %d: chunk_size %d\n", n, chunk_size);
-
-                 MPI_Barrier(MPI_COMM_WORLD);
-                 
-                 /* Do the race */
-                 offset = rank * chunk_size;
-                 lseek(fd, offset, SEEK_SET);
-
-                 done = 0;
-                 do {
-                        ret = write(fd, chunk_buf[rank]+done, chunk_size-done);
-                        if (ret < 0) 
-                                 rprintf(rank, n, "write() returned %s\n", 
-                                         strerror(errno));
+        }
+        MPI_Barrier(MPI_COMM_WORLD);
+
+        fd = open(filename, O_RDWR);
+        if (fd < 0)
+                rprintf(rank, -1, "open() returned %s\n", strerror(errno));
+
+        for (n = 0; n < numloops; n++) {
+                /* reset the environment */
+                if (rank == 0) {
+                        ret = truncate(filename, 0);
+                        if (ret != 0)
+                                rprintf(rank, n, "truncate() returned %s\n",
+                                        strerror(errno) );
+                }
+                CHUNK_SIZE(n) = rand() % CHUNK_MAX_SIZE;
+
+                if (n % 1000 == 0 && rank == 0)
+                        printf("loop %d: chunk_size %lu\n", n, CHUNK_SIZE(n));
+
+                MPI_Barrier(MPI_COMM_WORLD);
+
+                /* Do the race */
+                offset = rank * CHUNK_SIZE(n);
+                lseek(fd, offset, SEEK_SET);
+
+                done = 0;
+                do {
+                        ret = write(fd, chunk_buf[rank] + done,
+                                    CHUNK_SIZE(n) - done);
+                        if (ret < 0)
+                                rprintf(rank, n, "write() returned %s\n",
+                                        strerror(errno));
                         done += ret;
-                 } while (done != chunk_size);
-
-                 MPI_Barrier(MPI_COMM_WORLD);
-
-                 /* Check the result */
-                 if (rank == 0) {
-                         lseek(fd, 0, SEEK_SET);
-                         
-                         /* quick check */
-                         stat(filename, &stat_buf);
-                         file_size = stat_buf.st_size;
-                         if (file_size != chunk_size * noProcessors)
-                                  rprintf(rank, n, "invalid file size %d"
-                                          " instead of %d\n", file_size, 
-                                          chunk_size * noProcessors);
+                } while (done != CHUNK_SIZE(n));
+
+                MPI_Barrier(MPI_COMM_WORLD);
+
+                /* Check the result */
+                if (rank == 0) {
+                        if (lseek(fd, 0, SEEK_SET) < 0)
+                                rprintf(rank, n, "error seeking to 0: %s\n",
+                                        strerror(errno));
+
+                        /* quick check */
+                        if (stat(filename, &stat_buf) < 0)
+                                rprintf(rank, n, "error stating %s: %s\n",
+                                        filename, strerror(errno));
+
+                        if (stat_buf.st_size != CHUNK_SIZE(n) * noProcessors)
+                                rprintf(rank, n, "invalid file size %lu"
+                                        " instead of %lu\n",
+                                        (unsigned long)stat_buf.st_size,
+                                        CHUNK_SIZE(n) * noProcessors);
 
                         done = 0;
                         do {
-                                ret = read(fd, read_buf + done, 
-                                           (chunk_size * noProcessors) - done);
-                                if (ret < 0) 
+                                ret = read(fd, read_buf + done,
+                                           CHUNK_SIZE(n) * noProcessors - done);
+                                if (ret < 0)
                                         rprintf(rank, n, "read returned %s\n",
                                                 strerror(errno));
 
                                 done += ret;
-                        } while (done != chunk_size * noProcessors);
+                        } while (done != CHUNK_SIZE(n) * noProcessors);
 
                         for (i = 0; i < noProcessors; i++) {
-                                char command[4096]; 
+                                char command[4096];
                                 int j;
-                                if (!memcmp(read_buf + (i * chunk_size), 
-                                           chunk_buf[i], chunk_size))
+                                if (!memcmp(read_buf + (i * CHUNK_SIZE(n)),
+                                            chunk_buf[i], CHUNK_SIZE(n)))
                                         continue;
 
-                                printf("rank %d, loop %d: chunk %d corrupted "
-                                       "with chunk_size %d, page_size %d\n",
-                                       rank, n, i, chunk_size, getpagesize());
-                                printf("(ranks: page boundry, chunk boundry, "
-                                       "page boundry)\n");
+                                /* print out previous chunk sizes */
+                                if (n > 0)
+                                        printf("loop %d: chunk_size %lu\n",
+                                               n - 1, CHUNK_SIZE(n - 1));
+
+                                printf("loop %d: chunk %d corrupted "
+                                       "with chunk_size %lu, page_size %d\n",
+                                       n, i, CHUNK_SIZE(n), getpagesize());
+                                printf("ranks:\tpage boundry\tchunk boundry\t"
+                                       "page boundry\n");
                                 for (j = 1 ; j < noProcessors; j++) {
-                                        int b = j * chunk_size;
-                                        printf("\t%c -> %c: %d %d %d\n", 
-                                               'A' + j - 1, 'A' + j, 
-                                               b & ~(getpagesize()-1), b, 
-                                               (b + getpagesize()) & ~(getpagesize()-1));
+                                        int b = j * CHUNK_SIZE(n);
+                                        printf("%c -> %c:\t%d\t%d\t%d\n",
+                                               'A' + j - 1, 'A' + j,
+                                               b & ~(getpagesize()-1), b,
+                                               (b + getpagesize()) &
+                                               ~(getpagesize()-1));
                                 }
 
                                 sprintf(command, "od -Ad -a %s", filename);
                                 system(command);
-                                MPI_Finalize();
-                                exit(1);
+                                rprintf(0, n, "data check error - exiting\n");
                         }
                 }
         }
index f1f7030..c888f7c 100644 (file)
@@ -7,26 +7,20 @@ Makefile.in
 .deps
 tags
 TAGS
-obdctl
-lctl
-obdstat
-obdio
-obdbarrier
-lload
-wirecheck
-lfs
 mkfs.lustre
-mkfs_lustre
 mount.lustre
-mount_lustre
 tunefs.lustre
-tunefs_lustre
-llog_reader
-llmount
-l_getgroups
+lctl
+lfs
+wirecheck
 wiretest
 llog_reader
-.*.cmd
-.*.d
+lr_reader
+obdio
+obdbarrier
+lload
 llverfs
 llverdev
+l_getgroups
+.*.cmd
+.*.d
index 8eb36df..e7aed66 100644 (file)
@@ -13,8 +13,8 @@ LIBPTLCTL := $(top_builddir)/lnet/utils/libptlctl.a
 sbin_scripts = llstat.pl llobdstat.pl lrun 
 
 if UTILS
-noinst_PROGRAMS = mount_lustre mkfs_lustre tunefs_lustre \
-       llog_reader lr_reader wirecheck wiretest lload obdio obdbarrier
+noinst_PROGRAMS = llog_reader lr_reader wirecheck wiretest lload obdio obdbarrier
+
 # mount only finds helpers in /sbin
 rootsbin_PROGRAMS = mount.lustre
 sbin_PROGRAMS = mkfs.lustre tunefs.lustre lctl \
@@ -96,12 +96,3 @@ newwiretest: wirehdr.c wirecheck
        cp ../ptlrpc/wirehdr.c ../ptlrpc/wiretest.c
        ./wirecheck >> ../ptlrpc/wiretest.c
 
-# Apparently I can't use .'s in automake names
-mount.lustre$(EXEEXT): mount_lustre
-       cp $< $@
-
-mkfs.lustre$(EXEEXT): mkfs_lustre
-       cp $< $@
-
-tunefs.lustre$(EXEEXT): tunefs_lustre
-       cp $< $@
index 423e536..acba01a 100755 (executable)
@@ -854,8 +854,8 @@ def def_mount_options(fstype, target, blkdev):
                 # use internal journal
                 return mountfsoptions
         
-            # run blkid
-            blkid = "blkid -o device -t UUID='%s'" % (journal_UUID)
+            # run blkid, lookup highest-priority device with matching UUID
+            blkid = "blkid -o device -l -t UUID='%s'" % (journal_UUID)
             (ret, devname) = run(blkid)
             if ret or len(devname) == 0:
                 panic("cannot find external journal for ", blkdev)
index ad988a7..53f5c67 100644 (file)
@@ -69,7 +69,7 @@ static void err_msg(char *fmt, ...)
         fprintf(stderr, ": %s (%d)\n", strerror(tmp_errno), tmp_errno);
 }
 
-int llapi_file_create(const char *name, long stripe_size, int stripe_offset,
+int llapi_file_create(const char *name, unsigned long stripe_size, int stripe_offset,
                       int stripe_count, int stripe_pattern)
 {
         struct lov_user_md lum = { 0 };
@@ -103,7 +103,7 @@ int llapi_file_create(const char *name, long stripe_size, int stripe_offset,
                         "multiple of %d bytes", stripe_size, page_size);
                 goto out;
         }
-        if (stripe_offset < -1 || stripe_offset > 2048) {
+        if (stripe_offset < -1 || stripe_offset > MAX_OBD_DEVICES) {
                 errno = rc = -EINVAL;
                 err_msg("error: bad stripe offset %d", stripe_offset);
                 goto out;
@@ -113,10 +113,10 @@ int llapi_file_create(const char *name, long stripe_size, int stripe_offset,
                 err_msg("error: bad stripe count %d", stripe_count);
                 goto out;
         }
-        if (stripe_count > 0 && (__u64)stripe_size * stripe_count > ~0UL) {
+        if (stripe_count > 0 && (__u64)stripe_size * stripe_count > 0xffffffff){
                 errno = rc = -EINVAL;
-                err_msg("error: stripe_size %ld * stripe_count %d "
-                        "exceeds %lu bytes", ~0UL);
+                err_msg("error: stripe_size %lu * stripe_count %u "
+                        "exceeds 4GB", stripe_size, stripe_count);
                 goto out;
         }
 
@@ -432,7 +432,8 @@ void llapi_lov_dump_user_lmm(struct find_param *param,
 
 int llapi_file_get_stripe(const char *path, struct lov_user_md *lum)
 {
-        char *dname, *fname;
+        const char *fname;
+        char *dname;
         int fd, rc = 0;
 
         fname = strrchr(path, '/');
@@ -724,7 +725,8 @@ static int cb_find_init(char *path, DIR *parent, DIR *dir, void *data)
                 ret = ioctl(dirfd(dir), LL_IOC_MDC_GETINFO,
                             (void *)param->lmd);
         } else if (!decision && parent) {
-                char *fname = strrchr(path, '/') + 1;
+                char *fname = strrchr(path, '/');
+                fname = (fname == NULL ? path : fname + 1);
 
                 /* retrieve needed file info */
                 strncpy((char *)param->lmd, fname, param->lumlen);
@@ -889,7 +891,8 @@ static int cb_getstripe(char *path, DIR *parent, DIR *d, void *data)
                 ret = ioctl(dirfd(d), LL_IOC_LOV_GETSTRIPE,
                             (void *)&param->lmd->lmd_lmm);
         } else if (parent) {
-                char *fname = strrchr(path, '/') + 1;
+                char *fname = strrchr(path, '/');
+                fname = (fname == NULL ? path : fname + 1);
 
                 strncpy((char *)&param->lmd->lmd_lmm, fname, param->lumlen);
                 ret = ioctl(dirfd(parent), IOC_MDC_GETFILESTRIPE,
@@ -1193,7 +1196,8 @@ static int cb_quotachown(char *path, DIR *parent, DIR *d, void *data)
                 rc = ioctl(dirfd(d), LL_IOC_MDC_GETINFO,
                            (void *)param->lmd);
         } else if (parent) {
-                char *fname = strrchr(path, '/') + 1;
+                char *fname = strrchr(path, '/');
+                fname = (fname == NULL ? path : fname + 1);
 
                 strncpy((char *)param->lmd, fname, param->lumlen);
                 rc = ioctl(dirfd(parent), IOC_MDC_GETFILEINFO,
index be8fba7..4feb3c1 100755 (executable)
@@ -1,4 +1,7 @@
 #!/usr/bin/perl
+# llobdstat.pl is a utility that parses obdfilter statistics files 
+# found at proc/fs/lustre/<ostname>/stats.
+# It is mainly useful to watch the statistics change over time.
 
 my $pname = $0;
 
@@ -7,9 +10,11 @@ my $obdstats = "stats";
 
 sub usage()
 {
-    print STDERR "Usage: $pname <stats_file> [<interval>]\n";
-    print STDERR "example: $pname help (to get help message)\n";
-    print STDERR "example: $pname ost1 1 (monitor /proc/fs/lustre/obdfilter/ost1/stats\n";
+    print STDERR "Usage: $pname <ost_name> [<interval>]\n";
+    print STDERR "where  ost_name  : ost name under $defaultpath/obdfilter\n";
+    print STDERR "       interval  : sample interaval in seconds\n";
+    print STDERR "example: $pname lustre-OST0000 2\n";
+    print STDERR "Use CTRL + C to stop statistics printing\n";
     exit 1;
 }
 
@@ -46,12 +51,14 @@ print "$pname on $statspath\n";
 my %cur;
 my %last;
 my $mhz = 0;
-my ($read_bytes, $read, $write_bytes, $write, $getattr, $setattr, $open, $close,    $create, $destroy, $statfs, $punch, $snapshot_time) = 
-    ("read_bytes", "read", "write_bytes", "write", "getattr", "setattr", "open",    "close", "create", "destroy", "statfs", "punch", "snapshot_time"); 
 
-my @extinfo = ($setattr, $open, $close, $create, $destroy, $statfs, $punch);
-my %shortname = ($setattr => "sa", $open => "op", $close => "cl", 
-               $create => "cx", $destroy => "dx", $statfs => "st", $punch => "pu");
+#Removed some statstics like open, close that obdfilter don't contains.
+#To add statistics parameters one need to specify parameter names in below declarations in same sequence. 
+my ($read_bytes, $write_bytes, $create, $destroy, $statfs, $punch, $snapshot_time) = 
+    ("read_bytes", "write_bytes", "create", "destroy", "statfs", "punch", "snapshot_time"); 
+
+my @extinfo = ($create, $destroy, $statfs, $punch);
+my %shortname = ($create => "cx", $destroy => "dx", $statfs => "st", $punch => "pu");
 
 sub get_cpumhz()
 {
@@ -73,6 +80,8 @@ sub get_cpumhz()
 get_cpumhz();
 print "Processor counters run at $mhz MHz\n";
 
+# readstats subroutine reads statistics from obdfilter stats file.
+# This subroutine gets called after every interval specified by user.     
 sub readstat()
 {
        my $prevcount;
@@ -101,35 +110,35 @@ sub readstat()
                }
        }
 }
-
+# process_stats subroutine processes stats information read from obdfilter stats file.
+# This subroutine gets called after every interval specified by user.     
 sub process_stats()
 {
        my $delta;
        my $data;
        my $last_time = $last{$snapshot_time};
        if (!defined($last_time)) {
-               printf "R %-g/%-g W %-g/%-g attr %-g/%-g open %-g/%-g create %-g/%-g stat %-g punch %-g\n",
-               $cur{$read_bytes}, $cur{$read}, 
-               $cur{$write_bytes}, $cur{$write}, 
-               $cur{$getattr}, $cur{$setattr}, 
-               $cur{$open}, $cur{$close}, 
+               printf "Read: %-g, Write: %-g, create/destroy: %-g/%-g, stat: %-g, punch: %-g\n",
+               $cur{$read_bytes}, $cur{$write_bytes},  
                $cur{$create}, $cur{$destroy}, 
                $cur{$statfs}, $cur{$punch}; 
+                if ($interval) {
+                        print "[NOTE: cx: create, dx: destroy, st: statfs, pu: punch ]\n\n";
+                        print "Timestamp   Read-delta  ReadRate  Write-delta  WriteRate\n";
+                        print "--------------------------------------------------------\n";
+                }
        }
        else {
                my $timespan = $cur{$snapshot_time} - $last{$snapshot_time};
-       
-               my $rdelta = $cur{$read} - $last{$read};
-               my $rvdelta = int ($rdelta / $timespan);
-               my $rrate = ($cur{$read_bytes} - $last{$read_bytes}) /
-                          ($timespan * ( 1 << 20 ));
-               my $wdelta = $cur{$write} - $last{$write};
-               my $wvdelta = int ($wdelta / $timespan);
-               my $wrate = ($cur{$write_bytes} - $last{$write_bytes}) /
-                          ($timespan * ( 1 << 20 ));
-               printf "R %6lu (%5lu %6.2fMB)/s W %6lu (%5lu %6.2fMB)/s",
-                       $rdelta, $rvdelta, $rrate,
-                       $wdelta, $wvdelta, $wrate;
+               my $rdelta = $cur{$read_bytes} - $last{$read_bytes};
+               my $rrate = ($rdelta) / ($timespan * ( 1 << 20 ));
+               my $wdelta = $cur{$write_bytes} - $last{$write_bytes};
+               my $wrate = ($wdelta) / ($timespan * ( 1 << 20 ));
+               $rdelta = ($rdelta) / (1024 * 1024);
+               $wdelta = ($wdelta) / (1024 * 1024);
+               # This print repeats after every interval.
+               printf "%10lu  %6.2fMB  %6.2fMB/s   %6.2fMB  %6.2fMB/s",
+                       $cur{$snapshot_time}, $rdelta, $rrate, $wdelta, $wrate;
 
                $delta = $cur{$getattr} - $last{$getattr};
                if ( $delta != 0 ) {
@@ -147,14 +156,15 @@ sub process_stats()
                $| = 1;
        }
 }
-
+#Open the obdfilter stat file with STATS
 open(STATS, $statspath) || die "Cannot open $statspath: $!\n";
 do {
-       readstat();
-       process_stats();
-       if ($interval) { 
-               sleep($interval);
+       readstat();             # read the statistics from stat file.
+       process_stats();        
+       if ($interval) {        
+               sleep($interval); 
                %last = %cur;
        }
-} while ($interval);
+} while ($interval);   # Repeat the statistics printing after every "interval" specified in command line.
 close STATS;
+# llobdfilter.pl ends here.
index 0305f3d..5706971 100755 (executable)
@@ -1,37 +1,60 @@
 #!/usr/bin/perl
-
+# llstat.pl is a utility that takes stats files as input with optional clear-flag. 
+# The clear-flag is used to clear the stats file before printing stats information.
+# The lustre stats files generally located inside proc/fs/lustre/
+# llstat.pl first reads the required statistics information from specified stat file,
+# process the information and prints the output after every interval specified by user.
 my $pname = $0;
 
 my $defaultpath = "/proc/fs/lustre";
 my $obdstats = "stats";
 
+# Subroutine for printing usages information
 sub usage()
 {
-    print STDERR "Usage: $pname <stats_file> [<interval>]\n";
+    print STDERR "Usage: $pname [-c] <stats_file> [<interval>]\n";
+    print STDERR "       <stats_file> : lustre stats file, full /proc path or substring search\n";
+    print STDERR "       <interval>   : Time in seconds to repeat statistics print cycle\n";
+    print STDERR "       -c           : zero stats first\n";
+    print STDERR "eg: $pname ost 1  --  monitors /proc/fs/lustre/ost/OSS/ost/stats\n";
+    print STDERR "Use CTRL + C to stop statistics printing\n";
     exit 1;
 }
 
 
 my $statspath = "None";
 my $interval = 0;
-
-if (($#ARGV < 0) || ($#ARGV > 1)) {
+my $argpos = 0;
+# check for number of auguments
+if (($#ARGV < 0) || ($#ARGV > 2)) {
     usage();
-} else {
+} else {   # Process arguments
     if ( $ARGV[0] =~ /help$/ ) {
        usage();
     }
-    if ( -f $ARGV[0] ) {
-       $statspath = $ARGV[0];
-    } elsif ( -f "$ARGV[0]/$obdstats" ) {
-       $statspath = "$ARGV[0]/$obdstats";
+    if ($#ARGV == 1) { 
+       if (($ARGV[0] eq "-c") || ($ARGV[0] eq "-C")) {
+           $argpos = 1;
+       } else {
+           $interval = $ARGV[1];
+       }
+    } 
+    if ( $#ARGV == 2 ) {
+       $interval = $ARGV[2];
+       $argpos = 1;
+    } 
+    if ( -f $ARGV[$argpos] ) {
+       $statspath = $ARGV[$argpos];
+    } elsif ( -f "$ARGV[$argpos]/$obdstats" ) {
+       $statspath = "$ARGV[$argpos]/$obdstats";
     } else {
-       my $st = `ls $defaultpath/*/$ARGV[0]/$obdstats 2> /dev/null`;
+       my $st = `ls $defaultpath/*/$ARGV[$argpos]/$obdstats 2> /dev/null`;
        chop $st;
        if ( -f "$st" ) {
            $statspath = $st;
        } else {
-           $st = `ls $defaultpath/*/*/$ARGV[0]/$obdstats 2> /dev/null`;
+           $st = `ls $defaultpath/*/*/$ARGV[$argpos]/$obdstats 2> /dev/null`;
            chop $st;
            if ( -f "$st" ) {
                $statspath = $st;
@@ -39,11 +62,19 @@ if (($#ARGV < 0) || ($#ARGV > 1)) {
        }
     }
     if ( $statspath =~ /^None$/ ) {
-       die "Cannot locate stat file for: $ARGV[0]\n";
+       die "Cannot locate stat file for: $ARGV[$argpos]\n";
+    }
+    if ($#ARGV == 2) {
+       # Clears stats file before printing information in intervals
+       if ( ($ARGV[0] eq "-c") || ($ARGV[0] eq "-C" ) ) {
+           open ( STATS, "> $statspath") || die "Cannot clear $statspath: $!\n";
+           print STATS " ";
+           close STATS;
+           sleep($interval);       
+       } else {
+           usage();
+       }
     }
-    if ($#ARGV == 1) {
-       $interval = $ARGV[1];
-    } 
 }
 
 print "$pname on $statspath\n";
@@ -53,6 +84,7 @@ my %sumhash;
 my $anysum = 0;
 my $anysumsquare = 0;
 my $mhz = 0;
+my $falg = 0;
 
 sub get_cpumhz()
 {
@@ -74,6 +106,8 @@ sub get_cpumhz()
 get_cpumhz();
 print "Processor counters run at $mhz MHz\n";
 
+# readstats subroutine reads and processes statistics from stats file.
+# This subroutine gets called after every interval specified by user.
 sub readstat()
 {
     seek STATS, 0, 0;
@@ -87,21 +121,13 @@ sub readstat()
            $diff = $cumulcount - $prevcount;
            if ($name eq "snapshot_time") {
                $tdiff = $diff;
-               # printf "%-25s prev=$prevcount, cumul=$cumulcount diff=$diff, tdiff=$tdiff\n", $name;
-               printf "$statspath @ $cumulcount\n";
-               printf "%-25s %-10s %-10s %-10s", "Name", "Cur.Count", "Cur.Rate", "#Events";
-               if ($anysum) {
-                   printf "%-8s %10s %10s %12s %10s", "Unit", "last", "min", "avg", "max";
-               }
-               if ($anysumsquare) {
-                   printf "%10s", "stddev";
-               }
-                printf "\n";
+                printf "\n%-10.0f", $cumulcount;
                $| = 1;
            }
            elsif ($cumulcount!=0) {
-               printf "%-25s %-10lu %-10lu %-10lu",
-                      $name, $diff, ($diff/$tdiff), $cumulcount;
+               
+               printf "   %s %lu %lu",
+                       $name,  ($diff/$tdiff), $cumulcount;
                
                if (defined($sum)) {
                    my $sum_orig = $sum;
@@ -118,7 +144,7 @@ sub readstat()
                        $sum_diff = $sum_diff/$mhz;
                        $max = $max/$mhz;
                    }
-                   printf "%-8s %10.2f %10lu %12.2f %10lu", $unit, ($sum_diff/$diff), $min,($sum/$cumulcount),$max;
+                   printf " %lu %.2f %lu", $min,($sum/$cumulcount),$max;
                    if (defined($sumsquare)) {
                        my $s = $sumsquare - (($sum_orig*$sum_orig)/$cumulcount);
                        if ($s >= 0) {
@@ -127,17 +153,16 @@ sub readstat()
                            if (($unit eq "[usecs]") && ($mhz != 1)) {
                                $stddev = $stddev/$mhz;
                            }
-                           printf " %10.2f", $stddev;
+                           printf " %.2f ", $stddev;
                        }
                    }
                }
-               printf "\n";
                $| = 1;
            }
        }
        else {
            if ($cumulcount!=0) {
-               printf "%-25s $cumulcount\n", $name     
+               printf "%-25s $cumulcount\n", $name     # print info when interval is not specified.
            }
            if (defined($sum)) {
                $anysum = 1;
@@ -149,6 +174,18 @@ sub readstat()
        %cumulhash->{$name} = $cumulcount;
        %sumhash->{$name} = $sum;
     }
+    if ( !$flag && $interval) {
+       printf "Timestamp [Name Rate Total";
+       if ($anysum) {
+           printf " min avg max";
+       }
+       if ($anysumsquare) {
+           printf " stddev";
+       }
+       printf " ]...";
+       printf "\n--------------------------------------------------------------------";
+       $flag = 1;
+    }
 }
 
 open(STATS, $statspath) || die "Cannot open $statspath: $!\n";
index 86e8503..7a15034 100644 (file)
@@ -87,12 +87,13 @@ void usage(FILE *out)
                 "\t\t--failnode=<nid>[,<...>] : NID(s) of a failover partner\n"
                 "\t\t--param <key>=<value> : set a permanent parameter\n"
                 "\t\t\te.g. --param sys.timeout=40\n"
-                "\t\t\t     --param lov.stripe.size=4194304\n"
+                "\t\t\t     --param lov.stripesize=2M\n"
                 "\t\t--index=#N : target index (i.e. ost index within the lov)\n"
                 /* FIXME implement 1.6.x
                 "\t\t--configdev=<altdevice|file>: store configuration info\n"
                 "\t\t\tfor this device on an alternate device\n"
                 */
+                "\t\t--comment=<user comment>: arbitrary user string (%d bytes)\n"
                 "\t\t--mountfsoptions=<opts> : permanent mount options\n"
 #ifndef TUNEFS
                 "\t\t--backfstype=<fstype> : backing fs type (ext3, ldiskfs)\n"
@@ -108,7 +109,8 @@ void usage(FILE *out)
                 "\t\t--noformat: just report what we would do; "
                 "don't write to disk\n"
                 "\t\t--verbose\n"
-                "\t\t--quiet\n");
+                "\t\t--quiet\n",
+                sizeof(((struct lustre_disk_data *)0)->ldd_userdata));
         return;
 }
 
@@ -577,7 +579,8 @@ void print_ldd(char *str, struct lustre_disk_data *ldd)
                 printf("Index:      unassigned\n");
         else
                 printf("Index:      %d\n", ldd->ldd_svindex);
-        printf("UUID:       %s\n", (char *)ldd->ldd_uuid);
+        if (ldd->ldd_uuid[0])
+                printf("UUID:       %s\n", (char *)ldd->ldd_uuid);
         printf("Lustre FS:  %s\n", ldd->ldd_fsname);
         printf("Mount type: %s\n", MT_STR(ldd));
         printf("Flags:      %#x\n", ldd->ldd_flags);
@@ -592,6 +595,8 @@ void print_ldd(char *str, struct lustre_disk_data *ldd)
                ldd->ldd_flags & LDD_F_UPGRADE14  ? "upgrade1.4 ":"");
         printf("Persistent mount opts: %s\n", ldd->ldd_mount_opts);
         printf("Parameters:%s\n", ldd->ldd_params);
+        if (ldd->ldd_userdata[0])
+                printf("Comment: %s\n", ldd->ldd_userdata);
         printf("\n");
 }
 
@@ -653,8 +658,8 @@ int write_local_files(struct mkfs_opts *mop)
         sprintf(filepnm, "%s/%s", mntpt, MOUNT_DATA_FILE);
         filep = fopen(filepnm, "w");
         if (!filep) {
-                fprintf(stderr, "%s: Unable to create %s file\n",
-                        progname, filepnm);
+                fprintf(stderr, "%s: Unable to create %s file: %s\n",
+                        progname, filepnm, strerror(errno));
                 goto out_umnt;
         }
         fwrite(&mop->mo_ldd, sizeof(mop->mo_ldd), 1, filep);
@@ -957,6 +962,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop,
         static struct option long_opt[] = {
                 {"backfstype", 1, 0, 'b'},
                 {"stripe-count-hint", 1, 0, 'c'},
+                {"comment", 1, 0, 'u'},
                 {"configdev", 1, 0, 'C'},
                 {"device-size", 1, 0, 'd'},
                 {"erase-params", 0, 0, 'e'},
@@ -982,7 +988,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop,
                 {"writeconf", 0, 0, 'w'},
                 {0, 0, 0, 0}
         };
-        char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqrvw";
+        char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqru:vw";
         char opt;
         int rc, longidx;
 
@@ -1122,6 +1128,12 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop,
                 case 'r':
                         mop->mo_flags |= MO_FORCEFORMAT;
                         break;
+                case 'u':
+                        strncpy(mop->mo_ldd.ldd_userdata, optarg,
+                                sizeof(mop->mo_ldd.ldd_userdata));
+                        mop->mo_ldd.ldd_userdata[
+                                sizeof(mop->mo_ldd.ldd_userdata) - 1] = 0;
+                        break;
                 case 'v':
                         verbose++;
                         break;
@@ -1363,9 +1375,6 @@ int main(int argc, char *argv[])
         char default_mountopts[512] = "";
         int ret = 0;
 
-        //printf("pad %d\n", offsetof(struct lustre_disk_data, ldd_padding));
-        assert(offsetof(struct lustre_disk_data, ldd_padding) == 200);
-
         if ((progname = strrchr(argv[0], '/')) != NULL)
                 progname++;
         else
diff --git a/lustre/utils/rmmod_all.sh b/lustre/utils/rmmod_all.sh
deleted file mode 100755 (executable)
index be1ff5f..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/sh
-
-SRCDIR=`dirname $0`
-PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
-
-rmmod quotacheck_test quotactl_test quotafmt_test pingsrv pingcli
-lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
-# do it again, in case we tried to unload ksocklnd too early
-lsmod | grep lnet > /dev/null && lctl modules | awk '{ print $2 }' | xargs rmmod
index c103a45..189d133 100644 (file)
@@ -165,6 +165,7 @@ static void check_obd_connect_data(void)
         CHECK_CDEFINE(OBD_CONNECT_LCL_CLIENT);
         CHECK_CDEFINE(OBD_CONNECT_RMT_CLIENT);
         CHECK_CDEFINE(OBD_CONNECT_BRW_SIZE);
+        CHECK_CDEFINE(OBD_CONNECT_QUOTA64);
 }
 
 static void
@@ -919,9 +920,19 @@ check_qunit_data(void)
         BLANK_LINE();
         CHECK_STRUCT(qunit_data);
         CHECK_MEMBER(qunit_data, qd_id);
-        CHECK_MEMBER(qunit_data, qd_type);
+        CHECK_MEMBER(qunit_data, qd_flags);
         CHECK_MEMBER(qunit_data, qd_count);
-        CHECK_MEMBER(qunit_data, qd_isblk);
+}
+
+static void
+check_qunit_data_old(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT(qunit_data_old);
+        CHECK_MEMBER(qunit_data_old, qd_id);
+        CHECK_MEMBER(qunit_data_old, qd_type);
+        CHECK_MEMBER(qunit_data_old, qd_count);
+        CHECK_MEMBER(qunit_data_old, qd_isblk);
 }
 
 static void
@@ -957,6 +968,7 @@ check_lustre_disk_data(void)
         CHECK_MEMBER(lustre_disk_data, ldd_fsname);
         CHECK_MEMBER(lustre_disk_data, ldd_svname);
         CHECK_MEMBER(lustre_disk_data, ldd_uuid);
+        CHECK_MEMBER(lustre_disk_data, ldd_userdata);
         CHECK_MEMBER(lustre_disk_data, ldd_mount_opts);
         CHECK_MEMBER(lustre_disk_data, ldd_params);
 }
@@ -1062,8 +1074,6 @@ main(int argc, char **argv)
         CHECK_VALUE(OST_OPEN);
         CHECK_VALUE(OST_CLOSE);
         CHECK_VALUE(OST_STATFS);
-        CHECK_VALUE(OST_SAN_READ);
-        CHECK_VALUE(OST_SAN_WRITE);
         CHECK_VALUE(OST_SYNC);
         CHECK_VALUE(OST_QUOTACHECK);
         CHECK_VALUE(OST_QUOTACTL);
@@ -1206,6 +1216,7 @@ main(int argc, char **argv)
         check_llog_array_rec();
         check_mds_extent_desc();
         check_qunit_data();
+        check_qunit_data_old();
         check_mgs_target_info();
         check_lustre_disk_data();
 
index abcffad..30da96b 100644 (file)
@@ -28,5 +28,3 @@ int main()
 void lustre_assert_wire_constants(void)
 {
 }
-
-